diff --git a/.gitattributes b/.gitattributes index 4542312f253759c65c6081284dc83b511b7d25ea..9f5ca5672f1b4b60c52b43753ee7c88b66a30bcd 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ FA/GreyWolf_NetConfig/gradle/wrapper/gradle-wrapper.jar filter=lfs diff=lfs merge=lfs -text +dev/team_x/PATEO_CarVoiceAssistant/data.zip filter=lfs diff=lfs merge=lfs -text diff --git a/FA/PATEO_CarVoiceAssistant/build-profile.json5 b/FA/PATEO_CarVoiceAssistant/build-profile.json5 new file mode 100644 index 0000000000000000000000000000000000000000..bbc7585c28a5e39e2dc02296b468a0b19957a747 --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/build-profile.json5 @@ -0,0 +1,28 @@ +{ + "app": { + "signingConfigs": [ + ], + "compileSdkVersion": 8, + "compatibleSdkVersion": 8, + "products": [ + { + "name": "default", + "signingConfig": "default", + } + ] + }, + "modules": [ + { + "name": "entry", + "srcPath": "./entry", + "targets": [ + { + "name": "default", + "applyToProducts": [ + "default" + ] + } + ] + } + ] +} \ No newline at end of file diff --git a/FA/PATEO_CarVoiceAssistant/entry/build-profile.json5 b/FA/PATEO_CarVoiceAssistant/entry/build-profile.json5 new file mode 100644 index 0000000000000000000000000000000000000000..5dd873a5083b7b62a817c1770955d252f3c22a27 --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/entry/build-profile.json5 @@ -0,0 +1,10 @@ +{ + "apiType": 'faMode', + "buildOption": { + }, + "targets": [ + { + "name": "default" + } + ] +} \ No newline at end of file diff --git a/FA/PATEO_CarVoiceAssistant/entry/hvigorfile.js b/FA/PATEO_CarVoiceAssistant/entry/hvigorfile.js new file mode 100644 index 0000000000000000000000000000000000000000..79ea2ec043f9b978a35f02d58e9348d1a26f04f9 --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/entry/hvigorfile.js @@ -0,0 +1,2 @@ +// Script for compiling build behavior. It is built in the build plug-in and cannot be modified currently. +module.exports = require('@ohos/hvigor-ohos-plugin').legacyHapTasks diff --git a/FA/PATEO_CarVoiceAssistant/entry/package-lock.json b/FA/PATEO_CarVoiceAssistant/entry/package-lock.json new file mode 100644 index 0000000000000000000000000000000000000000..15bc7145be1490029883067847743ea7134cf545 --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/entry/package-lock.json @@ -0,0 +1,5 @@ +{ + "name": "entry", + "version": "1.0.0", + "lockfileVersion": 1 +} diff --git a/FA/PATEO_CarVoiceAssistant/entry/package.json b/FA/PATEO_CarVoiceAssistant/entry/package.json new file mode 100644 index 0000000000000000000000000000000000000000..c7685ac4e7c0d79df04c96744f0d8f22cb4a9025 --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/entry/package.json @@ -0,0 +1,14 @@ +{ + "license": "ISC", + "devDependencies": {}, + "name": "entry", + "ohos": { + "org": "huawei", + "directoryLevel": "module", + "buildTool": "hvigor" + }, + "description": "example description", + "repository": {}, + "version": "1.0.0", + "dependencies": {} +} diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/config.json b/FA/PATEO_CarVoiceAssistant/entry/src/main/config.json new file mode 100644 index 0000000000000000000000000000000000000000..95b4e92d4d47ec62ffcc6cc2665a3e39475c8326 --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/entry/src/main/config.json @@ -0,0 +1,90 @@ +{ + "app": { + "bundleName": "com.pateo.voiceassistant", + "vendor": "example", + "version": { + "code": 1000000, + "name": "1.0.0" + } + }, + "deviceConfig": {}, + "module": { + "package": "com.pateo.entry", + "name": ".entry", + "mainAbility": ".MainAbility", + "deviceType": [ + "default", + "tablet" + ], + "reqPermissions": [ + { + "name": "ohos.permission.INTERNET" + }, + { + "name": "ohos.permission.OPERATE_DIRECTORY" + }, + { + "name": "ohos.permission.MICROPHONE" + }, + { + "name": "ohos.permission.USE_BLUETOOTH" + }, + { + "name": "ohos.permission.MODIFY_AUDIO_SETTINGS" + }, + { + "name": "ohos.permission.ACCESS_NOTIFICATION_POLICY" + }, + { + "name": "ohos.permission.LOCATION" + } + ], + "distro": { + "deliveryWithInstall": true, + "moduleName": "entry", + "moduleType": "entry", + "installationFree": false + }, + "abilities": [ + { + "skills": [ + { + "entities": [ + "entity.system.home" + ], + "actions": [ + "action.system.home" + ] + } + ], + "orientation": "unspecified", + "formsEnabled": false, + "name": ".MainAbility", + "srcLanguage": "ets", + "srcPath": "MainAbility", + "icon": "$media:icon", + "description": "$string:MainAbility_desc", + "label": "$string:MainAbility_label", + "type": "page", + "visible": true, + "launchType": "standard" + } + ], + "js": [ + { + "mode": { + "syntax": "ets", + "type": "pageAbility" + }, + "pages": [ + "pages/index" + ], + "name": ".MainAbility", + "window": { + "designWidth": 720, + "autoDesignWidth": false + } + } + ] + } +} \ No newline at end of file diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/ets/MainAbility/app.ets b/FA/PATEO_CarVoiceAssistant/entry/src/main/ets/MainAbility/app.ets new file mode 100644 index 0000000000000000000000000000000000000000..58ff7c4932ab6cff0162341916c094ecdcfebf67 --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/entry/src/main/ets/MainAbility/app.ets @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +export default { + onCreate() { + console.info('Application onCreate') + + }, + onDestroy() { + console.info('Application onDestroy') + }, +} \ No newline at end of file diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/ets/MainAbility/models/asrModel.ets b/FA/PATEO_CarVoiceAssistant/entry/src/main/ets/MainAbility/models/asrModel.ets new file mode 100644 index 0000000000000000000000000000000000000000..ec1f2097e85b81f9aa2e1b796ee38580c7de3669 --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/entry/src/main/ets/MainAbility/models/asrModel.ets @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +export default class AsrModel { + op: 'realTimeASRResult' | 'nluResult' + isFinish: boolean + text: string + needDeclare: boolean + tts: string + intentName: string + control: string + modeType: string + action:string + value: string + positions: string +} diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/ets/MainAbility/pages/index.ets b/FA/PATEO_CarVoiceAssistant/entry/src/main/ets/MainAbility/pages/index.ets new file mode 100644 index 0000000000000000000000000000000000000000..92780ac43f390588e38bf0bc9381a9d3a0e728ee --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/entry/src/main/ets/MainAbility/pages/index.ets @@ -0,0 +1,619 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import LogUtil from '../utils/logUtil' +import carvoiceassistant from '@ohos.carvoiceassistant' +import AsrModel from '../models/asrModel'; +import window from '@ohos.window'; +import app from '@system.app'; + +let voiceManager = carvoiceassistant.getManager(); + +let screenWidth = 720 +let screenHeight = 1280 //1160 +let cardSpace = 20 //卡片间隔 + +let tag = 'VoiceAppIndex' + +let hotwords = [ + { title: '打开空调', url: 'OpenAir' }, + { title: '关闭空调', url: 'CloseAir' }, + { title: '空调温度调高', url: 'AirTempSetUp' }, + { title: '空调温度调低', url: 'AirTempSetDown' }, + { title: '打开天窗', url: 'OpenWindow' }, + { title: '关闭天窗', url: 'CloseWindow' }, +] + +let windowImages = [ + $r('app.media.wind0'), + $r('app.media.wind1'), + $r('app.media.wind2'), + $r('app.media.wind3'), + $r('app.media.wind4'), + $r('app.media.wind5'), + $r('app.media.wind6'), + $r('app.media.wind7'), + $r('app.media.wind8'), + $r('app.media.wind9'), + $r('app.media.wind10')] + +@Entry +@Component +struct Index { + @State speakerDialogVisible: boolean = false + @State isRecognizing: boolean = false + @State isEnableWakeUp: boolean = false + @State voiceText: string = '' + @State isAirOpen: boolean = false + @State airTemp: number = 24 + @State airWind: number = 3 + @State airMode: string = '制冷' + @State isWindowOpen: boolean = false + @State tempColor: string = '#FFFFFF' + @State airWindColor: string = '#FFFFFF' + @State airModeColor: string = '#FFFFFF' + @State windowImage:Resource = $r('app.media.wind0') + private isUserStopRecognizing = false + private needDeclare = false + + async fullWindow() { + try { + const win = await window.getTopWindow(); + await win.setFullScreen(true); + } catch (err) { + LogUtil.info(tag, 'fullscreen error:' + err); + } + } + + aboutToAppear() { + this.fullWindow() + + this.isEnableWakeUp = voiceManager.isEnableWakeUp() + this.isRecognizing = voiceManager.isRecognizing() + + // 开启免唤醒 + voiceManager.enableWakeUp() + voiceManager.registerHotwords(JSON.stringify(hotwords)) + voiceManager.setCoord(23.025978, 113.754969) + + voiceManager.off(carvoiceassistant.EventType.VoiceAssistantEventTypeRecognizeStateChanged) + voiceManager.off(carvoiceassistant.EventType.VoiceAssistantEventTypeOnWakeUp) + voiceManager.off(carvoiceassistant.EventType.VoiceAssistantEventTypeAsrResult) + voiceManager.off(carvoiceassistant.EventType.VoiceAssistantEventTypeTTSPlayStateChanged) + + voiceManager.on(carvoiceassistant.EventType.VoiceAssistantEventTypeOnWakeUp, () => { + LogUtil.info(tag, '语音服务被唤醒') + }) + voiceManager.on(carvoiceassistant.EventType.VoiceAssistantEventTypeRecognizeStateChanged, (err, data) => { + this.isRecognizing = data['isRecognizing'] + if (this.isRecognizing) { + this.voiceText = "我正在听..." + } else if (this.voiceText == "我正在听...") { + this.voiceText = '' + } + }) + voiceManager.on(carvoiceassistant.EventType.VoiceAssistantEventTypeAsrResult, (err, data) => { + let json: AsrModel = JSON.parse(data['result']) + if (json.op == 'realTimeASRResult') { + if (json.text.length > 0) { + let last = json.text.substr(json.text.length - 1, 1) + if (last === '.' || last === '。' || last === ',' || last === ',') { + json.text = json.text.substr(0, json.text.length - 1) + } + } + this.voiceText = json.text + } else if (json.op == 'nluResult') { + if (this.isUserStopRecognizing) { + this.isUserStopRecognizing = false + return + } + + LogUtil.info(tag, JSON.stringify(data)); + + if (json.intentName == 'UIControl') { + this.handleHotwords(json.control); + } else { + let needSpeakTTS = json.tts && json.tts.length > 0 + if (needSpeakTTS) { + if (json.tts.length > 0) { + let last = json.tts.substr(json.tts.length - 1, 1) + if (last === '.' || last === '。' || last === ',' || last === ',') { + json.tts = json.tts.substr(0, json.tts.length - 1) + } + } + this.voiceText = json.tts; + this.speakTTS(json.tts); + } + this.needDeclare = json.needDeclare; + this.handleOtherControl(json,!needSpeakTTS); + } + } + }) + voiceManager.on(carvoiceassistant.EventType.VoiceAssistantEventTypeTTSPlayStateChanged, (err, data) => { + let isPlaying = data["isPlaying"] + if (isPlaying == false) { + if (this.needDeclare) { + this.isUserStopRecognizing = false; + this.needDeclare = false; + voiceManager.startRecognize(); + } + this.voiceText = ''; + } + }) + } + + aboutToDisappear() { + + } + + handleHotwords(url: string) { + if (url == 'OpenAir') { + this.openAir() + } else if (url == 'CloseAir') { + this.closeAir() + } else if (url == 'AirTempSetUp') { + this.changeTemp(this.airTemp + 1) + } else if (url == 'AirTempSetDown') { + this.changeTemp(this.airTemp - 1) + } else if (url == 'OpenWindow') { + this.openWindow() + } else if (url == 'CloseWindow') { + this.closeWindow() + } + } + + handleOtherControl(model: AsrModel, needSpeakTTS: boolean) { + if (model.intentName == 'CAR_AIR_CONDITION_CTRL') { + if (model.modeType == 'TEMPERATURE_ADJUST') { + if (model.action == 'INCREASE') { + this.changeTemp(this.airTemp + 1) + } else if (model.action == 'DECREASE') { + this.changeTemp(this.airTemp - 1) + } else { + if (needSpeakTTS) { + this.speakTTS('暂不支持此功能') + } + } + } else if (model.modeType == 'TEMPERATURE_SET') { + this.changeTemp(parseInt(model.value)) + } else if (model.modeType == 'OUT_LOOP') { + if (model.action == 'OPEN') { + this.openAirOutLoop() + } else { + this.closeAirOutLoop() + } + } else if (model.modeType == 'IN_LOOP') { + if (model.action == 'OPEN') { + this.openAirInLoop() + } else { + this.closeAirInLoop() + } + } else if (model.modeType == 'AIRVOLUME_SET') { + this.changeAirWind(parseInt(model.value)) + } else if (model.modeType == 'AIRVOLUME_ADJUST') { + if (model.action == 'INCREASE') { + this.changeAirWind(this.airWind + 1) + } else if (model.action == 'DECREASE') { + this.changeAirWind(this.airWind - 1) + } else { + if (needSpeakTTS) { + this.speakTTS('暂不支持此功能') + } + } + } else if (model.modeType == 'HOT') { + this.changeMode('制热', model.action == 'OPEN') + } else if (model.modeType == 'COLD') { + this.changeMode('制冷', model.action == 'OPEN') + } else if (model.modeType == 'VENTILATION') { + this.changeMode('通风', model.action == 'OPEN') + } else if (model.modeType == 'AREF') { + this.changeMode('除湿', model.action == 'OPEN') + } else if (model.modeType == 'DEFROST') { + if (model.positions == 'FRONT') { + this.changeMode('前除霜', model.action == 'OPEN') + } else if (model.positions == 'BACK') { + this.changeMode('后除霜', model.action == 'OPEN') + } else { + this.changeMode('除霜', model.action == 'OPEN') + } + } else if (model.modeType == 'DEMIST') { + if (model.positions == 'FRONT') { + this.changeMode('前除雾', model.action == 'OPEN') + } else if (model.positions == 'BACK') { + this.changeMode('后除雾', model.action == 'OPEN') + } else { + this.changeMode('除雾', model.action == 'OPEN') + } + } else if (model.modeType == 'AUTO') { + this.changeMode('自动', model.action == 'OPEN') + } else if (model.modeType == 'DOACTION') { + if (model.action == 'OPEN') { + this.openAir() + } else { + this.closeAir() + } + } + } else if (model.intentName == 'CAR_SKYLIGHT_CTRL') { + if (model.modeType == 'DOACTION') { + if (model.action == 'OPEN') { + this.openWindow() + } else { + this.closeWindow() + } + } else if(model.modeType == 'SKYLIGHT_SET') { + this.openWindowHalf() + } + } else if (model.intentName == 'CAR_SUNSHADE_CTRL') { + if (model.modeType == 'DOACTION') { + if (model.action == 'OPEN') { + this.openSunshade() + } else { + this.closeSunshade() + } + } + } else if (needSpeakTTS) { + this.speakTTS('暂不支持此功能') + } + } + + openAir() { + this.isAirOpen = true; + this.speakTTS('空调已打开') + } + + closeAir() { + this.isAirOpen = false; + this.speakTTS('空调已关闭') + } + + changeAirWind(wind: number) { + if (wind > 11) { + this.speakTTS('风量最高10级') + return + } + if (wind <= 0) { + this.speakTTS('风量最低1级') + return + } + + this.airWind = wind + + this.animationRepeat(4, async () => { + await this.animationStep({ duration: 200 }, () => { + this.airWindColor = '#FF0000' + }) + await this.animationStep({ duration: 200 }, () => { + this.airWindColor = '#FFFFFF' + }) + }) + + this.speakTTS('风量已设置到' + wind + '级') + } + + changeTemp(temp: number) { + if (temp > 34) { + this.speakTTS('空调温度最高34摄氏度') + return + } + if (temp < 16) { + this.speakTTS('空调温度最低16摄氏度') + return + } + + this.airTemp = temp + + this.animationRepeat(4, async () => { + await this.animationStep({ duration: 200 }, () => { + this.tempColor = '#FF0000' + }) + await this.animationStep({ duration: 200 }, () => { + this.tempColor = '#FFFFFF' + }) + }) + + this.speakTTS('空调温度已设置到' + temp + '摄氏度') + } + + async animationRepeat(repeatCount: number, animation: () => {}) { + for (var i = 0; i < repeatCount; i++) { + await animation() + } + } + + animationStep(value: AnimateParam, event: () => void) { + return new Promise(resolve => { + let onFinish = value.onFinish + value.onFinish = () => { + if (onFinish) onFinish() + resolve(true) + } + animateTo(value, event) + }) + } + + changeMode(mode: string, isOpen: boolean) { + if (isOpen) { + this.airMode = mode + } else { + this.airMode = '制冷' + } + + this.animationRepeat(4, async () => { + await this.animationStep({ duration: 200 }, () => { + this.airModeColor = '#FF0000' + }) + await this.animationStep({ duration: 200 }, () => { + this.airModeColor = '#FFFFFF' + }) + }) + + this.speakTTS('空调' + mode + '已' + (isOpen ? '打开' : '关闭')) + } + + openAirOutLoop() { + this.speakTTS('空调外循环已打开') + } + + openAirInLoop() { + this.speakTTS('空调内循环已打开') + } + + closeAirOutLoop() { + this.speakTTS('空调外循环已关闭') + } + + closeAirInLoop() { + this.speakTTS('空调内循环已关闭') + } + + async openWindow() { + this.isWindowOpen = true + this.speakTTS('天窗已打开') + + for (var i = 0; i < windowImages.length; i++) { + await this.windowAnimation(windowImages[i]) + } + + } + + async openWindowHalf() { + this.isWindowOpen = true + this.speakTTS('天窗已打开一半') + + for (var i = 0; i < windowImages.length/2; i++) { + await this.windowAnimation(windowImages[i]) + } + + } + + async closeWindow() { + this.isWindowOpen = false + this.speakTTS('天窗已关闭') + let images = [...windowImages].reverse() + for (var i = 0; i < images.length; i++) { + await this.windowAnimation(images[i]) + } + } + + async windowAnimation(resource) { + return new Promise(resolve => { + this.windowImage =resource + setTimeout(() => { + resolve(true) + },200) + }) + } + + openSunshade() { + this.speakTTS('遮阳帘已打开') + } + + closeSunshade() { + this.speakTTS('遮阳帘已关闭') + } + + speakTTS(tts: string) { + if (tts.length == 0) { + return + } + + if (tts.length > 80) { + tts = tts.substr(0,80) + } + + voiceManager.playTTS(tts); + } + + changeSpeaker(speaker: string) { + voiceManager.changeSpeakerType(speaker); + } + + onRecognizingClick() { + if (this.isRecognizing) { + this.isUserStopRecognizing = true; + voiceManager.stopRecognize(); + } else { + this.isUserStopRecognizing = false; + this.needDeclare = false; + voiceManager.startRecognize(); + } + } + + onQuitApp() { + app.terminate() + } + + @Styles + cardStyle() { + .height(screenWidth - cardSpace * 2) + .width((screenHeight - cardSpace * 4) / 3) + .borderRadius(40) + .clip(true) + .linearGradient({ + colors: [[0x2F4668, 1.0], [0x5C7CA1, 0.0]], + angle: 0 + }) + } + + build() { + Stack({ alignContent: Alignment.Center }) { + Flex({ direction: FlexDirection.Row, alignItems: ItemAlign.Start, justifyContent: FlexAlign.SpaceBetween }) { + Stack({ alignContent: Alignment.Bottom }) { + Image($r('app.media.voice_bottom')) + .width('100%') + .height(148) + Flex({ direction: FlexDirection.Column }) { + Stack({ alignContent: Alignment.TopStart }) { + Image($r('app.media.icon_close')) + .objectFit(ImageFit.Contain) + .padding({ left: 15, top: 15 }) + .width(84) + .height(84) + .onClick(this.onQuitApp) + } + .width('100%') + .height(84) + + Image($r('app.media.voice_ball')) + .margin({ top: 0 }) + .width(255) + .height(255) + .objectFit(ImageFit.Contain) + .onClick(this.onRecognizingClick.bind(this)) + + Text(this.isRecognizing ? "" : "试试说:你好博泰") + .padding({ left: 30, right: 30, top: 10 }) + .fontColor("#CCFFFFFF") + .fontSize(40) + .textAlign(TextAlign.Center) + Text(this.voiceText) + .height(300) + .width('100%') + .padding(30) + .fontColor(Color.White) + .maxLines(100) + .fontSize(40) + .textAlign(TextAlign.Center) + .align(Alignment.Center) + .textOverflow({ overflow: TextOverflow.Ellipsis }) + } + .width('100%') + .height('100%') + } + .cardStyle() + + Column() { + Stack() { + Image(this.isAirOpen ? (this.airMode == '制热' ? $r('app.media.wind_hot') : $r('app.media.wind_cold')) : $r('app.media.wind_close')) + .width('100%') + .height(141) + Image($r('app.media.seat')) + .width('100%') + .height(305) + .objectFit(ImageFit.Contain) + } + .margin({ top: 30 }) + .width('100%') + .height(320) + + Flex({ alignItems: ItemAlign.Center }) { + Row() { + Image($r('app.media.icon_temp')) + .width(46) + .height(46) + Text('' + this.airTemp) + .fontColor(this.tempColor) + .fontSize(45) + } + .padding(10) + + Row() { + Image($r('app.media.icon_wind')) + .width(46) + .height(46) + Text('' + this.airWind) + .fontColor(this.airWindColor) + .fontSize(45) + } + .padding(10) + + Text(this.airMode) + .fontColor(this.airModeColor) + .fontSize(45) + .padding(10) + } + .width('90%') + + Button({ type: ButtonType.Normal }) { + Stack() { + Image(this.isAirOpen ? $r('app.media.btn_focus') : $r('app.media.btn_normal')) + .width('100%') + .height('100%') + Image($r('app.media.icon_close')) + .width(54) + .height(54) + .margin({ top: -5 }) + } + .width('100%') + .height('100%') + } + .margin({ top: 60 }) + .backgroundColor('#00000000') + .width(190) + .height(88) + } + .cardStyle() + + Column() { + Image(this.windowImage) + .width('100%') + .height(314) + .margin({ top: 80 }) + Button({ type: ButtonType.Normal }) { + Stack() { + Image(this.isWindowOpen ? $r('app.media.btn_focus') : $r('app.media.btn_normal')) + .width('100%') + .height('100%') + Text(this.isWindowOpen ? '已打开' : '已关闭') + .fontColor(Color.White) + .fontSize(26) + .margin({ top: -5 }) + } + .width('100%') + .height('100%') + } + .margin({ top: 80 }) + .backgroundColor('#00000000') + .width(190) + .height(88) + } + .cardStyle() + } + .padding(cardSpace) + .width('100%') + .height('100%') + + } + .backgroundColor("FF17212E") + .width(screenHeight + 'px') + .height(screenWidth + 'px') + .translate({ + x: -(screenHeight - screenWidth) / 2 + 'px', + y: (screenHeight - screenWidth) / 2 + 'px' + }) + .rotate({ z: 1, angle: 90, centerX: '50%', centerY: '50%' }) + + } +} \ No newline at end of file diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/ets/MainAbility/utils/logUtil.ets b/FA/PATEO_CarVoiceAssistant/entry/src/main/ets/MainAbility/utils/logUtil.ets new file mode 100644 index 0000000000000000000000000000000000000000..8b4f6825df5067c85b891b9996e0099da5f6e138 --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/entry/src/main/ets/MainAbility/utils/logUtil.ets @@ -0,0 +1,20 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +export default class LogUtil { + static info(tag:String, str:string):void { + console.info('[VA][' + tag + ']' + str) + } +} \ No newline at end of file diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/element/color.json b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/element/color.json new file mode 100644 index 0000000000000000000000000000000000000000..1bbc9aa9617e97c45440e1d3d66afc1154837012 --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/element/color.json @@ -0,0 +1,8 @@ +{ + "color": [ + { + "name": "white", + "value": "#FFFFFF" + } + ] +} \ No newline at end of file diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/element/string.json b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/element/string.json new file mode 100644 index 0000000000000000000000000000000000000000..0732e50fc7a5f98003a60d05568925bcdf4a24f7 --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/element/string.json @@ -0,0 +1,16 @@ +{ + "string": [ + { + "name": "entry_desc", + "value": "description" + }, + { + "name": "MainAbility_desc", + "value": "description" + }, + { + "name": "MainAbility_label", + "value": "语音助理" + } + ] +} \ No newline at end of file diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/btn_focus.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/btn_focus.png new file mode 100644 index 0000000000000000000000000000000000000000..a9a37f4194ca5f009501f2ed7c470c1051e59d05 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/btn_focus.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/btn_normal.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/btn_normal.png new file mode 100644 index 0000000000000000000000000000000000000000..3be7c2f97a0b5ea5e05ea149e1036727207fdc02 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/btn_normal.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/icon.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/icon.png new file mode 100644 index 0000000000000000000000000000000000000000..ce307a8827bd75456441ceb57d530e4c8d45d36c Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/icon.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/icon_close.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/icon_close.png new file mode 100644 index 0000000000000000000000000000000000000000..a5ead6a63f2c54037c18d4afa5f5f47da57f0898 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/icon_close.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/icon_temp.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/icon_temp.png new file mode 100644 index 0000000000000000000000000000000000000000..b89e22b10fc1cbeb379df3fd9eab187c9a58b297 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/icon_temp.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/icon_wind.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/icon_wind.png new file mode 100644 index 0000000000000000000000000000000000000000..aa7adc4a237b48ff3d3cb04e9da075b9c9cf7d5b Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/icon_wind.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/seat.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/seat.png new file mode 100644 index 0000000000000000000000000000000000000000..e5851247e2c30cfc34d220a102ecfbfd4d51618d Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/seat.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/voice_ball.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/voice_ball.png new file mode 100644 index 0000000000000000000000000000000000000000..73c9ac08aa2a077b0daac29ce592310fce36c772 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/voice_ball.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/voice_bottom.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/voice_bottom.png new file mode 100644 index 0000000000000000000000000000000000000000..767e991efed40aabf82801e98092846f3e011705 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/voice_bottom.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind0.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind0.png new file mode 100644 index 0000000000000000000000000000000000000000..543892f3b9e244cd07268aa26c62fb7b2f75cb57 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind0.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind1.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind1.png new file mode 100644 index 0000000000000000000000000000000000000000..ae0603532aadba7e85cd6c942a926e9f26bf8949 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind1.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind10.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind10.png new file mode 100644 index 0000000000000000000000000000000000000000..07697814265ecdc0ca8f86f35a2407d8ec3e610c Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind10.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind2.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind2.png new file mode 100644 index 0000000000000000000000000000000000000000..dfaa138c6dd2eb3d76fe597d68449f70e47594a4 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind2.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind3.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind3.png new file mode 100644 index 0000000000000000000000000000000000000000..ce69a4db8d357ac65307aa7d40e0b1a9f474d29b Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind3.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind4.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind4.png new file mode 100644 index 0000000000000000000000000000000000000000..bdb0be9a2ab966e228e420d4e4b45825531bbbf9 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind4.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind5.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind5.png new file mode 100644 index 0000000000000000000000000000000000000000..a26dab9ae0a024e47dab473057e67247940d6908 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind5.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind6.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind6.png new file mode 100644 index 0000000000000000000000000000000000000000..51bb521b62c3f43cbc43b5e1c8f72f2ef6b44bec Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind6.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind7.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind7.png new file mode 100644 index 0000000000000000000000000000000000000000..5f4e3a164cd3487822f995ac66d6696e287b3bcd Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind7.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind8.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind8.png new file mode 100644 index 0000000000000000000000000000000000000000..712767f3fce9d9cec6a43768d093ddbfb03b5009 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind8.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind9.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind9.png new file mode 100644 index 0000000000000000000000000000000000000000..5b17378a4b5e24714ce7818c008ac3df9523b905 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind9.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind_close.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind_close.png new file mode 100644 index 0000000000000000000000000000000000000000..bbd46ba9cb64bff600f024ea6d7ec886251f3d07 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind_close.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind_cold.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind_cold.png new file mode 100644 index 0000000000000000000000000000000000000000..853a8edebc3384e4061f11366d65a59fd8026ed6 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind_cold.png differ diff --git a/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind_hot.png b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind_hot.png new file mode 100644 index 0000000000000000000000000000000000000000..15ce1d7928761e6cec941bc66e571f90e07bd2f4 Binary files /dev/null and b/FA/PATEO_CarVoiceAssistant/entry/src/main/resources/base/media/wind_hot.png differ diff --git a/FA/PATEO_CarVoiceAssistant/hvigorfile.js b/FA/PATEO_CarVoiceAssistant/hvigorfile.js new file mode 100644 index 0000000000000000000000000000000000000000..cfe6efdfb22b766357f554f94fafb02e172c9605 --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/hvigorfile.js @@ -0,0 +1,2 @@ +// Script for compiling build behavior. It is built in the build plug-in and cannot be modified currently. +module.exports = require('@ohos/hvigor-ohos-plugin').legacyAppTasks \ No newline at end of file diff --git a/FA/PATEO_CarVoiceAssistant/package-lock.json b/FA/PATEO_CarVoiceAssistant/package-lock.json new file mode 100644 index 0000000000000000000000000000000000000000..05b25058bafccbd7ad9a203d83ae5a88d17cb846 --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/package-lock.json @@ -0,0 +1,1226 @@ +{ + "name": "voiceassistant", + "version": "1.0.0", + "lockfileVersion": 1, + "requires": true, + "dependencies": { + "@ohos/hos-sdkmanager-common": { + "version": "1.0.4", + "resolved": "https://repo.harmonyos.com/npm/@ohos/hos-sdkmanager-common/-/@ohos/hos-sdkmanager-common-1.0.4.tgz", + "integrity": "sha512-LWrfF8Js+u54BcEAdyjzsA81iGBA4LPvQdQ1ig/pX6mvTieUPSvtjtAzdI8nnGVmJRLrHwAMHEO/syd9d8UAFw==", + "requires": { + "@ohos/sdkmanager-common": "^1.1.8" + } + }, + "@ohos/hvigor": { + "version": "1.2.2", + "resolved": "https://repo.harmonyos.com/npm/@ohos/hvigor/-/@ohos/hvigor-1.2.2.tgz", + "integrity": "sha512-GfXCf7pDnyEcxWkG7Edd23XPxgym1vY37zdNt/Gj0pZmw0f+FrP+blfCzVWfYQYlI76krzd96+kvD4lgWkIPBQ==", + "requires": { + "@ohos/hvigor-base": "1.2.2", + "fs-extra": "10.0.1", + "interpret": "1.4.0", + "liftoff": "4.0.0", + "mute-stdout": "1.0.0", + "pretty-hrtime": "1.0.0", + "v8flags": "3.2.0", + "yargs": "7.1.2" + } + }, + "@ohos/hvigor-base": { + "version": "1.2.2", + "resolved": "https://repo.harmonyos.com/npm/@ohos/hvigor-base/-/@ohos/hvigor-base-1.2.2.tgz", + "integrity": "sha512-omwL/qjTE7DTCQdHZnMKuAxKlH1JOgOxaVAae1ca8j/oPuNgY6Spn+mpYRDHIktvav6axMmHT9zV1hJykY4GEg==", + "requires": { + "fs-extra": "10.0.1", + "json5": "2.2.0", + "log4js": "6.4.1", + "once": "1.4.0", + "pretty-hrtime": "1.0.0" + } + }, + "@ohos/hvigor-ohos-plugin": { + "version": "1.2.2", + "resolved": "https://repo.harmonyos.com/npm/@ohos/hvigor-ohos-plugin/-/@ohos/hvigor-ohos-plugin-1.2.2.tgz", + "integrity": "sha512-gir91UxlhMbf2E/NjTYWJGrsNGBKD/1YYbaEdmswD/qW18UDY7jyuqSVyV/gj0h9+iji+gd53rXWzpLLztG5lg==", + "requires": { + "@ohos/hos-sdkmanager-common": "1.0.4", + "@ohos/hvigor-base": "1.2.2", + "@ohos/sdkmanager-common": "1.1.8", + "adm-zip": "0.5.9", + "ajv": "8.10.0", + "execa": "5.1.1", + "fast-xml-parser": "4.0.3", + "fs-extra": "10.0.1", + "glob": "7.2.0", + "iconv-lite": "0.6.3", + "json5": "2.2.0", + "lodash": "4.17.21", + "pretty-hrtime": "1.0.3", + "resolve-package-path": "4.0.3" + }, + "dependencies": { + "pretty-hrtime": { + "version": "1.0.3", + "resolved": "https://repo.huaweicloud.com/repository/npm/pretty-hrtime/-/pretty-hrtime-1.0.3.tgz", + "integrity": "sha512-66hKPCr+72mlfiSjlEB1+45IjXSqvVAIy6mocupoww4tBFE9R9IhwwUGoI4G++Tc9Aq+2rxOt0RFU6gPcrte0A==" + } + } + }, + "@ohos/hypium": { + "version": "1.0.2", + "resolved": "https://repo.harmonyos.com/npm/@ohos/hypium/-/@ohos/hypium-1.0.2.tgz", + "integrity": "sha512-HWW62q6hbd0PhvPTH96lo4j//owFwyCLj6Q2jmVLDW6mOt1dwJJwUQ7qUH+Cni/6MmcnyXKvt9I0moGAPK1aCw==" + }, + "@ohos/sdkmanager-common": { + "version": "1.1.8", + "resolved": "https://repo.harmonyos.com/npm/@ohos/sdkmanager-common/-/@ohos/sdkmanager-common-1.1.8.tgz", + "integrity": "sha512-mxq69+6Zg/ybeQGnOtkBzOTbNBkEdiYehRKWsAD/je53v1W+ahauLqe90pNZEiBuVYugzb6z2EaJtAXYZtE8gQ==" + }, + "adm-zip": { + "version": "0.5.9", + "resolved": "https://repo.huaweicloud.com/repository/npm/adm-zip/-/adm-zip-0.5.9.tgz", + "integrity": "sha512-s+3fXLkeeLjZ2kLjCBwQufpI5fuN+kIGBxu6530nVQZGVol0d7Y/M88/xw9HGGUcJjKf8LutN3VPRUBq6N7Ajg==" + }, + "ajv": { + "version": "8.10.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/ajv/-/ajv-8.10.0.tgz", + "integrity": "sha512-bzqAEZOjkrUMl2afH8dknrq5KEk2SrwdBROR+vH1EKVQTqaUbJVPdc/gEdggTMM0Se+s+Ja4ju4TlNcStKl2Hw==", + "requires": { + "fast-deep-equal": "^3.1.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2", + "uri-js": "^4.2.2" + } + }, + "ansi-regex": { + "version": "2.1.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/ansi-regex/-/ansi-regex-2.1.1.tgz", + "integrity": "sha512-TIGnTpdo+E3+pCyAluZvtED5p5wCqLdezCyhPZzKPcxvFplEt4i+W7OONCKgeZFT3+y5NZZfOOS/Bdcanm1MYA==" + }, + "array-each": { + "version": "1.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/array-each/-/array-each-1.0.1.tgz", + "integrity": "sha512-zHjL5SZa68hkKHBFBK6DJCTtr9sfTCPCaph/L7tMSLcTFgy+zX7E+6q5UArbtOtMBCtxdICpfTCspRse+ywyXA==" + }, + "array-slice": { + "version": "1.1.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/array-slice/-/array-slice-1.1.0.tgz", + "integrity": "sha512-B1qMD3RBP7O8o0H2KbrXDyB0IccejMF15+87Lvlor12ONPRHP6gTjXMNkt/d3ZuOGbAe66hFmaCfECI24Ufp6w==" + }, + "balanced-match": { + "version": "1.0.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/balanced-match/-/balanced-match-1.0.2.tgz", + "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==" + }, + "brace-expansion": { + "version": "1.1.11", + "resolved": "https://repo.huaweicloud.com/repository/npm/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "requires": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "braces": { + "version": "3.0.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/braces/-/braces-3.0.2.tgz", + "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "requires": { + "fill-range": "^7.0.1" + } + }, + "call-bind": { + "version": "1.0.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/call-bind/-/call-bind-1.0.2.tgz", + "integrity": "sha512-7O+FbCihrB5WGbFYesctwmTKae6rOiIzmz1icreWJ+0aA7LJfuqhEso2T9ncpcFtzMQtzXf2QGGueWJGTYsqrA==", + "requires": { + "function-bind": "^1.1.1", + "get-intrinsic": "^1.0.2" + } + }, + "camelcase": { + "version": "3.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/camelcase/-/camelcase-3.0.0.tgz", + "integrity": "sha512-4nhGqUkc4BqbBBB4Q6zLuD7lzzrHYrjKGeYaEji/3tFR5VdJu9v+LilhGIVe8wxEJPPOeWo7eg8dwY13TZ1BNg==" + }, + "cliui": { + "version": "3.2.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/cliui/-/cliui-3.2.0.tgz", + "integrity": "sha512-0yayqDxWQbqk3ojkYqUKqaAQ6AfNKeKWRNA8kR0WXzAsdHpP4BIaOmMAG87JGuO6qcobyW4GjxHd9PmhEd+T9w==", + "requires": { + "string-width": "^1.0.1", + "strip-ansi": "^3.0.1", + "wrap-ansi": "^2.0.0" + } + }, + "code-point-at": { + "version": "1.1.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/code-point-at/-/code-point-at-1.1.0.tgz", + "integrity": "sha512-RpAVKQA5T63xEj6/giIbUEtZwJ4UFIc3ZtvEkiaUERylqe8xb5IvqcgOurZLahv93CLKfxcw5YI+DZcUBRyLXA==" + }, + "concat-map": { + "version": "0.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==" + }, + "cross-spawn": { + "version": "7.0.3", + "resolved": "https://repo.huaweicloud.com/repository/npm/cross-spawn/-/cross-spawn-7.0.3.tgz", + "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "requires": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "dependencies": { + "which": { + "version": "2.0.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "requires": { + "isexe": "^2.0.0" + } + } + } + }, + "date-format": { + "version": "4.0.13", + "resolved": "https://repo.huaweicloud.com/repository/npm/date-format/-/date-format-4.0.13.tgz", + "integrity": "sha512-bnYCwf8Emc3pTD8pXnre+wfnjGtfi5ncMDKy7+cWZXbmRAsdWkOQHrfC1yz/KiwP5thDp2kCHWYWKBX4HP1hoQ==" + }, + "debug": { + "version": "4.3.4", + "resolved": "https://repo.huaweicloud.com/repository/npm/debug/-/debug-4.3.4.tgz", + "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "requires": { + "ms": "2.1.2" + } + }, + "decamelize": { + "version": "1.2.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/decamelize/-/decamelize-1.2.0.tgz", + "integrity": "sha512-z2S+W9X73hAUUki+N+9Za2lBlun89zigOyGrsax+KUQ6wKW4ZoWpEYBkGhQjwAjjDCkWxhY0VKEhk8wzY7F5cA==" + }, + "define-properties": { + "version": "1.1.4", + "resolved": "https://repo.huaweicloud.com/repository/npm/define-properties/-/define-properties-1.1.4.tgz", + "integrity": "sha512-uckOqKcfaVvtBdsVkdPv3XjveQJsNQqmhXgRi8uhvWWuPYZCNlzT8qAyblUgNoXdHdjMTzAqeGjAoli8f+bzPA==", + "requires": { + "has-property-descriptors": "^1.0.0", + "object-keys": "^1.1.1" + } + }, + "detect-file": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/detect-file/-/detect-file-1.0.0.tgz", + "integrity": "sha512-DtCOLG98P007x7wiiOmfI0fi3eIKyWiLTGJ2MDnVi/E04lWGbf+JzrRHMm0rgIIZJGtHpKpbVgLWHrv8xXpc3Q==" + }, + "error-ex": { + "version": "1.3.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/error-ex/-/error-ex-1.3.2.tgz", + "integrity": "sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==", + "requires": { + "is-arrayish": "^0.2.1" + } + }, + "execa": { + "version": "5.1.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/execa/-/execa-5.1.1.tgz", + "integrity": "sha512-8uSpZZocAZRBAPIEINJj3Lo9HyGitllczc27Eh5YYojjMFMn8yHMDMaUHE2Jqfq05D/wucwI4JGURyXt1vchyg==", + "requires": { + "cross-spawn": "^7.0.3", + "get-stream": "^6.0.0", + "human-signals": "^2.1.0", + "is-stream": "^2.0.0", + "merge-stream": "^2.0.0", + "npm-run-path": "^4.0.1", + "onetime": "^5.1.2", + "signal-exit": "^3.0.3", + "strip-final-newline": "^2.0.0" + } + }, + "expand-tilde": { + "version": "2.0.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/expand-tilde/-/expand-tilde-2.0.2.tgz", + "integrity": "sha512-A5EmesHW6rfnZ9ysHQjPdJRni0SRar0tjtG5MNtm9n5TUvsYU8oozprtRD4AqHxcZWWlVuAmQo2nWKfN9oyjTw==", + "requires": { + "homedir-polyfill": "^1.0.1" + } + }, + "extend": { + "version": "3.0.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/extend/-/extend-3.0.2.tgz", + "integrity": "sha512-fjquC59cD7CyW6urNXK0FBufkZcoiGG80wTuPujX590cB5Ttln20E2UB4S/WARVqhXffZl2LNgS+gQdPIIim/g==" + }, + "fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://repo.huaweicloud.com/repository/npm/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==" + }, + "fast-xml-parser": { + "version": "4.0.3", + "resolved": "https://repo.huaweicloud.com/repository/npm/fast-xml-parser/-/fast-xml-parser-4.0.3.tgz", + "integrity": "sha512-xhQbg3a/EYNHwK0cxIG1nZmVkHX/0tWihamn5pU4Mhd9KEVE2ga8ZJiqEUgB2sApElvAATOdMTLjgqIpvYDUkQ==", + "requires": { + "strnum": "^1.0.5" + } + }, + "fill-range": { + "version": "7.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/fill-range/-/fill-range-7.0.1.tgz", + "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "requires": { + "to-regex-range": "^5.0.1" + } + }, + "find-up": { + "version": "1.1.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/find-up/-/find-up-1.1.2.tgz", + "integrity": "sha512-jvElSjyuo4EMQGoTwo1uJU5pQMwTW5lS1x05zzfJuTIyLR3zwO27LYrxNg+dlvKpGOuGy/MzBdXh80g0ve5+HA==", + "requires": { + "path-exists": "^2.0.0", + "pinkie-promise": "^2.0.0" + } + }, + "findup-sync": { + "version": "5.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/findup-sync/-/findup-sync-5.0.0.tgz", + "integrity": "sha512-MzwXju70AuyflbgeOhzvQWAvvQdo1XL0A9bVvlXsYcFEBM87WR4OakL4OfZq+QRmr+duJubio+UtNQCPsVESzQ==", + "requires": { + "detect-file": "^1.0.0", + "is-glob": "^4.0.3", + "micromatch": "^4.0.4", + "resolve-dir": "^1.0.1" + } + }, + "fined": { + "version": "2.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/fined/-/fined-2.0.0.tgz", + "integrity": "sha512-OFRzsL6ZMHz5s0JrsEr+TpdGNCtrVtnuG3x1yzGNiQHT0yaDnXAj8V/lWcpJVrnoDpcwXcASxAZYbuXda2Y82A==", + "requires": { + "expand-tilde": "^2.0.2", + "is-plain-object": "^5.0.0", + "object.defaults": "^1.1.0", + "object.pick": "^1.3.0", + "parse-filepath": "^1.0.2" + } + }, + "flagged-respawn": { + "version": "2.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/flagged-respawn/-/flagged-respawn-2.0.0.tgz", + "integrity": "sha512-Gq/a6YCi8zexmGHMuJwahTGzXlAZAOsbCVKduWXC6TlLCjjFRlExMJc4GC2NYPYZ0r/brw9P7CpRgQmlPVeOoA==" + }, + "flatted": { + "version": "3.2.7", + "resolved": "https://repo.huaweicloud.com/repository/npm/flatted/-/flatted-3.2.7.tgz", + "integrity": "sha512-5nqDSxl8nn5BSNxyR3n4I6eDmbolI6WT+QqR547RwxQapgjQBmtktdP+HTBb/a/zLsbzERTONyUB5pefh5TtjQ==" + }, + "for-in": { + "version": "1.0.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/for-in/-/for-in-1.0.2.tgz", + "integrity": "sha512-7EwmXrOjyL+ChxMhmG5lnW9MPt1aIeZEwKhQzoBUdTV0N3zuwWDZYVJatDvZ2OyzPUvdIAZDsCetk3coyMfcnQ==" + }, + "for-own": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/for-own/-/for-own-1.0.0.tgz", + "integrity": "sha512-0OABksIGrxKK8K4kynWkQ7y1zounQxP+CWnyclVwj81KW3vlLlGUx57DKGcP/LH216GzqnstnPocF16Nxs0Ycg==", + "requires": { + "for-in": "^1.0.1" + } + }, + "fs-extra": { + "version": "10.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/fs-extra/-/fs-extra-10.0.1.tgz", + "integrity": "sha512-NbdoVMZso2Lsrn/QwLXOy6rm0ufY2zEOKCDzJR/0kBsb0E6qed0P3iYK+Ath3BfvXEeu4JhEtXLgILx5psUfag==", + "requires": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + } + }, + "fs.realpath": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/fs.realpath/-/fs.realpath-1.0.0.tgz", + "integrity": "sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==" + }, + "function-bind": { + "version": "1.1.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/function-bind/-/function-bind-1.1.1.tgz", + "integrity": "sha512-yIovAzMX49sF8Yl58fSCWJ5svSLuaibPxXQJFLmBObTuCr0Mf1KiPopGM9NiFjiYBCbfaa2Fh6breQ6ANVTI0A==" + }, + "get-caller-file": { + "version": "1.0.3", + "resolved": "https://repo.huaweicloud.com/repository/npm/get-caller-file/-/get-caller-file-1.0.3.tgz", + "integrity": "sha512-3t6rVToeoZfYSGd8YoLFR2DJkiQrIiUrGcjvFX2mDw3bn6k2OtwHN0TNCLbBO+w8qTvimhDkv+LSscbJY1vE6w==" + }, + "get-intrinsic": { + "version": "1.1.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/get-intrinsic/-/get-intrinsic-1.1.2.tgz", + "integrity": "sha512-Jfm3OyCxHh9DJyc28qGk+JmfkpO41A4XkneDSujN9MDXrm4oDKdHvndhZ2dN94+ERNfkYJWDclW6k2L/ZGHjXA==", + "requires": { + "function-bind": "^1.1.1", + "has": "^1.0.3", + "has-symbols": "^1.0.3" + } + }, + "get-stream": { + "version": "6.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/get-stream/-/get-stream-6.0.1.tgz", + "integrity": "sha512-ts6Wi+2j3jQjqi70w5AlN8DFnkSwC+MqmxEzdEALB2qXZYV3X/b1CTfgPLGJNMeAWxdPfU8FO1ms3NUfaHCPYg==" + }, + "glob": { + "version": "7.2.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/glob/-/glob-7.2.0.tgz", + "integrity": "sha512-lmLf6gtyrPq8tTjSmrO94wBeQbFR3HbLHbuyD69wuyQkImp2hWqMGB47OX65FBkPffO641IP9jWa1z4ivqG26Q==", + "requires": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.0.4", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + } + }, + "global-modules": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/global-modules/-/global-modules-1.0.0.tgz", + "integrity": "sha512-sKzpEkf11GpOFuw0Zzjzmt4B4UZwjOcG757PPvrfhxcLFbq0wpsgpOqxpxtxFiCG4DtG93M6XRVbF2oGdev7bg==", + "requires": { + "global-prefix": "^1.0.1", + "is-windows": "^1.0.1", + "resolve-dir": "^1.0.0" + } + }, + "global-prefix": { + "version": "1.0.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/global-prefix/-/global-prefix-1.0.2.tgz", + "integrity": "sha512-5lsx1NUDHtSjfg0eHlmYvZKv8/nVqX4ckFbM+FrGcQ+04KWcWFo9P5MxPZYSzUvyzmdTbI7Eix8Q4IbELDqzKg==", + "requires": { + "expand-tilde": "^2.0.2", + "homedir-polyfill": "^1.0.1", + "ini": "^1.3.4", + "is-windows": "^1.0.1", + "which": "^1.2.14" + } + }, + "graceful-fs": { + "version": "4.2.10", + "resolved": "https://repo.huaweicloud.com/repository/npm/graceful-fs/-/graceful-fs-4.2.10.tgz", + "integrity": "sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA==" + }, + "has": { + "version": "1.0.3", + "resolved": "https://repo.huaweicloud.com/repository/npm/has/-/has-1.0.3.tgz", + "integrity": "sha512-f2dvO0VU6Oej7RkWJGrehjbzMAjFp5/VKPp5tTpWIV4JHHZK1/BxbFRtf/siA2SWTe09caDmVtYYzWEIbBS4zw==", + "requires": { + "function-bind": "^1.1.1" + } + }, + "has-property-descriptors": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/has-property-descriptors/-/has-property-descriptors-1.0.0.tgz", + "integrity": "sha512-62DVLZGoiEBDHQyqG4w9xCuZ7eJEwNmJRWw2VY84Oedb7WFcA27fiEVe8oUQx9hAUJ4ekurquucTGwsyO1XGdQ==", + "requires": { + "get-intrinsic": "^1.1.1" + } + }, + "has-symbols": { + "version": "1.0.3", + "resolved": "https://repo.huaweicloud.com/repository/npm/has-symbols/-/has-symbols-1.0.3.tgz", + "integrity": "sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==" + }, + "homedir-polyfill": { + "version": "1.0.3", + "resolved": "https://repo.huaweicloud.com/repository/npm/homedir-polyfill/-/homedir-polyfill-1.0.3.tgz", + "integrity": "sha512-eSmmWE5bZTK2Nou4g0AI3zZ9rswp7GRKoKXS1BLUkvPviOqs4YTN1djQIqrXy9k5gEtdLPy86JjRwsNM9tnDcA==", + "requires": { + "parse-passwd": "^1.0.0" + } + }, + "hosted-git-info": { + "version": "2.8.9", + "resolved": "https://repo.huaweicloud.com/repository/npm/hosted-git-info/-/hosted-git-info-2.8.9.tgz", + "integrity": "sha512-mxIDAb9Lsm6DoOJ7xH+5+X4y1LU/4Hi50L9C5sIswK3JzULS4bwk1FvjdBgvYR4bzT4tuUQiC15FE2f5HbLvYw==" + }, + "human-signals": { + "version": "2.1.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/human-signals/-/human-signals-2.1.0.tgz", + "integrity": "sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw==" + }, + "iconv-lite": { + "version": "0.6.3", + "resolved": "https://repo.huaweicloud.com/repository/npm/iconv-lite/-/iconv-lite-0.6.3.tgz", + "integrity": "sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==", + "requires": { + "safer-buffer": ">= 2.1.2 < 3.0.0" + } + }, + "inflight": { + "version": "1.0.6", + "resolved": "https://repo.huaweicloud.com/repository/npm/inflight/-/inflight-1.0.6.tgz", + "integrity": "sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==", + "requires": { + "once": "^1.3.0", + "wrappy": "1" + } + }, + "inherits": { + "version": "2.0.4", + "resolved": "https://repo.huaweicloud.com/repository/npm/inherits/-/inherits-2.0.4.tgz", + "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==" + }, + "ini": { + "version": "1.3.8", + "resolved": "https://repo.huaweicloud.com/repository/npm/ini/-/ini-1.3.8.tgz", + "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==" + }, + "interpret": { + "version": "1.4.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/interpret/-/interpret-1.4.0.tgz", + "integrity": "sha512-agE4QfB2Lkp9uICn7BAqoscw4SZP9kTE2hxiFI3jBPmXJfdqiahTbUuKGsMoN2GtqL9AxhYioAcVvgsb1HvRbA==" + }, + "invert-kv": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/invert-kv/-/invert-kv-1.0.0.tgz", + "integrity": "sha512-xgs2NH9AE66ucSq4cNG1nhSFghr5l6tdL15Pk+jl46bmmBapgoaY/AacXyaDznAqmGL99TiLSQgO/XazFSKYeQ==" + }, + "is-absolute": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/is-absolute/-/is-absolute-1.0.0.tgz", + "integrity": "sha512-dOWoqflvcydARa360Gvv18DZ/gRuHKi2NU/wU5X1ZFzdYfH29nkiNZsF3mp4OJ3H4yo9Mx8A/uAGNzpzPN3yBA==", + "requires": { + "is-relative": "^1.0.0", + "is-windows": "^1.0.1" + } + }, + "is-arrayish": { + "version": "0.2.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/is-arrayish/-/is-arrayish-0.2.1.tgz", + "integrity": "sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==" + }, + "is-core-module": { + "version": "2.10.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/is-core-module/-/is-core-module-2.10.0.tgz", + "integrity": "sha512-Erxj2n/LDAZ7H8WNJXd9tw38GYM3dv8rk8Zcs+jJuxYTW7sozH+SS8NtrSjVL1/vpLvWi1hxy96IzjJ3EHTJJg==", + "requires": { + "has": "^1.0.3" + } + }, + "is-extglob": { + "version": "2.1.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/is-extglob/-/is-extglob-2.1.1.tgz", + "integrity": "sha512-SbKbANkN603Vi4jEZv49LeVJMn4yGwsbzZworEoyEiutsN3nJYdbO36zfhGJ6QEDpOZIFkDtnq5JRxmvl3jsoQ==" + }, + "is-fullwidth-code-point": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/is-fullwidth-code-point/-/is-fullwidth-code-point-1.0.0.tgz", + "integrity": "sha512-1pqUqRjkhPJ9miNq9SwMfdvi6lBJcd6eFxvfaivQhaH3SgisfiuudvFntdKOmxuee/77l+FPjKrQjWvmPjWrRw==", + "requires": { + "number-is-nan": "^1.0.0" + } + }, + "is-glob": { + "version": "4.0.3", + "resolved": "https://repo.huaweicloud.com/repository/npm/is-glob/-/is-glob-4.0.3.tgz", + "integrity": "sha512-xelSayHH36ZgE7ZWhli7pW34hNbNl8Ojv5KVmkJD4hBdD3th8Tfk9vYasLM+mXWOZhFkgZfxhLSnrwRr4elSSg==", + "requires": { + "is-extglob": "^2.1.1" + } + }, + "is-number": { + "version": "7.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/is-number/-/is-number-7.0.0.tgz", + "integrity": "sha512-41Cifkg6e8TylSpdtTpeLVMqvSBEVzTttHvERD741+pnZ8ANv0004MRL43QKPDlK9cGvNp6NZWZUBlbGXYxxng==" + }, + "is-plain-object": { + "version": "5.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/is-plain-object/-/is-plain-object-5.0.0.tgz", + "integrity": "sha512-VRSzKkbMm5jMDoKLbltAkFQ5Qr7VDiTFGXxYFXXowVj387GeGNOCsOH6Msy00SGZ3Fp84b1Naa1psqgcCIEP5Q==" + }, + "is-relative": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/is-relative/-/is-relative-1.0.0.tgz", + "integrity": "sha512-Kw/ReK0iqwKeu0MITLFuj0jbPAmEiOsIwyIXvvbfa6QfmN9pkD1M+8pdk7Rl/dTKbH34/XBFMbgD4iMJhLQbGA==", + "requires": { + "is-unc-path": "^1.0.0" + } + }, + "is-stream": { + "version": "2.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/is-stream/-/is-stream-2.0.1.tgz", + "integrity": "sha512-hFoiJiTl63nn+kstHGBtewWSKnQLpyb155KHheA1l39uvtO9nWIop1p3udqPcUd/xbF1VLMO4n7OI6p7RbngDg==" + }, + "is-unc-path": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/is-unc-path/-/is-unc-path-1.0.0.tgz", + "integrity": "sha512-mrGpVd0fs7WWLfVsStvgF6iEJnbjDFZh9/emhRDcGWTduTfNHd9CHeUwH3gYIjdbwo4On6hunkztwOaAw0yllQ==", + "requires": { + "unc-path-regex": "^0.1.2" + } + }, + "is-utf8": { + "version": "0.2.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/is-utf8/-/is-utf8-0.2.1.tgz", + "integrity": "sha512-rMYPYvCzsXywIsldgLaSoPlw5PfoB/ssr7hY4pLfcodrA5M/eArza1a9VmTiNIBNMjOGr1Ow9mTyU2o69U6U9Q==" + }, + "is-windows": { + "version": "1.0.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/is-windows/-/is-windows-1.0.2.tgz", + "integrity": "sha512-eXK1UInq2bPmjyX6e3VHIzMLobc4J94i4AWn+Hpq3OU5KkrRC96OAcR3PRJ/pGu6m8TRnBHP9dkXQVsT/COVIA==" + }, + "isexe": { + "version": "2.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==" + }, + "isobject": { + "version": "3.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/isobject/-/isobject-3.0.1.tgz", + "integrity": "sha512-WhB9zCku7EGTj/HQQRz5aUQEUeoQZH2bWcltRErOpymJ4boYE6wL9Tbr23krRPSZ+C5zqNSrSw+Cc7sZZ4b7vg==" + }, + "json-schema-traverse": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==" + }, + "json5": { + "version": "2.2.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/json5/-/json5-2.2.0.tgz", + "integrity": "sha512-f+8cldu7X/y7RAJurMEJmdoKXGB/X550w2Nr3tTbezL6RwEE/iMcm+tZnXeoZtKuOq6ft8+CqzEkrIgx1fPoQA==", + "requires": { + "minimist": "^1.2.5" + } + }, + "jsonfile": { + "version": "6.1.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/jsonfile/-/jsonfile-6.1.0.tgz", + "integrity": "sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ==", + "requires": { + "graceful-fs": "^4.1.6", + "universalify": "^2.0.0" + } + }, + "kind-of": { + "version": "6.0.3", + "resolved": "https://repo.huaweicloud.com/repository/npm/kind-of/-/kind-of-6.0.3.tgz", + "integrity": "sha512-dcS1ul+9tmeD95T+x28/ehLgd9mENa3LsvDTtzm3vyBEO7RPptvAD+t44WVXaUjTBRcrpFeFlC8WCruUR456hw==" + }, + "lcid": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/lcid/-/lcid-1.0.0.tgz", + "integrity": "sha512-YiGkH6EnGrDGqLMITnGjXtGmNtjoXw9SVUzcaos8RBi7Ps0VBylkq+vOcY9QE5poLasPCR849ucFUkl0UzUyOw==", + "requires": { + "invert-kv": "^1.0.0" + } + }, + "liftoff": { + "version": "4.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/liftoff/-/liftoff-4.0.0.tgz", + "integrity": "sha512-rMGwYF8q7g2XhG2ulBmmJgWv25qBsqRbDn5gH0+wnuyeFt7QBJlHJmtg5qEdn4pN6WVAUMgXnIxytMFRX9c1aA==", + "requires": { + "extend": "^3.0.2", + "findup-sync": "^5.0.0", + "fined": "^2.0.0", + "flagged-respawn": "^2.0.0", + "is-plain-object": "^5.0.0", + "object.map": "^1.0.1", + "rechoir": "^0.8.0", + "resolve": "^1.20.0" + } + }, + "load-json-file": { + "version": "1.1.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/load-json-file/-/load-json-file-1.1.0.tgz", + "integrity": "sha512-cy7ZdNRXdablkXYNI049pthVeXFurRyb9+hA/dZzerZ0pGTx42z+y+ssxBaVV2l70t1muq5IdKhn4UtcoGUY9A==", + "requires": { + "graceful-fs": "^4.1.2", + "parse-json": "^2.2.0", + "pify": "^2.0.0", + "pinkie-promise": "^2.0.0", + "strip-bom": "^2.0.0" + } + }, + "lodash": { + "version": "4.17.21", + "resolved": "https://repo.huaweicloud.com/repository/npm/lodash/-/lodash-4.17.21.tgz", + "integrity": "sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==" + }, + "log4js": { + "version": "6.4.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/log4js/-/log4js-6.4.1.tgz", + "integrity": "sha512-iUiYnXqAmNKiIZ1XSAitQ4TmNs8CdZYTAWINARF3LjnsLN8tY5m0vRwd6uuWj/yNY0YHxeZodnbmxKFUOM2rMg==", + "requires": { + "date-format": "^4.0.3", + "debug": "^4.3.3", + "flatted": "^3.2.4", + "rfdc": "^1.3.0", + "streamroller": "^3.0.2" + } + }, + "make-iterator": { + "version": "1.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/make-iterator/-/make-iterator-1.0.1.tgz", + "integrity": "sha512-pxiuXh0iVEq7VM7KMIhs5gxsfxCux2URptUQaXo4iZZJxBAzTPOLE2BumO5dbfVYq/hBJFBR/a1mFDmOx5AGmw==", + "requires": { + "kind-of": "^6.0.2" + } + }, + "map-cache": { + "version": "0.2.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/map-cache/-/map-cache-0.2.2.tgz", + "integrity": "sha512-8y/eV9QQZCiyn1SprXSrCmqJN0yNRATe+PO8ztwqrvrbdRLA3eYJF0yaR0YayLWkMbsQSKWS9N2gPcGEc4UsZg==" + }, + "merge-stream": { + "version": "2.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/merge-stream/-/merge-stream-2.0.0.tgz", + "integrity": "sha512-abv/qOcuPfk3URPfDzmZU1LKmuw8kT+0nIHvKrKgFrwifol/doWcdA4ZqsWQ8ENrFKkd67Mfpo/LovbIUsbt3w==" + }, + "micromatch": { + "version": "4.0.5", + "resolved": "https://repo.huaweicloud.com/repository/npm/micromatch/-/micromatch-4.0.5.tgz", + "integrity": "sha512-DMy+ERcEW2q8Z2Po+WNXuw3c5YaUSFjAO5GsJqfEl7UjvtIuFKO6ZrKvcItdy98dwFI2N1tg3zNIdKaQT+aNdA==", + "requires": { + "braces": "^3.0.2", + "picomatch": "^2.3.1" + } + }, + "mimic-fn": { + "version": "2.1.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/mimic-fn/-/mimic-fn-2.1.0.tgz", + "integrity": "sha512-OqbOk5oEQeAZ8WXWydlu9HJjz9WVdEIvamMCcXmuqUYjTknH/sqsWvhQ3vgwKFRR1HpjvNBKQ37nbJgYzGqGcg==" + }, + "minimatch": { + "version": "3.1.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "requires": { + "brace-expansion": "^1.1.7" + } + }, + "minimist": { + "version": "1.2.6", + "resolved": "https://repo.huaweicloud.com/repository/npm/minimist/-/minimist-1.2.6.tgz", + "integrity": "sha512-Jsjnk4bw3YJqYzbdyBiNsPWHPfO++UGG749Cxs6peCu5Xg4nrena6OVxOYxrQTqww0Jmwt+Ref8rggumkTLz9Q==" + }, + "ms": { + "version": "2.1.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/ms/-/ms-2.1.2.tgz", + "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" + }, + "mute-stdout": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/mute-stdout/-/mute-stdout-1.0.0.tgz", + "integrity": "sha512-MaSQenn0f9oxIjtCufclpV00MuYTiHaXPbdcfPIM+quMqoa8cXywjHHx4LhhIAZlXqPWMdcUpYviajfmHtHRJw==" + }, + "normalize-package-data": { + "version": "2.5.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/normalize-package-data/-/normalize-package-data-2.5.0.tgz", + "integrity": "sha512-/5CMN3T0R4XTj4DcGaexo+roZSdSFW/0AOOTROrjxzCG1wrWXEsGbRKevjlIL+ZDE4sZlJr5ED4YW0yqmkK+eA==", + "requires": { + "hosted-git-info": "^2.1.4", + "resolve": "^1.10.0", + "semver": "2 || 3 || 4 || 5", + "validate-npm-package-license": "^3.0.1" + } + }, + "npm-run-path": { + "version": "4.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/npm-run-path/-/npm-run-path-4.0.1.tgz", + "integrity": "sha512-S48WzZW777zhNIrn7gxOlISNAqi9ZC/uQFnRdbeIHhZhCA6UqpkOT8T1G7BvfdgP4Er8gF4sUbaS0i7QvIfCWw==", + "requires": { + "path-key": "^3.0.0" + } + }, + "number-is-nan": { + "version": "1.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/number-is-nan/-/number-is-nan-1.0.1.tgz", + "integrity": "sha512-4jbtZXNAsfZbAHiiqjLPBiCl16dES1zI4Hpzzxw61Tk+loF+sBDBKx1ICKKKwIqQ7M0mFn1TmkN7euSncWgHiQ==" + }, + "object-keys": { + "version": "1.1.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/object-keys/-/object-keys-1.1.1.tgz", + "integrity": "sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==" + }, + "object.assign": { + "version": "4.1.4", + "resolved": "https://repo.huaweicloud.com/repository/npm/object.assign/-/object.assign-4.1.4.tgz", + "integrity": "sha512-1mxKf0e58bvyjSCtKYY4sRe9itRk3PJpquJOjeIkz885CczcI4IvJJDLPS72oowuSh+pBxUFROpX+TU++hxhZQ==", + "requires": { + "call-bind": "^1.0.2", + "define-properties": "^1.1.4", + "has-symbols": "^1.0.3", + "object-keys": "^1.1.1" + } + }, + "object.defaults": { + "version": "1.1.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/object.defaults/-/object.defaults-1.1.0.tgz", + "integrity": "sha512-c/K0mw/F11k4dEUBMW8naXUuBuhxRCfG7W+yFy8EcijU/rSmazOUd1XAEEe6bC0OuXY4HUKjTJv7xbxIMqdxrA==", + "requires": { + "array-each": "^1.0.1", + "array-slice": "^1.0.0", + "for-own": "^1.0.0", + "isobject": "^3.0.0" + } + }, + "object.map": { + "version": "1.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/object.map/-/object.map-1.0.1.tgz", + "integrity": "sha512-3+mAJu2PLfnSVGHwIWubpOFLscJANBKuB/6A4CxBstc4aqwQY0FWcsppuy4jU5GSB95yES5JHSI+33AWuS4k6w==", + "requires": { + "for-own": "^1.0.0", + "make-iterator": "^1.0.0" + } + }, + "object.pick": { + "version": "1.3.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/object.pick/-/object.pick-1.3.0.tgz", + "integrity": "sha512-tqa/UMy/CCoYmj+H5qc07qvSL9dqcs/WZENZ1JbtWBlATP+iVOe778gE6MSijnyCnORzDuX6hU+LA4SZ09YjFQ==", + "requires": { + "isobject": "^3.0.1" + } + }, + "once": { + "version": "1.4.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/once/-/once-1.4.0.tgz", + "integrity": "sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==", + "requires": { + "wrappy": "1" + } + }, + "onetime": { + "version": "5.1.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/onetime/-/onetime-5.1.2.tgz", + "integrity": "sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg==", + "requires": { + "mimic-fn": "^2.1.0" + } + }, + "os-locale": { + "version": "1.4.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/os-locale/-/os-locale-1.4.0.tgz", + "integrity": "sha512-PRT7ZORmwu2MEFt4/fv3Q+mEfN4zetKxufQrkShY2oGvUms9r8otu5HfdyIFHkYXjO7laNsoVGmM2MANfuTA8g==", + "requires": { + "lcid": "^1.0.0" + } + }, + "parse-filepath": { + "version": "1.0.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/parse-filepath/-/parse-filepath-1.0.2.tgz", + "integrity": "sha512-FwdRXKCohSVeXqwtYonZTXtbGJKrn+HNyWDYVcp5yuJlesTwNH4rsmRZ+GrKAPJ5bLpRxESMeS+Rl0VCHRvB2Q==", + "requires": { + "is-absolute": "^1.0.0", + "map-cache": "^0.2.0", + "path-root": "^0.1.1" + } + }, + "parse-json": { + "version": "2.2.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/parse-json/-/parse-json-2.2.0.tgz", + "integrity": "sha512-QR/GGaKCkhwk1ePQNYDRKYZ3mwU9ypsKhB0XyFnLQdomyEqk3e8wpW3V5Jp88zbxK4n5ST1nqo+g9juTpownhQ==", + "requires": { + "error-ex": "^1.2.0" + } + }, + "parse-passwd": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/parse-passwd/-/parse-passwd-1.0.0.tgz", + "integrity": "sha512-1Y1A//QUXEZK7YKz+rD9WydcE1+EuPr6ZBgKecAB8tmoW6UFv0NREVJe1p+jRxtThkcbbKkfwIbWJe/IeE6m2Q==" + }, + "path-exists": { + "version": "2.1.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/path-exists/-/path-exists-2.1.0.tgz", + "integrity": "sha512-yTltuKuhtNeFJKa1PiRzfLAU5182q1y4Eb4XCJ3PBqyzEDkAZRzBrKKBct682ls9reBVHf9udYLN5Nd+K1B9BQ==", + "requires": { + "pinkie-promise": "^2.0.0" + } + }, + "path-is-absolute": { + "version": "1.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==" + }, + "path-key": { + "version": "3.1.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==" + }, + "path-parse": { + "version": "1.0.7", + "resolved": "https://repo.huaweicloud.com/repository/npm/path-parse/-/path-parse-1.0.7.tgz", + "integrity": "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==" + }, + "path-root": { + "version": "0.1.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/path-root/-/path-root-0.1.1.tgz", + "integrity": "sha512-QLcPegTHF11axjfojBIoDygmS2E3Lf+8+jI6wOVmNVenrKSo3mFdSGiIgdSHenczw3wPtlVMQaFVwGmM7BJdtg==", + "requires": { + "path-root-regex": "^0.1.0" + } + }, + "path-root-regex": { + "version": "0.1.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/path-root-regex/-/path-root-regex-0.1.2.tgz", + "integrity": "sha512-4GlJ6rZDhQZFE0DPVKh0e9jmZ5egZfxTkp7bcRDuPlJXbAwhxcl2dINPUAsjLdejqaLsCeg8axcLjIbvBjN4pQ==" + }, + "path-type": { + "version": "1.1.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/path-type/-/path-type-1.1.0.tgz", + "integrity": "sha512-S4eENJz1pkiQn9Znv33Q+deTOKmbl+jj1Fl+qiP/vYezj+S8x+J3Uo0ISrx/QoEvIlOaDWJhPaRd1flJ9HXZqg==", + "requires": { + "graceful-fs": "^4.1.2", + "pify": "^2.0.0", + "pinkie-promise": "^2.0.0" + } + }, + "picomatch": { + "version": "2.3.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/picomatch/-/picomatch-2.3.1.tgz", + "integrity": "sha512-JU3teHTNjmE2VCGFzuY8EXzCDVwEqB2a8fsIvwaStHhAWJEeVd1o1QD80CU6+ZdEXXSLbSsuLwJjkCBWqRQUVA==" + }, + "pify": { + "version": "2.3.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/pify/-/pify-2.3.0.tgz", + "integrity": "sha512-udgsAY+fTnvv7kI7aaxbqwWNb0AHiB0qBO89PZKPkoTmGOgdbrHDKD+0B2X4uTfJ/FT1R09r9gTsjUjNJotuog==" + }, + "pinkie": { + "version": "2.0.4", + "resolved": "https://repo.huaweicloud.com/repository/npm/pinkie/-/pinkie-2.0.4.tgz", + "integrity": "sha512-MnUuEycAemtSaeFSjXKW/aroV7akBbY+Sv+RkyqFjgAe73F+MR0TBWKBRDkmfWq/HiFmdavfZ1G7h4SPZXaCSg==" + }, + "pinkie-promise": { + "version": "2.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/pinkie-promise/-/pinkie-promise-2.0.1.tgz", + "integrity": "sha512-0Gni6D4UcLTbv9c57DfxDGdr41XfgUjqWZu492f0cIGr16zDU06BWP/RAEvOuo7CQ0CNjHaLlM59YJJFm3NWlw==", + "requires": { + "pinkie": "^2.0.0" + } + }, + "pretty-hrtime": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/pretty-hrtime/-/pretty-hrtime-1.0.0.tgz", + "integrity": "sha512-CU2l5CYUAptUYq/671ajexQfXuxJFwwg0n243Kdkx8bTjeenedsWgu8TGHPm03vLfNtk3aTXgySKPp3Usykudw==" + }, + "punycode": { + "version": "2.1.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/punycode/-/punycode-2.1.1.tgz", + "integrity": "sha512-XRsRjdf+j5ml+y/6GKHPZbrF/8p2Yga0JPtdqTIY2Xe5ohJPD9saDJJLPvp9+NSBprVvevdXZybnj2cv8OEd0A==" + }, + "read-pkg": { + "version": "1.1.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/read-pkg/-/read-pkg-1.1.0.tgz", + "integrity": "sha512-7BGwRHqt4s/uVbuyoeejRn4YmFnYZiFl4AuaeXHlgZf3sONF0SOGlxs2Pw8g6hCKupo08RafIO5YXFNOKTfwsQ==", + "requires": { + "load-json-file": "^1.0.0", + "normalize-package-data": "^2.3.2", + "path-type": "^1.0.0" + } + }, + "read-pkg-up": { + "version": "1.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/read-pkg-up/-/read-pkg-up-1.0.1.tgz", + "integrity": "sha512-WD9MTlNtI55IwYUS27iHh9tK3YoIVhxis8yKhLpTqWtml739uXc9NWTpxoHkfZf3+DkCCsXox94/VWZniuZm6A==", + "requires": { + "find-up": "^1.0.0", + "read-pkg": "^1.0.0" + } + }, + "rechoir": { + "version": "0.8.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/rechoir/-/rechoir-0.8.0.tgz", + "integrity": "sha512-/vxpCXddiX8NGfGO/mTafwjq4aFa/71pvamip0++IQk3zG8cbCj0fifNPrjjF1XMXUne91jL9OoxmdykoEtifQ==", + "requires": { + "resolve": "^1.20.0" + } + }, + "require-directory": { + "version": "2.1.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/require-directory/-/require-directory-2.1.1.tgz", + "integrity": "sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==" + }, + "require-from-string": { + "version": "2.0.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/require-from-string/-/require-from-string-2.0.2.tgz", + "integrity": "sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw==" + }, + "require-main-filename": { + "version": "1.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/require-main-filename/-/require-main-filename-1.0.1.tgz", + "integrity": "sha512-IqSUtOVP4ksd1C/ej5zeEh/BIP2ajqpn8c5x+q99gvcIG/Qf0cud5raVnE/Dwd0ua9TXYDoDc0RE5hBSdz22Ug==" + }, + "resolve": { + "version": "1.22.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/resolve/-/resolve-1.22.1.tgz", + "integrity": "sha512-nBpuuYuY5jFsli/JIs1oldw6fOQCBioohqWZg/2hiaOybXOft4lonv85uDOKXdf8rhyK159cxU5cDcK/NKk8zw==", + "requires": { + "is-core-module": "^2.9.0", + "path-parse": "^1.0.7", + "supports-preserve-symlinks-flag": "^1.0.0" + } + }, + "resolve-dir": { + "version": "1.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/resolve-dir/-/resolve-dir-1.0.1.tgz", + "integrity": "sha512-R7uiTjECzvOsWSfdM0QKFNBVFcK27aHOUwdvK53BcW8zqnGdYp0Fbj82cy54+2A4P2tFM22J5kRfe1R+lM/1yg==", + "requires": { + "expand-tilde": "^2.0.0", + "global-modules": "^1.0.0" + } + }, + "resolve-package-path": { + "version": "4.0.3", + "resolved": "https://repo.huaweicloud.com/repository/npm/resolve-package-path/-/resolve-package-path-4.0.3.tgz", + "integrity": "sha512-SRpNAPW4kewOaNUt8VPqhJ0UMxawMwzJD8V7m1cJfdSTK9ieZwS6K7Dabsm4bmLFM96Z5Y/UznrpG5kt1im8yA==", + "requires": { + "path-root": "^0.1.1" + } + }, + "rfdc": { + "version": "1.3.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/rfdc/-/rfdc-1.3.0.tgz", + "integrity": "sha512-V2hovdzFbOi77/WajaSMXk2OLm+xNIeQdMMuB7icj7bk6zi2F8GGAxigcnDFpJHbNyNcgyJDiP+8nOrY5cZGrA==" + }, + "safer-buffer": { + "version": "2.1.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/safer-buffer/-/safer-buffer-2.1.2.tgz", + "integrity": "sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==" + }, + "semver": { + "version": "5.7.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/semver/-/semver-5.7.1.tgz", + "integrity": "sha512-sauaDf/PZdVgrLTNYHRtpXa1iRiKcaebiKQ1BJdpQlWH2lCvexQdX55snPFyK7QzpudqbCI0qXFfOasHdyNDGQ==" + }, + "set-blocking": { + "version": "2.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/set-blocking/-/set-blocking-2.0.0.tgz", + "integrity": "sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==" + }, + "shebang-command": { + "version": "2.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "requires": { + "shebang-regex": "^3.0.0" + } + }, + "shebang-regex": { + "version": "3.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==" + }, + "signal-exit": { + "version": "3.0.7", + "resolved": "https://repo.huaweicloud.com/repository/npm/signal-exit/-/signal-exit-3.0.7.tgz", + "integrity": "sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==" + }, + "spdx-correct": { + "version": "3.1.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/spdx-correct/-/spdx-correct-3.1.1.tgz", + "integrity": "sha512-cOYcUWwhCuHCXi49RhFRCyJEK3iPj1Ziz9DpViV3tbZOwXD49QzIN3MpOLJNxh2qwq2lJJZaKMVw9qNi4jTC0w==", + "requires": { + "spdx-expression-parse": "^3.0.0", + "spdx-license-ids": "^3.0.0" + } + }, + "spdx-exceptions": { + "version": "2.3.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/spdx-exceptions/-/spdx-exceptions-2.3.0.tgz", + "integrity": "sha512-/tTrYOC7PPI1nUAgx34hUpqXuyJG+DTHJTnIULG4rDygi4xu/tfgmq1e1cIRwRzwZgo4NLySi+ricLkZkw4i5A==" + }, + "spdx-expression-parse": { + "version": "3.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/spdx-expression-parse/-/spdx-expression-parse-3.0.1.tgz", + "integrity": "sha512-cbqHunsQWnJNE6KhVSMsMeH5H/L9EpymbzqTQ3uLwNCLZ1Q481oWaofqH7nO6V07xlXwY6PhQdQ2IedWx/ZK4Q==", + "requires": { + "spdx-exceptions": "^2.1.0", + "spdx-license-ids": "^3.0.0" + } + }, + "spdx-license-ids": { + "version": "3.0.12", + "resolved": "https://repo.huaweicloud.com/repository/npm/spdx-license-ids/-/spdx-license-ids-3.0.12.tgz", + "integrity": "sha512-rr+VVSXtRhO4OHbXUiAF7xW3Bo9DuuF6C5jH+q/x15j2jniycgKbxU09Hr0WqlSLUs4i4ltHGXqTe7VHclYWyA==" + }, + "streamroller": { + "version": "3.1.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/streamroller/-/streamroller-3.1.2.tgz", + "integrity": "sha512-wZswqzbgGGsXYIrBYhOE0yP+nQ6XRk7xDcYwuQAGTYXdyAUmvgVFE0YU1g5pvQT0m7GBaQfYcSnlHbapuK0H0A==", + "requires": { + "date-format": "^4.0.13", + "debug": "^4.3.4", + "fs-extra": "^8.1.0" + }, + "dependencies": { + "fs-extra": { + "version": "8.1.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/fs-extra/-/fs-extra-8.1.0.tgz", + "integrity": "sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==", + "requires": { + "graceful-fs": "^4.2.0", + "jsonfile": "^4.0.0", + "universalify": "^0.1.0" + } + }, + "jsonfile": { + "version": "4.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/jsonfile/-/jsonfile-4.0.0.tgz", + "integrity": "sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==", + "requires": { + "graceful-fs": "^4.1.6" + } + }, + "universalify": { + "version": "0.1.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/universalify/-/universalify-0.1.2.tgz", + "integrity": "sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==" + } + } + }, + "string-width": { + "version": "1.0.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/string-width/-/string-width-1.0.2.tgz", + "integrity": "sha512-0XsVpQLnVCXHJfyEs8tC0zpTVIr5PKKsQtkT29IwupnPTjtPmQ3xT/4yCREF9hYkV/3M3kzcUTSAZT6a6h81tw==", + "requires": { + "code-point-at": "^1.0.0", + "is-fullwidth-code-point": "^1.0.0", + "strip-ansi": "^3.0.0" + } + }, + "strip-ansi": { + "version": "3.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/strip-ansi/-/strip-ansi-3.0.1.tgz", + "integrity": "sha512-VhumSSbBqDTP8p2ZLKj40UjBCV4+v8bUSEpUb4KjRgWk9pbqGF4REFj6KEagidb2f/M6AzC0EmFyDNGaw9OCzg==", + "requires": { + "ansi-regex": "^2.0.0" + } + }, + "strip-bom": { + "version": "2.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/strip-bom/-/strip-bom-2.0.0.tgz", + "integrity": "sha512-kwrX1y7czp1E69n2ajbG65mIo9dqvJ+8aBQXOGVxqwvNbsXdFM6Lq37dLAY3mknUwru8CfcCbfOLL/gMo+fi3g==", + "requires": { + "is-utf8": "^0.2.0" + } + }, + "strip-final-newline": { + "version": "2.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/strip-final-newline/-/strip-final-newline-2.0.0.tgz", + "integrity": "sha512-BrpvfNAE3dcvq7ll3xVumzjKjZQ5tI1sEUIKr3Uoks0XUl45St3FlatVqef9prk4jRDzhW6WZg+3bk93y6pLjA==" + }, + "strnum": { + "version": "1.0.5", + "resolved": "https://repo.huaweicloud.com/repository/npm/strnum/-/strnum-1.0.5.tgz", + "integrity": "sha512-J8bbNyKKXl5qYcR36TIO8W3mVGVHrmmxsd5PAItGkmyzwJvybiw2IVq5nqd0i4LSNSkB/sx9VHllbfFdr9k1JA==" + }, + "supports-preserve-symlinks-flag": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/supports-preserve-symlinks-flag/-/supports-preserve-symlinks-flag-1.0.0.tgz", + "integrity": "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w==" + }, + "to-regex-range": { + "version": "5.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/to-regex-range/-/to-regex-range-5.0.1.tgz", + "integrity": "sha512-65P7iz6X5yEr1cwcgvQxbbIw7Uk3gOy5dIdtZ4rDveLqhrdJP+Li/Hx6tyK0NEb+2GCyneCMJiGqrADCSNk8sQ==", + "requires": { + "is-number": "^7.0.0" + } + }, + "unc-path-regex": { + "version": "0.1.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/unc-path-regex/-/unc-path-regex-0.1.2.tgz", + "integrity": "sha512-eXL4nmJT7oCpkZsHZUOJo8hcX3GbsiDOa0Qu9F646fi8dT3XuSVopVqAcEiVzSKKH7UoDti23wNX3qGFxcW5Qg==" + }, + "universalify": { + "version": "2.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/universalify/-/universalify-2.0.0.tgz", + "integrity": "sha512-hAZsKq7Yy11Zu1DE0OzWjw7nnLZmJZYTDZZyEFHZdUhV8FkH5MCfoU1XMaxXovpyW5nq5scPqq0ZDP9Zyl04oQ==" + }, + "uri-js": { + "version": "4.4.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "requires": { + "punycode": "^2.1.0" + } + }, + "v8flags": { + "version": "3.2.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/v8flags/-/v8flags-3.2.0.tgz", + "integrity": "sha512-mH8etigqMfiGWdeXpaaqGfs6BndypxusHHcv2qSHyZkGEznCd/qAXCWWRzeowtL54147cktFOC4P5y+kl8d8Jg==", + "requires": { + "homedir-polyfill": "^1.0.1" + } + }, + "validate-npm-package-license": { + "version": "3.0.4", + "resolved": "https://repo.huaweicloud.com/repository/npm/validate-npm-package-license/-/validate-npm-package-license-3.0.4.tgz", + "integrity": "sha512-DpKm2Ui/xN7/HQKCtpZxoRWBhZ9Z0kqtygG8XCgNQ8ZlDnxuQmWhj566j8fN4Cu3/JmbhsDo7fcAJq4s9h27Ew==", + "requires": { + "spdx-correct": "^3.0.0", + "spdx-expression-parse": "^3.0.0" + } + }, + "which": { + "version": "1.3.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/which/-/which-1.3.1.tgz", + "integrity": "sha512-HxJdYWq1MTIQbJ3nw0cqssHoTNU267KlrDuGZ1WYlxDStUtKUhOaJmh112/TZmHxxUfuJqPXSOm7tDyas0OSIQ==", + "requires": { + "isexe": "^2.0.0" + } + }, + "which-module": { + "version": "1.0.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/which-module/-/which-module-1.0.0.tgz", + "integrity": "sha512-F6+WgncZi/mJDrammbTuHe1q0R5hOXv/mBaiNA2TCNT/LTHusX0V+CJnj9XT8ki5ln2UZyyddDgHfCzyrOH7MQ==" + }, + "wrap-ansi": { + "version": "2.1.0", + "resolved": "https://repo.huaweicloud.com/repository/npm/wrap-ansi/-/wrap-ansi-2.1.0.tgz", + "integrity": "sha512-vAaEaDM946gbNpH5pLVNR+vX2ht6n0Bt3GXwVB1AuAqZosOvHNF3P7wDnh8KLkSqgUh0uh77le7Owgoz+Z9XBw==", + "requires": { + "string-width": "^1.0.1", + "strip-ansi": "^3.0.1" + } + }, + "wrappy": { + "version": "1.0.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/wrappy/-/wrappy-1.0.2.tgz", + "integrity": "sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==" + }, + "y18n": { + "version": "3.2.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/y18n/-/y18n-3.2.2.tgz", + "integrity": "sha512-uGZHXkHnhF0XeeAPgnKfPv1bgKAYyVvmNL1xlKsPYZPaIHxGti2hHqvOCQv71XMsLxu1QjergkqogUnms5D3YQ==" + }, + "yargs": { + "version": "7.1.2", + "resolved": "https://repo.huaweicloud.com/repository/npm/yargs/-/yargs-7.1.2.tgz", + "integrity": "sha512-ZEjj/dQYQy0Zx0lgLMLR8QuaqTihnxirir7EwUHp1Axq4e3+k8jXU5K0VLbNvedv1f4EWtBonDIZm0NUr+jCcA==", + "requires": { + "camelcase": "^3.0.0", + "cliui": "^3.2.0", + "decamelize": "^1.1.1", + "get-caller-file": "^1.0.1", + "os-locale": "^1.4.0", + "read-pkg-up": "^1.0.1", + "require-directory": "^2.1.1", + "require-main-filename": "^1.0.1", + "set-blocking": "^2.0.0", + "string-width": "^1.0.2", + "which-module": "^1.0.0", + "y18n": "^3.2.1", + "yargs-parser": "^5.0.1" + } + }, + "yargs-parser": { + "version": "5.0.1", + "resolved": "https://repo.huaweicloud.com/repository/npm/yargs-parser/-/yargs-parser-5.0.1.tgz", + "integrity": "sha512-wpav5XYiddjXxirPoCTUPbqM0PXvJ9hiBMvuJgInvo4/lAOTZzUprArw17q2O1P2+GHhbBr18/iQwjL5Z9BqfA==", + "requires": { + "camelcase": "^3.0.0", + "object.assign": "^4.1.0" + } + } + } +} diff --git a/FA/PATEO_CarVoiceAssistant/package.json b/FA/PATEO_CarVoiceAssistant/package.json new file mode 100644 index 0000000000000000000000000000000000000000..7b2076179b1a2f9b855e9a5babbc4f8f8578699d --- /dev/null +++ b/FA/PATEO_CarVoiceAssistant/package.json @@ -0,0 +1,18 @@ +{ + "license": "ISC", + "devDependencies": {}, + "name": "voiceassistant", + "ohos": { + "org": "huawei", + "directoryLevel": "project", + "buildTool": "hvigor" + }, + "description": "example description", + "repository": {}, + "version": "1.0.0", + "dependencies": { + "@ohos/hypium": "1.0.2", + "@ohos/hvigor-ohos-plugin": "1.2.2", + "@ohos/hvigor": "1.2.2" + } +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/data.zip b/dev/team_x/PATEO_CarVoiceAssistant/data.zip new file mode 100644 index 0000000000000000000000000000000000000000..d45de0585cb876e544b902adf2d34b71a7291fce --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/data.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5cdd4904896de51c90269b8aa92b2535f684c96626cd4148610e041ca8ad1dcc +size 71961545 diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/etc/init/BUILD.gn b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/etc/init/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..cb3076f5bbaa9df5e822de8e5d5db5fb9d477f33 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/etc/init/BUILD.gn @@ -0,0 +1,28 @@ +# Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import("//build/ohos.gni") + +################################################################################ + +ohos_prebuilt_etc("voice_assistant_service.rc") { + if (use_musl) { + source = "voice_assistant_service.cfg" + } else { + source = "voice_assistant_service.rc" + } + relative_install_dir = "init" + part_name = "voiceassistant" + subsystem_name = "miscservices" +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/etc/init/voice_assistant_service.cfg b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/etc/init/voice_assistant_service.cfg new file mode 100644 index 0000000000000000000000000000000000000000..c33a20ebd065094064dcf3645ad68398b7c2efad --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/etc/init/voice_assistant_service.cfg @@ -0,0 +1,16 @@ +{ + "jobs" : [{ + "name" : "boot", + "cmds" : [ + "start voice_assistant_service" + ] + } + ], + "services" : [{ + "name" : "voice_assistant_service", + "path" : ["/system/bin/sa_main", "/system/profile/voice_assistant_service.xml"], + "uid" : "system", + "gid" : ["system", "shell"] + } + ] +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/etc/init/voice_assistant_service.rc b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/etc/init/voice_assistant_service.rc new file mode 100644 index 0000000000000000000000000000000000000000..76cdd950edb685e6385992bc6af961e040f7c752 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/etc/init/voice_assistant_service.rc @@ -0,0 +1,21 @@ +# Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +on boot + start voice_assistant_service +service voice_assistant_service /system/bin/sa_main /system/profile/voice_assistant_service.xml + class z_core + user system + group system shell + capabilities SYS_TIME + seclabel u:r:time_service:s0 diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/audio_record_manager.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/audio_record_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..f246fa1423ebd3c8b408ec63335ec35747bf74eb --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/audio_record_manager.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CAR_VOICE_ASSISTANT_AUDIO_RECORD_MANAGER_H +#define CAR_VOICE_ASSISTANT_AUDIO_RECORD_MANAGER_H + +#include "audio_capturer.h" +#include "refbase.h" +#include +#include +#include +#include + +namespace OHOS { +namespace CarVoiceAssistant { + enum AudioRecordStatus { + AudioRecordStatusNone, + AudioRecordStatusStarting, + AudioRecordStatusRunning, + }; + + class IAudioRecordCallback : public virtual RefBase { + public: + virtual void AudioRecordStatusChanged(AudioRecordStatus status) = 0; + virtual void ReceiveAudioBuffer(void* data, size_t length) = 0; + }; + + class AudioRecordManager : public RefBase { + public: + AudioRecordManager(); + ~AudioRecordManager(); + + void SetCallback(wptr callback); + + bool StartRecord(); + void StopRecord(); + AudioRecordStatus GetStatus(); + + void OnStateChange(const AudioStandard::CapturerState state); + + private: + wptr callback_; + AudioRecordStatus status_; + bool recordingTag_; //内部标记tag,用于退出取buffer线程 + + std::unique_ptr audioCapturer_; + + void StartCapture(); + }; + +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/i_wakeup_manager.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/i_wakeup_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..3d0ff4c7a22c95a8b1d4768fa527fee822217714 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/i_wakeup_manager.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef CAR_VOICE_ASSISTANT_I_WAKEUP_MANAGER_H +#define CAR_VOICE_ASSISTANT_I_WAKEUP_MANAGER_H + +#include "refbase.h" + +#include + +namespace OHOS { +namespace CarVoiceAssistant { + + enum WakeUpStatus { + WakeUpStatusNotInit, //未初始化 + WakeUpStatusInitilazed, //已初始化 + WakeUpStatusRecognizing, // 热词识别中 + }; + + class IWakeUpCallback : public virtual RefBase { + public: + virtual void WakeUpCallback(std::string text) = 0; //唤醒回调,返回识别的文字 + }; + + class IWakeUpManager : public virtual RefBase { + public: + IWakeUpManager() = default; + virtual ~IWakeUpManager() = default; + + virtual void Init() = 0; //唤醒引擎初始化 + virtual void Process(void* data, size_t length) = 0; //处理pcm流 + virtual void SetCallback(wptr callback) = 0; //设置回调 + virtual void SetNeedClearBeforeProcess() = 0; //处理pcm流前,先清除当前状态 + }; +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/tts_manager.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/tts_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..d84b4bb4193aa95dd4a30feb0dadf215fe71cf30 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/tts_manager.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CAR_VOICE_ASSISTANT_TTS_MANAGER_H +#define CAR_VOICE_ASSISTANT_TTS_MANAGER_H + +#include "i_voice_cloud_manager.h" +#include "media_data_source.h" +#include "refbase.h" +#include +#include +#include +#include +#include + +namespace OHOS { +namespace CarVoiceAssistant { + + class MediaDataSource : public OHOS::Media::IMediaDataSource { + public: + MediaDataSource(void* data, size_t size); + ~MediaDataSource(); + + int32_t ReadAt(int64_t pos, uint32_t length, const std::shared_ptr& mem) override; + int32_t ReadAt(uint32_t length, const std::shared_ptr& mem) override; + int32_t GetSize(int64_t& size) override; + + private: + void* data_; + int64_t size_; + }; + + class ITTSManagerCallback : public virtual RefBase { + public: + virtual void AudioPlayerStatusChanged(bool isPlaying) = 0; + }; + + class TTSManager : public RefBase { + public: + TTSManager(); + ~TTSManager(); + void RequestPlay(std::string text); + void CancelAll(); + void ChangeSpeakerType(std::string speakerType); + bool GetAudioPlayerIsPlaying(); + + void OnPlayStateChanged(bool isPlaying); + + void SetCallback(wptr callback); + + IVoiceCloudManager* voiceCloudManager_; + + private: + std::string currentText_; + std::mutex mutex_; + std::string speakerType_; + + std::shared_ptr player_; + std::shared_ptr source_; + std::recursive_mutex player_recursive_mutex_; + + wptr callback_; + bool isAudioPlaying_; + + void RunRequest(std::string text); + + void PlayTTS(void* data, size_t length); + void CancelPlayTTS(); + }; +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/voice_cloud_loader.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/voice_cloud_loader.h new file mode 100644 index 0000000000000000000000000000000000000000..624c2e90ea39b3595627d1b0a225dc767e64cc1d --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/voice_cloud_loader.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CAR_VOICE_ASSISTANT_VOICE_CLOUD_LOADER_H +#define CAR_VOICE_ASSISTANT_VOICE_CLOUD_LOADER_H + +#include "i_voice_cloud_manager.h" + +namespace OHOS { +namespace CarVoiceAssistant { + + void LoadVoiceCloud(); + IVoiceCloudManager* CreateVoiceCloudManager(); + void DestoryVoiceCloudManager(IVoiceCloudManager* manager); +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/wakeup_manager.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/wakeup_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..2db5e901b52f2da43b1d28c2d1b3df8c05a07012 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/include/wakeup_manager.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef CAR_VOICE_ASSISTANT_WAKEUP_MANAGER_H +#define CAR_VOICE_ASSISTANT_WAKEUP_MANAGER_H + +#include "i_wakeup_manager.h" +#include "refbase.h" +#include +#include +#include + +namespace OHOS { +namespace CarVoiceAssistant { + + class WakeUpManager : public IWakeUpManager { + public: + WakeUpManager(); + ~WakeUpManager(); + + void Init(); + void Process(void* data, size_t length); + void SetCallback(wptr callback); + void SetNeedClearBeforeProcess(); + + private: + WakeUpStatus status_; + int lastVadResult_; + + FILE* file_; + + std::recursive_mutex recursive_mutex_; + bool needClearBeforeProcess_; + + VadInst* pVad_; + ps_decoder_t* decoder_; + cmd_ln_t* config_; + + wptr callback_; + + void RunInit(); + void RunDecode(FILE* fp); + void ClearState(); + }; +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/src/audio_record_manager.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/src/audio_record_manager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e90339264a2664fe13503878894e1a6cda0f22cd --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/src/audio_record_manager.cpp @@ -0,0 +1,216 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "audio_record_manager.h" +#include "voice_assistant_log.h" +#include "common_utils.h" +#include + +using namespace std; +using namespace std::chrono; +using namespace OHOS; +using namespace OHOS::AudioStandard; + +namespace OHOS { +namespace CarVoiceAssistant { + + class AudioCapturerCallbackTestImpl : public AudioCapturerCallback { + public: + AudioCapturerCallbackTestImpl(wptr manager) + : manager_(manager) + { + } + + void OnStateChange(const CapturerState state) override + { + VOICE_ASSISTANT_LOGI("AudioCapturerCallbackTestImpl:: OnStateChange"); + if (manager_ != nullptr) { + manager_->OnStateChange(state); + } + } + + wptr manager_; + }; + + AudioRecordManager::AudioRecordManager() + : callback_(nullptr) + , status_(AudioRecordStatusNone) + , recordingTag_(false) + , audioCapturer_(nullptr) + { + } + + AudioRecordManager::~AudioRecordManager() + { + if (status_ == AudioRecordStatusRunning) { + audioCapturer_->Flush(); + audioCapturer_->Stop(); + } + audioCapturer_->SetCapturerCallback(nullptr); + audioCapturer_->Release(); + this->callback_ = nullptr; + } + + void AudioRecordManager::SetCallback(wptr callback) + { + callback_ = callback; + } + + bool AudioRecordManager::StartRecord() + { + VOICE_ASSISTANT_LOGI("StartRecord "); + + if (audioCapturer_ == nullptr) { + VOICE_ASSISTANT_LOGI("AudioCapturer::Create"); + AudioCapturerOptions capturerOptions; + capturerOptions.streamInfo.samplingRate = static_cast(16000); + capturerOptions.streamInfo.encoding = AudioEncodingType::ENCODING_PCM; + capturerOptions.streamInfo.format = AudioSampleFormat::SAMPLE_S16LE; + capturerOptions.streamInfo.channels = AudioChannel::MONO; + capturerOptions.capturerInfo.sourceType = SourceType::SOURCE_TYPE_MIC; + capturerOptions.capturerInfo.capturerFlags = 0; + + audioCapturer_ = AudioCapturer::Create(capturerOptions); + + int32_t ret = 0; + shared_ptr cb1 = make_shared(this); + ret = audioCapturer_->SetCapturerCallback(cb1); + if (ret) { + VOICE_ASSISTANT_LOGI("AudioCapturerTest: SetCapturerCallback failed %d", ret); + return false; + } + } + + int32_t status = audioCapturer_->SetBufferDuration(20); + if (status) { + VOICE_ASSISTANT_LOGI("Failed to set buffer duration"); + } + + VOICE_ASSISTANT_LOGI("Starting Stream"); + if (!audioCapturer_->Start()) { + VOICE_ASSISTANT_LOGI("Start stream failed"); + audioCapturer_->Release(); + audioCapturer_ = nullptr; + return false; + } + + recordingTag_ = true; + status_ = AudioRecordStatusStarting; + + std::thread serviceThread(&AudioRecordManager::StartCapture, this); + serviceThread.detach(); + return true; + } + + void AudioRecordManager::StartCapture() + { + VOICE_ASSISTANT_LOGI("Capturing started"); + size_t bufferLen; + if (audioCapturer_->GetBufferSize(bufferLen) < 0) { + VOICE_ASSISTANT_LOGI(" GetMinimumBufferSize failed"); + return; + } + + auto buffer = std::make_unique(bufferLen); + if (buffer == nullptr) { + VOICE_ASSISTANT_LOGI("AudioCapturerTest: Failed to allocate buffer"); + return; + } + + VOICE_ASSISTANT_LOGI("AudioPerf Capturer First Frame Read, BUFFER_LEN = %zu", bufferLen); + while (true) { + int32_t bytesRead = 0; + while (bytesRead < bufferLen) { + if (audioCapturer_ == nullptr) { + break; + } + int32_t len = audioCapturer_->Read(*(buffer.get() + bytesRead), bufferLen - bytesRead, true); + if (len >= 0) { + bytesRead += len; + } else { + bytesRead = len; + break; + } + } + + if (!recordingTag_) { + break; + } + + if (bytesRead < 0) { + VOICE_ASSISTANT_LOGI("Bytes read failed. error code %zu", bytesRead); + break; + } else if (bytesRead == 0) { + continue; + } + + if (callback_ != nullptr) { + callback_->ReceiveAudioBuffer(buffer.get(), bytesRead); + } + } + + VOICE_ASSISTANT_LOGI("Quit loop"); + } + + void AudioRecordManager::StopRecord() + { + VOICE_ASSISTANT_LOGI("StopRecord"); + if (audioCapturer_ == nullptr) { + return; + } + + recordingTag_ = false; + + if (!audioCapturer_->Flush()) { + VOICE_ASSISTANT_LOGI("StopRecord: flush failed"); + } + + if (!audioCapturer_->Stop()) { + VOICE_ASSISTANT_LOGI("StopRecord: Stop failed"); + } + + status_ = AudioRecordStatusNone; + } + + AudioRecordStatus AudioRecordManager::GetStatus() + { + return status_; + } + + void AudioRecordManager::OnStateChange(const CapturerState state) + { + switch (state) { + case CAPTURER_PREPARED: + VOICE_ASSISTANT_LOGI("AudioRecordManager: OnStateChange CAPTURER_PREPARED"); + break; + case CAPTURER_RUNNING: + VOICE_ASSISTANT_LOGI("AudioRecordManager: OnStateChange CAPTURER_RUNNING"); + status_ = AudioRecordStatusRunning; + break; + case CAPTURER_STOPPED: + VOICE_ASSISTANT_LOGI("AudioRecordManager: OnStateChange CAPTURER_STOPPED"); + status_ = AudioRecordStatusNone; + recordingTag_ = false; + break; + case CAPTURER_RELEASED: + VOICE_ASSISTANT_LOGI("AudioRecordManager: OnStateChange CAPTURER_RELEASED"); + break; + default: + VOICE_ASSISTANT_LOGI("AudioRecordManager: OnStateChange NOT A VALID state"); + break; + } + } +} +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/src/tts_manager.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/src/tts_manager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7cca51e29d02b4042e20afae625003223a71d391 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/src/tts_manager.cpp @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#include "tts_manager.h" +#include "common_utils.h" +#include "voice_assistant_log.h" +#include "voice_cloud_loader.h" +#include +#include +#include +#include + +namespace OHOS { +namespace CarVoiceAssistant { + + class AudioPlayerCallback : public OHOS::Media::PlayerCallback { + public: + void OnError(OHOS::Media::PlayerErrorType errorType, int32_t errorCode) override; + void OnInfo(OHOS::Media::PlayerOnInfoType type, int32_t extra, const OHOS::Media::Format& infoBody) override; + + void PrintState(OHOS::Media::PlayerStates state); + + wptr manager_; + }; + + void AudioPlayerCallback::OnError(OHOS::Media::PlayerErrorType errorType, int32_t errorCode) + { + VOICE_ASSISTANT_LOGI("AudioPlayerCallback::OnError:%{public}d", errorCode); + } + + void AudioPlayerCallback::OnInfo(OHOS::Media::PlayerOnInfoType type, int32_t extra, const OHOS::Media::Format& infoBody) + { + if (type == OHOS::Media::INFO_TYPE_STATE_CHANGE) { + OHOS::Media::PlayerStates state = static_cast(extra); + PrintState(state); + if (state == OHOS::Media::PlayerStates::PLAYER_STARTED) { + if (manager_ != nullptr) { + manager_->OnPlayStateChanged(true); + } + } else if (state == OHOS::Media::PlayerStates::PLAYER_PAUSED + || state == OHOS::Media::PlayerStates::PLAYER_STOPPED + || state == OHOS::Media::PlayerStates::PLAYER_PLAYBACK_COMPLETE + || state == OHOS::Media::PlayerStates::PLAYER_IDLE + || state == OHOS::Media::PlayerStates::PLAYER_STATE_ERROR) { + if (manager_ != nullptr) { + manager_->OnPlayStateChanged(false); + } + } + } + } + + void AudioPlayerCallback::PrintState(OHOS::Media::PlayerStates state) + { + static const std::map STATE_MAP = { + { OHOS::Media::PlayerStates::PLAYER_STATE_ERROR, "Error" }, + { OHOS::Media::PlayerStates::PLAYER_IDLE, "Idle" }, + { OHOS::Media::PlayerStates::PLAYER_INITIALIZED, "Initialized" }, + { OHOS::Media::PlayerStates::PLAYER_PREPARED, "Prepared" }, + { OHOS::Media::PlayerStates::PLAYER_STARTED, "Started" }, + { OHOS::Media::PlayerStates::PLAYER_PAUSED, "Paused" }, + { OHOS::Media::PlayerStates::PLAYER_STOPPED, "Stopped" }, + { OHOS::Media::PlayerStates::PLAYER_PLAYBACK_COMPLETE, "Complete" }, + }; + + VOICE_ASSISTANT_LOGI("AudioPlayerCallback::PrintState:%{public}s", STATE_MAP.at(state).c_str()); + } + + MediaDataSource::MediaDataSource(void* data, size_t size) + : data_(data) + , size_(size) + { + } + + MediaDataSource::~MediaDataSource() + { + if (data_) { + free(data_); + } + data_ = nullptr; + } + + int32_t MediaDataSource::ReadAt(int64_t pos, uint32_t length, const std::shared_ptr& mem) + { + VOICE_ASSISTANT_LOGI("ReadAt pos %{public}lld length %{public}u", pos, length); + + if (mem->GetSize() <= 0) { + VOICE_ASSISTANT_LOGE("mem size should large than 0"); + return OHOS::Media::SOURCE_ERROR_IO; + } + + if (pos >= size_) { + VOICE_ASSISTANT_LOGE("is eof"); + return OHOS::Media::SOURCE_ERROR_EOF; + } + + length = std::min(static_cast(size_ - pos), length); + + uint32_t realLen = std::min(length, static_cast(mem->GetSize())); + + if (mem->GetBase() == nullptr) { + VOICE_ASSISTANT_LOGE("mem->GetBase() is nullptr"); + return OHOS::Media::SOURCE_ERROR_IO; + } + + memcpy(mem->GetBase(), (char*)data_ + pos, static_cast(realLen)); + + VOICE_ASSISTANT_LOGI("length %{public}u realLen %{public}u", length, realLen); + return realLen; + } + + int32_t MediaDataSource::ReadAt(uint32_t length, const std::shared_ptr& mem) + { + (void)length; + (void)mem; + return 0; + } + + int32_t MediaDataSource::GetSize(int64_t& size) + { + size = size_; + return OHOS::Media::MSERR_OK; + } + + TTSManager::TTSManager() + : voiceCloudManager_(nullptr) + , currentText_("") + , speakerType_("common") + , player_(nullptr) + , callback_(nullptr) + , isAudioPlaying_(false) + { + curl_global_init(CURL_GLOBAL_ALL); + } + + TTSManager::~TTSManager() + { + curl_global_cleanup(); + if (player_ != nullptr) { + if (player_->IsPlaying()) { + player_->Stop(); + } + player_->Release(); + player_ = nullptr; + } + voiceCloudManager_ = nullptr; + } + + void TTSManager::RequestPlay(std::string text) + { + mutex_.lock(); + currentText_ = text; + mutex_.unlock(); + + std::thread requestThread(&TTSManager::RunRequest, this, text); + requestThread.detach(); + } + + void TTSManager::RunRequest(std::string text) + { + if (voiceCloudManager_ == nullptr) { + VOICE_ASSISTANT_LOGI("voiceCloudManager_ is nullptr"); + return; + } + + MemoryStruct memoryStruct = voiceCloudManager_->RequestTTS(text, speakerType_); + + if (memoryStruct.size == 0) { + VOICE_ASSISTANT_LOGI("RunRequest:TTS data is zero"); + return; + } + + mutex_.lock(); + bool isNeedPlay = currentText_.compare(text) == 0; + mutex_.unlock(); + + if (isNeedPlay) { + PlayTTS(memoryStruct.memory, memoryStruct.size); + } + } + + void TTSManager::CancelAll() + { + std::lock_guard lock(mutex_); + currentText_ = ""; + CancelPlayTTS(); + } + + void TTSManager::ChangeSpeakerType(std::string speakerType) + { + speakerType_ = speakerType; + } + + void TTSManager::PlayTTS(void* data, size_t length) + { + CancelPlayTTS(); + VOICE_ASSISTANT_LOGI("TTS Play tts length:%{public}d", length); + + player_recursive_mutex_.lock(); + if (player_ == nullptr) { + player_ = OHOS::Media::PlayerFactory::CreatePlayer(); + std::shared_ptr cb = std::make_shared(); + cb->manager_ = this; + player_->SetPlayerCallback(cb); + } + source_ = std::make_shared(data, length); + player_->SetSource(source_); + player_->Prepare(); + player_->Play(); + player_recursive_mutex_.unlock(); + OnPlayStateChanged(true); + } + + void TTSManager::CancelPlayTTS() + { + VOICE_ASSISTANT_LOGI("TTS Cancel Play "); + player_recursive_mutex_.lock(); + if (player_ != nullptr) { + if (player_->IsPlaying()) { + player_->Stop(); + player_->Reset(); + } + player_->Release(); + player_ = nullptr; + } + player_recursive_mutex_.unlock(); + + OnPlayStateChanged(false); + } + + bool TTSManager::GetAudioPlayerIsPlaying() + { + return isAudioPlaying_; + } + + void TTSManager::OnPlayStateChanged(bool isPlaying) + { + if (isAudioPlaying_ == isPlaying) { + return; + } + isAudioPlaying_ = isPlaying; + if (callback_ != nullptr) { + callback_->AudioPlayerStatusChanged(true); + } + } + + void TTSManager::SetCallback(wptr callback) + { + callback_ = callback; + } + +} +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/src/voice_cloud_loader.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/src/voice_cloud_loader.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e837c9fe0300845d745988f6c13fb44050a45242 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/src/voice_cloud_loader.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "voice_cloud_loader.h" +#include "voice_assistant_log.h" +#include + +namespace OHOS { +namespace CarVoiceAssistant { + + typedef IVoiceCloudManager* CreateVoiceCloudManagerFuncPointer(); + typedef void DestoryVoiceCloudManagerFuncPointer(IVoiceCloudManager* manager); + + void* g_voicecloud = nullptr; + CreateVoiceCloudManagerFuncPointer* g_createVoiceCloudManagerFunc = nullptr; + DestoryVoiceCloudManagerFuncPointer* g_destoryVoiceCloudManagerFunc = nullptr; + void LoadVoiceCloud() + { + if (g_voicecloud) { + return; + } + + void* voiceCloud = dlopen("/system/lib/libvoicecloud.z.so", RTLD_LAZY); + if (!voiceCloud) { + VOICE_ASSISTANT_LOGI("dlopen /system/lib/libvoicecloud.z.so failed"); + voiceCloud = dlopen("/system/lib/module/libvoicecloud.z.so", RTLD_LAZY); + if (!voiceCloud) { + VOICE_ASSISTANT_LOGI("dlopen /system/lib/module/libvoicecloud.z.so failed"); + } + } + + if (voiceCloud) { + VOICE_ASSISTANT_LOGI("dlopen libvoicecloud.z.so success"); + g_voicecloud = voiceCloud; + } + } + + IVoiceCloudManager* CreateVoiceCloudManager() + { + if (g_voicecloud == nullptr) { + LoadVoiceCloud(); + } + + if (g_voicecloud == nullptr) { + return nullptr; + } + + if (g_createVoiceCloudManagerFunc) { + return g_createVoiceCloudManagerFunc(); + } + + CreateVoiceCloudManagerFuncPointer* createFunc = (CreateVoiceCloudManagerFuncPointer*)dlsym(g_voicecloud, "CreateVoiceCloudManager"); + + if (!createFunc) { + VOICE_ASSISTANT_LOGI("CreateVoiceCloudManager dlsym CreateVoiceCloudManager failed"); + return nullptr; + } + + g_createVoiceCloudManagerFunc = createFunc; + + return g_createVoiceCloudManagerFunc(); + } + + void DestoryVoiceCloudManager(IVoiceCloudManager* manager) + { + if (g_voicecloud == nullptr) { + LoadVoiceCloud(); + } + + if (g_voicecloud == nullptr) { + return; + } + + if (g_destoryVoiceCloudManagerFunc) { + g_destoryVoiceCloudManagerFunc(manager); + return; + } + + DestoryVoiceCloudManagerFuncPointer* destoryFunc = (DestoryVoiceCloudManagerFuncPointer*)dlsym(g_voicecloud, "DestoryVoiceCloudManager"); + + if (!destoryFunc) { + VOICE_ASSISTANT_LOGI("DestoryVoiceCloudManager dlsym DestoryVoiceCloudManager failed"); + return; + } + + g_destoryVoiceCloudManagerFunc = destoryFunc; + + g_destoryVoiceCloudManagerFunc(manager); + } +} +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/src/wakeup_manager.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/src/wakeup_manager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..af0359a53a133fcd957207747220c6d33d56341c --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/manager/src/wakeup_manager.cpp @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "wakeup_manager.h" +#include "voice_assistant_log.h" +#include +#include +#include +#include +#include +#include + +#define RATE 16000 +#define DURATION_MS 30 // 10, 20 ,30ms +#define CHUNK_SIZE (RATE * DURATION_MS / 1000) +#define BUFFER_LENGTH CHUNK_SIZE +#define MIN_DECODE_LENGTH BUFFER_LENGTH * 40 //最小识别的长度1200ms +#define MAX_DECODE_LENGTH BUFFER_LENGTH * 100 //最大识别的长度3000ms + +#define BUFFER_FILE_DIR "/data/asr" +#define BUFFER_FILE_PATH BUFFER_FILE_DIR "/wakeup_tmp.wav" + +namespace OHOS { +namespace CarVoiceAssistant { + WakeUpManager::WakeUpManager() + { + status_ = WakeUpStatusNotInit; + lastVadResult_ = 0; + file_ = nullptr; + needClearBeforeProcess_ = false; + + pVad_ = nullptr; + decoder_ = nullptr; + config_ = nullptr; + } + + WakeUpManager::~WakeUpManager() + { + if (pVad_ != nullptr) { + WebRtcVad_Free(pVad_); + } + if (config_ != nullptr) { + cmd_ln_free_r(config_); + } + if (decoder_ != nullptr) { + ps_free(decoder_); + } + + if (file_ != nullptr) { + fclose(file_); + } + } + + void WakeUpManager::Init() + { + std::thread thread(&WakeUpManager::RunInit, this); + thread.detach(); + } + + void WakeUpManager::RunInit() + { + pVad_ = WebRtcVad_Create(); + if (pVad_ == NULL) { + VOICE_ASSISTANT_LOGE("WebRtcVad_Create failed"); + return; + } + + if (WebRtcVad_Init(pVad_)) { + VOICE_ASSISTANT_LOGE("WebRtcVad_Init failed"); + return; + } + + if (WebRtcVad_set_mode(pVad_, 3)) { // 0-3 越大越粗略,连续静音多 + VOICE_ASSISTANT_LOGE("WebRtcVad_set_mode failed"); + return; + } + VOICE_ASSISTANT_LOGI("--------init vad success--------"); + + VOICE_ASSISTANT_LOGI("--------init pocketsphinx--------"); + if ((config_ = cmd_ln_init(NULL, ps_args(), TRUE, + "-hmm", "/system/etc/pocketsphinx/zh/zh", + "-lm", "/system/etc/pocketsphinx/zh/zh_cn.lm.bin", + "-dict", "/system/etc/pocketsphinx/zh/zh_cn.dic", + NULL)) + == NULL) + VOICE_ASSISTANT_LOGE("Command line parse failed"); + if ((decoder_ = ps_init(config_)) == NULL) + VOICE_ASSISTANT_LOGE("PocketSphinx decoder init failed"); + + VOICE_ASSISTANT_LOGI("--------init pocketsphinx success--------"); + + recursive_mutex_.lock(); + status_ = WakeUpStatusInitilazed; + recursive_mutex_.unlock(); + } + + void WakeUpManager::Process(void* data, size_t length) + { + recursive_mutex_.lock(); + + if (status_ != WakeUpStatusInitilazed) { + recursive_mutex_.unlock(); + return; + } + + size_t vadLength = BUFFER_LENGTH; + if (length < BUFFER_LENGTH) { + VOICE_ASSISTANT_LOGI("WakeUpManager::Process: length is too short : length:%{public}zu, vadLength:%{public}zu", length, vadLength); + ClearState(); + recursive_mutex_.unlock(); + return; + } + + if (needClearBeforeProcess_) { + ClearState(); + } + + int nRet = WebRtcVad_Process(pVad_, 16000, static_cast(data), vadLength); + // VOICE_ASSISTANT_LOGI("WakeUpManager::Process: ret:%{public}d, length:%{public}zu", nRet, length); + + if (lastVadResult_ == 0 && nRet == 1) { //开始有声音 + VOICE_ASSISTANT_LOGI("WakeUpManager::Process: file reset"); + + if (file_ == nullptr) { + VOICE_ASSISTANT_LOGI("WakeUpManager::Process: file fopen"); + if (access(BUFFER_FILE_DIR, F_OK) != 0) { + VOICE_ASSISTANT_LOGI("WakeUpManager::Process: create dir:%{public}s", BUFFER_FILE_DIR); + mkdir(BUFFER_FILE_DIR, S_IRWXU | S_IRWXG | S_IRWXO); + } + file_ = fopen(BUFFER_FILE_PATH, "w+"); + if (file_ == nullptr) { + VOICE_ASSISTANT_LOGI("WakeUpManager::Process: fopen failed"); + ClearState(); + recursive_mutex_.unlock(); + return; + } + } + + ftruncate(fileno(file_), 0); + fseek(file_, 0, SEEK_SET); + + fwrite(data, length, 1, file_); + } else if (lastVadResult_ == 1 && nRet == 1) { + // VOICE_ASSISTANT_LOGI("WakeUpManager::Process: fwrite"); + fwrite(data, length, 1, file_); + } else if (lastVadResult_ == 1 && nRet == 0) { + fseek(file_, 0, SEEK_END); + long fileLength = ftell(file_); + + if (fileLength < MIN_DECODE_LENGTH || fileLength > MAX_DECODE_LENGTH) { + VOICE_ASSISTANT_LOGI("WakeUpManager::Process: judge process too short or too long:%{public}ld", fileLength); + ClearState(); + recursive_mutex_.unlock(); + return; + } else { + VOICE_ASSISTANT_LOGI("WakeUpManager::Process: judge process: %{public}ld", fileLength); + status_ = WakeUpStatusRecognizing; + std::thread thread(&WakeUpManager::RunDecode, this, file_); + thread.detach(); + } + } + lastVadResult_ = nRet; + recursive_mutex_.unlock(); + } + + void WakeUpManager::RunDecode(FILE* fp) + { + VOICE_ASSISTANT_LOGI("WakeUpManager:RunDecode start"); + + fseek(fp, 0, SEEK_SET); + ps_decode_raw(decoder_, fp, -1); + + int32 score; + const char* outstr = ps_get_hyp(decoder_, &score); + if (outstr != NULL) { + VOICE_ASSISTANT_LOGI("WakeUpManager Recognized: %{public}s, score = %{public}d", outstr, score); + if (callback_ != nullptr && score > -3300) { + callback_->WakeUpCallback(outstr); + } + ClearState(); + return; + } else { + VOICE_ASSISTANT_LOGI("WakeUpManager ps_get_hyp failed"); + ClearState(); + return; + } + } + + void WakeUpManager::ClearState() + { + recursive_mutex_.lock(); + VOICE_ASSISTANT_LOGI("WakeUpManager ClearState"); + + lastVadResult_ = 0; + if (status_ == WakeUpStatusRecognizing) { + status_ = WakeUpStatusInitilazed; + } + needClearBeforeProcess_ = false; + recursive_mutex_.unlock(); + } + + void WakeUpManager::SetCallback(wptr callback) + { + callback_ = callback; + } + + void WakeUpManager::SetNeedClearBeforeProcess() + { + needClearBeforeProcess_ = true; + } +} +} \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/BUILD.gn b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..0fafe45c96649751682f3dc1d988ddb12764c35d --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/BUILD.gn @@ -0,0 +1,120 @@ +# Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import("//build/ohos.gni") + +config("pocketsphinx_config") { + visibility = [ ":*" ] + include_dirs = [ + "include/android", + "include", + "include/pocketsphinx", + "src" + ] + + cflags = ["-DHAVE_CONFIG_H", "-Wno-unused-variable", "-Wno-unused-function", "-Wno-implicit-function-declaration"] + cflags_cc = ["-DHAVE_CONFIG_H"] +} + +ohos_shared_library("pocketsphinx") { + sources = [ + "src/fe/fe_interface.c", + "src/fe/fe_noise.c", + "src/fe/fe_sigproc.c", + "src/fe/fe_warp.c", + "src/fe/fe_warp_affine.c", + "src/fe/fe_warp_inverse_linear.c", + "src/fe/fe_warp_piecewise_linear.c", + "src/fe/fixlog.c", + "src/feat/agc.c", + "src/feat/cmn.c", + "src/feat/cmn_live.c", + "src/feat/feat.c", + "src/feat/lda.c", + "src/lm/fsg_model.c", + "src/lm/jsgf.c", + "src/lm/jsgf_parser.c", +#jsgf_parser.y +#_jsgf_scanner.l + "src/lm/jsgf_scanner.c", + "src/lm/lm_trie.c", + "src/lm/lm_trie_quant.c", + "src/lm/ngram_model.c", + "src/lm/ngram_model_set.c", + "src/lm/ngram_model_trie.c", + "src/lm/ngrams_raw.c", + "src/util/bio.c", + "src/util/bitarr.c", + "src/util/bitvec.c", + "src/util/blas_lite.c", + "src/util/case.c", + "src/util/ckd_alloc.c", + "src/util/cmd_ln.c", + "src/util/dtoa.c", + "src/util/err.c", + "src/util/errno.c", + "src/util/f2c_lite.c", + "src/util/filename.c", + "src/util/genrand.c", + "src/util/glist.c", + "src/util/hash_table.c", + "src/util/heap.c", + "src/util/listelem_alloc.c", + "src/util/logmath.c", + "src/util/matrix.c", + "src/util/mmio.c", + "src/util/pio.c", + "src/util/priority_queue.c", + "src/util/profile.c", + "src/util/sbthread.c", + "src/util/slamch.c", + "src/util/slapack_lite.c", + "src/util/strfuncs.c", + "src/acmod.c", + "src/allphone_search.c", + "src/bin_mdef.c", + "src/blkarray_list.c", + "src/dict.c", + "src/dict2pid.c", + "src/fsg_history.c", + "src/fsg_lextree.c", + "src/fsg_search.c", + "src/hmm.c", + "src/kws_detections.c", + "src/kws_search.c", + "src/mdef.c", + "src/ms_gauden.c", + "src/ms_mgau.c", + "src/ms_senone.c", + "src/ngram_search.c", + "src/ngram_search_fwdflat.c", + "src/ngram_search_fwdtree.c", + "src/phone_loop_search.c", + "src/pocketsphinx.c", + "src/ps_alignment.c", + "src/ps_lattice.c", + "src/ps_mllr.c", + "src/ptm_mgau.c", + "src/s2_semi_mgau.c", + "src/state_align_search.c", + "src/tmat.c", + "src/vector.c" + ] + + configs = [ ":pocketsphinx_config" ] + + deps = [] + + subsystem_name = "miscservices" + part_name = "voiceassistant" +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/CMakeLists.txt b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..7c19c85a07bf188f2707efa318654419d6925b3d --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/CMakeLists.txt @@ -0,0 +1,5 @@ +install(FILES pocketsphinx.h TYPE INCLUDE) +install(DIRECTORY pocketsphinx TYPE INCLUDE) +install(DIRECTORY sphinxbase TYPE INCLUDE) +# Install generated header file(s) +install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/sphinxbase TYPE INCLUDE) diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/android/config.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/android/config.h new file mode 100644 index 0000000000000000000000000000000000000000..806b8364bb1e5c52d5241d9c81c9f9a7bb8efafb --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/android/config.h @@ -0,0 +1,108 @@ +/* include/config.h. Generated from config.h.in by configure. */ +/* include/config.h.in. Generated from configure.in by autoheader. */ + +/* Default radix point for fixed-point */ +/* #undef DEFAULT_RADIX */ + +/* Enable thread safety */ +#define ENABLE_THREADS /**/ + +/* Use fixed-point computation */ +/* #undef FIXED_POINT */ + +/* Define to 1 if you have the header file. */ +#define HAVE_DLFCN_H 1 + +/* Define if you have the iconv() function. */ +/* #define HAVE_ICONV 1 */ + +/* Define to 1 if you have the header file. */ +#define HAVE_INTTYPES_H 1 + +/* Define to 1 if you have the `asound' library (-lasound). */ +/* #define HAVE_LIBASOUND 1 */ + +/* Define to 1 if you have the `blas' library (-lblas). */ +/* #define HAVE_LIBBLAS 1 */ + +/* Define to 1 if you have the `lapack' library (-llapack). */ +/* #define HAVE_LIBLAPACK 1 */ + +/* Define to 1 if you have the `m' library (-lm). */ +#define HAVE_LIBM 1 + +/* Define to 1 if you have the `pthread' library (-lpthread). */ +#define HAVE_LIBPTHREAD 1 + +/* Define to 1 if the system has the type `long long'. */ +#define HAVE_LONG_LONG 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_MEMORY_H 1 + +/* Define to 1 if you have the `perror' function. */ +#define HAVE_PERROR 1 + +/* Define to 1 if you have the `popen' function. */ +#define HAVE_POPEN 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PTHREAD_H 1 + +/* Define to 1 if you have the `snprintf' function. */ +#define HAVE_SNPRINTF 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDINT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STDLIB_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRINGS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_STRING_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_UNISTD_H 1 + +/* Define as const if the declaration of iconv() needs const. */ +#define ICONV_CONST + +/* Define to the sub-directory in which libtool stores uninstalled libraries. + */ +#define LT_OBJDIR ".libs/" + +/* Define as the return type of signal handlers (`int' or `void'). */ +#define RETSIGTYPE void + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 8 + +/* The size of `long long', as computed by sizeof. */ +#define SIZEOF_LONG_LONG 8 + +/* Define to 1 if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Enable matrix algebra with LAPACK */ +/* #define WITH_LAPACK */ + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +#if defined AC_APPLE_UNIVERSAL_BUILD +# if defined __BIG_ENDIAN__ +# define WORDS_BIGENDIAN 1 +# endif +#else +# ifndef WORDS_BIGENDIAN +/* # undef WORDS_BIGENDIAN */ +# endif +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/android/sphinx_config.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/android/sphinx_config.h new file mode 100644 index 0000000000000000000000000000000000000000..205289a09f547c1c101688dc9cb6f8dfb6a09d44 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/android/sphinx_config.h @@ -0,0 +1,17 @@ +/* include/sphinx_config.h. Generated from sphinx_config.h.in by configure. */ +/* sphinx_config.h: Externally visible configuration parameters */ + +/* Default radix point for fixed-point */ +/* #undef DEFAULT_RADIX */ + +/* Use fixed-point computation */ +/* #undef FIXED_POINT */ + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 8 + +/* Define to 1 if the system has the type `long long'. */ +#define HAVE_LONG_LONG 1 + +/* The size of `long long', as computed by sizeof. */ +#define SIZEOF_LONG_LONG 8 diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx.h new file mode 100644 index 0000000000000000000000000000000000000000..bbe4b8d3fc4b7712993d01dfff73502fd65a3f06 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx.h @@ -0,0 +1,731 @@ +/* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file pocketsphinx.h Main header file for the PocketSphinx decoder. + */ + +#ifndef __POCKETSPHINX_H__ +#define __POCKETSPHINX_H__ + + +/* System headers we need. */ +#include + +/* SphinxBase headers we need. */ +#include +#include +#include +#include + +/* SphinxBase headers you need. */ +#include + +/* PocketSphinx headers */ +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * PocketSphinx speech recognizer object. + */ +typedef struct ps_decoder_s ps_decoder_t; + +#include + +/** + * PocketSphinx N-best hypothesis iterator object. + */ +typedef struct ps_astar_s ps_nbest_t; + +/** + * PocketSphinx segmentation iterator object. + */ +typedef struct ps_seg_s ps_seg_t; + +/** + * Sets default grammar and language model if they are not set explicitly and + * are present in the default search path. + */ +POCKETSPHINX_EXPORT void +ps_default_search_args(cmd_ln_t *); + +/** + * Initialize the decoder from a configuration object. + * + * @note The decoder retains ownership of the pointer + * config, so if you are not going to use it + * elsewhere, you can free it. + * + * @param config a command-line structure, as created by + * cmd_ln_parse_r() or cmd_ln_parse_file_r(). If NULL, the + * decoder will be allocated but not initialized. You can + * proceed to initialize it with ps_reinit(). + */ +POCKETSPHINX_EXPORT +ps_decoder_t *ps_init(cmd_ln_t *config); + +/** + * Reinitialize the decoder with updated configuration. + * + * This function allows you to switch the acoustic model, dictionary, + * or other configuration without creating an entirely new decoding + * object. + * + * @note Since the acoustic model will be reloaded, changes made to + * feature extraction parameters may be overridden if a `feat.params` + * file is present. + * @note Any searches created with ps_set_search() or words added to + * the dictionary with ps_add_word() will also be lost. To avoid this + * you can use ps_reinit_feat(). + * @note The decoder retains ownership of the pointer + * config, so you should free it when no longer used. + * + * @param ps Decoder. + * @param config An optional new configuration to use. If this is + * NULL, the previous configuration will be reloaded, + * with any changes applied. + * @return 0 for success, <0 for failure. + */ +POCKETSPHINX_EXPORT +int ps_reinit(ps_decoder_t *ps, cmd_ln_t *config); + +/** + * Reinitialize only the feature computation with updated configuration. + * + * This function allows you to switch the feature computation + * parameters without otherwise affecting the decoder configuration. + * For example, if you change the sample rate or the frame rate, the + * cepstral mean, or the VTLN warping factor, and do not need to + * reconfigure the rest of the decoder. + * + * Note that if your code has modified any internal parameters in the + * \ref acmod_t, these will be overriden by values from the config. + * + * @note The decoder retains ownership of the pointer `config`, so you + * should free it when no longer used. + * + * @param ps Decoder. + * @param config An optional new configuration to use. If this is + * NULL, the previous configuration will be reloaded, + * with any changes to feature computation applied. + * @return 0 for success, <0 for failure (usually an invalid parameter) + */ +POCKETSPHINX_EXPORT +int ps_reinit_feat(ps_decoder_t *ps, cmd_ln_t *config); + +/** + * Returns the argument definitions used in ps_init(). + * + * This is here to avoid exporting global data, which is problematic + * on Win32 and Symbian (and possibly other platforms). + */ +POCKETSPHINX_EXPORT +arg_t const *ps_args(void); + +/** + * Retain a pointer to the decoder. + * + * This increments the reference count on the decoder, allowing it to + * be shared between multiple parent objects. In general you will not + * need to use this function, ever. It is mainly here for the + * convenience of scripting language bindings. + * + * @return pointer to retained decoder. + */ +POCKETSPHINX_EXPORT +ps_decoder_t *ps_retain(ps_decoder_t *ps); + +/** + * Finalize the decoder. + * + * This releases all resources associated with the decoder. + * + * @param ps Decoder to be freed. + * @return New reference count (0 if freed). + */ +POCKETSPHINX_EXPORT +int ps_free(ps_decoder_t *ps); + +/** + * Get the configuration object for this decoder. + * + * @return The configuration object for this decoder. The decoder + * owns this pointer, so you should not attempt to free it + * manually. Use cmd_ln_retain() if you wish to reuse it + * elsewhere. + */ +POCKETSPHINX_EXPORT +cmd_ln_t *ps_get_config(ps_decoder_t *ps); + +/** + * Get the log-math computation object for this decoder. + * + * @return The log-math object for this decoder. The decoder owns + * this pointer, so you should not attempt to free it + * manually. Use logmath_retain() if you wish to reuse it + * elsewhere. + */ +POCKETSPHINX_EXPORT +logmath_t *ps_get_logmath(ps_decoder_t *ps); + +/** + * Get the feature extraction object for this decoder. + * + * @return The feature extraction object for this decoder. The + * decoder owns this pointer, so you should not attempt to + * free it manually. Use fe_retain() if you wish to reuse it + * elsewhere. + */ +POCKETSPHINX_EXPORT +fe_t *ps_get_fe(ps_decoder_t *ps); + +/** + * Get the dynamic feature computation object for this decoder. + * + * @return The dynamic feature computation object for this decoder. + * The decoder owns this pointer, so you should not attempt to + * free it manually. Use feat_retain() if you wish to reuse + * it elsewhere. + */ +POCKETSPHINX_EXPORT +feat_t *ps_get_feat(ps_decoder_t *ps); + +/** + * Adapt current acoustic model using a linear transform. + * + * @param mllr The new transform to use, or NULL to update the + * existing transform. The decoder retains ownership of + * this pointer, so you may free it if you no longer need + * it. + * @return The updated transform object for this decoder, or + * NULL on failure. + */ +POCKETSPHINX_EXPORT +ps_mllr_t *ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr); + +/** + * Reload the pronunciation dictionary from a file. + * + * This function replaces the current pronunciation dictionary with + * the one stored in `dictfile`. This also causes the active search + * module(s) to be reinitialized, in the same manner as calling + * ps_add_word() with update=TRUE. + * + * @param dictfile Path to dictionary file to load. + * @param fdictfile Path to filler dictionary to load, or NULL to keep + * the existing filler dictionary. + * @param format Format of the dictionary file, or NULL to determine + * automatically (currently unused,should be NULL) + */ +POCKETSPHINX_EXPORT +int ps_load_dict(ps_decoder_t *ps, char const *dictfile, + char const *fdictfile, char const *format); + +/** + * Dump the current pronunciation dictionary to a file. + * + * This function dumps the current pronunciation dictionary to a text file. + * + * @param dictfile Path to file where dictionary will be written. + * @param format Format of the dictionary file, or NULL for the + * default (text) format (currently unused, should be NULL) + */ +POCKETSPHINX_EXPORT +int ps_save_dict(ps_decoder_t *ps, char const *dictfile, char const *format); + +/** + * Add a word to the pronunciation dictionary. + * + * This function adds a word to the pronunciation dictionary and the + * current language model (but, obviously, not to the current FSG if + * FSG mode is enabled). If the word is already present in one or the + * other, it does whatever is necessary to ensure that the word can be + * recognized. + * + * @param word Word string to add. + * @param phones Whitespace-separated list of phoneme strings + * describing pronunciation of word. + * @param update If TRUE, update the search module (whichever one is + * currently active) to recognize the newly added word. + * If adding multiple words, it is more efficient to + * pass FALSE here in all but the last word. + * @return The internal ID (>= 0) of the newly added word, or <0 on + * failure. + */ +POCKETSPHINX_EXPORT +int ps_add_word(ps_decoder_t *ps, + char const *word, + char const *phones, + int update); + +/** + * Look up a word in the dictionary and return phone transcription + * for it. + * + * @param ps Pocketsphinx decoder + * @param word Word to look for + * + * @return Whitespace-spearated phone string describing the pronunciation of the word + * or NULL if word is not present in the dictionary. The string is + * allocated and must be freed by the user. + */ +POCKETSPHINX_EXPORT +char *ps_lookup_word(ps_decoder_t *ps, + const char *word); + +/** + * Decode a raw audio stream. + * + * No headers are recognized in this files. The configuration + * parameters -samprate and -input_endian are used + * to determine the sampling rate and endianness of the stream, + * respectively. Audio is always assumed to be 16-bit signed PCM. + * + * @param ps Decoder. + * @param rawfh Previously opened file stream. + * @param maxsamps Maximum number of samples to read from rawfh, or -1 + * to read until end-of-file. + * @return Number of samples of audio. + */ +POCKETSPHINX_EXPORT +long ps_decode_raw(ps_decoder_t *ps, FILE *rawfh, + long maxsamps); + +/** + * Decode a senone score dump file. + * + * @param ps Decoder + * @param senfh Previously opened file handle positioned at start of file. + * @return Number of frames read. + */ +POCKETSPHINX_EXPORT +int ps_decode_senscr(ps_decoder_t *ps, FILE *senfh); + +/** + * Start processing of the stream of speech. + * + * @deprecated This function is retained for compatibility, but its + * only effect is to reset the noise removal statistics, which are + * otherwise retained across utterances. You do not need to call it. + * + * @return 0 for success, <0 on error. + */ +POCKETSPHINX_EXPORT +int ps_start_stream(ps_decoder_t *ps); + +/** + * Checks if the last feed audio buffer contained speech. + * + * @deprecated This function is retained for compatibility but should + * not be considered a reliable voice activity detector. It will + * always return 1 between calls to ps_start_utt() and ps_end_utt(). + * + * @param ps Decoder. + * @return 1 if last buffer contained speech, 0 - otherwise + */ +POCKETSPHINX_EXPORT +int ps_get_in_speech(ps_decoder_t *ps); + + +/** + * Start utterance processing. + * + * This function should be called before any utterance data is passed + * to the decoder. It marks the start of a new utterance and + * reinitializes internal data structures. + * + * @param ps Decoder to be started. + * @return 0 for success, <0 on error. + */ +POCKETSPHINX_EXPORT +int ps_start_utt(ps_decoder_t *ps); + +/** + * Decode raw audio data. + * + * @param ps Decoder. + * @param no_search If non-zero, perform feature extraction but don't + * do any recognition yet. This may be necessary if + * your processor has trouble doing recognition in + * real-time. + * @param full_utt If non-zero, this block of data is a full utterance + * worth of data. This may allow the recognizer to + * produce more accurate results. + * @return Number of frames of data searched, or <0 for error. + */ +POCKETSPHINX_EXPORT +int ps_process_raw(ps_decoder_t *ps, + int16 const *data, + size_t n_samples, + int no_search, + int full_utt); + +/** + * Decode acoustic feature data. + * + * @param ps Decoder. + * @param data Acoustic feature data, as returned by... something :) + * @param no_search If non-zero, perform feature extraction but don't + * do any recognition yet. This may be necessary if + * your processor has trouble doing recognition in + * real-time. + * @param full_utt If non-zero, this block of data is a full utterance + * worth of data. This may allow the recognizer to + * produce more accurate results. + * @return Number of frames of data searched, or <0 for error. + */ +POCKETSPHINX_EXPORT +int ps_process_cep(ps_decoder_t *ps, + mfcc_t **data, + int n_frames, + int no_search, + int full_utt); + +/** + * Get the number of frames of data searched. + * + * Note that there is a delay between this and the number of frames of + * audio which have been input to the system. This is due to the fact + * that acoustic features are computed using a sliding window of + * audio, and dynamic features are computed over a sliding window of + * acoustic features. + * + * @param ps Decoder. + * @return Number of frames of speech data which have been recognized + * so far. + */ +POCKETSPHINX_EXPORT +int ps_get_n_frames(ps_decoder_t *ps); + +/** + * End utterance processing. + * + * @param ps Decoder. + * @return 0 for success, <0 on error + */ +POCKETSPHINX_EXPORT +int ps_end_utt(ps_decoder_t *ps); + +/** + * Get hypothesis string and path score. + * + * @param ps Decoder. + * @param out_best_score Output: path score corresponding to returned string. + * @return String containing best hypothesis at this point in + * decoding. NULL if no hypothesis is available. This string is owned + * by the decoder and only valid for the current hypothesis, so you + * should copy it if you need to hold onto it. + */ +POCKETSPHINX_EXPORT +char const *ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score); + +/** + * Get posterior probability. + * + * @note Unless the -bestpath option is enabled, this function will + * always return zero (corresponding to a posterior probability of + * 1.0). Even if -bestpath is enabled, it will also return zero when + * called on a partial result. Ongoing research into effective + * confidence annotation for partial hypotheses may result in these + * restrictions being lifted in future versions. + * + * @param ps Decoder. + * @return Posterior probability of the best hypothesis. + */ +POCKETSPHINX_EXPORT +int32 ps_get_prob(ps_decoder_t *ps); + +/** + * Get word lattice. + * + * There isn't much you can do with this so far, a public API will + * appear in the future. + * + * @param ps Decoder. + * @return Word lattice object containing all hypotheses so far. NULL + * if no hypotheses are available. This pointer is owned by + * the decoder and you should not attempt to free it manually. + * It is only valid until the next utterance, unless you use + * ps_lattice_retain() to retain it. + */ +POCKETSPHINX_EXPORT +ps_lattice_t *ps_get_lattice(ps_decoder_t *ps); + +/** + * Get an iterator over the word segmentation for the best hypothesis. + * + * @param ps Decoder. + * @return Iterator over the best hypothesis at this point in + * decoding. NULL if no hypothesis is available. + */ +POCKETSPHINX_EXPORT +ps_seg_t *ps_seg_iter(ps_decoder_t *ps); + +/** + * Get the next segment in a word segmentation. + * + * @param seg Segment iterator. + * @return Updated iterator with the next segment. NULL at end of + * utterance (the iterator will be freed in this case). + */ +POCKETSPHINX_EXPORT +ps_seg_t *ps_seg_next(ps_seg_t *seg); + +/** + * Get word string from a segmentation iterator. + * + * @param seg Segment iterator. + * @return Read-only string giving string name of this segment. This + * is only valid until the next call to ps_seg_next(). + */ +POCKETSPHINX_EXPORT +char const *ps_seg_word(ps_seg_t *seg); + +/** + * Get inclusive start and end frames from a segmentation iterator. + * + * @note These frame numbers are inclusive, i.e. the end frame refers + * to the last frame in which the given word or other segment was + * active. Therefore, the actual duration is *out_ef - *out_sf + 1. + * + * @param seg Segment iterator. + * @param out_sf Output: First frame index in segment. + * @param out_ef Output: Last frame index in segment. + */ +POCKETSPHINX_EXPORT +void ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef); + +/** + * Get language, acoustic, and posterior probabilities from a + * segmentation iterator. + * + * @note Unless the -bestpath option is enabled, this function will + * always return zero (corresponding to a posterior probability of + * 1.0). Even if -bestpath is enabled, it will also return zero when + * called on a partial result. Ongoing research into effective + * confidence annotation for partial hypotheses may result in these + * restrictions being lifted in future versions. + * + * @param out_ascr Output: acoustic model score for this segment. + * @param out_lscr Output: language model score for this segment. + * @param out_lback Output: language model backoff mode for this + * segment (i.e. the number of words used in + * calculating lscr). This field is, of course, only + * meaningful for N-Gram models. + * @return Log posterior probability of current segment. Log is + * expressed in the log-base used in the decoder. To convert + * to linear floating-point, use logmath_exp(ps_get_logmath(), + * pprob). + */ +POCKETSPHINX_EXPORT +int32 ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback); + +/** + * Finish iterating over a word segmentation early, freeing resources. + */ +POCKETSPHINX_EXPORT +void ps_seg_free(ps_seg_t *seg); + +/** + * Get an iterator over the best hypotheses. The function may also + * return a NULL which means that there is no hypothesis available for this + * utterance. + * + * @param ps Decoder. + * @return Iterator over N-best hypotheses or NULL if no hypothesis is available + */ +POCKETSPHINX_EXPORT +ps_nbest_t *ps_nbest(ps_decoder_t *ps); + +/** + * Move an N-best list iterator forward. + * + * @param nbest N-best iterator. + * @return Updated N-best iterator, or NULL if no more hypotheses are + * available (iterator is freed ni this case). + */ +POCKETSPHINX_EXPORT +ps_nbest_t *ps_nbest_next(ps_nbest_t *nbest); + +/** + * Get the hypothesis string from an N-best list iterator. + * + * @param nbest N-best iterator. + * @param out_score Output: Path score for this hypothesis. + * @return String containing next best hypothesis. Note that this + * pointer is only valid for the current iteration. + */ +POCKETSPHINX_EXPORT +char const *ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score); + +/** + * Get the word segmentation from an N-best list iterator. + * + * @param nbest N-best iterator. + * @return Iterator over the next best hypothesis. + */ +POCKETSPHINX_EXPORT +ps_seg_t *ps_nbest_seg(ps_nbest_t *nbest); + +/** + * Finish N-best search early, releasing resources. + * + * @param nbest N-best iterator. + */ +POCKETSPHINX_EXPORT +void ps_nbest_free(ps_nbest_t *nbest); + +/** + * Get performance information for the current utterance. + * + * @param ps Decoder. + * @param out_nspeech Output: Number of seconds of speech. + * @param out_ncpu Output: Number of seconds of CPU time used. + * @param out_nwall Output: Number of seconds of wall time used. + */ +POCKETSPHINX_EXPORT +void ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech, + double *out_ncpu, double *out_nwall); + +/** + * Get overall performance information. + * + * @param ps Decoder. + * @param out_nspeech Output: Number of seconds of speech. + * @param out_ncpu Output: Number of seconds of CPU time used. + * @param out_nwall Output: Number of seconds of wall time used. + */ +POCKETSPHINX_EXPORT +void ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, + double *out_ncpu, double *out_nwall); + +/** + * @mainpage PocketSphinx Documentation + * @author David Huggins-Daines + * @version 5.0.0 + * @date July, 2022 + * + * @section intro_sec Introduction + * + * This is the documentation for the PocketSphinx speech recognition + * engine. The main API calls are documented in . + * + * @section install_sec Installation + * + * To install from source, you will need a C compiler and a recent + * version of CMake. If you wish to use an integrated development + * environment, Visual Studio Code will automate most of this process + * for you once you have installed C++ and CMake support as described + * at https://code.visualstudio.com/docs/languages/cpp + * + * @subsection python_install Python module install + * + * The easiest way to program PocketSphinx is with the Python module. + * This can be installed in a + * [VirtualEnv](https://docs.python.org/3/library/venv.html) or + * [Conda](https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/environments.html) + * environment without affecting the rest of your system. For + * example, from the *top-level source directory*: + * + * python3 -m venv ~/ve_pocketsphinx + * . ~/ve_pocketsphinx/bin/activate + * pip install . + * + * There is no need to create a separate build directory as `pip` will + * do this for you. + * + * @subsection unix_install Unix-like systems + * + * From the Unix command line, you will create a separate directory in + * which to build the source code, then run `cmake` with the top-level + * source directory as argument to generate the build files: + * + * mkdir build + * cmake .. + * + * Now you can compile and run the tests, and install the code: + * + * make all test + * make install + * + * By default CMake will try to install things in `/usr/local`, which + * you might not have access to. If you want to install somewhere + * else you need to set `CMAKE_INSTALL_PREFIX` *when running cmake*: + * + * cmake .. -DCMAKE_INSTALL_PREFIX=$HOME/.local + * + * In this case you may also need to set the `LD_LIBRARY_PATH` + * environment variable so that the PocketSphinx library can be found: + * + * export LD_LIBRARY_PATH=$HOME/local/lib + * + * @subsection windows_install Windows + * + * On Windows, the process is similar, but you will need to tell CMake + * what build tool you are using with the `-g` option, and there are + * many of them. The build is known to work with `nmake` but it is + * easiest just to use Visual Studio Code. Once built, you will find + * the DLL and EXE files in `build\Debug` or `build\Release` depending + * on your build type. If the EXE files do not run, you need to + * ensure that `pocketsphinx.dll` is located in the same directory as + * them. + * + * @section faq_sec Frequently Asked Questions + * + * @subsection faq_faq Why are there no frequently asked questions? + * + * I'm glad you asked! There will be some soon. + * + * @section thanks_sec Acknowledgements + * + * PocketSphinx is largely based on the previous Sphinx-II and + * Sphinx-III systems, developed by a large number of contributors at + * Carnegie Mellon University. For some time afterwards, it was + * maintained by Nickolay Shmyrev and others at Alpha Cephei, Inc. + * See the `AUTHORS` file for a list of contributors. + */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __POCKETSPHINX_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/cmdln_macro.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/cmdln_macro.h new file mode 100644 index 0000000000000000000000000000000000000000..1237d1fd8c2f5a7e1d85e8efd9e631cb6f4cb5e5 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/cmdln_macro.h @@ -0,0 +1,388 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* cmdln_macro.h - Command line definitions for PocketSphinx */ + +#ifndef __PS_CMDLN_MACRO_H__ +#define __PS_CMDLN_MACRO_H__ + +#include +#include +#include + +/** Minimal set of command-line options for PocketSphinx. */ +#define POCKETSPHINX_OPTIONS \ + waveform_to_cepstral_command_line_macro(), \ + cepstral_to_feature_command_line_macro(), \ + POCKETSPHINX_ACMOD_OPTIONS, \ + POCKETSPHINX_BEAM_OPTIONS, \ + POCKETSPHINX_SEARCH_OPTIONS, \ + POCKETSPHINX_DICT_OPTIONS, \ + POCKETSPHINX_NGRAM_OPTIONS, \ + POCKETSPHINX_FSG_OPTIONS, \ + POCKETSPHINX_KWS_OPTIONS, \ + POCKETSPHINX_DEBUG_OPTIONS + +/** Options for debugging and logging. */ +#define POCKETSPHINX_DEBUG_OPTIONS \ + { "-logfn", \ + ARG_STRING, \ + NULL, \ + "File to write log messages in" }, \ + { "-loglevel", \ + ARG_STRING, \ + "WARN", \ + "Minimum level of log messages (DEBUG, INFO, WARN, ERROR)" },\ + { "-mfclogdir", \ + ARG_STRING, \ + NULL, \ + "Directory to log feature files to" \ + }, \ + { "-rawlogdir", \ + ARG_STRING, \ + NULL, \ + "Directory to log raw audio files to" }, \ + { "-senlogdir", \ + ARG_STRING, \ + NULL, \ + "Directory to log senone score files to" \ + } + +/** Options defining beam width parameters for tuning the search. */ +#define POCKETSPHINX_BEAM_OPTIONS \ +{ "-beam", \ + ARG_FLOATING, \ + "1e-48", \ + "Beam width applied to every frame in Viterbi search (smaller values mean wider beam)" }, \ +{ "-wbeam", \ + ARG_FLOATING, \ + "7e-29", \ + "Beam width applied to word exits" }, \ +{ "-pbeam", \ + ARG_FLOATING, \ + "1e-48", \ + "Beam width applied to phone transitions" }, \ +{ "-lpbeam", \ + ARG_FLOATING, \ + "1e-40", \ + "Beam width applied to last phone in words" }, \ +{ "-lponlybeam", \ + ARG_FLOATING, \ + "7e-29", \ + "Beam width applied to last phone in single-phone words" }, \ +{ "-fwdflatbeam", \ + ARG_FLOATING, \ + "1e-64", \ + "Beam width applied to every frame in second-pass flat search" }, \ +{ "-fwdflatwbeam", \ + ARG_FLOATING, \ + "7e-29", \ + "Beam width applied to word exits in second-pass flat search" }, \ +{ "-pl_window", \ + ARG_INTEGER, \ + "5", \ + "Phoneme lookahead window size, in frames" }, \ +{ "-pl_beam", \ + ARG_FLOATING, \ + "1e-10", \ + "Beam width applied to phone loop search for lookahead" }, \ +{ "-pl_pbeam", \ + ARG_FLOATING, \ + "1e-10", \ + "Beam width applied to phone loop transitions for lookahead" }, \ +{ "-pl_pip", \ + ARG_FLOATING, \ + "1.0", \ + "Phone insertion penalty for phone loop" }, \ +{ "-pl_weight", \ + ARG_FLOATING, \ + "3.0", \ + "Weight for phoneme lookahead penalties" } \ + +/** Options defining other parameters for tuning the search. */ +#define POCKETSPHINX_SEARCH_OPTIONS \ +{ "-compallsen", \ + ARG_BOOLEAN, \ + "no", \ + "Compute all senone scores in every frame (can be faster when there are many senones)" }, \ +{ "-fwdtree", \ + ARG_BOOLEAN, \ + "yes", \ + "Run forward lexicon-tree search (1st pass)" }, \ +{ "-fwdflat", \ + ARG_BOOLEAN, \ + "yes", \ + "Run forward flat-lexicon search over word lattice (2nd pass)" }, \ +{ "-bestpath", \ + ARG_BOOLEAN, \ + "yes", \ + "Run bestpath (Dijkstra) search over word lattice (3rd pass)" }, \ +{ "-backtrace", \ + ARG_BOOLEAN, \ + "no", \ + "Print results and backtraces to log." }, \ +{ "-latsize", \ + ARG_INTEGER, \ + "5000", \ + "Initial backpointer table size" }, \ +{ "-maxwpf", \ + ARG_INTEGER, \ + "-1", \ + "Maximum number of distinct word exits at each frame (or -1 for no pruning)" }, \ +{ "-maxhmmpf", \ + ARG_INTEGER, \ + "30000", \ + "Maximum number of active HMMs to maintain at each frame (or -1 for no pruning)" }, \ +{ "-min_endfr", \ + ARG_INTEGER, \ + "0", \ + "Nodes ignored in lattice construction if they persist for fewer than N frames" }, \ +{ "-fwdflatefwid", \ + ARG_INTEGER, \ + "4", \ + "Minimum number of end frames for a word to be searched in fwdflat search" }, \ +{ "-fwdflatsfwin", \ + ARG_INTEGER, \ + "25", \ + "Window of frames in lattice to search for successor words in fwdflat search " } + +/** Command-line options for keyphrase spotting */ +#define POCKETSPHINX_KWS_OPTIONS \ +{ "-keyphrase", \ + ARG_STRING, \ + NULL, \ + "Keyphrase to spot"}, \ +{ "-kws", \ + ARG_STRING, \ + NULL, \ + "A file with keyphrases to spot, one per line"}, \ +{ "-kws_plp", \ + ARG_FLOATING, \ + "1e-1", \ + "Phone loop probability for keyphrase spotting" }, \ +{ "-kws_delay", \ + ARG_INTEGER, \ + "10", \ + "Delay to wait for best detection score" }, \ +{ "-kws_threshold", \ + ARG_FLOATING, \ + "1e-30", \ + "Threshold for p(hyp)/p(alternatives) ratio" } + +/** Command-line options for finite state grammars. */ +#define POCKETSPHINX_FSG_OPTIONS \ + { "-fsg", \ + ARG_STRING, \ + NULL, \ + "Sphinx format finite state grammar file"}, \ +{ "-jsgf", \ + ARG_STRING, \ + NULL, \ + "JSGF grammar file" }, \ +{ "-toprule", \ + ARG_STRING, \ + NULL, \ + "Start rule for JSGF (first public rule is default)" }, \ +{ "-fsgusealtpron", \ + ARG_BOOLEAN, \ + "yes", \ + "Add alternate pronunciations to FSG"}, \ +{ "-fsgusefiller", \ + ARG_BOOLEAN, \ + "yes", \ + "Insert filler words at each state."} + +/** Command-line options for statistical language models. */ +#define POCKETSPHINX_NGRAM_OPTIONS \ +{ "-allphone", \ + ARG_STRING, \ + NULL, \ + "Perform phoneme decoding with phonetic lm (given here)" }, \ +{ "-allphone_ci", \ + ARG_BOOLEAN, \ + "yes", \ + "Perform phoneme decoding with phonetic lm and context-independent units only" }, \ +{ "-lm", \ + ARG_STRING, \ + NULL, \ + "Word trigram language model input file" }, \ +{ "-lmctl", \ + ARG_STRING, \ + NULL, \ + "Specify a set of language model"}, \ +{ "-lmname", \ + ARG_STRING, \ + NULL, \ + "Which language model in -lmctl to use by default"}, \ +{ "-lw", \ + ARG_FLOATING, \ + "6.5", \ + "Language model probability weight" }, \ +{ "-fwdflatlw", \ + ARG_FLOATING, \ + "8.5", \ + "Language model probability weight for flat lexicon (2nd pass) decoding" }, \ +{ "-bestpathlw", \ + ARG_FLOATING, \ + "9.5", \ + "Language model probability weight for bestpath search" }, \ +{ "-ascale", \ + ARG_FLOATING, \ + "20.0", \ + "Inverse of acoustic model scale for confidence score calculation" }, \ +{ "-wip", \ + ARG_FLOATING, \ + "0.65", \ + "Word insertion penalty" }, \ +{ "-nwpen", \ + ARG_FLOATING, \ + "1.0", \ + "New word transition penalty" }, \ +{ "-pip", \ + ARG_FLOATING, \ + "1.0", \ + "Phone insertion penalty" }, \ +{ "-uw", \ + ARG_FLOATING, \ + "1.0", \ + "Unigram weight" }, \ +{ "-silprob", \ + ARG_FLOATING, \ + "0.005", \ + "Silence word transition probability" }, \ +{ "-fillprob", \ + ARG_FLOATING, \ + "1e-8", \ + "Filler word transition probability" } \ + +/** Command-line options for dictionaries. */ +#define POCKETSPHINX_DICT_OPTIONS \ + { "-dict", \ + ARG_STRING, \ + NULL, \ + "Main pronunciation dictionary (lexicon) input file" }, \ + { "-fdict", \ + ARG_STRING, \ + NULL, \ + "Noise word pronunciation dictionary input file" }, \ + { "-dictcase", \ + ARG_BOOLEAN, \ + "no", \ + "Dictionary is case sensitive (NOTE: case insensitivity applies to ASCII characters only)" } \ + +/** Command-line options for acoustic modeling */ +#define POCKETSPHINX_ACMOD_OPTIONS \ +{ "-hmm", \ + REQARG_STRING, \ + NULL, \ + "Directory containing acoustic model files."}, \ +{ "-featparams", \ + ARG_STRING, \ + NULL, \ + "File containing feature extraction parameters."}, \ +{ "-mdef", \ + ARG_STRING, \ + NULL, \ + "Model definition input file" }, \ +{ "-senmgau", \ + ARG_STRING, \ + NULL, \ + "Senone to codebook mapping input file (usually not needed)" }, \ +{ "-tmat", \ + ARG_STRING, \ + NULL, \ + "HMM state transition matrix input file" }, \ +{ "-tmatfloor", \ + ARG_FLOATING, \ + "0.0001", \ + "HMM state transition probability floor (applied to -tmat file)" }, \ +{ "-mean", \ + ARG_STRING, \ + NULL, \ + "Mixture gaussian means input file" }, \ +{ "-var", \ + ARG_STRING, \ + NULL, \ + "Mixture gaussian variances input file" }, \ +{ "-varfloor", \ + ARG_FLOATING, \ + "0.0001", \ + "Mixture gaussian variance floor (applied to data from -var file)" }, \ +{ "-mixw", \ + ARG_STRING, \ + NULL, \ + "Senone mixture weights input file (uncompressed)" }, \ +{ "-mixwfloor", \ + ARG_FLOATING, \ + "0.0000001", \ + "Senone mixture weights floor (applied to data from -mixw file)" }, \ +{ "-aw", \ + ARG_INTEGER, \ + "1", \ + "Inverse weight applied to acoustic scores." }, \ +{ "-sendump", \ + ARG_STRING, \ + NULL, \ + "Senone dump (compressed mixture weights) input file" }, \ +{ "-mllr", \ + ARG_STRING, \ + NULL, \ + "MLLR transformation to apply to means and variances" }, \ +{ "-mmap", \ + ARG_BOOLEAN, \ + "yes", \ + "Use memory-mapped I/O (if possible) for model files" }, \ +{ "-ds", \ + ARG_INTEGER, \ + "1", \ + "Frame GMM computation downsampling ratio" }, \ +{ "-topn", \ + ARG_INTEGER, \ + "4", \ + "Maximum number of top Gaussians to use in scoring." }, \ +{ "-topn_beam", \ + ARG_STRING, \ + "0", \ + "Beam width used to determine top-N Gaussians (or a list, per-feature)" },\ +{ "-logbase", \ + ARG_FLOATING, \ + "1.0001", \ + "Base in which all log-likelihoods calculated" } + +#define CMDLN_EMPTY_OPTION { NULL, 0, NULL, NULL } + +#endif /* __PS_CMDLN_MACRO_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/export.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/export.h new file mode 100644 index 0000000000000000000000000000000000000000..4d8fa0f5b6f58cba4f29590a18493cbc5a942a28 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/export.h @@ -0,0 +1,15 @@ +#ifndef __POCKETSPHINX_EXPORT_H__ +#define __POCKETSPHINX_EXPORT_H__ + +/* Win32 DLL gunk */ +#if defined(_WIN32) && defined(SPHINX_DLL) +#if defined(POCKETSPHINX_EXPORTS) /* DLL itself */ +#define POCKETSPHINX_EXPORT __declspec(dllexport) +#else +#define POCKETSPHINX_EXPORT __declspec(dllimport) +#endif +#else /* No DLL things*/ +#define POCKETSPHINX_EXPORT +#endif + +#endif /* __POCKETSPHINX_EXPORT_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/ps_lattice.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/ps_lattice.h new file mode 100644 index 0000000000000000000000000000000000000000..39eaf9cd5f87f59ad2b4f3f9c1d081146830bce4 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/ps_lattice.h @@ -0,0 +1,456 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ps_lattice.h Word graph search + */ + +#ifndef __PS_LATTICE_H__ +#define __PS_LATTICE_H__ + +/* SphinxBase headers. */ +#include +#include + +/* PocketSphinx headers. */ +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Word graph structure used in bestpath/nbest search. + */ +typedef struct ps_lattice_s ps_lattice_t; + +/** + * DAG nodes. + * + * A node corresponds to a number of hypothesized instances of a word + * which all share the same starting point. + */ +typedef struct ps_latnode_s ps_latnode_t; + +/** + * Iterator over DAG nodes. + */ +typedef struct ps_latnode_s ps_latnode_iter_t; /* pay no attention to the man behind the curtain */ + +/** + * Links between DAG nodes. + * + * A link corresponds to a single hypothesized instance of a word with + * a given start and end point. + */ +typedef struct ps_latlink_s ps_latlink_t; + +/** + * Iterator over DAG links. + */ +typedef struct latlink_list_s ps_latlink_iter_t; + +/* Forward declaration needed to avoid circular includes */ +struct ps_decoder_s; + +/** + * Read a lattice from a file on disk. + * + * @param ps Decoder to use for processing this lattice, or NULL. + * @param file Path to lattice file. + * @return Newly created lattice, or NULL for failure. + */ +POCKETSPHINX_EXPORT +ps_lattice_t *ps_lattice_read(struct ps_decoder_s *ps, + char const *file); + +/** + * Retain a lattice. + * + * This function retains ownership of a lattice for the caller, + * preventing it from being freed automatically. You must call + * ps_lattice_free() to free it after having called this function. + * + * @return pointer to the retained lattice. + */ +POCKETSPHINX_EXPORT +ps_lattice_t *ps_lattice_retain(ps_lattice_t *dag); + +/** + * Free a lattice. + * + * @return new reference count (0 if dag was freed) + */ +POCKETSPHINX_EXPORT +int ps_lattice_free(ps_lattice_t *dag); + +/** + * Write a lattice to disk. + * + * @return 0 for success, <0 on failure. + */ +POCKETSPHINX_EXPORT +int ps_lattice_write(ps_lattice_t *dag, char const *filename); + +/** + * Write a lattice to disk in HTK format + * + * @return 0 for success, <0 on failure. + */ +POCKETSPHINX_EXPORT +int ps_lattice_write_htk(ps_lattice_t *dag, char const *filename); + +/** + * Get the log-math computation object for this lattice + * + * @return The log-math object for this lattice. The lattice retains + * ownership of this pointer, so you should not attempt to + * free it manually. Use logmath_retain() if you wish to + * reuse it elsewhere. + */ +POCKETSPHINX_EXPORT +logmath_t *ps_lattice_get_logmath(ps_lattice_t *dag); + + +/** + * Start iterating over nodes in the lattice. + * + * @note No particular order of traversal is guaranteed, and you + * should not depend on this. + * + * @param dag Lattice to iterate over. + * @return Iterator over lattice nodes. + */ +POCKETSPHINX_EXPORT +ps_latnode_iter_t *ps_latnode_iter(ps_lattice_t *dag); + +/** + * Move to next node in iteration. + * @param itor Node iterator. + * @return Updated node iterator, or NULL if finished + */ +POCKETSPHINX_EXPORT +ps_latnode_iter_t *ps_latnode_iter_next(ps_latnode_iter_t *itor); + +/** + * Stop iterating over nodes. + * @param itor Node iterator. + */ +POCKETSPHINX_EXPORT +void ps_latnode_iter_free(ps_latnode_iter_t *itor); + +/** + * Get node from iterator. + */ +POCKETSPHINX_EXPORT +ps_latnode_t *ps_latnode_iter_node(ps_latnode_iter_t *itor); + +/** + * Get start and end time range for a node. + * + * @param node Node inquired about. + * @param out_fef Output: End frame of first exit from this node. + * @param out_lef Output: End frame of last exit from this node. + * @return Start frame for all edges exiting this node. + */ +POCKETSPHINX_EXPORT +int ps_latnode_times(ps_latnode_t *node, int16 *out_fef, int16 *out_lef); + +/** + * Get word string for this node. + * + * @param dag Lattice to which node belongs. + * @param node Node inquired about. + * @return Word string for this node (possibly a pronunciation variant). + */ +POCKETSPHINX_EXPORT +char const *ps_latnode_word(ps_lattice_t *dag, ps_latnode_t *node); + +/** + * Get base word string for this node. + * + * @param dag Lattice to which node belongs. + * @param node Node inquired about. + * @return Base word string for this node. + */ +POCKETSPHINX_EXPORT +char const *ps_latnode_baseword(ps_lattice_t *dag, ps_latnode_t *node); + +/** + * Iterate over exits from this node. + * + * @param node Node inquired about. + * @return Iterator over exit links from this node. + */ +POCKETSPHINX_EXPORT +ps_latlink_iter_t *ps_latnode_exits(ps_latnode_t *node); + +/** + * Iterate over entries to this node. + * + * @param node Node inquired about. + * @return Iterator over entry links to this node. + */ +POCKETSPHINX_EXPORT +ps_latlink_iter_t *ps_latnode_entries(ps_latnode_t *node); + +/** + * Get best posterior probability and associated acoustic score from a lattice node. + * + * @param dag Lattice to which node belongs. + * @param node Node inquired about. + * @param out_link Output: exit link with highest posterior probability + * @return Posterior probability of the best link exiting this node. + * Log is expressed in the log-base used in the decoder. To + * convert to linear floating-point, use + * logmath_exp(ps_lattice_get_logmath(), pprob). + */ +POCKETSPHINX_EXPORT +int32 ps_latnode_prob(ps_lattice_t *dag, ps_latnode_t *node, + ps_latlink_t **out_link); + +/** + * Get next link from a lattice link iterator. + * + * @param itor Iterator. + * @return Updated iterator, or NULL if finished. + */ +POCKETSPHINX_EXPORT +ps_latlink_iter_t *ps_latlink_iter_next(ps_latlink_iter_t *itor); + +/** + * Stop iterating over links. + * @param itor Link iterator. + */ +POCKETSPHINX_EXPORT +void ps_latlink_iter_free(ps_latlink_iter_t *itor); + +/** + * Get link from iterator. + */ +POCKETSPHINX_EXPORT +ps_latlink_t *ps_latlink_iter_link(ps_latlink_iter_t *itor); + +/** + * Get start and end times from a lattice link. + * + * @note these are inclusive - i.e. the last frame of + * this word is ef, not ef-1. + * + * @param link Link inquired about. + * @param out_sf Output: (optional) start frame of this link. + * @return End frame of this link. + */ +POCKETSPHINX_EXPORT +int ps_latlink_times(ps_latlink_t *link, int16 *out_sf); + +/** + * Get destination and source nodes from a lattice link + * + * @param link Link inquired about + * @param out_src Output: (optional) source node. + * @return destination node + */ +POCKETSPHINX_EXPORT +ps_latnode_t *ps_latlink_nodes(ps_latlink_t *link, ps_latnode_t **out_src); + +/** + * Get word string from a lattice link. + * + * @param dag Lattice to which node belongs. + * @param link Link inquired about + * @return Word string for this link (possibly a pronunciation variant). + */ +POCKETSPHINX_EXPORT +char const *ps_latlink_word(ps_lattice_t *dag, ps_latlink_t *link); + +/** + * Get base word string from a lattice link. + * + * @param dag Lattice to which node belongs. + * @param link Link inquired about + * @return Base word string for this link + */ +POCKETSPHINX_EXPORT +char const *ps_latlink_baseword(ps_lattice_t *dag, ps_latlink_t *link); + +/** + * Get predecessor link in best path. + * + * @param link Link inquired about + * @return Best previous link from bestpath search, if any. Otherwise NULL + */ +POCKETSPHINX_EXPORT +ps_latlink_t *ps_latlink_pred(ps_latlink_t *link); + +/** + * Get acoustic score and posterior probability from a lattice link. + * + * @param dag Lattice to which node belongs. + * @param link Link inquired about + * @param out_ascr Output: (optional) acoustic score. + * @return Posterior probability for this link. Log is expressed in + * the log-base used in the decoder. To convert to linear + * floating-point, use logmath_exp(ps_lattice_get_logmath(), pprob). + */ +POCKETSPHINX_EXPORT +int32 ps_latlink_prob(ps_lattice_t *dag, ps_latlink_t *link, int32 *out_ascr); + +/** + * Create a directed link between "from" and "to" nodes, but if a link already exists, + * choose one with the best link_scr. + */ +POCKETSPHINX_EXPORT +void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, + int32 score, int32 ef); + +/** + * Start a forward traversal of edges in a word graph. + * + * @note A keen eye will notice an inconsistency in this API versus + * other types of iterators in PocketSphinx. The reason for this is + * that the traversal algorithm is much more efficient when it is able + * to modify the lattice structure. Therefore, to avoid giving the + * impression that multiple traversals are possible at once, no + * separate iterator structure is provided. + * + * @param dag Lattice to be traversed. + * @param start Start node (source) of traversal. + * @param end End node (goal) of traversal. + * @return First link in traversal. + */ +POCKETSPHINX_EXPORT +ps_latlink_t *ps_lattice_traverse_edges(ps_lattice_t *dag, ps_latnode_t *start, ps_latnode_t *end); + +/** + * Get the next link in forward traversal. + * + * @param dag Lattice to be traversed. + * @param end End node (goal) of traversal. + * @return Next link in traversal. + */ +POCKETSPHINX_EXPORT +ps_latlink_t *ps_lattice_traverse_next(ps_lattice_t *dag, ps_latnode_t *end); + +/** + * Start a reverse traversal of edges in a word graph. + * + * @note See ps_lattice_traverse_edges() for why this API is the way it is. + * + * @param dag Lattice to be traversed. + * @param start Start node (goal) of traversal. + * @param end End node (source) of traversal. + * @return First link in traversal. + */ +POCKETSPHINX_EXPORT +ps_latlink_t *ps_lattice_reverse_edges(ps_lattice_t *dag, ps_latnode_t *start, ps_latnode_t *end); + +/** + * Get the next link in reverse traversal. + * + * @param dag Lattice to be traversed. + * @param start Start node (goal) of traversal. + * @return Next link in traversal. + */ +POCKETSPHINX_EXPORT +ps_latlink_t *ps_lattice_reverse_next(ps_lattice_t *dag, ps_latnode_t *start); + +/** + * Do N-Gram based best-path search on a word graph. + * + * This function calculates both the best path as well as the forward + * probability used in confidence estimation. + * + * @return Final link in best path, NULL on error. + */ +POCKETSPHINX_EXPORT +ps_latlink_t *ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, + float32 lwf, float32 ascale); + +/** + * Calculate link posterior probabilities on a word graph. + * + * This function assumes that bestpath search has already been done. + * + * @return Posterior probability of the utterance as a whole. + */ +POCKETSPHINX_EXPORT +int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, + float32 ascale); + +/** + * Prune all links (and associated nodes) below a certain posterior probability. + * + * This function assumes that ps_lattice_posterior() has already been called. + * + * @param beam Minimum posterior probability for links. This is + * expressed in the log-base used in the decoder. To convert + * from linear floating-point, use + * logmath_log(ps_lattice_get_logmath(), prob). + * @return number of arcs removed. + */ +POCKETSPHINX_EXPORT +int32 ps_lattice_posterior_prune(ps_lattice_t *dag, int32 beam); + +#ifdef NOT_IMPLEMENTED_YET +/** + * Expand lattice using an N-gram language model. + * + * This function expands the lattice such that each node represents a + * unique N-gram history, and adds language model scores to the links. + */ +POCKETSPHINX_EXPORT +int32 ps_lattice_ngram_expand(ps_lattice_t *dag, ngram_model_t *lm); +#endif + +/** + * Get the number of frames in the lattice. + * + * @param dag The lattice in question. + * @return Number of frames in this lattice. + */ +POCKETSPHINX_EXPORT +int ps_lattice_n_frames(ps_lattice_t *dag); + +#ifdef __cplusplus +} +#endif + +#endif /* __PS_LATTICE_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/ps_mllr.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/ps_mllr.h new file mode 100644 index 0000000000000000000000000000000000000000..81d5a4677f0122394519a3185b4a83ae6eea93a2 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/ps_mllr.h @@ -0,0 +1,86 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ps_mllr.h Model-space linear transforms for speaker adaptation + */ + +#ifndef __PS_MLLR_H__ +#define __PS_MLLR_H__ + +/* SphinxBase headers. */ +#include +#include + +/* PocketSphinx headers. */ +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Feature space linear transform object. + */ +typedef struct ps_mllr_s ps_mllr_t; + +/** + * Read a speaker-adaptive linear transform from a file. + */ +POCKETSPHINX_EXPORT +ps_mllr_t *ps_mllr_read(char const *file); + +/** + * Retain a pointer to a linear transform. + */ +POCKETSPHINX_EXPORT +ps_mllr_t *ps_mllr_retain(ps_mllr_t *mllr); + +/** + * Release a pointer to a linear transform. + */ +POCKETSPHINX_EXPORT +int ps_mllr_free(ps_mllr_t *mllr); + +#ifdef __cplusplus +} +#endif + +#endif /* __PS_MLLR_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/ps_search.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/ps_search.h new file mode 100644 index 0000000000000000000000000000000000000000..23109a69e9c380b262210cc09ee56c3e7b0ffb67 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/pocketsphinx/ps_search.h @@ -0,0 +1,319 @@ +/* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2014 Alpha Cephei Inc.. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY ALPHA CEPHEI INC. ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file ps_search.h User can configure several "search" objects with + * different grammars and language models and switch them in runtime to + * provide interactive experience for the user. + * + * There are different possible search modes: + * + *
    + *
  • keyphrase - efficiently looks for keyphrase and ignores other speech. allows to configure detection threshold.
  • + *
  • grammar - recognizes speech according to JSGF grammar. Unlike keyphrase grammar search doesn't ignore words which are not in grammar but tries to recognize them.
  • + *
  • ngram/lm - recognizes natural speech with a language model.
  • + *
  • allphone - recognizes phonemes with a phonetic language model.
  • + *
  • align - creates time alignments for a fixed word sequence.
  • + *
+ * + * Each search has a name and can be referenced by a name, names are + * application-specific. The function ps_set_search allows to activate + * the search previously added by a name. Only single search can be + * activated at time. + * + * To add the search one needs to point to the grammar/language model + * describing the search. The location of the grammar is specific to the + * application. + * + * The exact design of a searches depends on your application. For + * example, you might want to listen for activation keyphrase first and once + * keyphrase is recognized switch to ngram search to recognize actual + * command. Once you recognized the command you can switch to grammar + * search to recognize the confirmation and then switch back to keyphrase listening + * mode to wait for another command. + * + * If only a simple recognition is required it is sufficient to add a single search or + * just configure the required mode with configuration options. + */ + +#ifndef __PS_SEARCH_H__ +#define __PS_SEARCH_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * PocketSphinx search iterator. + */ +typedef struct ps_search_iter_s ps_search_iter_t; + + +/** + * Actives search with the provided name. + * + * Activates search with the provided name. The search must be added before + * using either ps_set_fsg(), ps_set_lm() or ps_set_kws(). + * + * @return 0 on success, -1 on failure + */ +POCKETSPHINX_EXPORT +int ps_set_search(ps_decoder_t *ps, const char *name); + +/** + * Returns name of current search in decoder + * + * @see ps_set_search + */ +POCKETSPHINX_EXPORT +const char* ps_get_search(ps_decoder_t *ps); + +/** + * Unsets the search and releases related resources. + * + * Unsets the search previously added with + * using either ps_set_fsg(), ps_set_lm() or ps_set_kws(). + * + * @see ps_set_fsg + * @see ps_set_lm + * @see ps_set_kws + */ +POCKETSPHINX_EXPORT +int ps_unset_search(ps_decoder_t *ps, const char *name); + +/** + * Returns iterator over current searches + * + * @see ps_set_search + */ +POCKETSPHINX_EXPORT +ps_search_iter_t *ps_search_iter(ps_decoder_t *ps); + +/** + * Updates search iterator to point to the next position. + * + * This function automatically frees the iterator object upon reaching + * the final entry. + * @see ps_set_search + */ +POCKETSPHINX_EXPORT +ps_search_iter_t *ps_search_iter_next(ps_search_iter_t *itor); + +/** + * Retrieves the name of the search the iterator points to. + * + * @see ps_set_search + */ +POCKETSPHINX_EXPORT +const char* ps_search_iter_val(ps_search_iter_t *itor); + +/** + * Delete an unfinished search iterator + * + * @see ps_set_search + */ +POCKETSPHINX_EXPORT +void ps_search_iter_free(ps_search_iter_t *itor); + +/** + * Updates search iterator to point to the next position. + * + * This function automatically frees the iterator object upon reaching + * the final entry. + * @see ps_set_search + */ +POCKETSPHINX_EXPORT +const char* ps_search_iter_val(ps_search_iter_t *itor); + + +/** + * Get the language model set object for this decoder. + * + * If N-Gram decoding is not enabled, this will return NULL. You will + * need to enable it using ps_set_lmset(). + * + * @return The language model set object for this decoder. The + * decoder retains ownership of this pointer, so you should + * not attempt to free it manually. Use ngram_model_retain() + * if you wish to reuse it elsewhere. + */ +POCKETSPHINX_EXPORT +ngram_model_t *ps_get_lm(ps_decoder_t *ps, const char *name); + +/** + * Adds new search based on N-gram language model. + * + * Associates N-gram search with the provided name. The search can be activated + * using ps_set_search(). + * + * @see ps_set_search. + */ +POCKETSPHINX_EXPORT +int ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm); + +/** + * Adds new search based on N-gram language model. + * + * Convenient method to load N-gram model and create a search. + * + * @see ps_set_lm + */ +POCKETSPHINX_EXPORT +int ps_set_lm_file(ps_decoder_t *ps, const char *name, const char *path); + +/** + * Get the finite-state grammar set object for this decoder. + * + * If FSG decoding is not enabled, this returns NULL. Call + * ps_set_fsgset() to enable it. + * + * @return The current FSG set object for this decoder, or + * NULL if none is available. + */ +POCKETSPHINX_EXPORT +fsg_model_t *ps_get_fsg(ps_decoder_t *ps, const char *name); + +/** + * Adds new search based on finite state grammar. + * + * Associates FSG search with the provided name. The search can be activated + * using ps_set_search(). + * + * @see ps_set_search + */ +POCKETSPHINX_EXPORT +int ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg); + +/** + * Adds new search using JSGF model. + * + * Convenient method to load JSGF model and create a search. + * + * @see ps_set_fsg + */ +POCKETSPHINX_EXPORT +int ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path); + +/** + * Adds new search using JSGF model. + * + * Convenience method to parse JSGF model from string and create a search. + * + * @see ps_set_fsg + */ +POCKETSPHINX_EXPORT +int ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string); + +/** + * Get the current Key phrase to spot + * + * If KWS is not enabled, this returns NULL. Call + * ps_update_kws() to enable it. + * + * @return The current keyphrase to spot + */ +POCKETSPHINX_EXPORT +const char* ps_get_kws(ps_decoder_t *ps, const char *name); + +/** + * Adds keyphrases from a file to spotting + * + * Associates KWS search with the provided name. The search can be activated + * using ps_set_search(). + * + * @see ps_set_search + */ +POCKETSPHINX_EXPORT +int ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile); + +/** + * Adds new keyphrase to spot + * + * Associates KWS search with the provided name. The search can be activated + * using ps_set_search(). + * + * @see ps_set_search + */ +POCKETSPHINX_EXPORT +int ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase); + +/** + * Adds new search based on phone N-gram language model. + * + * Associates N-gram search with the provided name. The search can be activated + * using ps_set_search(). + * + * @see ps_set_search. + */ +POCKETSPHINX_EXPORT +int ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm); + +/** + * Adds new search based on phone N-gram language model. + * + * Convenient method to load N-gram model and create a search. + * + * @see ps_set_allphone + */ +POCKETSPHINX_EXPORT +int ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path); + +/** + * Adds new search based on forced alignment. + * + * Convenient method to and create a forced aligner for a piece of + * text. Note that this is currently less than useful, as it depends + * on the word sequence exactly matching the input, including + * alternate pronunciations and silences. + * + * @param ps Decoder + * @param name Name for this search (could be anything, such as an utterance + * label or the name of the input file) + * @param words String containing whitespace-separated words for alignment. + * These words are assumed to exist in the current dictionary. + * + */ +POCKETSPHINX_EXPORT +int ps_set_align(ps_decoder_t *ps, const char *name, const char *words); + +#ifdef __cplusplus +} +#endif + +#endif /* __PS_SEARCH_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/agc.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/agc.h new file mode 100644 index 0000000000000000000000000000000000000000..67d74f6936fa1c749bfa0e95f156033ef88af909 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/agc.h @@ -0,0 +1,202 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * agc.h -- Various forms of automatic gain control (AGC) + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log$ + * Revision 1.1 2006/04/05 20:27:30 dhdfu + * A Great Reorganzation of header files and executables + * + * Revision 1.8 2005/06/21 19:25:41 arthchan2003 + * 1, Fixed doxygen documentation. 2, Added $ keyword. + * + * Revision 1.4 2005/06/13 04:02:56 archan + * Fixed most doxygen-style documentation under libs3decoder. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 28-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Copied from previous version. + */ + + +#ifndef _S3_AGC_H_ +#define _S3_AGC_H_ + +/* Win32/WinCE DLL gunk */ +#include + +#include +#include + +/** \file agc.h + * \brief routine that implements automatic gain control + * + * \warning This function may not be fully compatible with + * SphinxTrain's family of AGC. + * + * This implements AGC. + */ +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Types of acoustic gain control to apply to the features. + */ +typedef enum agc_type_e { + AGC_NONE = 0, + AGC_MAX, + AGC_EMAX, + AGC_NOISE +} agc_type_t; + +/** Convert string representation (from command-line) to agc_type_t */ +SPHINXBASE_EXPORT +agc_type_t agc_type_from_str(const char *str); + +/** String representations of agc_type_t values. */ +SPHINXBASE_EXPORT +extern const char *agc_type_str[]; + +/** + * Structure holding data for doing AGC. + **/ +typedef struct agc_s { + mfcc_t max; /**< Estimated max for current utterance (for AGC_EMAX) */ + mfcc_t obs_max; /**< Observed max in current utterance */ + int32 obs_frame; /**< Whether any data was observed after prev update */ + int32 obs_utt; /**< Whether any utterances have been observed */ + mfcc_t obs_max_sum; + mfcc_t noise_thresh; /**< Noise threshold (for AGC_NOISE only) */ +} agc_t; + +/** + * Initialize AGC structure with default values. + */ +SPHINXBASE_EXPORT +agc_t *agc_init(void); + +/** + * Free AGC structure. + */ +SPHINXBASE_EXPORT +void agc_free(agc_t *agc); + +/** + * Apply AGC to the given mfc vectors (normalize all C0 mfc coefficients in the given + * input such that the max C0 value is 0, by subtracting the input max C0 from all). + * This function operates on an entire utterance at a time. Hence, the entire utterance + * must be available beforehand (batchmode). + */ +SPHINXBASE_EXPORT +void agc_max(agc_t *agc, /**< In: AGC structure (not used) */ + mfcc_t **mfc, /**< In/Out: mfc[f] = cepstrum vector in frame f */ + int32 n_frame /**< In: number of frames of cepstrum vectors supplied */ + ); + +/** + * Apply AGC to the given block of MFC vectors. + * Unlike agc_max() this does not require the entire utterance to be + * available. Call agc_emax_update() at the end of each utterance to + * update the AGC parameters. */ +SPHINXBASE_EXPORT +void agc_emax(agc_t *agc, /**< In: AGC structure */ + mfcc_t **mfc, /**< In/Out: mfc[f] = cepstrum vector in frame f */ + int32 n_frame /**< In: number of frames of cepstrum vectors supplied */ + ); + +/** + * Update AGC parameters for next utterance. + **/ +SPHINXBASE_EXPORT +void agc_emax_update(agc_t *agc /**< In: AGC structure */ + ); + +/** + * Get the current AGC maximum estimate. + **/ +SPHINXBASE_EXPORT +float32 agc_emax_get(agc_t *agc); + +/** + * Set the current AGC maximum estimate. + **/ +SPHINXBASE_EXPORT +void agc_emax_set(agc_t *agc, float32 m); + +/** + * Apply AGC using noise threshold to the given block of MFC vectors. + **/ +SPHINXBASE_EXPORT +void agc_noise(agc_t *agc, /**< In: AGC structure */ + mfcc_t **mfc, /**< In/Out: mfc[f] = cepstrum vector in frame f */ + int32 n_frame /**< In: number of frames of cepstrum vectors supplied */ + ); + +/** + * Get the current AGC noise threshold. + **/ +SPHINXBASE_EXPORT +float32 agc_get_threshold(agc_t *agc); + +/** + * Set the current AGC noise threshold. + **/ +SPHINXBASE_EXPORT +void agc_set_threshold(agc_t *agc, float32 threshold); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/bio.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/bio.h new file mode 100644 index 0000000000000000000000000000000000000000..343617737655a96d7cf9beb5e6e545dbebaeac42 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/bio.h @@ -0,0 +1,316 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * bio.h -- Sphinx-3 binary file I/O functions. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1996 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: bio.h,v $ + * Revision 1.8 2005/06/21 20:40:46 arthchan2003 + * 1, Fixed doxygen documentation, 2, Add the $ keyword. + * + * Revision 1.5 2005/06/13 04:02:57 archan + * Fixed most doxygen-style documentation under libs3decoder. + * + * Revision 1.4 2005/05/10 21:21:52 archan + * Three functionalities added but not tested. Code on 1) addition/deletion of LM in mode 4. 2) reading text-based LM 3) Converting txt-based LM to dmp-based LM. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 28-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + + +#ifndef _S3_BIO_H_ +#define _S3_BIO_H_ + +#include +#include + +/* Win32/WinCE DLL gunk */ +#include +#include +#include + +/** \file bio.h + * \brief Cross platform binary IO to process files in sphinx3 format. + * + * + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#define BYTE_ORDER_MAGIC (0x11223344) + +/** "reversed senses" SWAP, ARCHAN: This is still incorporated in + Sphinx 3 because lm3g2dmp used it. Don't think that I am very + happy with it. */ + +#if (__BIG_ENDIAN__) +#define REVERSE_SENSE_SWAP_INT16(x) x = ( (((x)<<8)&0x0000ff00) | (((x)>>8)&0x00ff) ) +#define REVERSE_SENSE_SWAP_INT32(x) x = ( (((x)<<24)&0xff000000) | (((x)<<8)&0x00ff0000) | \ + (((x)>>8)&0x0000ff00) | (((x)>>24)&0x000000ff) ) +#else +#define REVERSE_SENSE_SWAP_INT16(x) +#define REVERSE_SENSE_SWAP_INT32(x) + +#endif + + + +/** + * Read binary file format header: has the following format + *
+ *     s3
+ *      
+ *      
+ *     ...
+ *     endhdr
+ *     4-byte byte-order word used to find file byte ordering relative to host machine.
+ * 
+ * Lines beginning with # are ignored. + * Memory for name and val allocated by this function; use bio_hdrarg_free to free them. + * @return 0 if successful, -1 otherwise. + */ +SPHINXBASE_EXPORT +int32 bio_readhdr (FILE *fp, /**< In: File to read */ + char ***name, /**< Out: array of argument name strings read */ + char ***val, /**< Out: corresponding value strings read */ + int32 *swap /**< Out: file needs byteswapping iff (*swap) */ + ); +/** + * Write a simple binary file header, containing only the version string. Also write + * the byte order magic word. + * @return 0 if successful, -1 otherwise. + */ +SPHINXBASE_EXPORT +int32 bio_writehdr_version (FILE *fp, /**< Output: File to write */ + char *version /**< Input: A string of version */ + ); + + +/** + * Write a simple binary file header with only byte order magic word. + * @return 0 if successful, -1 otherwise. + */ +SPHINXBASE_EXPORT +int32 bio_writehdr(FILE *fp, ...); + +/** + * Free name and value strings previously allocated and returned by bio_readhdr. + */ +SPHINXBASE_EXPORT +void bio_hdrarg_free (char **name, /**< In: Array previously returned by bio_readhdr */ + char **val /**< In: Array previously returned by bio_readhdr */ + ); + +/** + * Like fread but perform byteswapping and accumulate checksum (the 2 extra arguments). + * + * @return unlike fread, returns -1 if required number of elements (n_el) not read; also, + * no byteswapping or checksum accumulation is performed in that case. + */ +SPHINXBASE_EXPORT +int32 bio_fread (void *buf, /**< In: buffer to write */ + int32 el_sz, /**< In: element size */ + int32 n_el, /**< In: number of elements */ + FILE *fp, /**< In: An input file pointer */ + int32 swap, /**< In: Byteswap iff (swap != 0) */ + uint32 *chksum /**< In/Out: Accumulated checksum */ + ); + +/** + * Like fwrite but perform byteswapping and accumulate checksum (the 2 extra arguments). + * + * @return the number of elemens written (like fwrite). + */ +SPHINXBASE_EXPORT +int32 bio_fwrite(const void *buf, /**< In: buffer to write */ + int32 el_sz, /**< In: element size */ + int32 n_el, /**< In: number of elements */ + FILE *fp, /**< In: An input file pointer */ + int32 swap, /**< In: Byteswap iff (swap != 0) */ + uint32 *chksum /**< In/Out: Accumulated checksum */ + ); + +/** + * Read a 1-d array (fashioned after fread): + * + * - 4-byte array size (returned in n_el) + * - memory allocated for the array and read (returned in buf) + * + * Byteswapping and checksum accumulation performed as necessary. + * Fails fatally if expected data not read. + * @return number of array elements allocated and read; -1 if error. + */ +SPHINXBASE_EXPORT +int32 bio_fread_1d (void **buf, /**< Out: contains array data; allocated by this + function; can be freed using ckd_free */ + size_t el_sz, /**< In: Array element size */ + uint32 *n_el, /**< Out: Number of array elements allocated/read */ + FILE *fp, /**< In: File to read */ + int32 sw, /**< In: Byteswap iff (swap != 0) */ + uint32 *ck /**< In/Out: Accumulated checksum */ + ); + +/** + * Read a 2-d matrix: + * + * - 4-byte # rows, # columns (returned in d1, d2, d3) + * - memory allocated for the array and read (returned in buf) + * + * Byteswapping and checksum accumulation performed as necessary. + * Fails fatally if expected data not read. + * @return number of array elements allocated and read; -1 if error. + */ +SPHINXBASE_EXPORT +int32 bio_fread_2d(void ***arr, + size_t e_sz, + uint32 *d1, + uint32 *d2, + FILE *fp, + uint32 swap, + uint32 *chksum); + +/** + * Read a 3-d array (set of matrices) + * + * - 4-byte # matrices, # rows, # columns (returned in d1, d2, d3) + * - memory allocated for the array and read (returned in buf) + * + * Byteswapping and checksum accumulation performed as necessary. + * Fails fatally if expected data not read. + * @return number of array elements allocated and read; -1 if error. + */ +SPHINXBASE_EXPORT +int32 bio_fread_3d(void ****arr, + size_t e_sz, + uint32 *d1, + uint32 *d2, + uint32 *d3, + FILE *fp, + uint32 swap, + uint32 *chksum); + +int +bio_fread_intv_3d(void ****arr, + size_t e_sz, + uint32 s, + uint32 e, + uint32 *d1, + uint32 *d2, + uint32 *d3, + FILE *fp, + uint32 swap, + uint32 *chksum); + +/** + * Read and verify checksum at the end of binary file. Fails fatally if there is + * a mismatch. + */ +SPHINXBASE_EXPORT +void bio_verify_chksum (FILE *fp, /**< In: File to read */ + int32 byteswap, /**< In: Byteswap iff (swap != 0) */ + uint32 chksum /**< In: Value to compare with checksum in file */ + ); + + + +/** + * Write a 1-d array. + * Checksum accumulation performed as necessary. + * + * @return number of array elements successfully written or -1 if error. + */ +SPHINXBASE_EXPORT +int bio_fwrite_1d(void *arr, /**< In: Data to write */ + size_t e_sz, /**< In: Size of the elements in bytes */ + uint32 d1, /**< In: First dimension */ + FILE *fp, /**< In: File to write to */ + uint32 *chksum /**< In/Out: Checksum accumulator */ + ); + +/** + * Write a 3-d array (set of matrices). + * Checksum accumulation performed as necessary. + * + * @return number of array elements successfully written or -1 if error. + */ +SPHINXBASE_EXPORT +int bio_fwrite_3d(void ***arr, /**< In: Data to write */ + size_t e_sz, /**< In: Size of the elements in bytes */ + uint32 d1, /**< In: First dimension */ + uint32 d2, /**< In: Second dimension */ + uint32 d3, /**< In: Third dimension */ + FILE *fp, /**< In: File to write to */ + uint32 *chksum /**< In/Out: Checksum accumulator */ + ); + +/** + * Read raw data from the wav file. + * + * @return pointer to the data. + */ +SPHINXBASE_EXPORT +int16* bio_read_wavfile(char const *directory, /**< In: the folder where the file is located */ + char const *filename, /**< In: the name of the file */ + char const *extension, /**< In: file extension */ + int32 header, /**< In: the size of the header to skip usually 44 bytes */ + int32 endian, /**< In: endian of the data */ + size_t *nsamps /**< Out: number of samples read */ + ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/bitarr.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/bitarr.h new file mode 100644 index 0000000000000000000000000000000000000000..029544e8cf9a61a450721a172938ae529c4c91d6 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/bitarr.h @@ -0,0 +1,152 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2015 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef _LIBUTIL_BITARR_H_ +#define _LIBUTIL_BITARR_H_ + +#include + +#include +/* Win32/WinCE DLL gunk */ +#include + +/** + * @file bitarr.h + * @brief An implementation bit array - memory + * efficient storage for digit int and float data. (FIXME: NO) + * + * Implementation of basic operations of read/write digits consuming + * as little space as possible. + * + * I HAVE QUESTIONS. Why 25 and 57 bits? What are the other 7 bits + * *doing*?!? Why didn't you stop to think about architectures with + * big-endian byte ordering or strictly aligned memory access when you + * wrote this? Does it really store floats BECAUSE NO IT DOESN'T + * + * Note that because of the problems noted above data is canonically + * stored in little-endian order in memory. + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Structure that specifies bits required to efficiently store certain data + */ +typedef struct bitarr_mask_s { + uint8 bits; + uint32 mask; +} bitarr_mask_t; + +/** + * Structure that stores address of certain value in bit array + */ +typedef struct bitarr_address_s { + void *base; + uint32 offset; +} bitarr_address_t; + +/** + * Read uint64 value from bit array. + * Assumes mask == (1 << length) - 1 where length <= 57 + * @param address to read from + * @param length number of bits for value + * @param mask of read value + * @return uint64 value that was read + */ +SPHINXBASE_EXPORT +uint64 bitarr_read_int57(bitarr_address_t address, uint8 length, uint64 mask); + +/** + * Write specified value into bit array. + * Assumes value < (1 << length) and length <= 57. + * Assumes the memory is zero initially. + * @param address to write to + * @param length amount of active bytes in value to write + * @param value integer to write + */ +SPHINXBASE_EXPORT +void bitarr_write_int57(bitarr_address_t address, uint8 length, uint64 value); + +/** + * Read uint32 value from bit array. + * Assumes mask == (1 << length) - 1 where length <= 25 + * @param address to read from + * @param length number of bits for value + * @param mask of read value + * @return uint32 value that was read + */ +SPHINXBASE_EXPORT +uint32 bitarr_read_int25(bitarr_address_t address, uint8 length, uint32 mask); + +/** + * Write specified value into bit array. + * Assumes value < (1 << length) and length <= 25. + * Assumes the memory is zero initially. + * @param address in bit array ti write to + * @param length amount of active bytes in value to write + * @param value integer to write + */ +SPHINXBASE_EXPORT +void bitarr_write_int25(bitarr_address_t address, uint8 length, uint32 value); + +/** + * Fills mask for certain int range according to provided max value + * @param bit_mask mask that is filled + * @param max_value bigest integer that is going to be stored using this mask + */ +SPHINXBASE_EXPORT +void bitarr_mask_from_max(bitarr_mask_t *bit_mask, uint32 max_value); + +/** + * Computes amount of bits required ti store integers upto value provided. + * @param max_value bigest integer that going to be stored using this amount of bits + * @return amount of bits required to store integers from range with maximum provided + */ +SPHINXBASE_EXPORT +uint8 bitarr_required_bits(uint32 max_value); + +#ifdef __cplusplus +} +#endif + +#endif /* _LIBUTIL_BITARR_H_ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/bitvec.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/bitvec.h new file mode 100644 index 0000000000000000000000000000000000000000..d5644df8986736969e3c7d5feee5977291fd630c --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/bitvec.h @@ -0,0 +1,155 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef _LIBUTIL_BITVEC_H_ +#define _LIBUTIL_BITVEC_H_ + +#include + +/* Win32/WinCE DLL gunk */ +#include + +#include +#include + +/** + * @file bitvec.h + * @brief An implementation of bit vectors. + * + * Implementation of basic operations of bit vectors. + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#define BITVEC_BITS 32 +typedef uint32 bitvec_t; + +/** + * Number of bitvec_t in a bit vector + */ +#define bitvec_size(n) (((n)+BITVEC_BITS-1)/BITVEC_BITS) + +/** + * Allocate a bit vector, all bits are clear + */ +#define bitvec_alloc(n) ckd_calloc(bitvec_size(n), sizeof(bitvec_t)) + +/** + * Resize a bit vector, clear the remaining bits + */ +SPHINXBASE_EXPORT +bitvec_t *bitvec_realloc(bitvec_t *vec, /* In: Bit vector to search */ + size_t old_len, /* In: Old length */ + size_t new_len); /* In: New lenght of above bit vector */ +/** + * Free a bit vector. + */ +#define bitvec_free(v) ckd_free(v) + +/** + * Set the b-th bit of bit vector v + * @param v is a vector + * @param b is the bit which will be set + */ + +#define bitvec_set(v,b) (v[(b)/BITVEC_BITS] |= (1UL << ((b) & (BITVEC_BITS-1)))) + +/** + * Set all n bits in bit vector v + * @param v is a vector + * @param n is the number of bits + */ + +#define bitvec_set_all(v,n) memset(v, (bitvec_t)-1, \ + (((n)+BITVEC_BITS-1)/BITVEC_BITS) * \ + sizeof(bitvec_t)) +/** + * Clear the b-th bit of bit vector v + * @param v is a vector + * @param b is the bit which will be set + */ + +#define bitvec_clear(v,b) (v[(b)/BITVEC_BITS] &= ~(1UL << ((b) & (BITVEC_BITS-1)))) + +/** + * Clear all n bits in bit vector v + * @param v is a vector + * @param n is the number of bits + */ + +#define bitvec_clear_all(v,n) memset(v, 0, (((n)+BITVEC_BITS-1)/BITVEC_BITS) * \ + sizeof(bitvec_t)) + +/** + * Check whether the b-th bit is set in vector v + * @param v is a vector + * @param b is the bit which will be checked + */ + +#define bitvec_is_set(v,b) (v[(b)/BITVEC_BITS] & (1UL << ((b) & (BITVEC_BITS-1)))) + +/** + * Check whether the b-th bit is cleared in vector v + * @param v is a vector + * @param b is the bit which will be checked + */ + +#define bitvec_is_clear(v,b) (! (bitvec_is_set(v,b))) + + +/** + * Return the number of bits set in the given bitvector. + * + * @param vec is the bit vector + * @param len is the length of bit vector vec + * @return the number of bits being set in vector vec + */ +SPHINXBASE_EXPORT +size_t bitvec_count_set(bitvec_t *vec, /* In: Bit vector to search */ + size_t len); /* In: Lenght of above bit vector */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/byteorder.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/byteorder.h new file mode 100644 index 0000000000000000000000000000000000000000..692ce60a03aaa8cf3ec5a6cf3548c5b9c4297fb6 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/byteorder.h @@ -0,0 +1,98 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2001 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * byteorder.h -- Byte swapping ordering macros. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1996 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * + * $Log: byteorder.h,v $ + * Revision 1.8 2005/09/01 21:09:54 dhdfu + * Really, actually, truly consolidate byteswapping operations into + * byteorder.h. Where unconditional byteswapping is needed, SWAP_INT32() + * and SWAP_INT16() are to be used. The WORDS_BIGENDIAN macro from + * autoconf controls the functioning of the conditional swap macros + * (SWAP_?[LW]) whose names and semantics have been regularized. + * Private, adhoc macros have been removed. + * + */ + +#ifndef __S2_BYTEORDER_H__ +#define __S2_BYTEORDER_H__ 1 + +/* Macro to byteswap an int16 variable. x = ptr to variable */ +#define SWAP_INT16(x) *(x) = ((0x00ff & (*(x))>>8) | (0xff00 & (*(x))<<8)) + +/* Macro to byteswap an int32 variable. x = ptr to variable */ +#define SWAP_INT32(x) *(x) = ((0x000000ff & (*(x))>>24) | \ + (0x0000ff00 & (*(x))>>8) | \ + (0x00ff0000 & (*(x))<<8) | \ + (0xff000000 & (*(x))<<24)) + +/* Macro to byteswap a float32 variable. x = ptr to variable */ +#define SWAP_FLOAT32(x) SWAP_INT32((int32 *) x) + +/* Macro to byteswap a float64 variable. x = ptr to variable */ +#define SWAP_FLOAT64(x) { int *low = (int *) (x), *high = (int *) (x) + 1,\ + temp;\ + SWAP_INT32(low); SWAP_INT32(high);\ + temp = *low; *low = *high; *high = temp;} + +#ifdef WORDS_BIGENDIAN +#define SWAP_BE_64(x) +#define SWAP_BE_32(x) +#define SWAP_BE_16(x) +#define SWAP_LE_64(x) SWAP_FLOAT64(x) +#define SWAP_LE_32(x) SWAP_INT32(x) +#define SWAP_LE_16(x) SWAP_INT16(x) +#else +#define SWAP_LE_64(x) +#define SWAP_LE_32(x) +#define SWAP_LE_16(x) +#define SWAP_BE_64(x) SWAP_FLOAT64(x) +#define SWAP_BE_32(x) SWAP_INT32(x) +#define SWAP_BE_16(x) SWAP_INT16(x) +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/case.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/case.h new file mode 100644 index 0000000000000000000000000000000000000000..bd1f62e024cb0730cb9525659f4f3b0e1b38db06 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/case.h @@ -0,0 +1,135 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * case.h -- Upper/lower case conversion routines + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: case.h,v $ + * Revision 1.7 2005/06/22 02:58:54 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 18-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added strcmp_nocase, UPPER_CASE and LOWER_CASE definitions. + * + * 16-Feb-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Created. + */ + + +/** + * @file case.h + * @brief Locale-independent implementation of case swapping operation. + * + * This function implements ASCII-only case switching and comparison + * related operations, which do not depend on the locale and are + * guaranteed to exist on all versions of Windows. + */ + +#ifndef _LIBUTIL_CASE_H_ +#define _LIBUTIL_CASE_H_ + +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + + /** + * Return upper case form for c + */ +#define UPPER_CASE(c) ((((c) >= 'a') && ((c) <= 'z')) ? (c-32) : c) + + /** + * Return lower case form for c + */ +#define LOWER_CASE(c) ((((c) >= 'A') && ((c) <= 'Z')) ? (c+32) : c) + + + /** + * Convert str to all upper case. + * @param str is a string. + */ +SPHINXBASE_EXPORT +void ucase(char *str); + + /** + * Convert str to all lower case + * @param str is a string. + */ +SPHINXBASE_EXPORT +void lcase(char *str); + + /** + * (FIXME! The implementation is incorrect!) + * Case insensitive string compare. Return the usual -1, 0, +1, depending on + * str1 <, =, > str2 (case insensitive, of course). + * @param str1 is the first string. + * @param str2 is the second string. + */ +SPHINXBASE_EXPORT +int32 strcmp_nocase(const char *str1, const char *str2); + +/** + * Like strcmp_nocase() but with a maximum length. + */ +SPHINXBASE_EXPORT +int32 strncmp_nocase(const char *str1, const char *str2, size_t len); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/ckd_alloc.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/ckd_alloc.h new file mode 100644 index 0000000000000000000000000000000000000000..5bb60d499cc8a70939c23031e689f1ddf7c9a5ea --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/ckd_alloc.h @@ -0,0 +1,311 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * ckd_alloc.h -- Memory allocation package. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: ckd_alloc.h,v $ + * Revision 1.10 2005/06/22 02:59:25 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 19-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Removed file,line arguments from free functions. + * + * 01-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + + +/********************************************************************* + * + * $Header: /cvsroot/cmusphinx/sphinx3/src/libutil/ckd_alloc.h,v 1.10 2005/06/22 02:59:25 arthchan2003 Exp $ + * + * Carnegie Mellon ARPA Speech Group + * + * Copyright (c) 1994 Carnegie Mellon University. + * All rights reserved. + * + ********************************************************************* + * + * file: ckd_alloc.h + * + * traceability: + * + * description: + * + * author: + * + *********************************************************************/ + + +#ifndef _LIBUTIL_CKD_ALLOC_H_ +#define _LIBUTIL_CKD_ALLOC_H_ + +#include +#include + +/* Win32/WinCE DLL gunk */ +#include +#include + +/** \file ckd_alloc.h + *\brief Sphinx's memory allocation/deallocation routines. + * + *Implementation of efficient memory allocation deallocation for + *multiple dimensional arrays. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Control behaviour of the program when allocation fails. + * + * Although your program is probably toast when memory allocation + * fails, it is also probably a good idea to be able to catch these + * errors and alert the user in some way. Either that, or you might + * want the program to call abort() so that you can debug the failed + * code. This function allows you to control that behaviour. + * + * @param env Pointer to a jmp_buf initialized with + * setjmp(), or NULL to remove a previously set jump target. + * @param abort If non-zero, the program will call abort() when + * allocation fails rather than exiting or calling longjmp(). + * @return Pointer to a previously set jmp_buf, if any. + */ +SPHINXBASE_EXPORT +jmp_buf *ckd_set_jump(jmp_buf *env, int abort); + +/** + * Fail (with a message) according to behaviour specified by ckd_set_jump(). + */ +void ckd_fail(char *format, ...); + +/* + * The following functions are similar to the malloc family, except + * that they have two additional parameters, caller_file and + * caller_line, for error reporting. All functions print a diagnostic + * message if any error occurs, with any other behaviour determined by + * ckd_set_jump(), above. + */ + +SPHINXBASE_EXPORT +void *__ckd_calloc__(size_t n_elem, size_t elem_size, + const char *caller_file, int caller_line); + +SPHINXBASE_EXPORT +void *__ckd_malloc__(size_t size, + const char *caller_file, int caller_line); + +SPHINXBASE_EXPORT +void *__ckd_realloc__(void *ptr, size_t new_size, + const char *caller_file, int caller_line); + +/** + * Like strdup, except that if an error occurs it prints a diagnostic message and + * exits. If origin in NULL the function also returns NULL. + */ +SPHINXBASE_EXPORT +char *__ckd_salloc__(const char *origstr, + const char *caller_file, int caller_line); + +/** + * Allocate a 2-D array and return ptr to it (ie, ptr to vector of ptrs). + * The data area is allocated in one block so it can also be treated as a 1-D array. + */ +SPHINXBASE_EXPORT +void *__ckd_calloc_2d__(size_t d1, size_t d2, /* In: #elements in the 2 dimensions */ + size_t elemsize, /* In: Size (#bytes) of each element */ + const char *caller_file, int caller_line); /* In */ + +/** + * Allocate a 3-D array and return ptr to it. + * The data area is allocated in one block so it can also be treated as a 1-D array. + */ +SPHINXBASE_EXPORT +void *__ckd_calloc_3d__(size_t d1, size_t d2, size_t d3, /* In: #elems in the dims */ + size_t elemsize, /* In: Size (#bytes) per element */ + const char *caller_file, int caller_line); /* In */ + +/** + * Allocate a 34D array and return ptr to it. + * The data area is allocated in one block so it can also be treated as a 1-D array. + */ +SPHINXBASE_EXPORT +void ****__ckd_calloc_4d__(size_t d1, + size_t d2, + size_t d3, + size_t d4, + size_t elem_size, + char *caller_file, + int caller_line); + +/** + * Overlay a 3-D array over a previously allocated storage area. + **/ +SPHINXBASE_EXPORT +void * __ckd_alloc_3d_ptr(size_t d1, + size_t d2, + size_t d3, + void *store, + size_t elem_size, + char *caller_file, + int caller_line); + +/** + * Overlay a s-D array over a previously allocated storage area. + **/ +SPHINXBASE_EXPORT +void *__ckd_alloc_2d_ptr(size_t d1, + size_t d2, + void *store, + size_t elem_size, + char *caller_file, + int caller_line); + +/** + * Test and free a 1-D array + */ +SPHINXBASE_EXPORT +void ckd_free(void *ptr); + +/** + * Free a 2-D array (ptr) previously allocated by ckd_calloc_2d + */ +SPHINXBASE_EXPORT +void ckd_free_2d(void *ptr); + +/** + * Free a 3-D array (ptr) previously allocated by ckd_calloc_3d + */ +SPHINXBASE_EXPORT +void ckd_free_3d(void *ptr); + +/** + * Free a 4-D array (ptr) previously allocated by ckd_calloc_4d + */ +SPHINXBASE_EXPORT +void ckd_free_4d(void *ptr); + +/** + * Macros to simplify the use of above functions. + * One should use these, rather than target functions directly. + */ + +/** + * Macro for __ckd_calloc__ + */ +#define ckd_calloc(n,sz) __ckd_calloc__((n),(sz),__FILE__,__LINE__) + +/** + * Macro for __ckd_malloc__ + */ +#define ckd_malloc(sz) __ckd_malloc__((sz),__FILE__,__LINE__) + +/** + * Macro for __ckd_realloc__ + */ +#define ckd_realloc(ptr,sz) __ckd_realloc__(ptr,(sz),__FILE__,__LINE__) + +/** + * Macro for __ckd_salloc__ + */ + +#define ckd_salloc(ptr) __ckd_salloc__(ptr,__FILE__,__LINE__) + +/** + * Macro for __ckd_calloc_2d__ + */ + +#define ckd_calloc_2d(d1,d2,sz) __ckd_calloc_2d__((d1),(d2),(sz),__FILE__,__LINE__) + +/** + * Macro for __ckd_calloc_3d__ + */ + +#define ckd_calloc_3d(d1,d2,d3,sz) __ckd_calloc_3d__((d1),(d2),(d3),(sz),__FILE__,__LINE__) + +/** + * Macro for __ckd_calloc_4d__ + */ +#define ckd_calloc_4d(d1, d2, d3, d4, s) __ckd_calloc_4d__((d1), (d2), (d3), (d4), (s), __FILE__, __LINE__) + +/** + * Macro for __ckd_alloc_2d_ptr__ + */ + +#define ckd_alloc_2d_ptr(d1, d2, bf, sz) __ckd_alloc_2d_ptr((d1), (d2), (bf), (sz), __FILE__, __LINE__) + +/** + * Free only the pointer arrays allocated with ckd_alloc_2d_ptr(). + */ +#define ckd_free_2d_ptr(bf) ckd_free(bf) + +/** + * Macro for __ckd_alloc_3d_ptr__ + */ + +#define ckd_alloc_3d_ptr(d1, d2, d3, bf, sz) __ckd_alloc_3d_ptr((d1), (d2), (d3), (bf), (sz), __FILE__, __LINE__) + +/** + * Free only the pointer arrays allocated with ckd_alloc_3d_ptr(). + */ +#define ckd_free_3d_ptr(bf) ckd_free_2d(bf) + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/clapack_lite.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/clapack_lite.h new file mode 100644 index 0000000000000000000000000000000000000000..0f5a1f4bdcaca1f22487c0a47bed1d277d240e82 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/clapack_lite.h @@ -0,0 +1,36 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +#ifndef __CLAPACK_LITE_H +#define __CLAPACK_LITE_H + +#include "f2c.h" + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/* Subroutine */ int sgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, real *alpha, real *a, integer *lda, real *b, integer * + ldb, real *beta, real *c__, integer *ldc); +/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha, + real *a, integer *lda, real *x, integer *incx, real *beta, real *y, + integer *incy); +/* Subroutine */ int ssymm_(char *side, char *uplo, integer *m, integer *n, + real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, + real *c__, integer *ldc); + +/* Subroutine */ int sposv_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *info); +/* Subroutine */ int spotrf_(char *uplo, integer *n, real *a, integer *lda, + integer *info); + +#ifdef __cplusplus +} +#endif + + +#endif /* __CLAPACK_LITE_H */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/cmd_ln.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/cmd_ln.h new file mode 100644 index 0000000000000000000000000000000000000000..cb371c55efd250349705526cb7a6fed08651c622 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/cmd_ln.h @@ -0,0 +1,447 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * cmd_ln.h -- Command line argument parsing. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * + * 15-Jul-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added required arguments types. + * + * 07-Dec-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created, based on Eric's implementation. Basically, combined several + * functions into one, eliminated validation, and simplified the interface. + */ + + +#ifndef _LIBUTIL_CMD_LN_H_ +#define _LIBUTIL_CMD_LN_H_ + +#include +#include + +/* Win32/WinCE DLL gunk */ +#include +#include +#include + +/** + * @file cmd_ln.h + * @brief Command-line and other configurationparsing and handling. + * + * Configuration parameters, optionally parsed from the command line. + */ + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * @struct arg_t + * Argument definition structure. + */ +typedef struct arg_s { + char const *name; /**< Name of the command line switch */ + int type; /**< Type of the argument in question */ + char const *deflt; /**< Default value (as a character string), or NULL if none */ + char const *doc; /**< Documentation/description string */ +} arg_t; + +/** + * @struct cmd_ln_val_t + * Configuration parameter structure. + */ +typedef struct cmd_ln_val_s { + anytype_t val; + int type; + char *name; +} cmd_ln_val_t; + +/** + * @name Values for arg_t::type + */ +/* @{ */ +/** + * Bit indicating a required argument. + */ +#define ARG_REQUIRED (1<<0) +/** + * Integer argument (optional). + */ +#define ARG_INTEGER (1<<1) +/** + * Floating point argument (optional). + */ +#define ARG_FLOATING (1<<2) +/** + * String argument (optional). + */ +#define ARG_STRING (1<<3) +/** + * Boolean (true/false) argument (optional). + */ +#define ARG_BOOLEAN (1<<4) +/** + * String array argument (optional). + */ +#define ARG_STRING_LIST (1<<5) + +/** + * Required integer argument. + */ +#define REQARG_INTEGER (ARG_INTEGER | ARG_REQUIRED) +/** + * Required floating point argument. + */ +#define REQARG_FLOATING (ARG_FLOATING | ARG_REQUIRED) +/** + * Required string argument. + */ +#define REQARG_STRING (ARG_STRING | ARG_REQUIRED) +/** + * Required boolean argument. + */ +#define REQARG_BOOLEAN (ARG_BOOLEAN | ARG_REQUIRED) + +/* @} */ + + +/** + * Helper macro to stringify enums and other non-string values for + * default arguments. + **/ +#define ARG_STRINGIFY(s) ARG_STRINGIFY1(s) +#define ARG_STRINGIFY1(s) #s + +/** + * @struct cmd_ln_t + * Structure (no longer opaque) used to hold the results of command-line parsing. + */ +typedef struct cmd_ln_s { + int refcount; + hash_table_t *ht; + char **f_argv; + uint32 f_argc; + arg_t const *defn; +} cmd_ln_t; + +/** + * Create a cmd_ln_t from NULL-terminated list of arguments. + * + * This function creates a cmd_ln_t from a NULL-terminated list of + * argument strings. For example, to create the equivalent of passing + * "-hmm foodir -dsratio 2 -lm bar.lm" on the command-line: + * + * config = cmd_ln_init(NULL, defs, TRUE, "-hmm", "foodir", "-dsratio", "2", + * "-lm", "bar.lm", NULL); + * + * Note that for simplicity, all arguments are passed + * as strings, regardless of the actual underlying type. + * + * @param inout_cmdln Previous command-line to update, or NULL to create a new one. + * @param defn Array of argument name definitions, or NULL to allow any arguments. + * @param strict Whether to fail on duplicate or unknown arguments. + * @return A cmd_ln_t* containing the results of command line parsing, or NULL on failure. + */ +SPHINXBASE_EXPORT +cmd_ln_t *cmd_ln_init(cmd_ln_t *inout_cmdln, arg_t const *defn, int32 strict, ...); + +/** + * Retain ownership of a command-line argument set. + * + * @return pointer to retained command-line argument set. + */ +SPHINXBASE_EXPORT +cmd_ln_t *cmd_ln_retain(cmd_ln_t *cmdln); + +/** + * Release a command-line argument set and all associated strings. + * + * @return new reference count (0 if freed completely) + */ +SPHINXBASE_EXPORT +int cmd_ln_free_r(cmd_ln_t *cmdln); + +/** + * Parse a list of strings into argumetns. + * + * Parse the given list of arguments (name-value pairs) according to + * the given definitions. Argument values can be retrieved in future + * using cmd_ln_access(). argv[0] is assumed to be the program name + * and skipped. Any unknown argument name causes a fatal error. The + * routine also prints the prevailing argument values (to stderr) + * after parsing. + * + * @note It is currently assumed that the strings in argv are + * allocated statically, or at least that they will be valid as + * long as the cmd_ln_t returned from this function. + * Unpredictable behaviour will result if they are freed or + * otherwise become invalidated. + * + * @return A cmd_ln_t containing the results of command line parsing, + * or NULL on failure. + **/ +SPHINXBASE_EXPORT +cmd_ln_t *cmd_ln_parse_r(cmd_ln_t *inout_cmdln, /**< In/Out: Previous command-line to update, + or NULL to create a new one. */ + arg_t const *defn, /**< In: Array of argument name definitions */ + int32 argc, /**< In: Number of actual arguments */ + char *argv[], /**< In: Actual arguments */ + int32 strict /**< In: Fail on duplicate or unknown + arguments, or no arguments? */ + ); + +/** + * Parse an arguments file by deliminating on " \r\t\n" and putting each tokens + * into an argv[] for cmd_ln_parse(). + * + * @return A cmd_ln_t containing the results of command line parsing, or NULL on failure. + */ +SPHINXBASE_EXPORT +cmd_ln_t *cmd_ln_parse_file_r(cmd_ln_t *inout_cmdln, /**< In/Out: Previous command-line to update, + or NULL to create a new one. */ + arg_t const *defn, /**< In: Array of argument name definitions*/ + char const *filename,/**< In: A file that contains all + the arguments */ + int32 strict /**< In: Fail on duplicate or unknown + arguments, or no arguments? */ + ); + +/** + * Access the value and metadata for a configuration parameter. + * + * This structure is owned by the cmd_ln_t, assume that you must copy + * anything inside it, including strings, if you wish to retain it, + * and should never free it manually. + * + * @param cmdln Command-line object. + * @param name the command-line flag to retrieve. + * @return the value and metadata associated with name, or + * NULL if name does not exist. You must use + * cmd_ln_exists_r() to distinguish between cases where a + * value is legitimately NULL and where the corresponding flag + * is unknown. + */ +SPHINXBASE_EXPORT +cmd_ln_val_t *cmd_ln_access_r(cmd_ln_t *cmdln, char const *name); + +/** + * Access the type of a configuration parameter. + * + * This function is provided as a convenience for dynamically typed + * language bindings. + * + * @param cmdln Command-line object. + * @param name the command-line flag to retrieve. + * @return the type of the parameter (as a combination of the ARG_* + * bits), or 0 if no such parameter exists. + */ +SPHINXBASE_EXPORT +int cmd_ln_type_r(cmd_ln_t *cmdln, char const *name); + +/** + * Retrieve a string from a command-line object. + * + * The command-line object retains ownership of this string, so you + * should not attempt to free it manually. + * + * @param cmdln Command-line object. + * @param name the command-line flag to retrieve. + * @return the string value associated with name, or NULL if + * name does not exist. You must use + * cmd_ln_exists_r() to distinguish between cases where a + * value is legitimately NULL and where the corresponding flag + * is unknown. + */ +SPHINXBASE_EXPORT +char const *cmd_ln_str_r(cmd_ln_t *cmdln, char const *name); + +/** + * Retrieve an array of strings from a command-line object. + * + * The command-line object retains ownership of this array, so you + * should not attempt to free it manually. + * + * @param cmdln Command-line object. + * @param name the command-line flag to retrieve. + * @return the array of strings associated with name, or NULL if + * name does not exist. You must use + * cmd_ln_exists_r() to distinguish between cases where a + * value is legitimately NULL and where the corresponding flag + * is unknown. + */ +SPHINXBASE_EXPORT +char const **cmd_ln_str_list_r(cmd_ln_t *cmdln, char const *name); + +/** + * Retrieve an integer from a command-line object. + * + * @param cmdln Command-line object. + * @param name the command-line flag to retrieve. + * @return the integer value associated with name, or 0 if + * name does not exist. You must use + * cmd_ln_exists_r() to distinguish between cases where a + * value is legitimately zero and where the corresponding flag + * is unknown. + */ +SPHINXBASE_EXPORT +long cmd_ln_int_r(cmd_ln_t *cmdln, char const *name); + +/** + * Retrieve a floating-point number from a command-line object. + * + * @param cmdln Command-line object. + * @param name the command-line flag to retrieve. + * @return the float value associated with name, or 0.0 if + * name does not exist. You must use + * cmd_ln_exists_r() to distinguish between cases where a + * value is legitimately zero and where the corresponding flag + * is unknown. + */ +SPHINXBASE_EXPORT +double cmd_ln_float_r(cmd_ln_t *cmdln, char const *name); + +/** + * Retrieve a boolean value from a command-line object. + */ +#define cmd_ln_boolean_r(c,n) (cmd_ln_int_r(c,n) != 0) + +/** + * Set a string in a command-line object. + * + * @param cmdln Command-line object. + * @param name The command-line flag to set. + * @param str String value to set. The command-line object does not + * retain ownership of this pointer. + */ +SPHINXBASE_EXPORT +void cmd_ln_set_str_r(cmd_ln_t *cmdln, char const *name, char const *str); + +/** + * Set a string in a command-line object even if it is not present in argument + * description. Useful for setting extra values computed from configuration, propagated + * to other parts. + * + * @param cmdln Command-line object. + * @param name The command-line flag to set. + * @param str String value to set. The command-line object does not + * retain ownership of this pointer. + */ +SPHINXBASE_EXPORT +void cmd_ln_set_str_extra_r(cmd_ln_t *cmdln, char const *name, char const *str); + +/** + * Set an integer in a command-line object. + * + * @param cmdln Command-line object. + * @param name The command-line flag to set. + * @param iv Integer value to set. + */ +SPHINXBASE_EXPORT +void cmd_ln_set_int_r(cmd_ln_t *cmdln, char const *name, long iv); + +/** + * Set a floating-point number in a command-line object. + * + * @param cmdln Command-line object. + * @param name The command-line flag to set. + * @param fv Integer value to set. + */ +SPHINXBASE_EXPORT +void cmd_ln_set_float_r(cmd_ln_t *cmdln, char const *name, double fv); + +/** + * Set a boolean value in a command-line object. + */ +#define cmd_ln_set_boolean_r(c,n,b) (cmd_ln_set_int_r(c,n,(b)!=0)) + +/* + * Compatibility macros + */ +#define cmd_ln_int32_r(c,n) (int32)cmd_ln_int_r(c,n) +#define cmd_ln_float32_r(c,n) (float32)cmd_ln_float_r(c,n) +#define cmd_ln_float64_r(c,n) (float64)cmd_ln_float_r(c,n) +#define cmd_ln_set_int32_r(c,n,i) cmd_ln_set_int_r(c,n,i) +#define cmd_ln_set_float32_r(c,n,f) cmd_ln_set_float_r(c,n,(double)f) +#define cmd_ln_set_float64_r(c,n,f) cmd_ln_set_float_r(c,n,(double)f) + +/** + * Re-entrant version of cmd_ln_exists(). + * + * @return True if the command line argument exists (i.e. it + * was one of the arguments defined in the call to cmd_ln_parse_r(). + */ +SPHINXBASE_EXPORT +int cmd_ln_exists_r(cmd_ln_t *cmdln, char const *name); + +/** + * Print a help message listing the valid argument names, and the associated + * attributes as given in defn. + * + * @param cmdln command-line object + * @param defn array of argument name definitions. + */ +SPHINXBASE_EXPORT +void cmd_ln_log_help_r (cmd_ln_t *cmdln, const arg_t *defn); + +/** + * Print current configuration values and defaults. + * + * @param cmdln command-line object + * @param defn array of argument name definitions. + */ +SPHINXBASE_EXPORT +void cmd_ln_log_values_r (cmd_ln_t *cmdln, const arg_t *defn); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/cmn.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/cmn.h new file mode 100644 index 0000000000000000000000000000000000000000..9dae45943104b18594c59c22a3422ca4274421a0 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/cmn.h @@ -0,0 +1,185 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * cmn.h -- Various forms of cepstral mean normalization + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log$ + * Revision 1.1 2006/04/05 20:27:30 dhdfu + * A Great Reorganzation of header files and executables + * + * Revision 1.13 2006/02/23 03:48:27 arthchan2003 + * Resolved conflict in cmn.h + * + * + * Revision 1.12 2006/02/22 23:43:55 arthchan2003 + * Merged from the branch SPHINX3_5_2_RCI_IRII_BRANCH: Put data structure into the cmn_t structure. + * + * Revision 1.11.4.2 2005/10/17 04:45:57 arthchan2003 + * Free stuffs in cmn and feat corectly. + * + * Revision 1.11.4.1 2005/07/05 06:25:08 arthchan2003 + * Fixed dox-doc. + * + * Revision 1.11 2005/06/21 19:28:00 arthchan2003 + * 1, Fixed doxygen documentation. 2, Added $ keyword. + * + * Revision 1.4 2005/06/13 04:02:56 archan + * Fixed most doxygen-style documentation under libs3decoder. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu) + * Added cmn_free() and moved *mean and *var out global space and named them cmn_mean and cmn_var + * + * 28-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Copied from previous version. + */ + + +#ifndef _S3_CMN_H_ +#define _S3_CMN_H_ + +/* Win32/WinCE DLL gunk */ +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** \file cmn.h + * \brief Apply Cepstral Mean Normalization (CMN) to the set of input mfc frames. + * + * By subtractingthe mean of the input from each frame. C0 is also included in this process. + * This function operates on an entire utterance at a time. Hence, the entire utterance + * must be available beforehand (batchmode). + */ + +/** + * Types of cepstral mean normalization to apply to the features. + */ +typedef enum cmn_type_e { + CMN_NONE = 0, + CMN_BATCH, + CMN_LIVE +} cmn_type_t; + +/** String representations of cmn_type_t values. */ +SPHINXBASE_EXPORT +extern const char *cmn_type_str[]; + +/** Convert string representation (from command-line) to cmn_type_t */ +SPHINXBASE_EXPORT +cmn_type_t cmn_type_from_str(const char *str); + +/** \struct cmn_t + * \brief wrapper of operation of the cepstral mean normalization. + */ + +typedef struct { + mfcc_t *cmn_mean; /**< Current means */ + mfcc_t *cmn_var; /**< Stored cmn variance */ + mfcc_t *sum; /**< Accumulated cepstra for computing mean */ + int32 nframe; /**< Number of frames */ + int32 veclen; /**< Length of cepstral vector */ +} cmn_t; + +SPHINXBASE_EXPORT +cmn_t* cmn_init(int32 veclen); + +/** + * CMN for the whole sentence +*/ +SPHINXBASE_EXPORT +void cmn (cmn_t *cmn, /**< In/Out: cmn normalization, which contains the cmn_mean and cmn_var) */ + mfcc_t **mfc, /**< In/Out: mfc[f] = mfc vector in frame f */ + int32 varnorm,/**< In: if not FALSE, variance normalize the input vectors + to have unit variance (along each dimension independently); + Irrelevant if no cmn is performed */ + int32 n_frame /**< In: Number of frames of mfc vectors */ + ); + +#define CMN_WIN_HWM 800 /* #frames after which window shifted */ +#define CMN_WIN 500 + +/** + * CMN for one block of data, using live mean + */ +SPHINXBASE_EXPORT +void cmn_live(cmn_t *cmn, /**< In/Out: cmn normalization, which contains + the cmn_mean and cmn_var) */ + mfcc_t **incep, /**< In/Out: mfc[f] = mfc vector in frame f*/ + int32 varnorm, /**< This flag should always be 0 for live */ + int32 nfr /**< Number of incoming frames */ + ); + +/** + * Update live mean based on observed data + */ +SPHINXBASE_EXPORT +void cmn_live_update(cmn_t *cmn); + +/** + * Set the live mean. + */ +SPHINXBASE_EXPORT +void cmn_live_set(cmn_t *cmn, mfcc_t const *vec); + +/* RAH, free previously allocated memory */ +SPHINXBASE_EXPORT +void cmn_free (cmn_t *cmn); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/err.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/err.h new file mode 100644 index 0000000000000000000000000000000000000000..9fd0efc43d5a4195718c23dccb44fb643062b456 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/err.h @@ -0,0 +1,221 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef _LIBUTIL_ERR_H_ +#define _LIBUTIL_ERR_H_ + +#include +#include +#include +#include + +/* Win32/WinCE DLL gunk */ +#include + +/** + * @file err.h + * @brief Implementation of logging routines. + * + * Logging, warning, debug and error message output funtionality is provided in this file. + * Sphinxbase defines several level of logging messages - INFO, WARNING, ERROR, FATAL. By + * default output goes to standard error output. + * + * Logging is implemented through macros. They take same arguments as printf: format string and + * values. By default source file name and source line are prepended to the message. Log output + * could be redirected to any file using err_set_logfp() and err_set_logfile() functions. To disable + * logging in your application, call err_set_logfp(NULL). + * + * It's possible to log multiline info messages, to do that you need to start message with + * E_INFO and output other lines with E_INFOCONT. + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#define E_SYSCALL(stmt, ...) if (stmt) E_FATAL_SYSTEM(__VA_ARGS__); + +#define FILELINE __FILE__ , __LINE__ + +/** + * Exit with non-zero status after error message + */ +#define E_FATAL(...) \ + do { \ + err_msg(ERR_FATAL, FILELINE, __VA_ARGS__); \ + exit(EXIT_FAILURE); \ + } while (0) + +/** + * Print error text; Call perror(""); exit(errno); + */ +#define E_FATAL_SYSTEM(...) \ + do { \ + err_msg_system(ERR_FATAL, FILELINE, __VA_ARGS__); \ + exit(EXIT_FAILURE); \ + } while (0) + +/** + * Print error text; Call perror(""); + */ +#define E_ERROR_SYSTEM(...) err_msg_system(ERR_ERROR, FILELINE, __VA_ARGS__) + +/** + * Print error message to error log + */ +#define E_ERROR(...) err_msg(ERR_ERROR, FILELINE, __VA_ARGS__) + +/** + * Print warning message to error log + */ +#define E_WARN(...) err_msg(ERR_WARN, FILELINE, __VA_ARGS__) + +/** + * Print logging information to standard error stream + */ +#define E_INFO(...) err_msg(ERR_INFO, FILELINE, __VA_ARGS__) + +/** + * Continue printing the information to standard error stream + */ +#define E_INFOCONT(...) err_msg(ERR_INFO, NULL, 0, __VA_ARGS__) + +/** + * Print logging information without filename. + */ +#define E_INFO_NOFN(...) err_msg(ERR_INFO, NULL, 0, __VA_ARGS__) + +/** + * Debug is disabled by default + */ +#ifdef SPHINX_DEBUG +#define E_DEBUG(...) err_msg(ERR_DEBUG, NULL, 0, __VA_ARGS__) +#else +#define E_DEBUG(...) +#endif + +typedef enum err_e { + ERR_DEBUG, + ERR_INFO, + ERR_WARN, + ERR_ERROR, + ERR_FATAL, + ERR_MAX +} err_lvl_t; + +SPHINXBASE_EXPORT void +err_msg(err_lvl_t lvl, const char *path, long ln, const char *fmt, ...); + +SPHINXBASE_EXPORT void +err_msg_system(err_lvl_t lvl, const char *path, long ln, const char *fmt, ...); + +SPHINXBASE_EXPORT void +err_logfp_cb(void * user_data, err_lvl_t level, const char *fmt, ...); + +typedef void (*err_cb_f)(void* user_data, err_lvl_t, const char *, ...); + +/** + * Set minimum logging level. + * + * @param lvl Level below which messages will not be logged (note + * ERR_DEBUG messages are not logged unless compiled in debugging + * mode) + * @return previous log level. + */ +SPHINXBASE_EXPORT +int err_set_loglevel(err_lvl_t lvl); + +/** + * Set minimum logging levelfrom a string + * + * @param lvl Level below which messages will not be logged (note + * ERR_DEBUG messages are not logged unless compiled in debugging + * mode). A string corresponding to the names in enum err_e, but + * without the leading "ERR_" prefix. + * @return previous log level string, or NULL for invalid argument. + */ +SPHINXBASE_EXPORT +const char *err_set_loglevel_str(const char *lvl); + +/** + * Sets function to output error messages. Use it to redirect the logging + * to your application. By default the handler which dumps messages to + * stderr is set. + * + * @param callback callback to pass messages too + * @param user_data data to pass to callback + */ +SPHINXBASE_EXPORT +void err_set_callback(err_cb_f callback, void *user_data); + +/** + * Direct all logging to a given filehandle if default logfp callback is set. + * + * @param stream Filehandle to send log messages to, or NULL to disable logging. + */ +SPHINXBASE_EXPORT +void err_set_logfp(FILE *stream); + +/** + * Get the current logging filehandle. + * + * @return Current logging filehandle, NULL if logging is disabled. Initially + * it returns stderr + */ +SPHINXBASE_EXPORT +FILE *err_get_logfp(void); + +/** + * Append all log messages to a given file. + * + * Previous logging filehandle is closed (unless it was stdout or stderr). + * + * @param path File path to send log messages to + * @return 0 for success, <0 for failure (e.g. if file does not exist) + */ +SPHINXBASE_EXPORT +int err_set_logfile(const char *path); + +#ifdef __cplusplus +} +#endif + +#endif /* !_ERR_H */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/f2c.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/f2c.h new file mode 100644 index 0000000000000000000000000000000000000000..a50d1c730f9aa48f5df258cbec203e94bccab5da --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/f2c.h @@ -0,0 +1,218 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* f2c.h -- Standard Fortran to C header file */ + +/** barf [ba:rf] 2. "He suggested using FORTRAN, and everybody barfed." + + - From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */ + +#ifndef F2C_INCLUDE +#define F2C_INCLUDE + +typedef int integer; +typedef char *address; +typedef short int shortint; +typedef float real; +typedef double doublereal; +typedef struct { real r, i; } complex; +typedef struct { doublereal r, i; } doublecomplex; +typedef int logical; +typedef short int shortlogical; +typedef char logical1; +typedef char integer1; + +#define TRUE_ (1) +#define FALSE_ (0) + +/* Extern is for use with -E */ +#ifndef Extern +#define Extern extern +#endif + +/* I/O stuff */ + +#ifdef f2c_i2 +/* for -i2 */ +typedef short flag; +typedef short ftnlen; +typedef short ftnint; +#else +typedef int flag; +typedef int ftnlen; +typedef int ftnint; +#endif + +/*external read, write*/ +typedef struct +{ flag cierr; + ftnint ciunit; + flag ciend; + char *cifmt; + ftnint cirec; +} cilist; + +/*internal read, write*/ +typedef struct +{ flag icierr; + char *iciunit; + flag iciend; + char *icifmt; + ftnint icirlen; + ftnint icirnum; +} icilist; + +/*open*/ +typedef struct +{ flag oerr; + ftnint ounit; + char *ofnm; + ftnlen ofnmlen; + char *osta; + char *oacc; + char *ofm; + ftnint orl; + char *oblnk; +} olist; + +/*close*/ +typedef struct +{ flag cerr; + ftnint cunit; + char *csta; +} cllist; + +/*rewind, backspace, endfile*/ +typedef struct +{ flag aerr; + ftnint aunit; +} alist; + +/* inquire */ +typedef struct +{ flag inerr; + ftnint inunit; + char *infile; + ftnlen infilen; + ftnint *inex; /*parameters in standard's order*/ + ftnint *inopen; + ftnint *innum; + ftnint *innamed; + char *inname; + ftnlen innamlen; + char *inacc; + ftnlen inacclen; + char *inseq; + ftnlen inseqlen; + char *indir; + ftnlen indirlen; + char *infmt; + ftnlen infmtlen; + char *inform; + ftnint informlen; + char *inunf; + ftnlen inunflen; + ftnint *inrecl; + ftnint *innrec; + char *inblank; + ftnlen inblanklen; +} inlist; + +#define VOID void + +union Multitype { /* for multiple entry points */ + shortint h; + integer i; + real r; + doublereal d; + complex c; + doublecomplex z; + }; + +typedef union Multitype Multitype; + +typedef long Long; /* No longer used; formerly in Namelist */ + +struct Vardesc { /* for Namelist */ + char *name; + char *addr; + ftnlen *dims; + int type; + }; +typedef struct Vardesc Vardesc; + +struct Namelist { + char *name; + Vardesc **vars; + int nvars; + }; +typedef struct Namelist Namelist; + +#ifndef abs +#define abs(x) ((x) >= 0 ? (x) : -(x)) +#endif +#define dabs(x) (doublereal)abs(x) +#ifndef min +#define min(a,b) ((a) <= (b) ? (a) : (b)) +#endif +#ifndef max +#define max(a,b) ((a) >= (b) ? (a) : (b)) +#endif +#define dmin(a,b) (doublereal)min(a,b) +#define dmax(a,b) (doublereal)max(a,b) + +/* procedure parameter types for -A and -C++ */ + +#define F2C_proc_par_types 1 +#ifdef __cplusplus +typedef int /* Unknown procedure type */ (*U_fp)(...); +typedef shortint (*J_fp)(...); +typedef integer (*I_fp)(...); +typedef real (*R_fp)(...); +typedef doublereal (*D_fp)(...), (*E_fp)(...); +typedef /* Complex */ VOID (*C_fp)(...); +typedef /* Double Complex */ VOID (*Z_fp)(...); +typedef logical (*L_fp)(...); +typedef shortlogical (*K_fp)(...); +typedef /* Character */ VOID (*H_fp)(...); +typedef /* Subroutine */ int (*S_fp)(...); +#else +typedef int /* Unknown procedure type */ (*U_fp)(void); +typedef shortint (*J_fp)(void); +typedef integer (*I_fp)(void); +typedef real (*R_fp)(void); +typedef doublereal (*D_fp)(void), (*E_fp)(void); +typedef /* Complex */ VOID (*C_fp)(void); +typedef /* Double Complex */ VOID (*Z_fp)(void); +typedef logical (*L_fp)(void); +typedef shortlogical (*K_fp)(void); +typedef /* Character */ VOID (*H_fp)(void); +typedef /* Subroutine */ int (*S_fp)(void); +#endif +/* E_fp is for real functions when -R is not specified */ +typedef VOID C_f; /* complex function */ +typedef VOID H_f; /* character function */ +typedef VOID Z_f; /* double complex function */ +typedef doublereal E_f; /* real function with -R not specified */ + +/* undef any lower-case symbols that your C compiler predefines, e.g.: */ + +#ifndef Skip_f2c_Undefs +#undef cray +#undef gcos +#undef mc68010 +#undef mc68020 +#undef mips +#undef pdp11 +#undef sgi +#undef sparc +#undef sun +#undef sun2 +#undef sun3 +#undef sun4 +#undef u370 +#undef u3b +#undef u3b2 +#undef u3b5 +#undef unix +#undef vax +#endif +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/fe.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/fe.h new file mode 100644 index 0000000000000000000000000000000000000000..ca03cef6d30d0b4d8489a1262128769218d3caa0 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/fe.h @@ -0,0 +1,561 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1996-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * fe.h + * + * $Log: fe.h,v $ + * Revision 1.11 2005/02/05 02:15:02 egouvea + * Removed fe_process(), never used + * + * Revision 1.10 2004/12/10 16:48:55 rkm + * Added continuous density acoustic model handling + * + * + */ + +#if defined(_WIN32) && !defined(GNUWINCE) +#define srand48(x) srand(x) +#define lrand48() rand() +#endif + +#ifndef _NEW_FE_H_ +#define _NEW_FE_H_ + +/* Win32/WinCE DLL gunk */ +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#ifdef WORDS_BIGENDIAN +#define NATIVE_ENDIAN "big" +#else +#define NATIVE_ENDIAN "little" +#endif + +/** Default number of samples per second. */ +#define DEFAULT_SAMPLING_RATE 16000 +/** Default number of frames per second. */ +#define DEFAULT_FRAME_RATE 100 +/** Default spacing between frame starts (equal to + * DEFAULT_SAMPLING_RATE/DEFAULT_FRAME_RATE) */ +#define DEFAULT_FRAME_SHIFT 160 +/** Default size of each frame (410 samples @ 16000Hz). */ +#define DEFAULT_WINDOW_LENGTH 0.025625 +/** Default number of FFT points. */ +#define DEFAULT_FFT_SIZE 512 +/** Default number of MFCC coefficients in output. */ +#define DEFAULT_NUM_CEPSTRA 13 +/** Default number of filter bands used to generate MFCCs. */ +#define DEFAULT_NUM_FILTERS 40 + +/** Default prespeech length */ +#define DEFAULT_PRE_SPEECH 20 +/** Default postspeech length */ +#define DEFAULT_POST_SPEECH 50 +/** Default postspeech length */ +#define DEFAULT_START_SPEECH 10 + +/** Default lower edge of mel filter bank. */ +#define DEFAULT_LOWER_FILT_FREQ 133.33334 +/** Default upper edge of mel filter bank. */ +#define DEFAULT_UPPER_FILT_FREQ 6855.4976 +/** Default pre-emphasis filter coefficient. */ +#define DEFAULT_PRE_EMPHASIS_ALPHA 0.97 +/** Default type of frequency warping to use for VTLN. */ +#define DEFAULT_WARP_TYPE "inverse_linear" +/** Default random number seed to use for dithering. */ +#define SEED -1 + +#define waveform_to_cepstral_command_line_macro() \ + { "-logspec", \ + ARG_BOOLEAN, \ + "no", \ + "Write out logspectral files instead of cepstra" }, \ + \ + { "-smoothspec", \ + ARG_BOOLEAN, \ + "no", \ + "Write out cepstral-smoothed logspectral files" }, \ + \ + { "-transform", \ + ARG_STRING, \ + "legacy", \ + "Which type of transform to use to calculate cepstra (legacy, dct, or htk)" }, \ + \ + { "-alpha", \ + ARG_FLOATING, \ + ARG_STRINGIFY(DEFAULT_PRE_EMPHASIS_ALPHA), \ + "Preemphasis parameter" }, \ + \ + { "-samprate", \ + ARG_FLOATING, \ + ARG_STRINGIFY(DEFAULT_SAMPLING_RATE), \ + "Sampling rate" }, \ + \ + { "-frate", \ + ARG_INTEGER, \ + ARG_STRINGIFY(DEFAULT_FRAME_RATE), \ + "Frame rate" }, \ + \ + { "-wlen", \ + ARG_FLOATING, \ + ARG_STRINGIFY(DEFAULT_WINDOW_LENGTH), \ + "Hamming window length" }, \ + \ + { "-nfft", \ + ARG_INTEGER, \ + "0", \ + "Size of FFT, or 0 to set automatically (recommended)" }, \ + \ + { "-nfilt", \ + ARG_INTEGER, \ + ARG_STRINGIFY(DEFAULT_NUM_FILTERS), \ + "Number of filter banks" }, \ + \ + { "-lowerf", \ + ARG_FLOATING, \ + ARG_STRINGIFY(DEFAULT_LOWER_FILT_FREQ), \ + "Lower edge of filters" }, \ + \ + { "-upperf", \ + ARG_FLOATING, \ + ARG_STRINGIFY(DEFAULT_UPPER_FILT_FREQ), \ + "Upper edge of filters" }, \ + \ + { "-unit_area", \ + ARG_BOOLEAN, \ + "yes", \ + "Normalize mel filters to unit area" }, \ + \ + { "-round_filters", \ + ARG_BOOLEAN, \ + "yes", \ + "Round mel filter frequencies to DFT points" }, \ + \ + { "-ncep", \ + ARG_INTEGER, \ + ARG_STRINGIFY(DEFAULT_NUM_CEPSTRA), \ + "Number of cep coefficients" }, \ + \ + { "-doublebw", \ + ARG_BOOLEAN, \ + "no", \ + "Use double bandwidth filters (same center freq)" }, \ + \ + { "-lifter", \ + ARG_INTEGER, \ + "0", \ + "Length of sin-curve for liftering, or 0 for no liftering." }, \ + \ + { "-input_endian", \ + ARG_STRING, \ + NATIVE_ENDIAN, \ + "Endianness of input data, big or little, ignored if NIST or MS Wav" }, \ + \ + { "-warp_type", \ + ARG_STRING, \ + DEFAULT_WARP_TYPE, \ + "Warping function type (or shape)" }, \ + \ + { "-warp_params", \ + ARG_STRING, \ + NULL, \ + "Parameters defining the warping function" }, \ + \ + { "-dither", \ + ARG_BOOLEAN, \ + "no", \ + "Add 1/2-bit noise" }, \ + \ + { "-seed", \ + ARG_INTEGER, \ + ARG_STRINGIFY(SEED), \ + "Seed for random number generator; if less than zero, pick our own" }, \ + \ + { "-remove_dc", \ + ARG_BOOLEAN, \ + "no", \ + "Remove DC offset from each frame" }, \ + { "-remove_noise", \ + ARG_BOOLEAN, \ + "no", \ + "Remove noise using spectral subtraction" }, \ + { "-verbose", \ + ARG_BOOLEAN, \ + "no", \ + "Show input filenames" } + + +#ifdef FIXED_POINT +/** MFCC computation type. */ +typedef fixed32 mfcc_t; + +/** Convert a floating-point value to mfcc_t. */ +#define FLOAT2MFCC(x) FLOAT2FIX(x) +/** Convert a mfcc_t value to floating-point. */ +#define MFCC2FLOAT(x) FIX2FLOAT(x) +/** Multiply two mfcc_t values. */ +#define MFCCMUL(a,b) FIXMUL(a,b) +#define MFCCLN(x,in,out) FIXLN_ANY(x,in,out) +#else /* !FIXED_POINT */ + +/** MFCC computation type. */ +typedef float32 mfcc_t; +/** Convert a floating-point value to mfcc_t. */ +#define FLOAT2MFCC(x) (x) +/** Convert a mfcc_t value to floating-point. */ +#define MFCC2FLOAT(x) (x) +/** Multiply two mfcc_t values. */ +#define MFCCMUL(a,b) ((a)*(b)) +#define MFCCLN(x,in,out) log(x) +#endif /* !FIXED_POINT */ + +/** + * Structure for the front-end computation. + */ +typedef struct fe_s fe_t; + +/** + * Error codes returned by stuff. + */ +enum fe_error_e { + FE_SUCCESS = 0, + FE_OUTPUT_FILE_SUCCESS = 0, + FE_CONTROL_FILE_ERROR = -1, + FE_START_ERROR = -2, + FE_UNKNOWN_SINGLE_OR_BATCH = -3, + FE_INPUT_FILE_OPEN_ERROR = -4, + FE_INPUT_FILE_READ_ERROR = -5, + FE_MEM_ALLOC_ERROR = -6, + FE_OUTPUT_FILE_WRITE_ERROR = -7, + FE_OUTPUT_FILE_OPEN_ERROR = -8, + FE_ZERO_ENERGY_ERROR = -9, + FE_INVALID_PARAM_ERROR = -10 +}; + +/** + * Get the default set of arguments for fe_init_auto_r(). + * + * @return Pointer to an argument structure which can be passed to + * cmd_ln_init() in friends to create argument structures for + * fe_init_auto_r(). + */ +SPHINXBASE_EXPORT +arg_t const *fe_get_args(void); + +/** + * Initialize a front-end object from a command-line parse. + * + * @param config Command-line object, as returned by cmd_ln_parse_r() + * or cmd_ln_parse_file(). Ownership is retained by the + * fe_t, so you may free this if you no longer need it. + * @return Newly created front-end object. + */ +SPHINXBASE_EXPORT +fe_t *fe_init_auto_r(cmd_ln_t *config); + +/** + * Retrieve the command-line object used to initialize this front-end. + * + * @return command-line object for this front-end. This pointer is + * owned by the fe_t, so you should not attempt to free it + * manually. + */ +SPHINXBASE_EXPORT +cmd_ln_t *fe_get_config(fe_t *fe); + +/** + * Start processing an utterance. + * @return 0 for success, <0 for error (see enum fe_error_e) + */ +SPHINXBASE_EXPORT +int fe_start_utt(fe_t *fe); + +/** + * Get the dimensionality of the output of this front-end object. + * + * This is guaranteed to be the number of values in one frame of + * output from fe_end_utt(), fe_process_frame(), and + * fe_process_frames(). It is usually the number of MFCC + * coefficients, but it might be the number of log-spectrum bins, if + * the -logspec or -smoothspec options to + * fe_init_auto_r() were true. + * + * @param fe Front-end object + * @return Dimensionality of front-end output. + */ +SPHINXBASE_EXPORT +int fe_get_output_size(fe_t *fe); + +/** + * Get the dimensionality of the input to this front-end object. + * + * This function retrieves the number of input samples consumed by one + * frame of processing. To obtain one frame of output, you must have + * at least *out_frame_size samples. To obtain N + * frames of output, you must have at least (N-1) * + * *out_frame_shift + *out_frame_size input samples. + * + * @param fe Front-end object + * @param out_frame_shift Output: Number of samples between each frame start. + * @param out_frame_size Output: Number of samples in each frame. + */ +SPHINXBASE_EXPORT +void fe_get_input_size(fe_t *fe, int *out_frame_shift, + int *out_frame_size); + +/** + * Finish processing an utterance. + * + * This function also collects any remaining samples and calculates a + * final cepstral vector. If there are overflow samples remaining, it + * will pad with zeros to make a complete frame. + * + * @param fe Front-end object. + * @param out_cepvector Buffer to hold a residual cepstral vector, or NULL + * if you wish to ignore it. Must be large enough + * @param out_nframes Number of frames of residual cepstra created + * (either 0 or 1). + * @return 0 for success, <0 for error (see enum fe_error_e) + */ +SPHINXBASE_EXPORT +int fe_end_utt(fe_t *fe, mfcc_t *out_cepvector, int32 *out_nframes); + +/** + * Retain ownership of a front end object. + * + * @return pointer to the retained front end. + */ +SPHINXBASE_EXPORT +fe_t *fe_retain(fe_t *fe); + +/** + * Free the front end. + * + * Releases resources associated with the front-end object. + * + * @return new reference count (0 if freed completely) + */ +SPHINXBASE_EXPORT +int fe_free(fe_t *fe); + +/** + * Process one frame of samples. + * + * @param spch Speech samples (signed 16-bit linear PCM) + * @param nsamps Number of samples in spch + * @param buf_cep Buffer which will receive one frame of features. + * @return 0 for success, <0 for error (see enum fe_error_e) + */ +SPHINXBASE_EXPORT +int fe_process_frame(fe_t *fe, int16 const *spch, + int32 nsamps, mfcc_t *out_cep); + +/** + * Process a block of samples. + * + * This function generates up to *inout_nframes of + * features, or as many as can be generated from + * *inout_nsamps samples. + * + * On exit, the inout_spch, inout_nsamps, + * and inout_nframes parameters are updated to point to + * the remaining sample data, the number of remaining samples, and the + * number of frames processed, respectively. This allows you to call + * this repeatedly to process a large block of audio in small (say, + * 5-frame) chunks: + * + * int16 *bigbuf, *p; + * mfcc_t **cepstra; + * int32 nsamps; + * int32 nframes = 5; + * + * cepstra = (mfcc_t **) + * ckd_calloc_2d(nframes, fe_get_output_size(fe), sizeof(**cepstra)); + * p = bigbuf; + * while (nsamps) { + * nframes = 5; + * fe_process_frames(fe, &p, &nsamps, cepstra, &nframes); + * // Now do something with these frames... + * if (nframes) + * do_some_stuff(cepstra, nframes); + * } + * + * @param inout_spch Input: Pointer to pointer to speech samples + * (signed 16-bit linear PCM). + * Output: Pointer to remaining samples. + * @param inout_nsamps Input: Pointer to maximum number of samples to + * process. + * Output: Number of samples remaining in input buffer. + * @param buf_cep Two-dimensional buffer (allocated with + * ckd_calloc_2d()) which will receive frames of output + * data. If NULL, no actual processing will be done, + * and the maximum number of output frames which would + * be generated is returned in + * *inout_nframes. + * @param inout_nframes Input: Pointer to maximum number of frames to + * generate. + * Output: Number of frames actually generated. + * @return 0 for success, <0 for failure (see enum fe_error_e) + */ +SPHINXBASE_EXPORT +int fe_process_frames(fe_t *fe, + int16 const **inout_spch, + size_t *inout_nsamps, + mfcc_t **buf_cep, + int32 *inout_nframes); + +/** + * Process a block of samples, returning as many frames as possible. + * + * This function processes all the samples in a block of data and + * returns a newly allocated block of feature vectors. This block + * needs to be freed with fe_free_2d() after use. + * + * It is possible for there to be some left-over data which could not + * fit in a complete frame. This data can be processed with + * fe_end_utt(). + * + * This function is deprecated in favor of fe_process_frames(). + * + * @return 0 for success, <0 for failure (see enum fe_error_e) + */ +SPHINXBASE_EXPORT +int fe_process_utt(fe_t *fe, /**< A front end object */ + int16 const *spch, /**< The speech samples */ + size_t nsamps, /**< number of samples*/ + mfcc_t ***cep_block, /**< Output pointer to cepstra */ + int32 *nframes /**< Number of frames processed */ + ); + +/** + * Free the output pointer returned by fe_process_utt(). + **/ +SPHINXBASE_EXPORT +void fe_free_2d(void *arr); + +/** + * Convert a block of mfcc_t to float32 (can be done in-place) + **/ +SPHINXBASE_EXPORT +int fe_mfcc_to_float(fe_t *fe, + mfcc_t **input, + float32 **output, + int32 nframes); + +/** + * Convert a block of float32 to mfcc_t (can be done in-place) + **/ +SPHINXBASE_EXPORT +int fe_float_to_mfcc(fe_t *fe, + float32 **input, + mfcc_t **output, + int32 nframes); + +/** + * Process one frame of log spectra into MFCC using discrete cosine + * transform. + * + * This uses a variant of the DCT-II where the first frequency bin is + * scaled by 0.5. Unless somebody misunderstood the DCT-III equations + * and thought that's what they were implementing here, this is + * ostensibly done to account for the symmetry properties of the + * DCT-II versus the DFT - the first coefficient of the input is + * assumed to be repeated in the negative frequencies, which is not + * the case for the DFT. (This begs the question, why not just use + * the DCT-I, since it has the appropriate symmetry properties...) + * Moreover, this is bogus since the mel-frequency bins on which we + * are doing the DCT don't extend to the edge of the DFT anyway. + * + * This also means that the matrix used in computing this DCT can not + * be made orthogonal, and thus inverting the transform is difficult. + * Therefore if you want to do cepstral smoothing or have some other + * reason to invert your MFCCs, use fe_logspec_dct2() and its inverse + * fe_logspec_dct3() instead. + * + * Also, it normalizes by 1/nfilt rather than 2/nfilt, for some reason. + **/ +SPHINXBASE_EXPORT +int fe_logspec_to_mfcc(fe_t *fe, /**< A fe structure */ + const mfcc_t *fr_spec, /**< One frame of spectrum */ + mfcc_t *fr_cep /**< One frame of cepstrum */ + ); + +/** + * Convert log spectra to MFCC using DCT-II. + * + * This uses the "unitary" form of the DCT-II, i.e. with a scaling + * factor of sqrt(2/N) and a "beta" factor of sqrt(1/2) applied to the + * cos(0) basis vector (i.e. the one corresponding to the DC + * coefficient in the output). + **/ +SPHINXBASE_EXPORT +int fe_logspec_dct2(fe_t *fe, /**< A fe structure */ + const mfcc_t *fr_spec, /**< One frame of spectrum */ + mfcc_t *fr_cep /**< One frame of cepstrum */ + ); + +/** + * Convert MFCC to log spectra using DCT-III. + * + * This uses the "unitary" form of the DCT-III, i.e. with a scaling + * factor of sqrt(2/N) and a "beta" factor of sqrt(1/2) applied to the + * cos(0) basis vector (i.e. the one corresponding to the DC + * coefficient in the input). + **/ +SPHINXBASE_EXPORT +int fe_mfcc_dct3(fe_t *fe, /**< A fe structure */ + const mfcc_t *fr_cep, /**< One frame of cepstrum */ + mfcc_t *fr_spec /**< One frame of spectrum */ + ); + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/feat.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/feat.h new file mode 100644 index 0000000000000000000000000000000000000000..421852de6f972662a0f42c9c941694c0bd24f10d --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/feat.h @@ -0,0 +1,469 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * feat.h -- Cepstral features computation. + */ + +#ifndef _S3_FEAT_H_ +#define _S3_FEAT_H_ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** \file feat.h + * \brief compute the dynamic coefficients from the cepstral vector. + */ +#define LIVEBUFBLOCKSIZE 256 /** Blocks of 256 vectors allocated + for livemode decoder */ +#define S3_MAX_FRAMES 15000 /* RAH, I believe this is still too large, but better than before */ + +#define cepstral_to_feature_command_line_macro() \ +{ "-feat", \ + ARG_STRING, \ + "1s_c_d_dd", \ + "Feature stream type, depends on the acoustic model" }, \ +{ "-ceplen", \ + ARG_INTEGER, \ + "13", \ + "Number of components in the input feature vector" }, \ +{ "-cmn", \ + ARG_STRING, \ + "live", \ + "Cepstral mean normalization scheme ('live', 'batch', or 'none')" }, \ +{ "-cmninit", \ + ARG_STRING, \ + "40,3,-1", \ + "Initial values (comma-separated) for cepstral mean when 'live' is used" }, \ +{ "-varnorm", \ + ARG_BOOLEAN, \ + "no", \ + "Variance normalize each utterance (only if CMN == current)" }, \ +{ "-agc", \ + ARG_STRING, \ + "none", \ + "Automatic gain control for c0 ('max', 'emax', 'noise', or 'none')" }, \ +{ "-agcthresh", \ + ARG_FLOATING, \ + "2.0", \ + "Initial threshold for automatic gain control" }, \ +{ "-lda", \ + ARG_STRING, \ + NULL, \ + "File containing transformation matrix to be applied to features (single-stream features only)" }, \ +{ "-ldadim", \ + ARG_INTEGER, \ + "0", \ + "Dimensionality of output of feature transformation (0 to use entire matrix)" }, \ +{"-svspec", \ + ARG_STRING, \ + NULL, \ + "Subvector specification (e.g., 24,0-11/25,12-23/26-38 or 0-12/13-25/26-38)"} + +/** + * \struct feat_t + * \brief Structure for describing a speech feature type + * Structure for describing a speech feature type (no. of streams and stream widths), + * as well as the computation for converting the input speech (e.g., Sphinx-II format + * MFC cepstra) into this type of feature vectors. + */ +typedef struct feat_s { + int refcount; /**< Reference count. */ + char *name; /**< Printable name for this feature type */ + int32 cepsize; /**< Size of input speech vector (typically, a cepstrum vector) */ + int32 n_stream; /**< Number of feature streams; e.g., 4 in Sphinx-II */ + uint32 *stream_len; /**< Vector length of each feature stream */ + int32 window_size; /**< Number of extra frames around given input frame needed to compute + corresponding output feature (so total = window_size*2 + 1) */ + int32 n_sv; /**< Number of subvectors */ + uint32 *sv_len; /**< Vector length of each subvector */ + int32 **subvecs; /**< Subvector specification (or NULL for none) */ + mfcc_t *sv_buf; /**< Temporary copy buffer for subvector projection */ + int32 sv_dim; /**< Total dimensionality of subvector (length of sv_buf) */ + + cmn_type_t cmn; /**< Type of CMN to be performed on each utterance */ + int32 varnorm; /**< Whether variance normalization is to be performed on each utt; + Irrelevant if no CMN is performed */ + agc_type_t agc; /**< Type of AGC to be performed on each utterance */ + + /** + * Feature computation function. + * @param fcb the feat_t describing this feature type + * @param input pointer into the input cepstra + * @param feat a 2-d array of output features (n_stream x stream_len) + * @return 0 if successful, -ve otherwise. + * + * Function for converting window of input speech vector + * (input[-window_size..window_size]) to output feature vector + * (feat[stream][]). If NULL, no conversion available, the + * speech input must be feature vector itself. + **/ + void (*compute_feat)(struct feat_s *fcb, mfcc_t **input, mfcc_t **feat); + cmn_t *cmn_struct; /**< Structure that stores the temporary variables for cepstral + means normalization*/ + agc_t *agc_struct; /**< Structure that stores the temporary variables for acoustic + gain control*/ + + mfcc_t **cepbuf; /**< Circular buffer of MFCC frames for live feature computation. */ + mfcc_t **tmpcepbuf; /**< Array of pointers into cepbuf to handle border cases. */ + int32 bufpos; /**< Write index in cepbuf. */ + int32 curpos; /**< Read index in cepbuf. */ + + mfcc_t ***lda; /**< Array of linear transformations (for LDA, MLLT, or whatever) */ + uint32 n_lda; /**< Number of linear transformations in lda. */ + uint32 out_dim; /**< Output dimensionality */ +} feat_t; + +/** + * Name of feature type. + */ +#define feat_name(f) ((f)->name) +/** + * Input dimensionality of feature. + */ +#define feat_cepsize(f) ((f)->cepsize) +/** + * Size of dynamic feature window. + */ +#define feat_window_size(f) ((f)->window_size) +/** + * Number of feature streams. + * + * @deprecated Do not use this, use feat_dimension1() instead. + */ +#define feat_n_stream(f) ((f)->n_stream) +/** + * Length of feature stream i. + * + * @deprecated Do not use this, use feat_dimension2() instead. + */ +#define feat_stream_len(f,i) ((f)->stream_len[i]) +/** + * Number of streams or subvectors in feature output. + */ +#define feat_dimension1(f) ((f)->n_sv ? (f)->n_sv : f->n_stream) +/** + * Dimensionality of stream/subvector i in feature output. + */ +#define feat_dimension2(f,i) ((f)->lda ? (f)->out_dim : ((f)->sv_len ? (f)->sv_len[i] : f->stream_len[i])) +/** + * Total dimensionality of feature output. + */ +#define feat_dimension(f) ((f)->out_dim) +/** + * Array with stream/subvector lengths + */ +#define feat_stream_lengths(f) ((f)->lda ? (&(f)->out_dim) : (f)->sv_len ? (f)->sv_len : f->stream_len) + +/** + * Parse subvector specification string. + * + * Format of specification: + * \li '/' separated list of subvectors + * \li each subvector is a ',' separated list of subranges + * \li each subrange is a single \verbatim \endverbatim or + * \verbatim - \endverbatim (inclusive), where + * \verbatim \endverbatim is a feature vector dimension + * specifier. + * + * E.g., "24,0-11/25,12-23/26,27-38" has: + * \li 3 subvectors + * \li the 1st subvector has feature dims: 24, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, and 11. + * \li etc. + * + * @param str subvector specification string. + * @return allocated 2-D array of subvector specs (free with + * subvecs_free()). If there are N subvectors specified, subvec[N] = + * NULL; and each subvec[0]..subvec[N-1] is -1 terminated vector of + * feature dims. + */ +SPHINXBASE_EXPORT +int32 **parse_subvecs(char const *str); + +/** + * Free array of subvector specs. + */ +SPHINXBASE_EXPORT +void subvecs_free(int32 **subvecs); + + +/** + * Allocate an array to hold several frames worth of feature vectors. The returned value + * is the mfcc_t ***data array, organized as follows: + * + * - data[0][0] = frame 0 stream 0 vector, data[0][1] = frame 0 stream 1 vector, ... + * - data[1][0] = frame 1 stream 0 vector, data[0][1] = frame 1 stream 1 vector, ... + * - data[2][0] = frame 2 stream 0 vector, data[0][1] = frame 2 stream 1 vector, ... + * - ... + * + * NOTE: For I/O convenience, the entire data area is allocated as one contiguous block. + * @return pointer to the allocated space if successful, NULL if any error. + */ +SPHINXBASE_EXPORT +mfcc_t ***feat_array_alloc(feat_t *fcb, /**< In: Descriptor from feat_init(), used + to obtain number of streams and stream sizes */ + int32 nfr /**< In: Number of frames for which to allocate */ + ); + +/** + * Realloate the array of features. Requires us to know the old size + */ +SPHINXBASE_EXPORT +mfcc_t ***feat_array_realloc(feat_t *fcb, /**< In: Descriptor from feat_init(), used + to obtain number of streams and stream sizes */ + mfcc_t ***old_feat, /**< Feature array. Freed */ + int32 ofr, /**< In: Previous number of frames */ + int32 nfr /**< In: Number of frames for which to allocate */ + ); + +/** + * Free a buffer allocated with feat_array_alloc() + */ +SPHINXBASE_EXPORT +void feat_array_free(mfcc_t ***feat); + + +/** + * Initialize feature module to use the selected type of feature stream. + * One-time only initialization at the beginning of the program. Input type + * is a string defining the kind of input->feature conversion desired: + * + * - "s2_4x": s2mfc->Sphinx-II 4-feature stream, + * - "1s_c_d_dd": s2mfc->Sphinx 3.x single feature stream, + * - "s3_1x39": s2mfc->Sphinx 3.0 single feature stream, + * - "n1,n2,n3,...": Explicit feature vector layout spec. with comma-separated + * feature stream lengths. In this case, the input data is already in the + * feature format and there is no conversion necessary. + * + * @return (feat_t *) descriptor if successful, NULL if error. Caller + * must not directly modify the contents of the returned value. + */ +SPHINXBASE_EXPORT +feat_t *feat_init(char const *type,/**< In: Type of feature stream */ + cmn_type_t cmn, /**< In: Type of cepstram mean normalization to + be done before feature computation; can be + CMN_NONE (for none) */ + int32 varnorm, /**< In: (boolean) Whether variance + normalization done on each utt; only + applicable if CMN also done */ + agc_type_t agc, /**< In: Type of automatic gain control to be + done before feature computation */ + int32 breport, /**< In: Whether to show a report for feat_t */ + int32 cepsize /**< Number of components in the input vector + (or 0 for the default for this feature type, + which is usually 13) */ + ); + +/** + * Add an LDA transformation to the feature module from a file. + * @return 0 for success or -1 if reading the LDA file failed. + **/ +SPHINXBASE_EXPORT +int32 feat_read_lda(feat_t *feat, /**< In: Descriptor from feat_init() */ + const char *ldafile, /**< In: File to read the LDA matrix from. */ + int32 dim /**< In: Dimensionality of LDA output. */ + ); + +/** + * Transform a block of features using the feature module's LDA transform. + **/ +SPHINXBASE_EXPORT +void feat_lda_transform(feat_t *fcb, /**< In: Descriptor from feat_init() */ + mfcc_t ***inout_feat, /**< Feature block to transform. */ + uint32 nfr /**< In: Number of frames in inout_feat. */ + ); + +/** + * Add a subvector specification to the feature module. + * + * The subvector splitting will be performed after dynamic feature + * computation, CMN, AGC, and any LDA transformation. The number of + * streams in the dynamic feature type must be one, as with LDA. + * + * After adding a subvector specification, the output of feature + * computation will be split into multiple subvectors, and + * feat_array_alloc() will allocate pointers accordingly. The number + * of streams will remain the + * + * @param fcb the feature descriptor. + * @param subvecs subvector specification. This pointer is retained + * by the feat_t and should not be freed manually. + * @return 0 for success or -1 if the subvector specification was + * invalid. + */ +SPHINXBASE_EXPORT +int feat_set_subvecs(feat_t *fcb, int32 **subvecs); + +/** + * Print the given block of feature vectors to the given FILE. + */ +SPHINXBASE_EXPORT +void feat_print(feat_t *fcb, /**< In: Descriptor from feat_init() */ + mfcc_t ***feat, /**< In: Feature data to be printed */ + int32 nfr, /**< In: Number of frames of feature data above */ + FILE *fp /**< In: Output file pointer */ + ); + + +/** + * Read a specified MFC file (or given segment within it), perform + * CMN/AGC as indicated by fcb, and compute feature + * vectors. Feature vectors are computed for the entire segment + * specified, by including additional surrounding or padding frames to + * accommodate the feature windows. + * + * @return Number of frames of feature vectors computed if successful; + * -1 if any error. If feat is NULL, then no actual + * computation will be done, and the number of frames which must be + * allocated will be returned. + * + * A note on how the file path is constructed: If the control file + * already specifies extension or absolute path, then these are not + * applied. The default extension is defined by the application. + */ +SPHINXBASE_EXPORT +int32 feat_s2mfc2feat(feat_t *fcb, /**< In: Descriptor from feat_init() */ + const char *file, /**< In: File to be read */ + const char *dir, /**< In: Directory prefix for file, + if needed; can be NULL */ + const char *cepext,/**< In: Extension of the + cepstrum file.It cannot be + NULL */ + int32 sf, int32 ef, /* Start/End frames + within file to be read. Use + 0,-1 to process entire + file */ + mfcc_t ***feat, /**< Out: Computed feature vectors; + caller must allocate this space */ + int32 maxfr /**< In: Available space (number of frames) in + above feat array; it must be + sufficient to hold the result. + Pass -1 for no limit. */ + ); + + +/** + * Feature computation routine for live mode decoder. + * + * This function computes features for blocks of incoming data. It + * retains an internal buffer for computing deltas, which means that + * the number of output frames will not necessarily equal the number + * of input frames. + * + * It is very important to realize that the number of + * output frames can be greater than the number of + * input frames, specifically when endutt is true. It is + * guaranteed to never exceed *inout_ncep + + * feat_window_size(fcb). You MUST have + * allocated at least that many frames in ofeat, or you + * will experience a buffer overflow. + * + * If beginutt and endutt are both true, CMN_CURRENT and AGC_MAX will + * be done. Otherwise only CMN_PRIOR and AGC_EMAX will be done. + * + * If beginutt is false, endutt is true, and the number of input + * frames exceeds the input size, then end-of-utterance processing + * won't actually be done. This condition can easily be checked, + * because *inout_ncep will equal the return value on + * exit, and will also be smaller than the value of + * *inout_ncep on entry. + * + * @return The number of output frames actually computed. + **/ +SPHINXBASE_EXPORT +int32 feat_s2mfc2feat_live(feat_t *fcb, /**< In: Descriptor from feat_init() */ + mfcc_t **uttcep, /**< In: Incoming cepstral buffer */ + int32 *inout_ncep,/**< In: Size of incoming buffer. + Out: Number of incoming frames consumed. */ + int32 beginutt, /**< In: Begining of utterance flag */ + int32 endutt, /**< In: End of utterance flag */ + mfcc_t ***ofeat /**< In: Output feature buffer. See + VERY IMPORTANT note + about the size of this buffer above. */ + ); + + +/** + * Update the normalization stats, possibly in the end of utterance + * + */ +SPHINXBASE_EXPORT +void feat_update_stats(feat_t *fcb); + + +/** + * Retain ownership of feat_t. + * + * @return pointer to retained feat_t. + */ +SPHINXBASE_EXPORT +feat_t *feat_retain(feat_t *f); + +/** + * Release resource associated with feat_t + * + * @return new reference count (0 if freed) + */ +SPHINXBASE_EXPORT +int feat_free(feat_t *f /**< In: feat_t */ + ); + +/** + * Report the feat_t data structure + */ +SPHINXBASE_EXPORT +void feat_report(feat_t *f /**< In: feat_t */ + ); +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/filename.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/filename.h new file mode 100644 index 0000000000000000000000000000000000000000..b69dcde57332ab3a70581ab8b6dadf6140bb0755 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/filename.h @@ -0,0 +1,112 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * filename.h -- File and path name operations. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: filename.h,v $ + * Revision 1.7 2005/06/22 03:01:07 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 30-Oct-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. + * Started. + */ + + +#ifndef _LIBUTIL_FILENAME_H_ +#define _LIBUTIL_FILENAME_H_ + +/* Win32/WinCE DLL gunk */ +#include +#include + +/**\file filename.h + *\brief File names related operation + */ +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Returns the last part of the path, without modifying anything in memory. + */ +SPHINXBASE_EXPORT +const char *path2basename(const char *path); + +/** + * Strip off filename from the given path and copy the directory name into dir + * Caller must have allocated dir (hint: it's always shorter than path). + */ +SPHINXBASE_EXPORT +void path2dirname(const char *path, char *dir); + + +/** + * Strip off the smallest trailing file-extension suffix and copy + * the rest into the given root argument. Caller must have + * allocated root. + */ +SPHINXBASE_EXPORT +void strip_fileext(const char *file, char *root); + +/** + * Test whether a pathname is absolute for the current OS. + */ +SPHINXBASE_EXPORT +int path_is_absolute(const char *file); + +#ifdef __cplusplus +} +#endif + + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/fixpoint.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/fixpoint.h new file mode 100644 index 0000000000000000000000000000000000000000..8f8f0ad621c3f9e8f6ee53ddb43d0c4f1bf460eb --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/fixpoint.h @@ -0,0 +1,145 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2005 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== */ + +/* Fixed-point arithmetic macros. + * + * Author: David Huggins-Daines + */ + +#ifndef _FIXPOINT_H_ +#define _FIXPOINT_H_ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#ifndef DEFAULT_RADIX +#define DEFAULT_RADIX 12 +#endif + +/** Fixed-point computation type. */ +typedef int32 fixed32; + +/** Convert floating point to fixed point. */ +#define FLOAT2FIX_ANY(x,radix) \ + (((x)<0.0) ? \ + ((fixed32)((x)*(float32)(1<<(radix)) - 0.5)) \ + : ((fixed32)((x)*(float32)(1<<(radix)) + 0.5))) +#define FLOAT2FIX(x) FLOAT2FIX_ANY(x,DEFAULT_RADIX) +/** Convert fixed point to floating point. */ +#define FIX2FLOAT_ANY(x,radix) ((float32)(x)/(1<<(radix))) +#define FIX2FLOAT(x) FIX2FLOAT_ANY(x,DEFAULT_RADIX) + +/** + * Multiply two fixed point numbers with an arbitrary radix point. + * + * A veritable multiplicity of implementations exist, starting with + * the fastest ones... + */ + +#if defined(__arm__) && !defined(__thumb__) +/* + * This works on most modern ARMs but *only* in ARM mode (for obvious + * reasons), so don't use it in Thumb mode (but why are you building + * signal processing code in Thumb mode?!) + */ +#define FIXMUL(a,b) FIXMUL_ANY(a,b,DEFAULT_RADIX) +#define FIXMUL_ANY(a,b,r) ({ \ + int cl, ch, _a = a, _b = b; \ + __asm__ ("smull %0, %1, %2, %3\n" \ + "mov %0, %0, lsr %4\n" \ + "orr %0, %0, %1, lsl %5\n" \ + : "=&r" (cl), "=&r" (ch) \ + : "r" (_a), "r" (_b), "i" (r), "i" (32-(r)));\ + cl; }) + +#elif defined(_MSC_VER) || (defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8) +/* Standard systems*/ +#define FIXMUL(a,b) FIXMUL_ANY(a,b,DEFAULT_RADIX) +#define FIXMUL_ANY(a,b,radix) ((fixed32)(((int64)(a)*(b))>>(radix))) + +#else +/* Most general case where 'long long' doesn't exist or is slow. */ +#define FIXMUL(a,b) FIXMUL_ANY(a,b,DEFAULT_RADIX) +#define FIXMUL_ANY(a,b,radix) ({ \ + int32 _ah, _bh; \ + uint32 _al, _bl, _t, c; \ + _ah = ((int32)(a)) >> 16; \ + _bh = ((int32)(b)) >> 16; \ + _al = ((uint32)(a)) & 0xffff; \ + _bl = ((uint32)(b)) & 0xffff; \ + _t = _ah * _bl + _al * _bh; \ + c = (fixed32)(((_al * _bl) >> (radix)) \ + + ((_ah * _bh) << (32 - (radix))) \ + + ((radix) > 16 ? (_t >> (radix - 16)) : (_t << (16 - radix)))); \ + c;}) +#endif + +/* Various fixed-point logarithmic functions that we need. */ +/** Minimum value representable in log format. */ +#define MIN_FIXLOG -2829416 /* log(1e-300) * (1< +#include + +/* SphinxBase headers. */ +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/* + * A single transition in the FSG. + */ +typedef struct fsg_link_s { + int32 from_state; + int32 to_state; + int32 logs2prob; /**< log(transition probability)*lw */ + int32 wid; /**< Word-ID; <0 if epsilon or null transition */ +} fsg_link_t; + +/* Access macros */ +#define fsg_link_from_state(l) ((l)->from_state) +#define fsg_link_to_state(l) ((l)->to_state) +#define fsg_link_wid(l) ((l)->wid) +#define fsg_link_logs2prob(l) ((l)->logs2prob) + +/** + * Adjacency list (opaque) for a state in an FSG. + */ +typedef struct trans_list_s trans_list_t; + +/** + * Word level FSG definition. + * States are simply integers 0..n_state-1. + * A transition emits a word and has a given probability of being taken. + * There can also be null or epsilon transitions, with no associated emitted + * word. + */ +typedef struct fsg_model_s { + int refcount; /**< Reference count. */ + char *name; /**< A unique string identifier for this FSG */ + int32 n_word; /**< Number of unique words in this FSG */ + int32 n_word_alloc; /**< Number of words allocated in vocab */ + char **vocab; /**< Vocabulary for this FSG. */ + bitvec_t *silwords; /**< Indicates which words are silence/fillers. */ + bitvec_t *altwords; /**< Indicates which words are pronunciation alternates. */ + logmath_t *lmath; /**< Pointer to log math computation object. */ + int32 n_state; /**< number of states in FSG */ + int32 start_state; /**< Must be in the range [0..n_state-1] */ + int32 final_state; /**< Must be in the range [0..n_state-1] */ + float32 lw; /**< Language weight that's been applied to transition + logprobs */ + trans_list_t *trans; /**< Transitions out of each state, if any. */ + listelem_alloc_t *link_alloc; /**< Allocator for FSG links. */ +} fsg_model_t; + +/* Access macros */ +#define fsg_model_name(f) ((f)->name) +#define fsg_model_n_state(f) ((f)->n_state) +#define fsg_model_start_state(f) ((f)->start_state) +#define fsg_model_final_state(f) ((f)->final_state) +#define fsg_model_log(f,p) logmath_log((f)->lmath, p) +#define fsg_model_lw(f) ((f)->lw) +#define fsg_model_n_word(f) ((f)->n_word) +#define fsg_model_word_str(f,wid) (wid == -1 ? "(NULL)" : (f)->vocab[wid]) + +/** + * Iterator over arcs. + */ +typedef struct fsg_arciter_s fsg_arciter_t; + +/** + * Have silence transitions been added? + */ +#define fsg_model_has_sil(f) ((f)->silwords != NULL) + +/** + * Have alternate word transitions been added? + */ +#define fsg_model_has_alt(f) ((f)->altwords != NULL) + +#define fsg_model_is_filler(f,wid) \ + (fsg_model_has_sil(f) ? bitvec_is_set((f)->silwords, wid) : FALSE) +#define fsg_model_is_alt(f,wid) \ + (fsg_model_has_alt(f) ? bitvec_is_set((f)->altwords, wid) : FALSE) + +/** + * Create a new FSG. + */ +SPHINXBASE_EXPORT +fsg_model_t *fsg_model_init(char const *name, logmath_t *lmath, + float32 lw, int32 n_state); + +/** + * Read a word FSG from the given file and return a pointer to the structure + * created. Return NULL if any error occurred. + * + * File format: + * + *
+ *   Any number of comment lines; ignored
+ *   FSG_BEGIN []
+ *   N <#states>
+ *   S 
+ *   F 
+ *   T    []
+ *   T ...
+ *   ... (any number of state transitions)
+ *   FSG_END
+ *   Any number of comment lines; ignored
+ * 
+ * + * The FSG spec begins with the line containing the keyword FSG_BEGIN. + * It has an optional fsg name string. If not present, the FSG has the empty + * string as its name. + * + * Following the FSG_BEGIN declaration is the number of states, the start + * state, and the final state, each on a separate line. States are numbered + * in the range [0 .. -1]. + * + * These are followed by all the state transitions, each on a separate line, + * and terminated by the FSG_END line. A state transition has the given + * probability of being taken, and emits the given word. The word emission + * is optional; if word-string omitted, it is an epsilon or null transition. + * + * Comments can also be embedded within the FSG body proper (i.e. between + * FSG_BEGIN and FSG_END): any line with a # character in col 1 is treated + * as a comment line. + * + * Return value: a new fsg_model_t structure if the file is successfully + * read, NULL otherwise. + */ +SPHINXBASE_EXPORT +fsg_model_t *fsg_model_readfile(const char *file, logmath_t *lmath, float32 lw); + +/** + * Like fsg_model_readfile(), but from an already open stream. + */ +SPHINXBASE_EXPORT +fsg_model_t *fsg_model_read(FILE *fp, logmath_t *lmath, float32 lw); + +/** + * Retain ownership of an FSG. + * + * @return Pointer to retained FSG. + */ +SPHINXBASE_EXPORT +fsg_model_t *fsg_model_retain(fsg_model_t *fsg); + +/** + * Free the given word FSG. + * + * @return new reference count (0 if freed completely) + */ +SPHINXBASE_EXPORT +int fsg_model_free(fsg_model_t *fsg); + +/** + * Add a word to the FSG vocabulary. + * + * @return Word ID for this new word. + */ +SPHINXBASE_EXPORT +int fsg_model_word_add(fsg_model_t *fsg, char const *word); + +/** + * Look up a word in the FSG vocabulary. + * + * @return Word ID for this word + */ +SPHINXBASE_EXPORT +int fsg_model_word_id(fsg_model_t *fsg, char const *word); + +/** + * Add the given transition to the FSG transition matrix. + * + * Duplicates (i.e., two transitions between the same states, with the + * same word label) are flagged and only the highest prob retained. + */ +SPHINXBASE_EXPORT +void fsg_model_trans_add(fsg_model_t * fsg, + int32 from, int32 to, int32 logp, int32 wid); + +/** + * Add a null transition between the given states. + * + * There can be at most one null transition between the given states; + * duplicates are flagged and only the best prob retained. Transition + * probs must be <= 1 (i.e., logprob <= 0). + * + * @return 1 if a new transition was added, 0 if the prob of an existing + * transition was upgraded; -1 if nothing was changed. + */ +SPHINXBASE_EXPORT +int32 fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to, int32 logp); + +/** + * Add a "tag" transition between the given states. + * + * A "tag" transition is a null transition with a non-null word ID, + * which corresponds to a semantic tag or other symbol to be output + * when this transition is taken. + * + * As above, there can be at most one null or tag transition between + * the given states; duplicates are flagged and only the best prob + * retained. Transition probs must be <= 1 (i.e., logprob <= 0). + * + * @return 1 if a new transition was added, 0 if the prob of an existing + * transition was upgraded; -1 if nothing was changed. + */ +SPHINXBASE_EXPORT +int32 fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to, + int32 logp, int32 wid); + +/** + * Obtain transitive closure of null transitions in the given FSG. + * + * @param nulls List of null transitions, or NULL to find them automatically. + * @return Updated list of null transitions. + */ +SPHINXBASE_EXPORT +glist_t fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls); + +/** + * Get the list of transitions (if any) from state i to j. + */ +SPHINXBASE_EXPORT +glist_t fsg_model_trans(fsg_model_t *fsg, int32 i, int32 j); + +/** + * Get an iterator over the outgoing transitions from state i. + */ +SPHINXBASE_EXPORT +fsg_arciter_t *fsg_model_arcs(fsg_model_t *fsg, int32 i); + +/** + * Get the current arc from the arc iterator. + */ +SPHINXBASE_EXPORT +fsg_link_t *fsg_arciter_get(fsg_arciter_t *itor); + +/** + * Move the arc iterator forward. + */ +SPHINXBASE_EXPORT +fsg_arciter_t *fsg_arciter_next(fsg_arciter_t *itor); + +/** + * Free the arc iterator (early termination) + */ +SPHINXBASE_EXPORT +void fsg_arciter_free(fsg_arciter_t *itor); +/** + * Get the null transition (if any) from state i to j. + */ +SPHINXBASE_EXPORT +fsg_link_t *fsg_model_null_trans(fsg_model_t *fsg, int32 i, int32 j); + +/** + * Add silence word transitions to each state in given FSG. + * + * @param state state to add a self-loop to, or -1 for all states. + * @param silprob probability of silence transition. + */ +SPHINXBASE_EXPORT +int fsg_model_add_silence(fsg_model_t * fsg, char const *silword, + int state, float32 silprob); + +/** + * Add alternate pronunciation transitions for a word in given FSG. + */ +SPHINXBASE_EXPORT +int fsg_model_add_alt(fsg_model_t * fsg, char const *baseword, + char const *altword); + +/** + * Write FSG to a file. + */ +SPHINXBASE_EXPORT +void fsg_model_write(fsg_model_t *fsg, FILE *fp); + +/** + * Write FSG to a file. + */ +SPHINXBASE_EXPORT +void fsg_model_writefile(fsg_model_t *fsg, char const *file); + +/** + * Write FSG to a file in AT&T FSM format. + */ +SPHINXBASE_EXPORT +void fsg_model_write_fsm(fsg_model_t *fsg, FILE *fp); + +/** + * Write FSG to a file in AT&T FSM format. + */ +SPHINXBASE_EXPORT +void fsg_model_writefile_fsm(fsg_model_t *fsg, char const *file); + +/** + * Write FSG symbol table to a file (for AT&T FSM) + */ +SPHINXBASE_EXPORT +void fsg_model_write_symtab(fsg_model_t *fsg, FILE *file); + +/** + * Write FSG symbol table to a file (for AT&T FSM) + */ +SPHINXBASE_EXPORT +void fsg_model_writefile_symtab(fsg_model_t *fsg, char const *file); + +#ifdef __cplusplus +} +#endif + +#endif /* __FSG_MODEL_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/genrand.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/genrand.h new file mode 100644 index 0000000000000000000000000000000000000000..5123c9524a77f76f73a89b6f772418ee947de51a --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/genrand.h @@ -0,0 +1,177 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright +` notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.keio.ac.jp/matumoto/emt.html + email: matumoto@math.keio.ac.jp +*/ + +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * randgen.c : a portable random generator + * + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: genrand.h,v $ + * Revision 1.3 2005/06/22 03:01:50 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 18-Nov-04 ARCHAN (archan@cs.cmu.edu) at Carnegie Mellon University + * First incorporated from the Mersenne Twister Random + * Number Generator package. It was chosen because it is + * in BSD-license and its performance is quite + * reasonable. Of course if you look at the inventors's + * page. This random generator can actually gives + * 19937-bits period. This is already far from we need. + * This will possibly good enough for the next 10 years. + * + * I also downgrade the code a little bit to avoid Sphinx's + * developers misused it. + */ + +#ifndef _LIBUTIL_GENRAND_H_ +#define _LIBUTIL_GENRAND_H_ + +#define S3_RAND_MAX_INT32 0x7fffffff +#include + +/* Win32/WinCE DLL gunk */ +#include + +/** \file genrand.h + *\brief High performance prortable random generator created by Takuji + *Nishimura and Makoto Matsumoto. + * + * A high performance which applied Mersene twister primes to generate + * random number. If probably seeded, the random generator can achieve + * 19937-bits period. For technical detail. Please take a look at + * (FIXME! Need to search for the web site.) http://www. + */ +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Macros to simplify calling of random generator function. + * + */ +#define s3_rand_seed(s) genrand_seed(s); +#define s3_rand_int31() genrand_int31() +#define s3_rand_real() genrand_real3() +#define s3_rand_res53() genrand_res53() + +/** + *Initialize the seed of the random generator. + */ +SPHINXBASE_EXPORT +void genrand_seed(unsigned long s); + +/** + *generates a random number on [0,0x7fffffff]-interval + */ +SPHINXBASE_EXPORT +long genrand_int31(void); + +/** + *generates a random number on (0,1)-real-interval + */ +SPHINXBASE_EXPORT +double genrand_real3(void); + +/** + *generates a random number on [0,1) with 53-bit resolution + */ +SPHINXBASE_EXPORT +double genrand_res53(void); + +#ifdef __cplusplus +} +#endif + +#endif /*_LIBUTIL_GENRAND_H_*/ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/glist.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/glist.h new file mode 100644 index 0000000000000000000000000000000000000000..cdb18be3b6382048f454ac7041d8001687a667aa --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/glist.h @@ -0,0 +1,242 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * glist.h -- Module for maintaining a generic, linear linked-list structure. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: glist.h,v $ + * Revision 1.9 2005/06/22 03:02:51 arthchan2003 + * 1, Fixed doxygen documentation, 2, add keyword. + * + * Revision 1.4 2005/05/03 04:09:11 archan + * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore. This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame. The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century. But well, after all, everything needs a start. I will then really get the results from the search and see how it looks. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 09-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added glist_chkdup_*(). + * + * 13-Feb-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created from earlier version. + */ + + +/** + * \file glist.h + * \brief Generic linked-lists maintenance. + * + * Only insert at the head of the list. A convenient little + * linked-list package, but a double-edged sword: the user must keep + * track of the data type within the linked list elements. When it + * was first written, there was no selective deletions except to + * destroy the entire list. This is modified in later version. + * + * + * (C++ would be good for this, but that's a double-edged sword as well.) + */ + + +#ifndef _LIBUTIL_GLIST_H_ +#define _LIBUTIL_GLIST_H_ + +#include +/* Win32/WinCE DLL gunk */ +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** A node in a generic list + */ +typedef struct gnode_s { + anytype_t data; /** See prim_type.h */ + struct gnode_s *next; /** Next node in list */ +} gnode_t; +typedef gnode_t *glist_t; /** Head of a list of gnodes */ + + +/** Access macros, for convenience + */ +#define gnode_ptr(g) ((g)->data.ptr) +#define gnode_int32(g) ((g)->data.i) +#define gnode_uint32(g) ((g)->data.ui) +#define gnode_float32(g) ((float32)(g)->data.fl) +#define gnode_float64(g) ((g)->data.fl) +#define gnode_next(g) ((g)->next) + + +/** + * Create and prepend a new list node, with the given user-defined data, at the HEAD + * of the given generic list. Return the new list thus formed. + * g may be NULL to indicate an initially empty list. + */ +SPHINXBASE_EXPORT +glist_t glist_add_ptr (glist_t g, /**< a link list */ + void *ptr /**< a pointer */ + ); + +/** + * Create and prepend a new list node containing an integer. + */ +SPHINXBASE_EXPORT +glist_t glist_add_int32 (glist_t g, /**< a link list */ + int32 val /**< an integer value */ + ); +/** + * Create and prepend a new list node containing an unsigned integer. + */ +SPHINXBASE_EXPORT +glist_t glist_add_uint32 (glist_t g, /**< a link list */ + uint32 val /**< an unsigned integer value */ + ); +/** + * Create and prepend a new list node containing a single-precision float. + */ +SPHINXBASE_EXPORT +glist_t glist_add_float32 (glist_t g, /**< a link list */ + float32 val /**< a float32 vlaue */ + ); +/** + * Create and prepend a new list node containing a double-precision float. + */ +SPHINXBASE_EXPORT +glist_t glist_add_float64 (glist_t g, /**< a link list */ + float64 val /**< a float64 vlaue */ + ); + + + +/** + * Create and insert a new list node, with the given user-defined data, after + * the given generic node gn. gn cannot be NULL. + * Return ptr to the newly created gnode_t. + */ +SPHINXBASE_EXPORT +gnode_t *glist_insert_ptr (gnode_t *gn, /**< a generic node which ptr will be inserted after it*/ + void *ptr /**< pointer inserted */ + ); +/** + * Create and insert a new list node containing an integer. + */ +SPHINXBASE_EXPORT +gnode_t *glist_insert_int32 (gnode_t *gn, /**< a generic node which a value will be inserted after it*/ + int32 val /**< int32 inserted */ + ); +/** + * Create and insert a new list node containing an unsigned integer. + */ +SPHINXBASE_EXPORT +gnode_t *glist_insert_uint32 (gnode_t *gn, /**< a generic node which a value will be inserted after it*/ + uint32 val /**< uint32 inserted */ + ); +/** + * Create and insert a new list node containing a single-precision float. + */ +SPHINXBASE_EXPORT +gnode_t *glist_insert_float32 (gnode_t *gn, /**< a generic node which a value will be inserted after it*/ + float32 val /**< float32 inserted */ + ); +/** + * Create and insert a new list node containing a double-precision float. + */ +SPHINXBASE_EXPORT +gnode_t *glist_insert_float64 (gnode_t *gn, /**< a generic node which a value will be inserted after it*/ + float64 val /**< float64 inserted */ + ); + +/** + * Reverse the order of the given glist. (glist_add() adds to the head; one might + * ultimately want the reverse of that.) + * NOTE: The list is reversed "in place"; i.e., no new memory is allocated. + * @return: The head of the new list. + */ +SPHINXBASE_EXPORT +glist_t glist_reverse (glist_t g /**< input link list */ + ); + + +/** + Count the number of element in a given link list + @return the number of elements in the given glist_t +*/ +SPHINXBASE_EXPORT +int32 glist_count (glist_t g /**< input link list */ + ); + +/** + * Free the given generic list; user-defined data contained within is not + * automatically freed. The caller must have done that already. + */ +SPHINXBASE_EXPORT +void glist_free (glist_t g); + + +/** + * Free the given node, gn, of a glist, pred being its predecessor in the list. + * Return ptr to the next node in the list after the freed node. + */ +SPHINXBASE_EXPORT +gnode_t *gnode_free(gnode_t *gn, + gnode_t *pred + ); + +/** + * Return the last node in the given list. + */ +SPHINXBASE_EXPORT +gnode_t *glist_tail (glist_t g); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/hash_table.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/hash_table.h new file mode 100644 index 0000000000000000000000000000000000000000..945628f0302e82ebf82f71e880b096b43e7f63c8 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/hash_table.h @@ -0,0 +1,443 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * hash.h -- Hash table module. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: hash.h,v $ + * Revision 1.7 2005/06/22 03:04:01 arthchan2003 + * 1, Implemented hash_delete and hash_display, 2, Fixed doxygen documentation, 3, Added keyword. + * + * Revision 1.8 2005/05/24 01:10:54 archan + * Fix a bug when the value only appear in the hash but there is no chain. Also make sure that prev was initialized to NULL. All success cases were tested, but not tested with the deletion is tested. + * + * Revision 1.7 2005/05/24 00:12:31 archan + * Also add function prototype for hash_display in hash.h + * + * Revision 1.4 2005/05/03 04:09:11 archan + * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore. This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame. The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century. But well, after all, everything needs a start. I will then really get the results from the search and see how it looks. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 05-May-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Removed hash_key2hash(). Added hash_enter_bkey() and hash_lookup_bkey(), + * and len attribute to hash_entry_t. + * + * 30-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added hash_key2hash(). + * + * 18-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Included case sensitive/insensitive option. + * + * 08-31-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Created. + */ + + +/** + * @file hash_table.h + * @brief Hash table implementation + * + * This hash tables are intended for associating a pointer/integer + * "value" with a char string "key", (e.g., an ID with a word string). + * Subsequently, one can retrieve the value by providing the string + * key. (The reverse functionality--obtaining the string given the + * value--is not provided with the hash table module.) + */ + +/** + * A note by ARCHAN at 20050510: Technically what we use is so-called + * "hash table with buckets" which is very nice way to deal with + * external hashing. There are definitely better ways to do internal + * hashing (i.e. when everything is stored in the memory.) In Sphinx + * 3, this is a reasonable practice because hash table is only used in + * lookup in initialization or in lookups which is not critical for + * speed. + */ + +/** + * Another note by ARCHAN at 20050703: To use this data structure + * properly, it is very important to realize that the users are + * required to handle memory allocation of the C-style keys. The hash + * table will not make a copy of the memory allocated for any of the + * C-style key. It will not allocate memory for it. It will not delete + * memory for it. As a result, the following code sniplet will cause + * memory leak. + * + * while (1){ + * str=(char*)ckd_calloc(str_length,sizeof(char*)) + * if(hash_enter(ht,str,id)!=id){ printf("fail to add key str %s with val id %d\n",str,id)} + * } + * + */ + +/** + * A note by dhuggins on 20061010: Changed this to use void * instead + * of int32 as the value type, so that arbitrary objects can be + * inserted into a hash table (in a way that won't crash on 64-bit + * machines ;) + */ + +#ifndef _LIBUTIL_HASH_H_ +#define _LIBUTIL_HASH_H_ + +/* Win32/WinCE DLL gunk */ +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * The hash table structures. + * Each hash table is identified by a hash_table_t structure. hash_table_t.table is + * pre-allocated for a user-controlled max size, and is initially empty. As new + * entries are created (using hash_enter()), the empty entries get filled. If multiple + * keys hash to the same entry, new entries are allocated and linked together in a + * linear list. + */ + +typedef struct hash_entry_s { + const char *key; /** Key string, NULL if this is an empty slot. + NOTE that the key must not be changed once the entry + has been made. */ + size_t len; /** Key-length; the key string does not have to be a C-style NULL + terminated string; it can have arbitrary binary bytes */ + void *val; /** Value associated with above key */ + struct hash_entry_s *next; /** For collision resolution */ +} hash_entry_t; + +typedef struct hash_table_s { + hash_entry_t *table; /**Primary hash table, excluding entries that collide */ + int32 size; /** Primary hash table size, (is a prime#); NOTE: This is the + number of primary entries ALLOCATED, NOT the number of valid + entries in the table */ + int32 inuse; /** Number of valid entries in the table. */ + int32 nocase; /** Whether case insensitive for key comparisons */ +} hash_table_t; + +typedef struct hash_iter_s { + hash_table_t *ht; /**< Hash table we are iterating over. */ + hash_entry_t *ent; /**< Current entry in that table. */ + size_t idx; /**< Index of next bucket to search. */ +} hash_iter_t; + +/** Access macros */ +#define hash_entry_val(e) ((e)->val) +#define hash_entry_key(e) ((e)->key) +#define hash_entry_len(e) ((e)->len) +#define hash_table_inuse(h) ((h)->inuse) +#define hash_table_size(h) ((h)->size) + + +/** + * Allocate a new hash table for a given expected size. + * + * @note Case sensitivity of hash keys applies to 7-bit ASCII + * characters only, and is not locale-dependent. + * + * @return handle to allocated hash table. + */ +SPHINXBASE_EXPORT +hash_table_t * hash_table_new(int32 size, /**< In: Expected number of entries in the table */ + int32 casearg /**< In: Whether case insensitive for key + comparisons. When 1, case is insentitive, + 0, case is sensitive. */ + ); + +#define HASH_CASE_YES 0 +#define HASH_CASE_NO 1 + +/** + * Free the specified hash table; the caller is responsible for freeing the key strings + * pointed to by the table entries. + */ +SPHINXBASE_EXPORT +void hash_table_free(hash_table_t *h /**< In: Handle of hash table to free */ + ); + + +/** + * Try to add a new entry with given key and associated value to hash table h. If key doesn't + * already exist in hash table, the addition is successful, and the return value is val. But + * if key already exists, return its existing associated value. (The hash table is unchanged; + * it is up to the caller to resolve the conflict.) + */ +SPHINXBASE_EXPORT +void *hash_table_enter(hash_table_t *h, /**< In: Handle of hash table in which to create entry */ + const char *key, /**< In: C-style NULL-terminated key string + for the new entry */ + void *val /**< In: Value to be associated with above key */ + ); + +/** + * Add a 32-bit integer value to a hash table. + * + * This macro is the clean way to do this and avoid compiler warnings + * on 64-bit platforms. + */ +#define hash_table_enter_int32(h,k,v) \ + ((int32)(long)hash_table_enter((h),(k),(void *)(long)(v))) + +/** + * Add a new entry with given key and value to hash table h. If the + * key already exists, its value is replaced with the given value, and + * the previous value is returned, otherwise val is returned. + * + * A very important but subtle point: The key pointer in the hash + * table is replaced with the pointer passed to this function. + * In general you should always pass a pointer to hash_table_enter() + * whose lifetime matches or exceeds that of the hash table. In some + * rare cases it is convenient to initially enter a value with a + * short-lived key, then later replace that with a long-lived one. + * This behaviour allows this to happen. + */ +SPHINXBASE_EXPORT +void *hash_table_replace(hash_table_t *h, /**< In: Handle of hash table in which to create entry */ + const char *key, /**< In: C-style NULL-terminated key string + for the new entry */ + void *val /**< In: Value to be associated with above key */ + ); + +/** + * Replace a 32-bit integer value in a hash table. + * + * This macro is the clean way to do this and avoid compiler warnings + * on 64-bit platforms. + */ +#define hash_table_replace_int32(h,k,v) \ + ((int32)(long)hash_table_replace((h),(k),(void *)(long)(v))) + +/** + * Delete an entry with given key and associated value to hash table + * h. Return the value associated with the key (NULL if it did not exist) + */ + +SPHINXBASE_EXPORT +void *hash_table_delete(hash_table_t *h, /**< In: Handle of hash table in + which a key will be deleted */ + const char *key /**< In: C-style NULL-terminated + key string for the new entry */ + ); + +/** + * Like hash_table_delete, but with an explicitly specified key length, + * instead of a NULL-terminated, C-style key string. So the key + * string is a binary key (or bkey). Hash tables containing such keys + * should be created with the HASH_CASE_YES option. Otherwise, the + * results are unpredictable. + */ +SPHINXBASE_EXPORT +void *hash_table_delete_bkey(hash_table_t *h, /**< In: Handle of hash table in + which a key will be deleted */ + const char *key, /**< In: C-style NULL-terminated + key string for the new entry */ + size_t len + ); + +/** + * Delete all entries from a hash_table. + */ +SPHINXBASE_EXPORT +void hash_table_empty(hash_table_t *h /**< In: Handle of hash table */ + ); + +/** + * Like hash_table_enter, but with an explicitly specified key length, + * instead of a NULL-terminated, C-style key string. So the key + * string is a binary key (or bkey). Hash tables containing such keys + * should be created with the HASH_CASE_YES option. Otherwise, the + * results are unpredictable. + */ +SPHINXBASE_EXPORT +void *hash_table_enter_bkey(hash_table_t *h, /**< In: Handle of hash table + in which to create entry */ + const char *key, /**< In: Key buffer */ + size_t len, /**< In: Length of above key buffer */ + void *val /**< In: Value to be associated with above key */ + ); + +/** + * Enter a 32-bit integer value in a hash table. + * + * This macro is the clean way to do this and avoid compiler warnings + * on 64-bit platforms. + */ +#define hash_table_enter_bkey_int32(h,k,l,v) \ + ((int32)(long)hash_table_enter_bkey((h),(k),(l),(void *)(long)(v))) + +/** + * Like hash_table_replace, but with an explicitly specified key length, + * instead of a NULL-terminated, C-style key string. So the key + * string is a binary key (or bkey). Hash tables containing such keys + * should be created with the HASH_CASE_YES option. Otherwise, the + * results are unpredictable. + */ +SPHINXBASE_EXPORT +void *hash_table_replace_bkey(hash_table_t *h, /**< In: Handle of hash table in which to create entry */ + const char *key, /**< In: Key buffer */ + size_t len, /**< In: Length of above key buffer */ + void *val /**< In: Value to be associated with above key */ + ); + +/** + * Replace a 32-bit integer value in a hash table. + * + * This macro is the clean way to do this and avoid compiler warnings + * on 64-bit platforms. + */ +#define hash_table_replace_bkey_int32(h,k,l,v) \ + ((int32)(long)hash_table_replace_bkey((h),(k),(l),(void *)(long)(v))) + +/** + * Look up a key in a hash table and optionally return the associated + * value. + * @return 0 if key found in hash table, else -1. + */ +SPHINXBASE_EXPORT +int32 hash_table_lookup(hash_table_t *h, /**< In: Handle of hash table being searched */ + const char *key, /**< In: C-style NULL-terminated string whose value is sought */ + void **val /**< Out: *val = value associated with key. + If this is NULL, no value will be returned. */ + ); + +/** + * Look up a 32-bit integer value in a hash table. + * + * This function is the clean way to do this and avoid compiler warnings + * on 64-bit platforms. + */ +SPHINXBASE_EXPORT +int32 hash_table_lookup_int32(hash_table_t *h, /**< In: Handle of hash table being searched */ + const char *key, /**< In: C-style NULL-terminated string whose value is sought */ + int32 *val /**< Out: *val = value associated with key. + If this is NULL, no value will be returned. */ + ); + +/** + * Like hash_lookup, but with an explicitly specified key length, instead of a NULL-terminated, + * C-style key string. So the key string is a binary key (or bkey). Hash tables containing + * such keys should be created with the HASH_CASE_YES option. Otherwise, the results are + * unpredictable. + */ +SPHINXBASE_EXPORT +int32 hash_table_lookup_bkey(hash_table_t *h, /**< In: Handle of hash table being searched */ + const char *key, /**< In: Key buffer */ + size_t len, /**< In: Length of above key buffer */ + void **val /**< Out: *val = value associated with key. + If this is NULL, no value will be returned. */ + ); + +/** + * Look up a 32-bit integer value in a hash table. + * + * This function is the clean way to do this and avoid compiler warnings + * on 64-bit platforms. + */ +SPHINXBASE_EXPORT +int32 hash_table_lookup_bkey_int32(hash_table_t *h,/**< In: Handle of hash table being searched */ + const char *key,/**< In: Key buffer */ + size_t len, /**< In: Length of above key buffer */ + int32 *val /**< Out: *val = value associated with key. + If this is NULL, no value will be returned. */ + ); + +/** + * Start iterating over key-value pairs in a hash table. + */ +SPHINXBASE_EXPORT +hash_iter_t *hash_table_iter(hash_table_t *h); + +/** + * Get the next key-value pair in iteration. + * + * This function automatically frees the iterator object upon reaching + * the final entry. + * + * @return the next entry in the hash table, or NULL if done. + */ +SPHINXBASE_EXPORT +hash_iter_t *hash_table_iter_next(hash_iter_t *itor); + +/** + * Delete an unfinished iterator. + */ +SPHINXBASE_EXPORT +void hash_table_iter_free(hash_iter_t *itor); + +/** + * Build a glist of valid hash_entry_t pointers from the given hash table. Return the list. + */ +SPHINXBASE_EXPORT +glist_t hash_table_tolist(hash_table_t *h, /**< In: Hash table from which list is to be generated */ + int32 *count /**< Out: Number of entries in the list. + If this is NULL, no count will be returned. */ + + ); + +/** + * Display a hash-with-chaining representation on the screen. + * Currently, it will only works for situation where hash_enter was + * used to enter the keys. + */ +SPHINXBASE_EXPORT +void hash_table_display(hash_table_t *h, /**< In: Hash table to display */ + int32 showkey /**< In: Show the string or not, + Use 0 if hash_enter_bkey was + used. */ + ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/heap.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/heap.h new file mode 100644 index 0000000000000000000000000000000000000000..ad3756aafe4a45f73d821e95eea927e21af3a396 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/heap.h @@ -0,0 +1,153 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * heap.h -- Generic heap structure for inserting in any and popping in sorted + * order. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: heap.h,v $ + * Revision 1.7 2005/06/22 03:05:49 arthchan2003 + * 1, Fixed doxygen documentation, 2, Add keyword. + * + * Revision 1.4 2005/06/15 04:21:46 archan + * 1, Fixed doxygen-documentation, 2, Add keyword such that changes will be logged into a file. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 23-Dec-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Started. + */ + + +#ifndef _LIBUTIL_HEAP_H_ +#define _LIBUTIL_HEAP_H_ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include + + /** \file heap.h + * \brief Heap Implementation. + * + * General Comment: Sorted heap structure with three main operations: + * + * 1. Insert a data item (with two attributes: an application supplied pointer and an + * integer value; the heap is maintained in ascending order of the integer value). + * 2. Return the currently topmost item (i.e., item with smallest associated value). + * 3. Return the currently topmost item and pop it off the heap. + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + + +typedef struct heap_s heap_t; + + +/** + * Allocate a new heap and return handle to it. + */ +SPHINXBASE_EXPORT +heap_t *heap_new(void); + + +/** + * Insert a new item into the given heap. + * Return value: 0 if successful, -1 otherwise. + */ +SPHINXBASE_EXPORT +int heap_insert(heap_t *heap, /**< In: Heap into which item is to be inserted */ + void *data, /**< In: Application-determined data pointer */ + int32 val /**< In: According to item entered in sorted heap */ + ); +/** + * Return the topmost item in the heap. + * Return value: 1 if heap is not empty and the topmost value is returned; + * 0 if heap is empty; -1 if some error occurred. + */ +SPHINXBASE_EXPORT +int heap_top(heap_t *heap, /**< In: Heap whose topmost item is to be returned */ + void **data, /**< Out: Data pointer associated with the topmost item */ + int32 *val /**< Out: Value associated with the topmost item */ + ); +/** + * Like heap_top but also pop the top item off the heap. + */ +SPHINXBASE_EXPORT +int heap_pop(heap_t *heap, void **data, int32 *val); + +/** + * Remove an item from the heap. + */ +SPHINXBASE_EXPORT +int heap_remove(heap_t *heap, void *data); + +/** + * Return the number of items in the heap. + */ +SPHINXBASE_EXPORT +size_t heap_size(heap_t *heap); + +/** + * Destroy the given heap; free the heap nodes. NOTE: Data pointers in the nodes are NOT freed. + * Return value: 0 if successful, -1 otherwise. + */ + +SPHINXBASE_EXPORT +int heap_destroy(heap_t *heap); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/jsgf.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/jsgf.h new file mode 100644 index 0000000000000000000000000000000000000000..3c3de1de6ee42b2c8900836969f76cfb807c1c90 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/jsgf.h @@ -0,0 +1,209 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef __JSGF_H__ +#define __JSGF_H__ + +/** + * @file jsgf.h JSGF grammar compiler + * + * This file defines the data structures for parsing JSGF grammars + * into Sphinx finite-state grammars. + **/ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +typedef struct jsgf_s jsgf_t; +typedef struct jsgf_rule_s jsgf_rule_t; + +/** + * Create a new JSGF grammar. + * + * @param parent optional parent grammar for this one (NULL, usually). + * @return new JSGF grammar object, or NULL on failure. + */ +SPHINXBASE_EXPORT +jsgf_t *jsgf_grammar_new(jsgf_t *parent); + +/** + * Parse a JSGF grammar from a file. + * + * @param filename the name of the file to parse. + * @param parent optional parent grammar for this one (NULL, usually). + * @return new JSGF grammar object, or NULL on failure. + */ +SPHINXBASE_EXPORT +jsgf_t *jsgf_parse_file(const char *filename, jsgf_t *parent); + +/** + * Parse a JSGF grammar from a string. + * + * @param 0-terminated string with grammar. + * @param parent optional parent grammar for this one (NULL, usually). + * @return new JSGF grammar object, or NULL on failure. + */ +SPHINXBASE_EXPORT +jsgf_t *jsgf_parse_string(const char *string, jsgf_t *parent); + +/** + * Get the grammar name from the file. + */ +SPHINXBASE_EXPORT +char const *jsgf_grammar_name(jsgf_t *jsgf); + +/** + * Free a JSGF grammar. + */ +SPHINXBASE_EXPORT +void jsgf_grammar_free(jsgf_t *jsgf); + +/** + * Iterator over rules in a grammar. + */ +typedef hash_iter_t jsgf_rule_iter_t; + +/** + * Get an iterator over all rules in a grammar. + */ +SPHINXBASE_EXPORT +jsgf_rule_iter_t *jsgf_rule_iter(jsgf_t *grammar); + +/** + * Advance an iterator to the next rule in the grammar. + */ +#define jsgf_rule_iter_next(itor) hash_table_iter_next(itor) + +/** + * Get the current rule in a rule iterator. + */ +#define jsgf_rule_iter_rule(itor) ((jsgf_rule_t *)(itor)->ent->val) + +/** + * Free a rule iterator (if the end hasn't been reached). + */ +#define jsgf_rule_iter_free(itor) hash_table_iter_free(itor) + +/** + * Get a rule by name from a grammar. Name should not contain brackets. + */ +SPHINXBASE_EXPORT +jsgf_rule_t *jsgf_get_rule(jsgf_t *grammar, const char *name); + +/** + * Returns the first public rule of the grammar + */ +SPHINXBASE_EXPORT +jsgf_rule_t *jsgf_get_public_rule(jsgf_t *grammar); + +/** + * Get the rule name from a rule. + */ +SPHINXBASE_EXPORT +char const *jsgf_rule_name(jsgf_rule_t *rule); + +/** + * Test if a rule is public or not. + */ +SPHINXBASE_EXPORT +int jsgf_rule_public(jsgf_rule_t *rule); + +/** + * Build a Sphinx FSG object from a JSGF rule. + */ +SPHINXBASE_EXPORT +fsg_model_t *jsgf_build_fsg(jsgf_t *grammar, jsgf_rule_t *rule, + logmath_t *lmath, float32 lw); + +/** + * Build a Sphinx FSG object from a JSGF rule. + * + * This differs from jsgf_build_fsg() in that it does not do closure + * on epsilon transitions or any other postprocessing. For the time + * being this is necessary in order to write it to a file - the FSG + * code will be fixed soon. + */ +SPHINXBASE_EXPORT +fsg_model_t *jsgf_build_fsg_raw(jsgf_t *grammar, jsgf_rule_t *rule, + logmath_t *lmath, float32 lw); + + +/** + * Read JSGF from file and return FSG object from it. + * + * This function looks for a first public rule in jsgf and constructs JSGF from it. + */ +SPHINXBASE_EXPORT +fsg_model_t *jsgf_read_file(const char *file, logmath_t * lmath, float32 lw); + +/** + * Read JSGF from string and return FSG object from it. + * + * This function looks for a first public rule in jsgf and constructs JSGF from it. + */ +SPHINXBASE_EXPORT +fsg_model_t *jsgf_read_string(const char *string, logmath_t * lmath, float32 lw); + + +/** + * Convert a JSGF rule to Sphinx FSG text form. + * + * This does a direct conversion without doing transitive closure on + * null transitions and so forth. + */ +SPHINXBASE_EXPORT +int jsgf_write_fsg(jsgf_t *grammar, jsgf_rule_t *rule, FILE *outfh); + +#ifdef __cplusplus +} +#endif + + +#endif /* __JSGF_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/listelem_alloc.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/listelem_alloc.h new file mode 100644 index 0000000000000000000000000000000000000000..91b21d17714e87a37443ff5e81b1d7a9bef9b0dd --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/listelem_alloc.h @@ -0,0 +1,125 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef __LISTELEM_ALLOC_H__ +#define __LISTELEM_ALLOC_H__ + +/** @file listelem_alloc.h + * @brief Fast memory allocator for uniformly sized objects + * @author M K Ravishankar + */ +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#include +#ifdef S60 +#include +#endif + +/* Win32/WinCE DLL gunk */ +#include +#include + +/** + * List element allocator object. + */ +typedef struct listelem_alloc_s listelem_alloc_t; + +/** + * Initialize and return a list element allocator. + */ +SPHINXBASE_EXPORT +listelem_alloc_t * listelem_alloc_init(size_t elemsize); + +/** + * Finalize and release all memory associated with a list element allocator. + */ +SPHINXBASE_EXPORT +void listelem_alloc_free(listelem_alloc_t *le); + + +SPHINXBASE_EXPORT +void *__listelem_malloc__(listelem_alloc_t *le, char *file, int line); + +/** + * Allocate a list element and return pointer to it. + */ +#define listelem_malloc(le) __listelem_malloc__((le),__FILE__,__LINE__) + +SPHINXBASE_EXPORT +void *__listelem_malloc_id__(listelem_alloc_t *le, char *file, int line, + int32 *out_id); + +/** + * Allocate a list element, returning a unique identifier. + */ +#define listelem_malloc_id(le, oid) __listelem_malloc_id__((le),__FILE__,__LINE__,(oid)) + +/** + * Retrieve a list element by its identifier. + */ +SPHINXBASE_EXPORT +void *listelem_get_item(listelem_alloc_t *le, int32 id); + +/** + * Free list element of given size + */ +SPHINXBASE_EXPORT +void __listelem_free__(listelem_alloc_t *le, void *elem, char *file, int line); + +/** + * Macro of __listelem_free__ + */ +#define listelem_free(le,el) __listelem_free__((le),(el),__FILE__,__LINE__) + +/** + Print number of allocation, numer of free operation stats +*/ +SPHINXBASE_EXPORT +void listelem_stats(listelem_alloc_t *le); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/logmath.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/logmath.h new file mode 100644 index 0000000000000000000000000000000000000000..1e555c24584c84d0b934a6452dee7f0087857d0b --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/logmath.h @@ -0,0 +1,249 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file logmath.h + * @brief Fast integer logarithmic addition operations. + * + * In evaluating HMM models, probability values are often kept in log + * domain, to avoid overflow. To enable these logprob values to be + * held in int32 variables without significant loss of precision, a + * logbase of (1+epsilon) (where epsilon < 0.01 or so) is used. This + * module maintains this logbase (B). + * + * However, maintaining probabilities in log domain creates a problem + * when adding two probability values. This problem can be solved by + * table lookup. Note that: + * + * - \f$ b^z = b^x + b^y \f$ + * - \f$ b^z = b^x(1 + b^{y-x}) = b^y(1 + e^{x-y}) \f$ + * - \f$ z = x + log_b(1 + b^{y-x}) = y + log_b(1 + b^{x-y}) \f$ + * + * So: + * + * - when \f$ y > x, z = y + logadd\_table[-(x-y)] \f$ + * - when \f$ x > y, z = x + logadd\_table[-(y-x)] \f$ + * - where \f$ logadd\_table[n] = log_b(1 + b^{-n}) \f$ + * + * The first entry in logadd_table is + * simply \f$ log_b(2.0) \f$, for + * the case where \f$ y = x \f$ and thus + * \f$ z = log_b(2x) = log_b(2) + x \f$. The last entry is zero, + * where \f$ log_b(x+y) = x = y \f$ due to loss of precision. + * + * Since this table can be quite large particularly for small + * logbases, an option is provided to compress it by dropping the + * least significant bits of the table. + */ + +#ifndef __LOGMATH_H__ +#define __LOGMATH_H__ + +#include +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Integer log math computation table. + * + * This is exposed here to allow log-add computations to be inlined. + */ +typedef struct logadd_s logadd_t; +struct logadd_s { + /** Table, in unsigned integers of (width) bytes. */ + void *table; + /** Number of elements in (table). This is never smaller than 256 (important!) */ + uint32 table_size; + /** Width of elements of (table). */ + uint8 width; + /** Right shift applied to elements in (table). */ + int8 shift; +}; + +/** + * Integer log math computation class. + */ +typedef struct logmath_s logmath_t; + +/** + * Obtain the log-add table from a logmath_t * + */ +#define LOGMATH_TABLE(lm) ((logadd_t *)lm) + +/** + * Initialize a log math computation table. + * @param base The base B in which computation is to be done. + * @param shift Log values are shifted right by this many bits. + * @param use_table Whether to use an add table or not + * @return The newly created log math table. + */ +SPHINXBASE_EXPORT +logmath_t *logmath_init(float64 base, int shift, int use_table); + +/** + * Memory-map (or read) a log table from a file. + */ +SPHINXBASE_EXPORT +logmath_t *logmath_read(const char *filename); + +/** + * Write a log table to a file. + */ +SPHINXBASE_EXPORT +int32 logmath_write(logmath_t *lmath, const char *filename); + +/** + * Get the log table size and dimensions. + */ +SPHINXBASE_EXPORT +int32 logmath_get_table_shape(logmath_t *lmath, uint32 *out_size, + uint32 *out_width, uint32 *out_shift); + +/** + * Get the log base. + */ +SPHINXBASE_EXPORT +float64 logmath_get_base(logmath_t *lmath); + +/** + * Get the smallest possible value represented in this base. + */ +SPHINXBASE_EXPORT +int logmath_get_zero(logmath_t *lmath); + +/** + * Get the width of the values in a log table. + */ +SPHINXBASE_EXPORT +int logmath_get_width(logmath_t *lmath); + +/** + * Get the shift of the values in a log table. + */ +SPHINXBASE_EXPORT +int logmath_get_shift(logmath_t *lmath); + +/** + * Retain ownership of a log table. + * + * @return pointer to retained log table. + */ +SPHINXBASE_EXPORT +logmath_t *logmath_retain(logmath_t *lmath); + +/** + * Free a log table. + * + * @return new reference count (0 if freed completely) + */ +SPHINXBASE_EXPORT +int logmath_free(logmath_t *lmath); + +/** + * Add two values in log space exactly and slowly (without using add table). + */ +SPHINXBASE_EXPORT +int logmath_add_exact(logmath_t *lmath, int logb_p, int logb_q); + +/** + * Add two values in log space (i.e. return log(exp(p)+exp(q))) + */ +SPHINXBASE_EXPORT +int logmath_add(logmath_t *lmath, int logb_p, int logb_q); + +/** + * Convert linear floating point number to integer log in base B. + */ +SPHINXBASE_EXPORT +int logmath_log(logmath_t *lmath, float64 p); + +/** + * Convert integer log in base B to linear floating point. + */ +SPHINXBASE_EXPORT +float64 logmath_exp(logmath_t *lmath, int logb_p); + +/** + * Convert natural log (in floating point) to integer log in base B. + */ +SPHINXBASE_EXPORT +int logmath_ln_to_log(logmath_t *lmath, float64 log_p); + +/** + * Convert integer log in base B to natural log (in floating point). + */ +SPHINXBASE_EXPORT +float64 logmath_log_to_ln(logmath_t *lmath, int logb_p); + +/** + * Convert base 10 log (in floating point) to integer log in base B. + */ +SPHINXBASE_EXPORT +int logmath_log10_to_log(logmath_t *lmath, float64 log_p); + +/** + * Convert base 10 log (in floating point) to float log in base B. + */ +SPHINXBASE_EXPORT +float logmath_log10_to_log_float(logmath_t *lmath, float64 log_p); + +/** + * Convert integer log in base B to base 10 log (in floating point). + */ +SPHINXBASE_EXPORT +float64 logmath_log_to_log10(logmath_t *lmath, int logb_p); + +/** + * Convert float log in base B to base 10 log. + */ +SPHINXBASE_EXPORT +float64 logmath_log_float_to_log10(logmath_t *lmath, float log_p); + +#ifdef __cplusplus +} +#endif + + +#endif /* __LOGMATH_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/matrix.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/matrix.h new file mode 100644 index 0000000000000000000000000000000000000000..b77cab1c96d33d8ed4290232c5fad5cefc679881 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/matrix.h @@ -0,0 +1,214 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1997-2000 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: matrix.h + * + * Description: Matrix and linear algebra functions + * + * Author: + * + *********************************************************************/ + +#ifndef MATRIX_H +#define MATRIX_H + +/** \file matrix.h + * \brief Matrix and linear algebra functions. + * + * This file contains some basic matrix and linear algebra operations. + * In general these operate on positive definite matrices ONLY, + * because all matrices we're likely to encounter are either + * covariance matrices or are derived from them, and therefore a + * non-positive-definite matrix indicates some kind of pathological + * condition. + */ +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/* Win32/WinCE DLL gunk */ +#include +#include + + +/** + * Norm an array + * @param arr array + * @param d1 dimension + * @param d2 dimension + * @param d3 dimension + **/ +SPHINXBASE_EXPORT void norm_3d(float32 ***arr, uint32 d1, uint32 d2, uint32 d3); + +/** + * Floor 3-d array + * @param out output array + * @param in input array + * @param d1 dimension + * @param d2 dimension + * @param d3 dimension + **/ +SPHINXBASE_EXPORT void +accum_3d(float32 ***out, float32 ***in, uint32 d1, uint32 d2, uint32 d3); + +/** Ensures that non-zero values x such that -band < x < band, band > 0 are set to -band if x < 0 and band if x > 0. + * @param v array + * @param d1 array size + * @param band band value + */ +SPHINXBASE_EXPORT void band_nz_1d(float32 *v, uint32 d1, float32 band); + +/** + * Floor 3-d array + * @param m array + * @param d1 dimension + * @param d2 dimension + * @param d3 dimension + * @param floor floor value + **/ +SPHINXBASE_EXPORT void floor_nz_3d(float32 ***m, uint32 d1, uint32 d2, uint32 d3, float32 floor); + +/** + * Floor 1-d array + * @param v array + * @param d1 dimension + * @param floor floor value + **/ +SPHINXBASE_EXPORT void floor_nz_1d(float32 *v, uint32 d1, float32 floor); + +/** + * Calculate the determinant of a positive definite matrix. + * @param a The input matrix, must be positive definite. + * @param len The dimension of the input matrix. + * @return The determinant of the input matrix, or -1.0 if the matrix is + * not positive definite. + * + * \note These can be vanishingly small hence the float64 return type. + * Also note that only the upper triangular portion of a is + * considered, therefore the check for positive-definiteness is not + * reliable. + **/ +SPHINXBASE_EXPORT +float64 determinant(float32 **a, int32 len); + +/** + * Invert (if possible) a positive definite matrix with QR + * algorithm. + * + * @param out_ainv The inverse of a will be stored here. + * @param a The input matrix, must be positive definite. + * @param len The dimension of the input matrix. + * @return 0 for success or -1 for a non-positive-definite matrix. + * + * \note Only the upper triangular portion of a is considered, + * therefore the check for positive-definiteness is not reliable. + * + * \note The inversion can be done in-place, so you can use the same matrix + * if you do not need to keep a. + **/ +SPHINXBASE_EXPORT +int32 invert(float32 **out_ainv, float32 **a, int32 len); + +/** + * Solve (if possible) a positive-definite system of linear equations AX=B for X. + * @param a The A matrix on the left-hand side of the equation, must be positive-definite. + * @param b The B vector on the right-hand side of the equation. + * @param out_x The X vector will be stored here. + * @param n The dimension of the A matrix (n by n) and the B and X vectors. + * @return 0 for success or -1 for a non-positive-definite matrix. + * + * \note Only the upper triangular portion of a is considered, + * therefore the check for positive-definiteness is not reliable. + **/ +SPHINXBASE_EXPORT +int32 solve(float32 **a, float32 *b, + float32 *out_x, int32 n); + +/** + * Calculate the outer product of two vectors. + * @param out_a A (pre-allocated) len x len array. The outer product + * will be stored here. + * @param x A vector of length len. + * @param y A vector of length len. + * @param len The length of the input vectors. + **/ +SPHINXBASE_EXPORT +void outerproduct(float32 **out_a, float32 *x, float32 *y, int32 len); + +/** + * Multiply C=AB where A and B are symmetric matrices. + * @param out_c The output matrix C. + * @param a The input matrix A. + * @param b The input matrix B. + * @param n Dimensionality of A and B. + **/ +SPHINXBASE_EXPORT +void matrixmultiply(float32 **out_c, /* = */ + float32 **a, /* * */ float32 **b, + int32 n); + +/** + * Multiply a symmetric matrix by a constant in-place. + * @param inout_a The matrix to multiply. + * @param x The constant to multiply it by. + * @param n dimension of a. + **/ +SPHINXBASE_EXPORT +void scalarmultiply(float32 **inout_a, float32 x, int32 n); + +/** + * Add A += B. + * @param inout_a The A matrix to add. + * @param b The B matrix to add to A. + * @param n dimension of a and b. + **/ +SPHINXBASE_EXPORT +void matrixadd(float32 **inout_a, float32 **b, int32 n); + +#if 0 +{ /* Fool indent. */ +#endif +#ifdef __cplusplus +} +#endif + +#endif /* MATRIX_H */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/mmio.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/mmio.h new file mode 100644 index 0000000000000000000000000000000000000000..fcae74d8338c252834ad497fba7a25644d83258a --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/mmio.h @@ -0,0 +1,85 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file mmio.h + * @brief Memory-mapped I/O wrappers for files. + * @author David Huggins-Daines + **/ + +#ifndef __MMIO_H__ +#define __MMIO_H__ + +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Abstract structure representing a memory-mapped file. + **/ +typedef struct mmio_file_s mmio_file_t; + +/** + * Memory-map a file for reading. + * @return a mmio_file_t * or NULL for failure. + **/ +SPHINXBASE_EXPORT +mmio_file_t *mmio_file_read(const char *filename); + +/** + * Get a pointer to the memory mapped for a file. + **/ +SPHINXBASE_EXPORT +void *mmio_file_ptr(mmio_file_t *mf); + +/** + * Unmap a file, releasing memory associated with it. + **/ +SPHINXBASE_EXPORT +void mmio_file_unmap(mmio_file_t *mf); + +#ifdef __cplusplus +} +#endif + + +#endif /* __MMIO_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/ngram_model.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/ngram_model.h new file mode 100644 index 0000000000000000000000000000000000000000..5be6d5ecf8ba59cb7f8d7e3f28d8a8565e686741 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/ngram_model.h @@ -0,0 +1,703 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file ngram_model.h + * @brief N-Gram language models + * @author David Huggins-Daines + */ + +#ifndef __NGRAM_MODEL_H__ +#define __NGRAM_MODEL_H__ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Abstract type representing an N-Gram based language model. + */ +typedef struct ngram_model_s ngram_model_t; + +/** + * Abstract type representing a word class in an N-Gram model. + */ +typedef struct ngram_class_s ngram_class_t; + +/** + * File types for N-Gram files + */ +typedef enum ngram_file_type_e { + NGRAM_INVALID = -1, /**< Not a valid file type. */ + NGRAM_AUTO, /**< Determine file type automatically. */ + NGRAM_ARPA, /**< ARPABO text format (the standard). */ + NGRAM_BIN /**< Sphinx .DMP format. */ +} ngram_file_type_t; + +#define NGRAM_INVALID_WID -1 /**< Impossible word ID */ + +/** + * Read an N-Gram model from a file on disk. + * + * @param config Optional pointer to a set of command-line arguments. + * Recognized arguments are: + * + * - -mmap (boolean) whether to use memory-mapped I/O + * - -lw (float32) language weight to apply to the model + * - -wip (float32) word insertion penalty to apply to the model + * + * @param file_name path to the file to read. + * @param file_type type of the file, or NGRAM_AUTO to determine automatically. + * @param lmath Log-math parameters to use for probability + * calculations. Ownership of this object is assumed by + * the newly created ngram_model_t, and you should not + * attempt to free it manually. If you wish to reuse it + * elsewhere, you must retain it with logmath_retain(). + * @return newly created ngram_model_t. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_read(cmd_ln_t *config, + const char *file_name, + ngram_file_type_t file_type, + logmath_t *lmath); + +/** + * Write an N-Gram model to disk. + * + * @return 0 for success, <0 on error + */ +SPHINXBASE_EXPORT +int ngram_model_write(ngram_model_t *model, const char *file_name, + ngram_file_type_t format); + +/** + * Guess the file type for an N-Gram model from the filename. + * + * @return the guessed file type, or NGRAM_INVALID if none could be guessed. + */ +SPHINXBASE_EXPORT +ngram_file_type_t ngram_file_name_to_type(const char *file_name); + +/** + * Get the N-Gram file type from a string. + * + * @return file type, or NGRAM_INVALID if no such file type exists. + */ +SPHINXBASE_EXPORT +ngram_file_type_t ngram_str_to_type(const char *str_name); + +/** + * Get the canonical name for an N-Gram file type. + * + * @return read-only string with the name for this file type, or NULL + * if no such type exists. + */ +SPHINXBASE_EXPORT +char const *ngram_type_to_str(int type); + +/** + * Retain ownership of an N-Gram model. + * + * @return Pointer to retained model. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_retain(ngram_model_t *model); + +/** + * Release memory associated with an N-Gram model. + * + * @return new reference count (0 if freed completely) + */ +SPHINXBASE_EXPORT +int ngram_model_free(ngram_model_t *model); + +/** + * Constants for case folding. + */ +typedef enum ngram_case_e { + NGRAM_UPPER, + NGRAM_LOWER +} ngram_case_t; + +/** + * Case-fold word strings in an N-Gram model. + * + * WARNING: This is not Unicode aware, so any non-ASCII characters + * will not be converted. + */ +SPHINXBASE_EXPORT +int ngram_model_casefold(ngram_model_t *model, int kase); + +/** + * Apply a language weight, insertion penalty, and unigram weight to a + * language model. + * + * This will change the values output by ngram_score() and friends. + * This is done for efficiency since in decoding, these are the only + * values we actually need. Call ngram_prob() if you want the "raw" + * N-Gram probability estimate. + * + * To remove all weighting, call ngram_apply_weights(model, 1.0, 1.0). + */ +SPHINXBASE_EXPORT +int ngram_model_apply_weights(ngram_model_t *model, + float32 lw, float32 wip); + +/** + * Get the current weights from a language model. + * + * @param model The model in question. + * @param out_log_wip Output: (optional) logarithm of word insertion penalty. + * @return language weight. + */ +SPHINXBASE_EXPORT +float32 ngram_model_get_weights(ngram_model_t *model, int32 *out_log_wip); + +/** + * Get the score (scaled, interpolated log-probability) for a general + * N-Gram. + * + * The argument list consists of the history words (as null-terminated + * strings) of the N-Gram, in reverse order, followed by NULL. + * Therefore, if you wanted to get the N-Gram score for "a whole joy", + * you would call: + * + *
+ *  score = ngram_score(model, "joy", "whole", "a", NULL);
+ * 
+ * + * This is not the function to use in decoding, because it has some + * overhead for looking up words. Use ngram_ng_score(), + * ngram_tg_score(), or ngram_bg_score() instead. In the future there + * will probably be a version that takes a general language model + * state object, to support suffix-array LM and things like that. + * + * If one of the words is not in the LM's vocabulary, the result will + * depend on whether this is an open or closed vocabulary language + * model. For an open-vocabulary model, unknown words are all mapped + * to the unigram <UNK> which has a non-zero probability and also + * participates in higher-order N-Grams. Therefore, you will get a + * score of some sort in this case. + * + * For a closed-vocabulary model, unknown words are impossible and + * thus have zero probability. Therefore, if word is + * unknown, this function will return a "zero" log-probability, i.e. a + * large negative number. To obtain this number for comparison, call + * ngram_zero(). + */ +SPHINXBASE_EXPORT +int32 ngram_score(ngram_model_t *model, const char *word, ...); + +/** + * Quick trigram score lookup. + */ +SPHINXBASE_EXPORT +int32 ngram_tg_score(ngram_model_t *model, + int32 w3, int32 w2, int32 w1, + int32 *n_used); + +/** + * Quick bigram score lookup. + */ +SPHINXBASE_EXPORT +int32 ngram_bg_score(ngram_model_t *model, + int32 w2, int32 w1, + int32 *n_used); + +/** + * Quick general N-Gram score lookup. + */ +SPHINXBASE_EXPORT +int32 ngram_ng_score(ngram_model_t *model, int32 wid, int32 *history, + int32 n_hist, int32 *n_used); + +/** + * Get the "raw" log-probability for a general N-Gram. + * + * This returns the log-probability of an N-Gram, as defined in the + * language model file, before any language weighting, interpolation, + * or insertion penalty has been applied. + * + * @note When backing off to a unigram from a bigram or trigram, the + * unigram weight (interpolation with uniform) is not removed. + */ +SPHINXBASE_EXPORT +int32 ngram_probv(ngram_model_t *model, const char *word, ...); + +/** + * Get the "raw" log-probability for a general N-Gram. + * + * This returns the log-probability of an N-Gram, as defined in the + * language model file, before any language weighting, interpolation, + * or insertion penalty has been applied. + * + * @note When backing off to a unigram from a bigram or trigram, the + * unigram weight (interpolation with uniform) is not removed. + */ +SPHINXBASE_EXPORT +int32 ngram_prob(ngram_model_t *model, const char* const *words, int32 n); + +/** + * Quick "raw" probability lookup for a general N-Gram. + * + * See documentation for ngram_ng_score() and ngram_apply_weights() + * for an explanation of this. + */ +SPHINXBASE_EXPORT +int32 ngram_ng_prob(ngram_model_t *model, int32 wid, int32 *history, + int32 n_hist, int32 *n_used); + +/** + * Convert score to "raw" log-probability. + * + * @note The unigram weight (interpolation with uniform) is not + * removed, since there is no way to know which order of N-Gram + * generated score. + * + * @param model The N-Gram model from which score was obtained. + * @param score The N-Gram score to convert + * @return The raw log-probability value. + */ +SPHINXBASE_EXPORT +int32 ngram_score_to_prob(ngram_model_t *model, int32 score); + +/** + * Look up numerical word ID. + */ +SPHINXBASE_EXPORT +int32 ngram_wid(ngram_model_t *model, const char *word); + +/** + * Look up word string for numerical word ID. + */ +SPHINXBASE_EXPORT +const char *ngram_word(ngram_model_t *model, int32 wid); + +/** + * Get the unknown word ID for a language model. + * + * Language models can be either "open vocabulary" or "closed + * vocabulary". The difference is that the former assigns a fixed + * non-zero unigram probability to unknown words, while the latter + * does not allow unknown words (or, equivalently, it assigns them + * zero probability). If this is a closed vocabulary model, this + * function will return NGRAM_INVALID_WID. + * + * @return The ID for the unknown word, or NGRAM_INVALID_WID if none + * exists. + */ +SPHINXBASE_EXPORT +int32 ngram_unknown_wid(ngram_model_t *model); + +/** + * Get the "zero" log-probability value for a language model. + */ +SPHINXBASE_EXPORT +int32 ngram_zero(ngram_model_t *model); + +/** + * Get the order of the N-gram model (i.e. the "N" in "N-gram") + */ +SPHINXBASE_EXPORT +int32 ngram_model_get_size(ngram_model_t *model); + +/** + * Get the counts of the various N-grams in the model. + */ +SPHINXBASE_EXPORT +uint32 const *ngram_model_get_counts(ngram_model_t *model); + +/** + * M-gram iterator object. + */ +typedef struct ngram_iter_s ngram_iter_t; + +/** + * Iterate over all M-grams. + * + * @param model Language model to query. + * @param m Order of the M-Grams requested minus one (i.e. order of the history) + * @return An iterator over the requested M, or NULL if no N-grams of + * order M+1 exist. + */ +SPHINXBASE_EXPORT +ngram_iter_t *ngram_model_mgrams(ngram_model_t *model, int m); + +/** + * Get an iterator over M-grams pointing to the specified M-gram. + */ +SPHINXBASE_EXPORT +ngram_iter_t *ngram_iter(ngram_model_t *model, const char *word, ...); + +/** + * Get an iterator over M-grams pointing to the specified M-gram. + */ +SPHINXBASE_EXPORT +ngram_iter_t *ngram_ng_iter(ngram_model_t *model, int32 wid, int32 *history, int32 n_hist); + +/** + * Get information from the current M-gram in an iterator. + * + * @param out_score Output: Score for this M-gram (including any word + * penalty and language weight). + * @param out_bowt Output: Backoff weight for this M-gram. + * @return read-only array of word IDs. + */ +SPHINXBASE_EXPORT +int32 const *ngram_iter_get(ngram_iter_t *itor, + int32 *out_score, + int32 *out_bowt); + +/** + * Iterate over all M-gram successors of an M-1-gram. + * + * @param itor Iterator pointing to the M-1-gram to get successors of. + */ +SPHINXBASE_EXPORT +ngram_iter_t *ngram_iter_successors(ngram_iter_t *itor); + +/** + * Advance an M-gram iterator. + */ +SPHINXBASE_EXPORT +ngram_iter_t *ngram_iter_next(ngram_iter_t *itor); + +/** + * Terminate an M-gram iterator. + */ +SPHINXBASE_EXPORT +void ngram_iter_free(ngram_iter_t *itor); + +/** + * Add a word (unigram) to the language model. + * + * @note The semantics of this are not particularly well-defined for + * model sets, and may be subject to change. Currently this will add + * the word to all of the submodels + * + * @param model The model to add a word to. + * @param word Text of the word to add. + * @param weight Weight of this word relative to the uniform distribution. + * @return The word ID for the new word. + */ +SPHINXBASE_EXPORT +int32 ngram_model_add_word(ngram_model_t *model, + const char *word, float32 weight); + +/** + * Read a class definition file and add classes to a language model. + * + * This function assumes that the class tags have already been defined + * as unigrams in the language model. All words in the class + * definition will be added to the vocabulary as special in-class words. + * For this reason is is necessary that they not have the same names + * as any words in the general unigram distribution. The convention + * is to suffix them with ":class_tag", where class_tag is the class + * tag minus the enclosing square brackets. + * + * @return 0 for success, <0 for error + */ +SPHINXBASE_EXPORT +int32 ngram_model_read_classdef(ngram_model_t *model, + const char *file_name); + +/** + * Add a new class to a language model. + * + * If classname already exists in the unigram set for + * model, then it will be converted to a class tag, and + * classweight will be ignored. Otherwise, a new unigram + * will be created as in ngram_model_add_word(). + */ +SPHINXBASE_EXPORT +int32 ngram_model_add_class(ngram_model_t *model, + const char *classname, + float32 classweight, + char **words, + const float32 *weights, + int32 n_words); + +/** + * Add a word to a class in a language model. + * + * @param model The model to add a word to. + * @param classname Name of the class to add this word to. + * @param word Text of the word to add. + * @param weight Weight of this word relative to the within-class uniform distribution. + * @return The word ID for the new word. + */ +SPHINXBASE_EXPORT +int32 ngram_model_add_class_word(ngram_model_t *model, + const char *classname, + const char *word, + float32 weight); + +/** + * Create a set of language models sharing a common space of word IDs. + * + * This function creates a meta-language model which groups together a + * set of language models, synchronizing word IDs between them. To + * use this language model, you can either select a submodel to use + * exclusively using ngram_model_set_select(), or interpolate + * between scores from all models. To do the latter, you can either + * pass a non-NULL value of the weights parameter, or + * re-activate interpolation later on by calling + * ngram_model_set_interp(). + * + * In order to make this efficient, there are some restrictions on the + * models that can be grouped together. The most important (and + * currently the only) one is that they must all + * share the same log-math parameters. + * + * @param config Any configuration parameters to be shared between models. + * @param models Array of pointers to previously created language models. + * @param names Array of strings to use as unique identifiers for LMs. + * @param weights Array of weights to use in interpolating LMs, or NULL + * for no interpolation. + * @param n_models Number of elements in the arrays passed to this function. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_init(cmd_ln_t *config, + ngram_model_t **models, + char **names, + const float32 *weights, + int32 n_models); + +/** + * Read a set of language models from a control file. + * + * This file creates a language model set from a "control file" of + * the type used in Sphinx-II and Sphinx-III. + * File format (optional stuff is indicated by enclosing in []): + * + *
+ *   [{ LMClassFileName LMClassFilename ... }]
+ *   TrigramLMFileName LMName [{ LMClassName LMClassName ... }]
+ *   TrigramLMFileName LMName [{ LMClassName LMClassName ... }]
+ *   ...
+ * (There should be whitespace around the { and } delimiters.)
+ * 
+ * + * This is an extension of the older format that had only TrigramLMFilenName + * and LMName pairs. The new format allows a set of LMClass files to be read + * in and referred to by the trigram LMs. + * + * No "comments" allowed in this file. + * + * @param config Configuration parameters. + * @param lmctlfile Path to the language model control file. + * @param lmath Log-math parameters to use for probability + * calculations. Ownership of this object is assumed by + * the newly created ngram_model_t, and you should not + * attempt to free it manually. If you wish to reuse it + * elsewhere, you must retain it with logmath_retain(). + * @return newly created language model set. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_read(cmd_ln_t *config, + const char *lmctlfile, + logmath_t *lmath); + +/** + * Returns the number of language models in a set. + */ +SPHINXBASE_EXPORT +int32 ngram_model_set_count(ngram_model_t *set); + +/** + * Iterator over language models in a set. + */ +typedef struct ngram_model_set_iter_s ngram_model_set_iter_t; + +/** + * Begin iterating over language models in a set. + * + * @return iterator pointing to the first language model, or NULL if no models remain. + */ +SPHINXBASE_EXPORT +ngram_model_set_iter_t *ngram_model_set_iter(ngram_model_t *set); + +/** + * Move to the next language model in a set. + * + * @return iterator pointing to the next language model, or NULL if no models remain. + */ +SPHINXBASE_EXPORT +ngram_model_set_iter_t *ngram_model_set_iter_next(ngram_model_set_iter_t *itor); + +/** + * Finish iteration over a langauge model set. + */ +SPHINXBASE_EXPORT +void ngram_model_set_iter_free(ngram_model_set_iter_t *itor); + +/** + * Get language model and associated name from an iterator. + * + * @param itor the iterator + * @param lmname Output: string name associated with this language model. + * @return Language model pointed to by this iterator. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_iter_model(ngram_model_set_iter_t *itor, + char const **lmname); + +/** + * Select a single language model from a set for scoring. + * + * @return the newly selected language model, or NULL if no language + * model by that name exists. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_select(ngram_model_t *set, + const char *name); + +/** + * Look up a language model by name from a set. + * + * @return language model corresponding to name, or NULL + * if no language model by that name exists. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_lookup(ngram_model_t *set, + const char *name); + +/** + * Get the current language model name, if any. + */ +SPHINXBASE_EXPORT +const char *ngram_model_set_current(ngram_model_t *set); + +/** + * Set interpolation weights for a set and enables interpolation. + * + * If weights is NULL, any previously initialized set of + * weights will be used. If no weights were specified to + * ngram_model_set_init(), then a uniform distribution will be used. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_interp(ngram_model_t *set, + const char **names, + const float32 *weights); + +/** + * Add a language model to a set. + * + * @param set The language model set to add to. + * @param model The language model to add. + * @param name The name to associate with this model. + * @param weight Interpolation weight for this model, relative to the + * uniform distribution. 1.0 is a safe value. + * @param reuse_widmap Reuse the existing word-ID mapping in + * set. Any new words present in model + * will not be added to the word-ID mapping in this case. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_add(ngram_model_t *set, + ngram_model_t *model, + const char *name, + float32 weight, + int reuse_widmap); + +/** + * Remove a language model from a set. + * + * @param set The language model set to remove from. + * @param name The name associated with the model to remove. + * @param reuse_widmap Reuse the existing word-ID mapping in + * set. + */ +SPHINXBASE_EXPORT +ngram_model_t *ngram_model_set_remove(ngram_model_t *set, + const char *name, + int reuse_widmap); + +/** + * Set the word-to-ID mapping for this model set. + */ +SPHINXBASE_EXPORT +void ngram_model_set_map_words(ngram_model_t *set, + const char **words, + int32 n_words); + +/** + * Query the word-ID mapping for the current language model. + * + * @return the local word ID in the current language model, or + * NGRAM_INVALID_WID if set_wid is invalid or + * interpolation is enabled. + */ +SPHINXBASE_EXPORT +int32 ngram_model_set_current_wid(ngram_model_t *set, + int32 set_wid); + +/** + * Test whether a word ID corresponds to a known word in the current + * state of the language model set. + * + * @return If there is a current language model, returns non-zero if + * set_wid corresponds to a known word in that language + * model. Otherwise, returns non-zero if set_wid + * corresponds to a known word in any language model. + */ +SPHINXBASE_EXPORT +int32 ngram_model_set_known_wid(ngram_model_t *set, int32 set_wid); + +/** + * Flush any cached N-Gram information + */ +SPHINXBASE_EXPORT +void ngram_model_flush(ngram_model_t *lm); + +#ifdef __cplusplus +} +#endif + + +#endif /* __NGRAM_MODEL_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/pio.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/pio.h new file mode 100644 index 0000000000000000000000000000000000000000..b9a658ae9daad0cf861043dcfb815368d77a9c71 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/pio.h @@ -0,0 +1,308 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * pio.h -- Packaged I/O routines. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: pio.h,v $ + * Revision 1.3 2005/06/22 08:00:09 arthchan2003 + * Completed all doxygen documentation on file description for libs3decoder/libutil/libs3audio and programs. + * + * Revision 1.2 2005/06/22 03:09:52 arthchan2003 + * 1, Fixed doxygen documentation, 2, Added keyword. + * + * Revision 1.2 2005/06/16 00:14:08 archan + * Added const keyword to file argument for file_open + * + * Revision 1.1 2005/06/15 06:11:03 archan + * sphinx3 to s3.generic: change io.[ch] to pio.[ch] + * + * Revision 1.5 2005/06/15 04:21:46 archan + * 1, Fixed doxygen-documentation, 2, Add keyword such that changes will be logged into a file. + * + * Revision 1.4 2005/04/20 03:49:32 archan + * Add const to string argument of myfopen. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 08-Dec-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added stat_mtime(). + * + * 11-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added _myfopen() and myfopen macro. + * + * 05-Sep-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Started. + */ + + +#ifndef _LIBUTIL_IO_H_ +#define _LIBUTIL_IO_H_ + +#include +#if !defined(_WIN32_WCE) && !(defined(__ADSPBLACKFIN__) && !defined(__linux__)) +#include +#endif + +/* Win32/WinCE DLL gunk */ +#include +#include + +/** \file pio.h + * \brief file IO related operations. + * + * Custom fopen with error checking is implemented. fopen_comp can + * open a file with .z, .Z, .gz or .GZ extension + * + * WARNING: Usage of stat_retry will results in 100s of waiting time + * if the file doesn't exist. +*/ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Like fopen, but use popen and zcat if it is determined that "file" is compressed + * (i.e., has a .z, .Z, .gz, or .GZ extension). + */ +SPHINXBASE_EXPORT +FILE *fopen_comp (const char *file, /**< In: File to be opened */ + const char *mode, /**< In: "r" or "w", as with normal fopen */ + int32 *ispipe /**< Out: On return *ispipe is TRUE iff file + was opened via a pipe */ + ); + +/** + * Close a file opened using fopen_comp. + */ +SPHINXBASE_EXPORT +void fclose_comp (FILE *fp, /**< In: File pointer to be closed */ + int32 ispipe /**< In: ispipe argument that was returned by the + corresponding fopen_comp() call */ + ); + +/** + * Open a file for reading, but if file not present try to open compressed version (if + * file is uncompressed, and vice versa). + */ +SPHINXBASE_EXPORT +FILE *fopen_compchk (const char *file, /**< In: File to be opened */ + int32 *ispipe /**< Out: On return *ispipe is TRUE iff file + was opened via a pipe */ + ); + +/** + * Wrapper around fopen to check for failure and E_FATAL if failed. + */ +SPHINXBASE_EXPORT +FILE *_myfopen(const char *file, const char *mode, + const char *pgm, int32 line); /* In: __FILE__, __LINE__ from where called */ +#define myfopen(file,mode) _myfopen((file),(mode),__FILE__,__LINE__) + + +/** + * NFS file reads seem to fail now and then. Use the following functions in place of + * the regular fread. It retries failed freads several times and quits only if all of + * them fail. Be aware, however, that even normal failures such as attempting to read + * beyond EOF will trigger such retries, wasting about a minute in retries. + * Arguments identical to regular fread. + */ +SPHINXBASE_EXPORT +int32 fread_retry(void *pointer, int32 size, int32 num_items, FILE *stream); + +/** + * Read a line of arbitrary length from a file and return it as a + * newly allocated string. + * + * @deprecated Use line iterators instead. + * + * @param stream The file handle to read from. + * @param out_len Output: if not NULL, length of the string read. + * @return allocated string containing the line, or NULL on error or EOF. + */ +SPHINXBASE_EXPORT +char *fread_line(FILE *stream, size_t *out_len); + +/** + * Line iterator for files. + */ +typedef struct lineiter_t { + char *buf; + FILE *fh; + int32 bsiz; + int32 len; + int32 clean; + int32 lineno; +} lineiter_t; + +/** + * Start reading lines from a file. + */ +SPHINXBASE_EXPORT +lineiter_t *lineiter_start(FILE *fh); + +/** + * Start reading lines from a file, skip comments and trim lines. + */ +SPHINXBASE_EXPORT +lineiter_t *lineiter_start_clean(FILE *fh); + +/** + * Move to the next line in the file. + */ +SPHINXBASE_EXPORT +lineiter_t *lineiter_next(lineiter_t *li); + +/** + * Stop reading lines from a file. + */ +SPHINXBASE_EXPORT +void lineiter_free(lineiter_t *li); + +/** + * Returns current line number. + */ +SPHINXBASE_EXPORT +int lineiter_lineno(lineiter_t *li); + + +#ifdef _WIN32_WCE +/* Fake this for WinCE which has no stat() */ +#include +struct stat { + DWORD st_mtime; + DWORD st_size; +}; +#endif /* _WIN32_WCE */ + +#if defined(__ADSPBLACKFIN__) && !defined(__linux__) +struct stat { + int32 st_mtime; + int32 st_size; +}; + +#endif + +/** + * Bitstream encoder - for writing compressed files. + */ +typedef struct bit_encode_s bit_encode_t; + +/** + * Attach bitstream encoder to a file. + */ +SPHINXBASE_EXPORT +bit_encode_t *bit_encode_attach(FILE *outfh); + +/** + * Retain pointer to a bit encoder. + */ +SPHINXBASE_EXPORT +bit_encode_t *bit_encode_retain(bit_encode_t *be); + +/** + * Release pointer to a bit encoder. + * + * Note that this does NOT flush any leftover bits. + */ +SPHINXBASE_EXPORT +int bit_encode_free(bit_encode_t *be); + +/** + * Write bits to encoder. + */ +SPHINXBASE_EXPORT +int bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits); + +/** + * Write lowest-order bits of codeword to encoder. + */ +SPHINXBASE_EXPORT +int bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits); + +/** + * Flush any unwritten bits, zero-padding if necessary. + */ +SPHINXBASE_EXPORT +int bit_encode_flush(bit_encode_t *be); + +/** + * There is no bitstream decoder, because a stream abstraction is too + * slow. Instead we read blocks of bits and treat them as bitvectors. + */ + +/** + * Like fread_retry, but for stat. Arguments identical to regular stat. + * Return value: 0 if successful, -1 if stat failed several attempts. + */ +SPHINXBASE_EXPORT +int32 stat_retry (const char *file, struct stat *statbuf); + +/** + * Return time of last modification for the given file, or -1 if stat fails. + */ + +SPHINXBASE_EXPORT +int32 stat_mtime (const char *file); + +/** + * Create a directory and all of its parent directories, as needed. + * + * @return 0 on success, <0 on failure. + */ +SPHINXBASE_EXPORT +int build_directory(const char *path); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/prim_type.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/prim_type.h new file mode 100644 index 0000000000000000000000000000000000000000..4351257e5c49b467a150a87175aed38fcd301d6c --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/prim_type.h @@ -0,0 +1,195 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * prim_type.h -- Primitive types; more machine-independent. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: prim_type.h,v $ + * Revision 1.12 2005/10/05 00:31:14 dhdfu + * Make int8 be explicitly signed (signedness of 'char' is + * architecture-dependent). Then make a bunch of things use uint8 where + * signedness is unimportant, because on the architecture where 'char' is + * unsigned, it is that way for a reason (signed chars are slower). + * + * Revision 1.11 2005/06/22 03:10:23 arthchan2003 + * Added keyword. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 12-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added arraysize_t, point_t, fpoint_t. + * + * 01-Feb-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added anytype_t. + * + * 08-31-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Created. + */ + + +#ifndef _LIBUTIL_PRIM_TYPE_H_ +#define _LIBUTIL_PRIM_TYPE_H_ + +/** + * @file prim_type.h + * @brief Basic type definitions used in Sphinx. + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} /* Fool Emacs into not indenting things. */ +#endif + +#include + +/* Define some things for VisualDSP++ */ +#if defined(__ADSPBLACKFIN__) && !defined(__GNUC__) +# ifndef HAVE_LONG_LONG +# define HAVE_LONG_LONG +# endif +# ifndef ssize_t +typedef signed int ssize_t; +# endif +# define SIZEOF_LONG_LONG 8 +# define __BIGSTACKVARIABLE__ static +#else /* Not VisualDSP++ */ +# define __BIGSTACKVARIABLE__ +#endif + +/** + * Union of basic types. + */ +typedef union anytype_s { + void *ptr; + long i; + unsigned long ui; + double fl; +} anytype_t; + +/* Use C99 types if available */ +#if defined(HAVE_STDINT_H) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) +#include +typedef int32_t int32; +typedef int16_t int16; +typedef int8_t int8; +typedef uint32_t uint32; +typedef uint16_t uint16; +typedef uint8_t uint8; +typedef int64_t int64; +typedef uint64_t uint64; +/* Take a wild guess otherwise */ +#else +typedef int int32; +typedef short int16; +typedef signed char int8; +typedef unsigned int uint32; +typedef unsigned short uint16; +typedef unsigned char uint8; +# if defined(_MSC_VER) +typedef __int64 int64; +typedef unsigned __int64 uint64; +# else +typedef long long int64; +typedef unsigned long long uint64; +# endif +#endif /* not C99 or POSIX */ + +/* We should maybe stop using these as there isn't any good way to + know their exact size, but it's 99% certain they are 32 and 64 + bits. */ +typedef float float32; +typedef double float64; + +#ifndef TRUE +#define TRUE 1 +#endif +#ifndef FALSE +#define FALSE 0 +#endif + +#ifndef NULL +#define NULL (void *)0 +#endif + +/* These really ought to come from , but not everybody has that. */ +/* Useful constants */ +#define MAX_INT32 ((int32) 0x7fffffff) +#define MAX_INT16 ((int16) 0x00007fff) +#define MAX_INT8 ((int8) 0x0000007f) + +#define MAX_NEG_INT32 ((int32) 0x80000000) +#define MAX_NEG_INT16 ((int16) 0xffff8000) +#define MAX_NEG_INT8 ((int8) 0xffffff80) + +#define MAX_UINT32 ((uint32) 0xffffffff) +#define MAX_UINT16 ((uint16) 0x0000ffff) +#define MAX_UINT8 ((uint8) 0x000000ff) + +/* The following are approximate; IEEE floating point standards might quibble! */ +#define MAX_POS_FLOAT32 3.4e+38f +#define MIN_POS_FLOAT32 1.2e-38f /* But not 0 */ +#define MAX_POS_FLOAT64 1.8e+307 +#define MIN_POS_FLOAT64 2.2e-308 + +#define MAX_IEEE_NORM_POS_FLOAT32 3.4e+38f +#define MIN_IEEE_NORM_POS_FLOAT32 1.2e-38f +#define MIN_IEEE_NORM_NEG_FLOAT32 -3.4e+38f +#define MAX_IEEE_NORM_POS_FLOAT64 1.8e+307 +#define MIN_IEEE_NORM_POS_FLOAT64 2.2e-308 +#define MIN_IEEE_NORM_NEG_FLOAT64 -1.8e+307 + +/* Will the following really work?? */ +#define MIN_NEG_FLOAT32 ((float32) (-MIN_POS_FLOAT32)) +#define MIN_NEG_FLOAT64 ((float64) (-MIN_POS_FLOAT64)) + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/priority_queue.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/priority_queue.h new file mode 100644 index 0000000000000000000000000000000000000000..335bcda1e0fb1481600cff30124b0253064a35d7 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/priority_queue.h @@ -0,0 +1,45 @@ +#ifndef __PRIORITY_QUEUE_H__ +#define __PRIORITY_QUEUE_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Priority queue for max element tracking. + * The one expects heap here, but for current application + * (sorting of ngram entries one per order, i.e. maximum 10) + * i'll put just and array here, so each operation takes linear time. + * I swear to rework it some day! + * TODOTODOTODOTODOTODOTODOTODOTODOTODOTODOTODOTODOTODOTODO!!!!! + */ + +typedef struct priority_queue_s priority_queue_t; + +SPHINXBASE_EXPORT +priority_queue_t* priority_queue_create(size_t len, int (*compare)(const void *a, const void *b)); + +SPHINXBASE_EXPORT +void* priority_queue_poll(priority_queue_t *queue); + +SPHINXBASE_EXPORT +void priority_queue_add(priority_queue_t *queue, void *element); + +SPHINXBASE_EXPORT +size_t priority_queue_size(priority_queue_t *queue); + +SPHINXBASE_EXPORT +void priority_queue_free(priority_queue_t *queue, void (*free_ptr)(void *a)); + +#ifdef __cplusplus +} +#endif + +#endif /* __PRIORITY_QUEUE_H__ */ \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/profile.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/profile.h new file mode 100644 index 0000000000000000000000000000000000000000..ddecfb6e886e40ec4daf312933dbd5c7b2256f85 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/profile.h @@ -0,0 +1,231 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2001 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * profile.h -- For timing and event counting. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: profile.h,v $ + * Revision 1.10 2005/06/22 03:10:59 arthchan2003 + * 1, Fixed doxygen documentation, 2, Added keyword. + * + * Revision 1.5 2005/06/15 04:21:47 archan + * 1, Fixed doxygen-documentation, 2, Add keyword such that changes will be logged into a file. + * + * Revision 1.4 2005/04/25 19:22:48 archan + * Refactor out the code of rescoring from lexical tree. Potentially we want to turn off the rescoring if we need. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 11-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added ptmr_init(). + * + * 19-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created from earlier Sphinx-3 version. + */ + + +#ifndef _LIBUTIL_PROFILE_H_ +#define _LIBUTIL_PROFILE_H_ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} /* Fool Emacs into not indenting things. */ +#endif + +/** \file profile.h + * \brief Implementation of profiling, include counting , timing, cpu clock checking + * + * Currently, function host_endian is also in this function. It is + * not documented. + */ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include + + +/** + * \struct pctr_t + * + * Generic event counter for profiling. User is responsible for allocating an array + * of the desired number. There should be a sentinel with name = NULL. + */ +typedef struct { + char *name; /**< Counter print name; NULL + terminates array of counters + Used by pctr_print_all */ + int32 count; /**< Counter value */ +} pctr_t; + +/** + * operations of pctr_t + */ + +/** + * Initialize a counter + * @return an initialized counter + */ +SPHINXBASE_EXPORT +pctr_t* pctr_new ( + char *name /**< The name of the counter */ + ); + +/** + * Reset a counter + */ + +SPHINXBASE_EXPORT +void pctr_reset (pctr_t *ctr /**< A pointer of a counter */ + ); + +/** + * Print a counter + */ +SPHINXBASE_EXPORT +void pctr_print(FILE *fp, /**< A file pointer */ + pctr_t *ctr /**< A pointer of a counter */ + ); + +/** + * Increment a counter + */ +SPHINXBASE_EXPORT +void pctr_increment (pctr_t *ctr, /**< A pointer of a counter */ + int32 inc /**< The increment of the counter */ + ); + +/** + Free the counter +*/ +SPHINXBASE_EXPORT +void pctr_free(pctr_t* ctr /**< A pointer of a counter */ + ); + + +/** + * \struct ptmr_t + * Generic timer structures and functions for coarse-grained performance measurements + * using standard system calls. + */ +typedef struct { + const char *name; /**< Timer print name; NULL terminates an array of timers. + Used by ptmr_print_all */ + float64 t_cpu; /**< CPU time accumulated since most recent reset op */ + float64 t_elapsed; /**< Elapsed time accumulated since most recent reset */ + float64 t_tot_cpu; /**< Total CPU time since creation */ + float64 t_tot_elapsed; /**< Total elapsed time since creation */ + float64 start_cpu; /**< ---- FOR INTERNAL USE ONLY ---- */ + float64 start_elapsed; /**< ---- FOR INTERNAL USE ONLY ---- */ +} ptmr_t; + + + +/** Start timing using tmr */ +SPHINXBASE_EXPORT +void ptmr_start (ptmr_t *tmr /**< The timer*/ + ); + +/** Stop timing and accumulate tmr->{t_cpu, t_elapsed, t_tot_cpu, t_tot_elapsed} */ +SPHINXBASE_EXPORT +void ptmr_stop (ptmr_t *tmr /**< The timer*/ + ); + +/** Reset tmr->{t_cpu, t_elapsed} to 0.0 */ +SPHINXBASE_EXPORT +void ptmr_reset (ptmr_t *tmr /**< The timer*/ + ); + +/** Reset tmr->{t_cpu, t_elapsed, t_tot_cpu, t_tot_elapsed} to 0.0 + */ +SPHINXBASE_EXPORT +void ptmr_init (ptmr_t *tmr /**< The timer*/ + ); + + +/** + * Reset t_cpu, t_elapsed of all timer modules in array tmr[] to 0.0. + * The array should be terminated with a sentinel with .name = NULL. + */ +SPHINXBASE_EXPORT +void ptmr_reset_all (ptmr_t *tmr /**< The timer*/ + ); + +/** + * Print t_cpu for all timer modules in tmr[], normalized by norm (i.e., t_cpu/norm). + * The array should be terminated with a sentinel with .name = NULL. + */ +SPHINXBASE_EXPORT +void ptmr_print_all (FILE *fp, /**< The file pointer */ + ptmr_t *tmr, /**< The timer*/ + float64 norm + ); + + +/** + * Return the processor clock speed (in MHz); only available on some machines (Alphas). + * The dummy argument can be any integer value. + */ +SPHINXBASE_EXPORT +int32 host_pclk (int32 dummy); + + +/* + * Check the native byte-ordering of the machine by writing a magic + * number to a temporary file and reading it back. * Return value: + * 0 if BIG-ENDIAN, 1 if LITTLE-ENDIAN, -1 if error. + */ +SPHINXBASE_EXPORT +int32 host_endian ( void ); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/sbthread.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/sbthread.h new file mode 100644 index 0000000000000000000000000000000000000000..f5a3b668070b534371580672975603c4cf0dd149 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/sbthread.h @@ -0,0 +1,220 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file sbthread.h + * @brief Simple portable thread functions. + * @author David Huggins-Daines + **/ + +#ifndef __SBTHREAD_H__ +#define __SBTHREAD_H__ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Thread object. + */ +typedef struct sbthread_s sbthread_t; + +/** + * Asynchronous message queue object. + */ +typedef struct sbmsgq_s sbmsgq_t; + +/** + * Mutex (critical section) object. + */ +typedef struct sbmtx_s sbmtx_t; + +/** + * Event object. + */ +typedef struct sbevent_s sbevent_t; + +/** + * Entry point for a thread. + */ +typedef int (*sbthread_main)(sbthread_t *th); + +/** + * Start a new thread. + */ +SPHINXBASE_EXPORT +sbthread_t *sbthread_start(cmd_ln_t *config, sbthread_main func, void *arg); + +/** + * Wait for a thread to complete. + */ +SPHINXBASE_EXPORT +int sbthread_wait(sbthread_t *th); + +/** + * Free a thread object. + */ +SPHINXBASE_EXPORT +void sbthread_free(sbthread_t *th); + +/** + * Get configuration object from a thread. + */ +SPHINXBASE_EXPORT +cmd_ln_t *sbthread_config(sbthread_t *th); + +/** + * Get argument pointer from a thread. + */ +SPHINXBASE_EXPORT +void *sbthread_arg(sbthread_t *th); + +/** + * Get message queue from a thread. + */ +SPHINXBASE_EXPORT +sbmsgq_t *sbthread_msgq(sbthread_t *th); + +/** + * Wait for a thread to complete. + */ +SPHINXBASE_EXPORT +int sbthread_wait(sbthread_t *th); + +/** + * Send an asynchronous message to a thread. + * + * Each thread gets a message queue by default, so this is just a + * wrapper around sbmsgq_send(). + */ +SPHINXBASE_EXPORT +int sbthread_send(sbthread_t *th, size_t len, void const *data); + +/** + * Create a message queue. + * + * @param depth Depth of the queue. + */ +SPHINXBASE_EXPORT +sbmsgq_t *sbmsgq_init(size_t depth); + +/** + * Free a message queue. + */ +SPHINXBASE_EXPORT +void sbmsgq_free(sbmsgq_t *q); + +/** + * Post a message to a queue. + */ +SPHINXBASE_EXPORT +int sbmsgq_send(sbmsgq_t *q, size_t len, void const *data); + +/** + * Wait for a message from a queue. + */ +SPHINXBASE_EXPORT +void *sbmsgq_wait(sbmsgq_t *q, size_t *out_len, int sec, int nsec); + +/** + * Create a mutex. + */ +SPHINXBASE_EXPORT +sbmtx_t *sbmtx_init(void); + +/** + * Try to acquire a mutex. + */ +SPHINXBASE_EXPORT +int sbmtx_trylock(sbmtx_t *mtx); + +/** + * Acquire a mutex. + */ +SPHINXBASE_EXPORT +int sbmtx_lock(sbmtx_t *mtx); + +/** + * Release a mutex. + */ +SPHINXBASE_EXPORT +int sbmtx_unlock(sbmtx_t *mtx); + +/** + * Dispose of a mutex. + */ +SPHINXBASE_EXPORT +void sbmtx_free(sbmtx_t *mtx); + +/** + * Initialize an event. + */ +SPHINXBASE_EXPORT +sbevent_t *sbevent_init(void); + +/** + * Free an event. + */ +SPHINXBASE_EXPORT +void sbevent_free(sbevent_t *evt); + +/** + * Signal an event. + */ +SPHINXBASE_EXPORT +int sbevent_signal(sbevent_t *evt); + +/** + * Wait for an event to be signalled. + */ +SPHINXBASE_EXPORT +int sbevent_wait(sbevent_t *evt, int sec, int nsec); + + +#ifdef __cplusplus +} +#endif + + +#endif /* __SBTHREAD_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/sphinx_config.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/sphinx_config.h new file mode 100644 index 0000000000000000000000000000000000000000..205289a09f547c1c101688dc9cb6f8dfb6a09d44 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/sphinx_config.h @@ -0,0 +1,17 @@ +/* include/sphinx_config.h. Generated from sphinx_config.h.in by configure. */ +/* sphinx_config.h: Externally visible configuration parameters */ + +/* Default radix point for fixed-point */ +/* #undef DEFAULT_RADIX */ + +/* Use fixed-point computation */ +/* #undef FIXED_POINT */ + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 8 + +/* Define to 1 if the system has the type `long long'. */ +#define HAVE_LONG_LONG 1 + +/* The size of `long long', as computed by sizeof. */ +#define SIZEOF_LONG_LONG 8 diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/sphinxbase_export.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/sphinxbase_export.h new file mode 100644 index 0000000000000000000000000000000000000000..4022ed7fde228b6dfcec03e8a23786e1e6368a3a --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/sphinxbase_export.h @@ -0,0 +1,15 @@ +#ifndef __SPHINXBASE_EXPORT_H__ +#define __SPHINXBASE_EXPORT_H__ + +/* Win32 DLL gunk */ +#if defined(_WIN32) && defined(SPHINX_DLL) +#if defined(SPHINXBASE_EXPORTS) /* DLL itself */ +#define SPHINXBASE_EXPORT __declspec(dllexport) +#else +#define SPHINXBASE_EXPORT __declspec(dllimport) +#endif +#else /* No DLL things*/ +#define SPHINXBASE_EXPORT +#endif + +#endif /* __SPHINXBASE_EXPORT_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/strfuncs.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/strfuncs.h new file mode 100644 index 0000000000000000000000000000000000000000..392f1ca2247fa308887f5e295469c8881d846ee0 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/strfuncs.h @@ -0,0 +1,158 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1995-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file strfuncs.h + * @brief Miscellaneous useful string functions + */ + +#ifndef __SB_STRFUNCS_H__ +#define __SB_STRFUNCS_H__ + +#include + +/* Win32/WinCE DLL gunk */ +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/** + * Concatenate a NULL-terminated argument list of strings, returning a + * newly allocated string. + **/ +SPHINXBASE_EXPORT +char *string_join(const char *base, ...); + +/** + * Which end of a string to operate on for string_trim(). + */ +enum string_edge_e { + STRING_START, /**< Beginning of string. */ + STRING_END, /**< End of string. */ + STRING_BOTH /**< Both ends of string. */ +}; + +/** + * Remove whitespace from a string, modifying it in-place. + * + * @param string string to trim, contents will be modified. + * @param which one of STRING_START, STRING_END, or STRING_BOTH. + */ +SPHINXBASE_EXPORT +char *string_trim(char *string, enum string_edge_e which); + +/** + * Locale independent version of atof(). + * + * This function behaves like atof() in the "C" locale. Switching + * locale in a threaded program is extremely uncool, therefore we need + * this since we pass floats as strings in 1000 different places. + */ +SPHINXBASE_EXPORT +double atof_c(char const *str); + +/* FIXME: Both of these string splitting functions basically suck. I + have attempted to fix them as best I can. (dhuggins@cs, 20070808) */ + +/** + * Convert a line to an array of "words", based on whitespace separators. A word + * is a string with no whitespace chars in it. + * Note that the string line is modified as a result: NULL chars are placed after + * every word in the line. + * Return value: No. of words found; -1 if no. of words in line exceeds n_wptr. + */ +SPHINXBASE_EXPORT +int32 str2words (char *line, /**< In/Out: line to be parsed. This + string will be modified! (NUL + characters inserted at word + boundaries) */ + char **wptr, /**< In/Out: Array of pointers to + words found in line. The array + must be allocated by the caller. + It may be NULL in which case the + number of words will be counted. + This allows you to allcate it to + the proper size, e.g.: + + n = str2words(line, NULL, 0); + wptr = ckd_calloc(n, sizeof(*wptr)); + str2words(line, wptr, n); + */ + int32 n_wptr /**< In: Size of wptr array, ignored + if wptr == NULL */ + ); + +/** + * Yet another attempt at a clean "next-word-in-string" function. See arguments below. + * @return Length of word returned, or -1 if nothing found. + * This allows you to scan through a line: + * + *
+ * while ((n = nextword(line, delim, &word, &delimfound)) >= 0) {
+ *     ... do something with word ..
+ *     word[n] = delimfound;
+ *     line = word + n;
+ * }
+ * 
+ */ +SPHINXBASE_EXPORT +int32 nextword (char *line, /**< Input: String being searched for next word. + Will be modified by this function (NUL characters inserted) */ + const char *delim, /**< Input: A word, if found, must be delimited at either + end by a character from this string (or at the end + by the NULL char) */ + char **word,/**< Output: *word = ptr within line to beginning of first + word, if found. Delimiter at the end of word replaced + with the NULL char. */ + char *delimfound /**< Output: *delimfound = original delimiter found at the end + of the word. (This way, the caller can restore the + delimiter, preserving the original string.) */ + ); + +#ifdef __cplusplus +} +#endif + + +#endif /* __SB_STRFUNCS_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/yin.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/yin.h new file mode 100644 index 0000000000000000000000000000000000000000..b1e5639d0c899d84374cc03cd3d7fe9fbab6471e --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/sphinxbase/yin.h @@ -0,0 +1,116 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* + * Copyright (c) 2008 Beyond Access, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY BEYOND ACCESS, INC. ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL BEYOND ACCESS, INC. NOR + * ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file yin.h + * @brief Implementation of pitch estimation + * @author David Huggins-Daines + * + * This implements part of the YIN algorithm: + * + * "YIN, a fundamental frequency estimator for speech and music". + * Alain de Cheveigné and Hideki Kawahara. Journal of the Acoustical + * Society of America, 111 (4), April 2002. + */ + +#ifndef __YIN_H__ +#define __YIN_H__ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} /* Fool Emacs. */ +#endif + +/* Win32/WinCE DLL gunk */ +#include +#include + +/** + * Frame-based moving-window pitch estimator. + */ +typedef struct yin_s yin_t; + +/** + * Initialize moving-window pitch estimation. + */ +SPHINXBASE_EXPORT +yin_t *yin_init(int frame_size, float search_threshold, + float search_range, int smooth_window); + +/** + * Free a moving-window pitch estimator. + */ +SPHINXBASE_EXPORT +void yin_free(yin_t *pe); + +/** + * Start processing an utterance. + */ +SPHINXBASE_EXPORT +void yin_start(yin_t *pe); + +/** + * Mark the end of an utterance. + */ +SPHINXBASE_EXPORT +void yin_end(yin_t *pe); + +/** + * Feed a frame of data to the pitch estimator. + * + * @param pe Pitch estimator. + * @param frame Frame of frame_size (see + * yin_init()) samples of audio data. + */ +SPHINXBASE_EXPORT +void yin_write(yin_t *pe, int16 const *frame); + +/** + * Read a raw estimated pitch value from the pitch estimator. + * + * @param pe Pitch estimator. + * @param out_period Output: an estimate of the period (*not* the pitch) + * of the signal in samples. + * @param out_bestdiff Output: the minimum normalized difference value + * associated with *out_pitch, in Q15 + * format (i.e. scaled by 32768). This can be + * interpreted as one minus the probability of voicing. + * @return Non-zero if enough data was avaliable to return a pitch + * estimate, zero otherwise. + */ +SPHINXBASE_EXPORT +int yin_read(yin_t *pe, uint16 *out_period, uint16 *out_bestdiff); + +#ifdef __cplusplus +} +#endif + +#endif /* __YIN_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/win32/config.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/win32/config.h new file mode 100644 index 0000000000000000000000000000000000000000..0266542d00d057b64266c7160445907ac92455f2 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/win32/config.h @@ -0,0 +1,31 @@ +/* include/sphinx_config.h, defaults for Win32 */ +/* sphinx_config.h: Externally visible configuration parameters for + * SphinxBase. + */ + +/* Default radix point for fixed-point */ +/* #undef DEFAULT_RADIX */ + +/* Enable thread safety */ +#define ENABLE_THREADS + +/* Use fixed-point computation */ +/* #undef FIXED_POINT */ + +/* Enable matrix algebra with LAPACK */ +#define WITH_LAPACK 1 + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 4 + +/* We don't have popen, but we do have _popen */ +/* #define HAVE_POPEN 1 */ + +/* We do have perror */ +#define HAVE_PERROR 1 + +/* We have sys/stat.h */ +#define HAVE_SYS_STAT_H 1 + +/* Extension for executables */ +#define EXEEXT ".exe" diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/win32/sphinx_config.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/win32/sphinx_config.h new file mode 100644 index 0000000000000000000000000000000000000000..730a14ac4850892f4238c310dac326dd625c7bd6 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/win32/sphinx_config.h @@ -0,0 +1,13 @@ +/* include/sphinx_config.h, defaults for Win32 */ +/* sphinx_config.h: Externally visible configuration parameters for + * SphinxBase. + */ + +/* Default radix point for fixed-point */ +/* #undef DEFAULT_RADIX */ + +/* Use fixed-point computation */ +/* #undef FIXED_POINT */ + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 4 diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/wince/assert.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/wince/assert.h new file mode 100644 index 0000000000000000000000000000000000000000..0a035893b1c3e9511825676e747be06ab3b40978 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/wince/assert.h @@ -0,0 +1,6 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* A bogus for WinCE which sometimes doesn't have it. */ +#ifndef __ASSERT_H__ +#define __ASSERT_H__ +#define assert(x) if (!(x)) (*(int *)0=0); +#endif /* __ASSERT_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/wince/config.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/wince/config.h new file mode 100644 index 0000000000000000000000000000000000000000..777b967d6ead8b7076aa4acd9263453955c85692 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/wince/config.h @@ -0,0 +1,3 @@ +/* Keep only one configuration file */ + +#include "sphinx_config.h" diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/wince/errno.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/wince/errno.h new file mode 100644 index 0000000000000000000000000000000000000000..3a06b1506b914f482fe368f13e4db286609bef69 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/wince/errno.h @@ -0,0 +1,94 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== +/********************************************************************* + * + * File: errno.h + * + * Description: functions and variables missing from Windows CE standard + * library + * + * Author: Silvio Moioli + * + *********************************************************************/ + +#ifndef __ERRNO_H__ +#define __ERRNO_H__ + +#ifdef __cplusplus +extern "C" extern int errno; +#else +extern int errno; +#endif + +#define EPERM 1 +#define ENOENT 2 +#define ESRCH 3 +#define EINTR 4 +#define EIO 5 +#define ENXIO 6 +#define E2BIG 7 +#define ENOEXEC 8 +#define EBADF 9 +#define ECHILD 10 +#define EAGAIN 11 +#define ENOMEM 12 +#define EACCES 13 +#define EFAULT 14 +#define EBUSY 16 +#define EEXIST 17 +#define EXDEV 18 +#define ENODEV 19 +#define ENOTDIR 20 +#define EISDIR 21 +#define EINVAL 22 +#define ENFILE 23 +#define EMFILE 24 +#define ENOTTY 25 +#define EFBIG 27 +#define ENOSPC 28 +#define ESPIPE 29 +#define EROFS 30 +#define EMLINK 31 +#define EPIPE 32 +#define EDOM 33 +#define ERANGE 34 +#define EDEADLK 36 +#define ENAMETOOLONG 38 +#define ENOLCK 39 +#define ENOSYS 40 +#define ENOTEMPTY 41 +#define EILSEQ 42 + +#endif /* __ERRNO_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/wince/sphinx_config.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/wince/sphinx_config.h new file mode 100644 index 0000000000000000000000000000000000000000..181baf5368cf2c9ea64d2f2e376d766fda4ba6ba --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/include/wince/sphinx_config.h @@ -0,0 +1,16 @@ +/* include/sphinx_config.h, defaults for Windows CE. */ +/* sphinx_config.h: Externally visible configuration parameters for + * SphinxBase. + */ + +/* Default radix point for fixed-point */ +/* #undef DEFAULT_RADIX */ + +/* Use fixed-point computation */ +/* #define FIXED_POINT 1 */ + +/* The size of `long', as computed by sizeof. */ +#define SIZEOF_LONG 4 + +/* Extension for executables */ +#define EXEEXT ".exe" diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/CMakeLists.txt b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2b45676b55a462b89dfcaada99f4c575bdc9d9c0 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/CMakeLists.txt @@ -0,0 +1,109 @@ +add_library(pocketsphinx + tmat.c + fsg_history.c + allphone_search.c + s2_semi_mgau.c + ps_alignment.c + ms_gauden.c + state_align_search.c + mdef.c + blkarray_list.c + dict2pid.c + vector.c + ms_senone.c + ptm_mgau.c + ngram_search_fwdflat.c + kws_detections.c + hmm.c + bin_mdef.c + phone_loop_search.c + ngram_search.c + ngram_search_fwdtree.c + ms_mgau.c + fsg_search.c + ps_lattice.c + fsg_lextree.c + ps_mllr.c + pocketsphinx.c + kws_search.c + acmod.c + dict.c + fe/fe_sigproc.c + fe/fixlog.c + fe/fe_warp_inverse_linear.c + fe/fe_warp.c + fe/fe_interface.c + fe/fe_noise.c + fe/fe_warp_affine.c + fe/fe_warp_piecewise_linear.c + fe/yin.c + feat/cmn.c + feat/agc.c + feat/cmn_live.c + feat/feat.c + feat/lda.c + lm/lm_trie_quant.c + lm/ngram_model_trie.c + lm/fsg_model.c + lm/jsgf.c + lm/ngram_model_set.c + lm/ngrams_raw.c + lm/jsgf_scanner.c + lm/ngram_model.c + lm/lm_trie.c + lm/jsgf_parser.c + util/strfuncs.c + util/dtoa.c + util/case.c + util/filename.c + util/slamch.c + util/cmd_ln.c + util/blas_lite.c + util/mmio.c + util/hash_table.c + util/err.c + util/ckd_alloc.c + util/slapack_lite.c + util/matrix.c + util/sbthread.c + util/bio.c + util/heap.c + util/priority_queue.c + util/bitvec.c + util/profile.c + util/errno.c + util/logmath.c + util/glist.c + util/f2c_lite.c + util/listelem_alloc.c + util/bitarr.c + util/pio.c + util/genrand.c + ) +target_include_directories( + pocketsphinx PRIVATE ${CMAKE_BINARY_DIR} # config.h + pocketsphinx PRIVATE ${CMAKE_SOURCE_DIR}/include/pocketsphinx + pocketsphinx PUBLIC ${CMAKE_SOURCE_DIR}/include + pocketsphinx PUBLIC ${CMAKE_BINARY_DIR}/include + pocketsphinx INTERFACE ${CMAKE_SOURCE_DIR}/include + ) +if(APPLE) + # Things we might need are here + target_link_directories(pocketsphinx PUBLIC /usr/local/lib) +endif() +find_library(MATH_LIBRARY m) +if(MATH_LIBRARY) + target_link_libraries(pocketsphinx PUBLIC ${MATH_LIBRARY}) +endif() +find_package(Threads) +target_link_libraries(pocketsphinx PUBLIC Threads::Threads) +# Shared library version != package version, but we will make it the +# same for now to avoid confusion +set_target_properties(pocketsphinx PROPERTIES + VERSION 5.0.0 + SOVERSION 5 + ) +# No idea why this can't just go in the above list but oh well +set_property(TARGET pocketsphinx PROPERTY + COMPILE_DEFINITIONS POCKETSPHINX_EXPORTS;SPHINXBASE_EXPORTS +) diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/acmod.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/acmod.c new file mode 100644 index 0000000000000000000000000000000000000000..5986159caf4e652952860d91e1b424fc7f46fd38 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/acmod.c @@ -0,0 +1,1331 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + + +/** + * @file acmod.c Acoustic model structures for PocketSphinx. + * @author David Huggins-Daines + */ + +/* System headers. */ +#include +#include +#include + +/* SphinxBase headers. */ +#include +#include +#include +#include +#include +#include +#include + +/* Local headers. */ +#include "cmdln_macro.h" +#include "acmod.h" +#include "s2_semi_mgau.h" +#include "ptm_mgau.h" +#include "ms_mgau.h" + +static int32 acmod_process_mfcbuf(acmod_t *acmod); +static const char *acmod_update_cmninit(acmod_t *acmod); + +static int +acmod_init_am(acmod_t *acmod) +{ + char const *mdeffn, *tmatfn, *mllrfn, *hmmdir; + + /* Read model definition. */ + if ((mdeffn = cmd_ln_str_r(acmod->config, "_mdef")) == NULL) { + if ((hmmdir = cmd_ln_str_r(acmod->config, "-hmm")) == NULL) + E_ERROR("Acoustic model definition is not specified either " + "with -mdef option or with -hmm\n"); + else + E_ERROR("Folder '%s' does not contain acoustic model " + "definition 'mdef'\n", hmmdir); + + return -1; + } + + if ((acmod->mdef = bin_mdef_read(acmod->config, mdeffn)) == NULL) { + E_ERROR("Failed to read acoustic model definition from %s\n", mdeffn); + return -1; + } + + /* Read transition matrices. */ + if ((tmatfn = cmd_ln_str_r(acmod->config, "_tmat")) == NULL) { + E_ERROR("No tmat file specified\n"); + return -1; + } + acmod->tmat = tmat_init(tmatfn, acmod->lmath, + cmd_ln_float32_r(acmod->config, "-tmatfloor"), + TRUE); + + /* Read the acoustic models. */ + if ((cmd_ln_str_r(acmod->config, "_mean") == NULL) + || (cmd_ln_str_r(acmod->config, "_var") == NULL) + || (cmd_ln_str_r(acmod->config, "_tmat") == NULL)) { + E_ERROR("No mean/var/tmat files specified\n"); + return -1; + } + + if (cmd_ln_str_r(acmod->config, "_senmgau")) { + E_INFO("Using general multi-stream GMM computation\n"); + acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef); + if (acmod->mgau == NULL) + return -1; + } + else { + E_INFO("Attempting to use PTM computation module\n"); + if ((acmod->mgau = ptm_mgau_init(acmod, acmod->mdef)) == NULL) { + E_INFO("Attempting to use semi-continuous computation module\n"); + if ((acmod->mgau = s2_semi_mgau_init(acmod)) == NULL) { + E_INFO("Falling back to general multi-stream GMM computation\n"); + acmod->mgau = ms_mgau_init(acmod, acmod->lmath, acmod->mdef); + if (acmod->mgau == NULL) { + E_ERROR("Failed to read acoustic model\n"); + return -1; + } + } + } + } + + /* If there is an MLLR transform, apply it. */ + if ((mllrfn = cmd_ln_str_r(acmod->config, "-mllr"))) { + ps_mllr_t *mllr = ps_mllr_read(mllrfn); + if (mllr == NULL) + return -1; + acmod_update_mllr(acmod, mllr); + } + + return 0; +} + +int +acmod_reinit_feat(acmod_t *acmod, fe_t *fe, feat_t *fcb) +{ + if (fe) + fe = fe_retain(fe); + else { + fe = fe_init_auto_r(acmod->config); + if (fe == NULL) + return -1; + } + if (acmod_fe_mismatch(acmod, fe)) { + fe_free(fe); + return -1; + } + if (acmod->fe) + fe_free(acmod->fe); + acmod->fe = fe; + + if (fcb) + fcb = feat_retain(fcb); + else { + fcb = + feat_init(cmd_ln_str_r(acmod->config, "-feat"), + cmn_type_from_str(cmd_ln_str_r(acmod->config,"-cmn")), + cmd_ln_boolean_r(acmod->config, "-varnorm"), + agc_type_from_str(cmd_ln_str_r(acmod->config, "-agc")), + 1, cmd_ln_int32_r(acmod->config, "-ceplen")); + if (fcb == NULL) + return -1; + + if (cmd_ln_str_r(acmod->config, "_lda")) { + E_INFO("Reading linear feature transformation from %s\n", + cmd_ln_str_r(acmod->config, "_lda")); + if (feat_read_lda(fcb, + cmd_ln_str_r(acmod->config, "_lda"), + cmd_ln_int32_r(acmod->config, "-ldadim")) < 0) + return -1; + } + + if (cmd_ln_str_r(acmod->config, "-svspec")) { + int32 **subvecs; + E_INFO("Using subvector specification %s\n", + cmd_ln_str_r(acmod->config, "-svspec")); + if ((subvecs = parse_subvecs(cmd_ln_str_r(acmod->config, "-svspec"))) == NULL) + return -1; + if ((feat_set_subvecs(fcb, subvecs)) < 0) + return -1; + } + + if (cmd_ln_exists_r(acmod->config, "-agcthresh") + && 0 != strcmp(cmd_ln_str_r(acmod->config, "-agc"), "none")) { + agc_set_threshold(fcb->agc_struct, + cmd_ln_float32_r(acmod->config, "-agcthresh")); + } + + if (fcb->cmn_struct + && cmd_ln_exists_r(acmod->config, "-cmninit") + && cmd_ln_str_r(acmod->config, "-cmninit")) { + char *c, *cc, *vallist; + int32 nvals; + + vallist = ckd_salloc(cmd_ln_str_r(acmod->config, "-cmninit")); + c = vallist; + nvals = 0; + while (nvals < fcb->cmn_struct->veclen + && (cc = strchr(c, ',')) != NULL) { + *cc = '\0'; + fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); + c = cc + 1; + ++nvals; + } + if (nvals < fcb->cmn_struct->veclen && *c != '\0') { + fcb->cmn_struct->cmn_mean[nvals] = FLOAT2MFCC(atof_c(c)); + } + ckd_free(vallist); + } + } + if (acmod_feat_mismatch(acmod, fcb)) { + feat_free(fcb); + return -1; + } + if (acmod->fcb) + feat_free(acmod->fcb); + acmod->fcb = fcb; + + /* The MFCC buffer needs to be at least as large as the dynamic + * feature window. */ + acmod->n_mfc_alloc = acmod->fcb->window_size * 2 + 1; + if (acmod->mfc_buf) + ckd_free_2d(acmod->mfc_buf); + acmod->mfc_buf = (mfcc_t **) + ckd_calloc_2d(acmod->n_mfc_alloc, acmod->fcb->cepsize, + sizeof(**acmod->mfc_buf)); + + /* Feature buffer has to be at least as large as MFCC buffer. */ + acmod->n_feat_alloc = acmod->n_mfc_alloc + cmd_ln_int32_r(acmod->config, "-pl_window"); + if (acmod->feat_buf) + feat_array_free(acmod->feat_buf); + acmod->feat_buf = feat_array_alloc(acmod->fcb, acmod->n_feat_alloc); + if (acmod->framepos) + ckd_free(acmod->framepos); + acmod->framepos = ckd_calloc(acmod->n_feat_alloc, sizeof(*acmod->framepos)); + + return 0; +} + +int +acmod_fe_mismatch(acmod_t *acmod, fe_t *fe) +{ + /* Output vector dimension needs to be the same. */ + if (cmd_ln_int32_r(acmod->config, "-ceplen") != fe_get_output_size(fe)) { + E_ERROR("Configured feature length %d doesn't match feature " + "extraction output size %d\n", + cmd_ln_int32_r(acmod->config, "-ceplen"), + fe_get_output_size(fe)); + return TRUE; + } + /* Feature parameters need to be the same. */ + /* ... */ + return FALSE; +} + +int +acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb) +{ + /* Feature type needs to be the same. */ + if (0 != strcmp(cmd_ln_str_r(acmod->config, "-feat"), feat_name(fcb))) + return TRUE; + /* Input vector dimension needs to be the same. */ + if (cmd_ln_int32_r(acmod->config, "-ceplen") != feat_cepsize(fcb)) + return TRUE; + /* FIXME: Need to check LDA and stuff too. */ + return FALSE; +} + +acmod_t * +acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb) +{ + acmod_t *acmod; + + acmod = ckd_calloc(1, sizeof(*acmod)); + acmod->config = cmd_ln_retain(config); + acmod->lmath = logmath_retain(lmath); + acmod->state = ACMOD_IDLE; + + /* Initialize or retain fe and fcb. */ + if (acmod_reinit_feat(acmod, fe, fcb) < 0) + goto error_out; + + /* Load acoustic model parameters. */ + if (acmod_init_am(acmod) < 0) + goto error_out; + + /* Senone computation stuff. */ + acmod->senone_scores = ckd_calloc(bin_mdef_n_sen(acmod->mdef), + sizeof(*acmod->senone_scores)); + acmod->senone_active_vec = bitvec_alloc(bin_mdef_n_sen(acmod->mdef)); + acmod->senone_active = ckd_calloc(bin_mdef_n_sen(acmod->mdef), + sizeof(*acmod->senone_active)); + acmod->log_zero = logmath_get_zero(acmod->lmath); + acmod->compallsen = cmd_ln_boolean_r(config, "-compallsen"); + return acmod; + +error_out: + acmod_free(acmod); + return NULL; +} + +void +acmod_free(acmod_t *acmod) +{ + if (acmod == NULL) + return; + + feat_free(acmod->fcb); + fe_free(acmod->fe); + cmd_ln_free_r(acmod->config); + + if (acmod->mfc_buf) + ckd_free_2d((void **)acmod->mfc_buf); + if (acmod->feat_buf) + feat_array_free(acmod->feat_buf); + + if (acmod->mfcfh) + fclose(acmod->mfcfh); + if (acmod->rawfh) + fclose(acmod->rawfh); + if (acmod->senfh) + fclose(acmod->senfh); + + ckd_free(acmod->framepos); + ckd_free(acmod->senone_scores); + ckd_free(acmod->senone_active_vec); + ckd_free(acmod->senone_active); + + if (acmod->mdef) + bin_mdef_free(acmod->mdef); + if (acmod->tmat) + tmat_free(acmod->tmat); + if (acmod->mgau) + ps_mgau_free(acmod->mgau); + if (acmod->mllr) + ps_mllr_free(acmod->mllr); + logmath_free(acmod->lmath); + + ckd_free(acmod); +} + +ps_mllr_t * +acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr) +{ + if (acmod->mllr) + ps_mllr_free(acmod->mllr); + acmod->mllr = ps_mllr_retain(mllr); + ps_mgau_transform(acmod->mgau, mllr); + + return mllr; +} + +int +acmod_write_senfh_header(acmod_t *acmod, FILE *logfh) +{ + char nsenstr[64], logbasestr[64]; + + sprintf(nsenstr, "%d", bin_mdef_n_sen(acmod->mdef)); + sprintf(logbasestr, "%f", logmath_get_base(acmod->lmath)); + return bio_writehdr(logfh, + "version", "0.1", + "mdef_file", cmd_ln_str_r(acmod->config, "_mdef"), + "n_sen", nsenstr, + "logbase", logbasestr, NULL); +} + +int +acmod_set_senfh(acmod_t *acmod, FILE *logfh) +{ + if (acmod->senfh) + fclose(acmod->senfh); + acmod->senfh = logfh; + if (logfh == NULL) + return 0; + return acmod_write_senfh_header(acmod, logfh); +} + +int +acmod_set_mfcfh(acmod_t *acmod, FILE *logfh) +{ + int rv = 0; + + if (acmod->mfcfh) + fclose(acmod->mfcfh); + acmod->mfcfh = logfh; + fwrite(&rv, 4, 1, acmod->mfcfh); + return rv; +} + +int +acmod_set_rawfh(acmod_t *acmod, FILE *logfh) +{ + if (acmod->rawfh) + fclose(acmod->rawfh); + acmod->rawfh = logfh; + return 0; +} + +void +acmod_grow_feat_buf(acmod_t *acmod, int nfr) +{ + if (nfr > MAX_N_FRAMES) + E_FATAL("Decoder can not process more than %d frames at once, " + "requested %d\n", MAX_N_FRAMES, nfr); + + acmod->feat_buf = feat_array_realloc(acmod->fcb, acmod->feat_buf, + acmod->n_feat_alloc, nfr); + acmod->framepos = ckd_realloc(acmod->framepos, + nfr * sizeof(*acmod->framepos)); + acmod->n_feat_alloc = nfr; +} + +int +acmod_set_grow(acmod_t *acmod, int grow_feat) +{ + int tmp = acmod->grow_feat; + acmod->grow_feat = grow_feat; + + /* Expand feat_buf to a reasonable size to start with. */ + if (grow_feat && acmod->n_feat_alloc < 128) + acmod_grow_feat_buf(acmod, 128); + + return tmp; +} + +int +acmod_start_utt(acmod_t *acmod) +{ + fe_start_utt(acmod->fe); + acmod->state = ACMOD_STARTED; + acmod->n_mfc_frame = 0; + acmod->n_feat_frame = 0; + acmod->mfc_outidx = 0; + acmod->feat_outidx = 0; + acmod->output_frame = 0; + acmod->senscr_frame = -1; + acmod->n_senone_active = 0; + acmod->mgau->frame_idx = 0; + return 0; +} + +int +acmod_end_utt(acmod_t *acmod) +{ + int32 nfr = 0; + + acmod->state = ACMOD_ENDED; + if (acmod->n_mfc_frame < acmod->n_mfc_alloc) { + int inptr; + /* Where to start writing them (circular buffer) */ + inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc; + /* nfr is always either zero or one. */ + fe_end_utt(acmod->fe, acmod->mfc_buf[inptr], &nfr); + acmod->n_mfc_frame += nfr; + /* Process whatever's left, and any leadout. */ + if (nfr) + nfr = acmod_process_mfcbuf(acmod); + } + if (acmod->mfcfh) { + int32 outlen, rv; + outlen = (ftell(acmod->mfcfh) - 4) / 4; + SWAP_BE_32(&outlen); + /* Try to seek and write */ + if ((rv = fseek(acmod->mfcfh, 0, SEEK_SET)) == 0) { + fwrite(&outlen, 4, 1, acmod->mfcfh); + } + fclose(acmod->mfcfh); + acmod->mfcfh = NULL; + } + if (acmod->rawfh) { + fclose(acmod->rawfh); + acmod->rawfh = NULL; + } + + if (acmod->senfh) { + fclose(acmod->senfh); + acmod->senfh = NULL; + } + + acmod_update_cmninit(acmod); + + return nfr; +} + +static const char * +acmod_update_cmninit(acmod_t *acmod) +{ + char *cmninit, *ptr; + cmn_t *cmn; + int i, len; + + if (acmod->fcb == NULL) + return NULL; + if ((cmn = acmod->fcb->cmn_struct) == NULL) + return NULL; + len = 0; + for (i = 0; i < cmn->veclen; ++i) { + int nbytes = snprintf(NULL, 0, "%g,", cmn->cmn_mean[i]); + if (nbytes <= 0) { + E_ERROR_SYSTEM("Failed to format %g for cmninit", cmn->cmn_mean[i]); + return NULL; + } + len += nbytes; + } + len++; + ptr = cmninit = ckd_malloc(len); + if (ptr == NULL) { + E_ERROR_SYSTEM("Failed to allocate %d bytes for cmninit", len); + return NULL; + } + for (i = 0; i < cmn->veclen; ++i) + ptr += snprintf(ptr, cmninit + len - ptr, "%g,", cmn->cmn_mean[i]); + *--ptr = '\0'; + cmd_ln_set_str_r(acmod->config, "-cmninit", cmninit); + ckd_free(cmninit); + return cmd_ln_str_r(acmod->config, "-cmninit"); +} + +static int +acmod_log_mfc(acmod_t *acmod, + mfcc_t **cep, int n_frames) +{ + size_t i, n; + int32 *ptr = (int32 *)cep[0]; + + n = n_frames * feat_cepsize(acmod->fcb); + /* Swap bytes. */ +#if !WORDS_BIGENDIAN + for (i = 0; i < (n * sizeof(mfcc_t) / sizeof(int32)); ++i) { + SWAP_INT32(ptr + i); + } +#endif + /* Write features. */ + if (fwrite(cep[0], sizeof(mfcc_t), n, acmod->mfcfh) != n) { + E_ERROR_SYSTEM("Failed to write %d values to log file", n); + } + + /* Swap them back. */ +#if !WORDS_BIGENDIAN + for (i = 0; i < (n * sizeof(mfcc_t) / sizeof(int32)); ++i) { + SWAP_INT32(ptr + i); + } +#endif + return 0; +} + +static int +acmod_process_full_cep(acmod_t *acmod, + mfcc_t ***inout_cep, + int *inout_n_frames) +{ + int32 nfr; + + /* Write to log file. */ + if (acmod->mfcfh) + acmod_log_mfc(acmod, *inout_cep, *inout_n_frames); + + /* Resize feat_buf to fit. */ + if (acmod->n_feat_alloc < *inout_n_frames) { + + if (*inout_n_frames > MAX_N_FRAMES) + E_FATAL("Batch processing can not process more than %d frames " + "at once, requested %d\n", MAX_N_FRAMES, *inout_n_frames); + + feat_array_free(acmod->feat_buf); + acmod->feat_buf = feat_array_alloc(acmod->fcb, *inout_n_frames); + acmod->n_feat_alloc = *inout_n_frames; + acmod->n_feat_frame = 0; + acmod->feat_outidx = 0; + } + /* Make dynamic features. */ + nfr = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, inout_n_frames, + TRUE, TRUE, acmod->feat_buf); + acmod->n_feat_frame = nfr; + assert(acmod->n_feat_frame <= acmod->n_feat_alloc); + *inout_cep += *inout_n_frames; + *inout_n_frames = 0; + return nfr; +} + +static int +acmod_process_full_raw(acmod_t *acmod, + int16 const **inout_raw, + size_t *inout_n_samps) +{ + int32 nfr, ntail; + mfcc_t **cepptr; + + /* Write to logging file if any. */ + if (acmod->rawfh) + fwrite(*inout_raw, 2, *inout_n_samps, acmod->rawfh); + /* Resize mfc_buf to fit. */ + if (fe_process_frames(acmod->fe, NULL, inout_n_samps, NULL, &nfr) < 0) + return -1; + if (acmod->n_mfc_alloc < nfr + 1) { + ckd_free_2d(acmod->mfc_buf); + acmod->mfc_buf = ckd_calloc_2d(nfr + 1, fe_get_output_size(acmod->fe), + sizeof(**acmod->mfc_buf)); + acmod->n_mfc_alloc = nfr + 1; + } + acmod->n_mfc_frame = 0; + acmod->mfc_outidx = 0; + fe_start_utt(acmod->fe); + if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps, + acmod->mfc_buf, &nfr) < 0) + return -1; + fe_end_utt(acmod->fe, acmod->mfc_buf[nfr], &ntail); + nfr += ntail; + + cepptr = acmod->mfc_buf; + nfr = acmod_process_full_cep(acmod, &cepptr, &nfr); + acmod->n_mfc_frame = 0; + return nfr; +} + +/** + * Process MFCCs that are in the internal buffer into features. + */ +static int32 +acmod_process_mfcbuf(acmod_t *acmod) +{ + mfcc_t **mfcptr; + int32 ncep; + + ncep = acmod->n_mfc_frame; + /* Also do this in two parts because of the circular mfc_buf. */ + if (acmod->mfc_outidx + ncep > acmod->n_mfc_alloc) { + int32 ncep1 = acmod->n_mfc_alloc - acmod->mfc_outidx; + int saved_state = acmod->state; + + /* Make sure we don't end the utterance here. */ + if (acmod->state == ACMOD_ENDED) + acmod->state = ACMOD_PROCESSING; + mfcptr = acmod->mfc_buf + acmod->mfc_outidx; + ncep1 = acmod_process_cep(acmod, &mfcptr, &ncep1, FALSE); + /* It's possible that not all available frames were filled. */ + ncep -= ncep1; + acmod->n_mfc_frame -= ncep1; + acmod->mfc_outidx += ncep1; + acmod->mfc_outidx %= acmod->n_mfc_alloc; + /* Restore original state (could this really be the end) */ + acmod->state = saved_state; + } + mfcptr = acmod->mfc_buf + acmod->mfc_outidx; + ncep = acmod_process_cep(acmod, &mfcptr, &ncep, FALSE); + acmod->n_mfc_frame -= ncep; + acmod->mfc_outidx += ncep; + acmod->mfc_outidx %= acmod->n_mfc_alloc; + return ncep; +} + +int +acmod_process_raw(acmod_t *acmod, + int16 const **inout_raw, + size_t *inout_n_samps, + int full_utt) +{ + int32 ncep; + + /* If this is a full utterance, process it all at once. */ + if (full_utt) + return acmod_process_full_raw(acmod, inout_raw, inout_n_samps); + + /* Append MFCCs to the end of any that are previously in there + * (in practice, there will probably be none) */ + if (inout_n_samps && *inout_n_samps) { + int16 const *prev_audio_inptr = *inout_raw; + int inptr; + + /* Total number of frames available. */ + ncep = acmod->n_mfc_alloc - acmod->n_mfc_frame; + /* Where to start writing them (circular buffer) */ + inptr = (acmod->mfc_outidx + acmod->n_mfc_frame) % acmod->n_mfc_alloc; + + /* Write them in two (or more) parts if there is wraparound. */ + while (inptr + ncep > acmod->n_mfc_alloc) { + int32 ncep1 = acmod->n_mfc_alloc - inptr; + if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps, + acmod->mfc_buf + inptr, &ncep1) < 0) + return -1; + /* Write to logging file if any. */ + if (acmod->rawfh) { + fwrite(prev_audio_inptr, 2, + *inout_raw - prev_audio_inptr, + acmod->rawfh); + prev_audio_inptr = *inout_raw; + } + /* ncep1 now contains the number of frames actually + * processed. This is a good thing, but it means we + * actually still might have some room left at the end of + * the buffer, hence the while loop. Unfortunately it + * also means that in the case where we are really + * actually done, we need to get out totally, hence the + * goto. */ + acmod->n_mfc_frame += ncep1; + ncep -= ncep1; + inptr += ncep1; + inptr %= acmod->n_mfc_alloc; + if (ncep1 == 0) + goto alldone; + } + assert(inptr + ncep <= acmod->n_mfc_alloc); + if (fe_process_frames(acmod->fe, inout_raw, inout_n_samps, + acmod->mfc_buf + inptr, &ncep) < 0) + return -1; + /* Write to logging file if any. */ + if (acmod->rawfh) { + fwrite(prev_audio_inptr, 2, + *inout_raw - prev_audio_inptr, acmod->rawfh); + prev_audio_inptr = *inout_raw; + } + acmod->n_mfc_frame += ncep; + alldone: + ; + } + + /* Hand things off to acmod_process_cep. */ + return acmod_process_mfcbuf(acmod); +} + +int +acmod_process_cep(acmod_t *acmod, + mfcc_t ***inout_cep, + int *inout_n_frames, + int full_utt) +{ + int32 nfeat, ncep, inptr; + int orig_n_frames; + + /* If this is a full utterance, process it all at once. */ + if (full_utt) + return acmod_process_full_cep(acmod, inout_cep, inout_n_frames); + + /* Write to log file. */ + if (acmod->mfcfh) + acmod_log_mfc(acmod, *inout_cep, *inout_n_frames); + + /* Maximum number of frames we're going to generate. */ + orig_n_frames = ncep = nfeat = *inout_n_frames; + + /* FIXME: This behaviour isn't guaranteed... */ + if (acmod->state == ACMOD_ENDED) + nfeat += feat_window_size(acmod->fcb); + else if (acmod->state == ACMOD_STARTED) + nfeat -= feat_window_size(acmod->fcb); + + /* Clamp number of features to fit available space. */ + if (nfeat > acmod->n_feat_alloc - acmod->n_feat_frame) { + /* Grow it as needed - we have to grow it at the end of an + * utterance because we can't return a short read there. */ + if (acmod->grow_feat || acmod->state == ACMOD_ENDED) + acmod_grow_feat_buf(acmod, acmod->n_feat_alloc + nfeat); + else + ncep -= (nfeat - (acmod->n_feat_alloc - acmod->n_feat_frame)); + } + + /* Where to start writing in the feature buffer. */ + if (acmod->grow_feat) { + /* Grow to avoid wraparound if grow_feat == TRUE. */ + inptr = acmod->feat_outidx + acmod->n_feat_frame; + while (inptr + nfeat >= acmod->n_feat_alloc) + acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2); + } + else { + inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc; + } + + + /* FIXME: we can't split the last frame drop properly to be on the bounary, so just return */ + if (inptr + nfeat > acmod->n_feat_alloc && acmod->state == ACMOD_ENDED) { + *inout_n_frames -= ncep; + *inout_cep += ncep; + return 0; + } + + /* Write them in two parts if there is wraparound. */ + if (inptr + nfeat > acmod->n_feat_alloc) { + int32 ncep1 = acmod->n_feat_alloc - inptr; + + /* Make sure we don't end the utterance here. */ + nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, + &ncep1, + (acmod->state == ACMOD_STARTED), + FALSE, + acmod->feat_buf + inptr); + if (nfeat < 0) + return -1; + /* Move the output feature pointer forward. */ + acmod->n_feat_frame += nfeat; + assert(acmod->n_feat_frame <= acmod->n_feat_alloc); + inptr += nfeat; + inptr %= acmod->n_feat_alloc; + /* Move the input feature pointers forward. */ + *inout_n_frames -= ncep1; + *inout_cep += ncep1; + ncep -= ncep1; + } + + nfeat = feat_s2mfc2feat_live(acmod->fcb, *inout_cep, + &ncep, + (acmod->state == ACMOD_STARTED), + (acmod->state == ACMOD_ENDED), + acmod->feat_buf + inptr); + if (nfeat < 0) + return -1; + acmod->n_feat_frame += nfeat; + assert(acmod->n_feat_frame <= acmod->n_feat_alloc); + /* Move the input feature pointers forward. */ + *inout_n_frames -= ncep; + *inout_cep += ncep; + if (acmod->state == ACMOD_STARTED) + acmod->state = ACMOD_PROCESSING; + return orig_n_frames - *inout_n_frames; +} + +int +acmod_process_feat(acmod_t *acmod, + mfcc_t **feat) +{ + int i, inptr; + + if (acmod->n_feat_frame == acmod->n_feat_alloc) { + if (acmod->grow_feat) + acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2); + else + return 0; + } + + if (acmod->grow_feat) { + /* Grow to avoid wraparound if grow_feat == TRUE. */ + inptr = acmod->feat_outidx + acmod->n_feat_frame; + while (inptr + 1 >= acmod->n_feat_alloc) + acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2); + } + else { + inptr = (acmod->feat_outidx + acmod->n_feat_frame) % acmod->n_feat_alloc; + } + for (i = 0; i < feat_dimension1(acmod->fcb); ++i) + memcpy(acmod->feat_buf[inptr][i], + feat[i], feat_dimension2(acmod->fcb, i) * sizeof(**feat)); + ++acmod->n_feat_frame; + assert(acmod->n_feat_frame <= acmod->n_feat_alloc); + + return 1; +} + +static int +acmod_read_senfh_header(acmod_t *acmod) +{ + char **name, **val; + int32 swap; + int i; + + if (bio_readhdr(acmod->insenfh, &name, &val, &swap) < 0) + goto error_out; + for (i = 0; name[i] != NULL; ++i) { + if (!strcmp(name[i], "n_sen")) { + if (atoi(val[i]) != bin_mdef_n_sen(acmod->mdef)) { + E_ERROR("Number of senones in senone file (%d) does not " + "match mdef (%d)\n", atoi(val[i]), + bin_mdef_n_sen(acmod->mdef)); + goto error_out; + } + } + + if (!strcmp(name[i], "logbase")) { + if (fabs(atof_c(val[i]) - logmath_get_base(acmod->lmath)) > 0.001) { + E_ERROR("Logbase in senone file (%f) does not match acmod " + "(%f)\n", atof_c(val[i]), + logmath_get_base(acmod->lmath)); + goto error_out; + } + } + } + acmod->insen_swap = swap; + bio_hdrarg_free(name, val); + return 0; +error_out: + bio_hdrarg_free(name, val); + return -1; +} + +int +acmod_set_insenfh(acmod_t *acmod, FILE *senfh) +{ + acmod->insenfh = senfh; + if (senfh == NULL) { + acmod->n_feat_frame = 0; + acmod->compallsen = cmd_ln_boolean_r(acmod->config, "-compallsen"); + return 0; + } + acmod->compallsen = TRUE; + return acmod_read_senfh_header(acmod); +} + +int +acmod_rewind(acmod_t *acmod) +{ + /* If the feature buffer is circular, this is not possible. */ + if (acmod->output_frame > acmod->n_feat_alloc) { + E_ERROR("Circular feature buffer cannot be rewound (output frame %d, " + "alloc %d)\n", acmod->output_frame, acmod->n_feat_alloc); + return -1; + } + + /* Frames consumed + frames available */ + acmod->n_feat_frame = acmod->output_frame + acmod->n_feat_frame; + + /* Reset output pointers. */ + acmod->feat_outidx = 0; + acmod->output_frame = 0; + acmod->senscr_frame = -1; + acmod->mgau->frame_idx = 0; + + return 0; +} + +int +acmod_advance(acmod_t *acmod) +{ + /* Advance the output pointers. */ + if (++acmod->feat_outidx == acmod->n_feat_alloc) + acmod->feat_outidx = 0; + --acmod->n_feat_frame; + ++acmod->mgau->frame_idx; + + return ++acmod->output_frame; +} + +int +acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active, + int16 const *senscr, FILE *senfh) +{ + int16 n_active2; + + /* Uncompressed frame format: + * + * (2 bytes) n_active: Number of active senones + * If all senones active: + * (n_active * 2 bytes) scores of active senones + * + * Otherwise: + * (2 bytes) n_active: Number of active senones + * (n_active bytes) deltas to active senones + * (n_active * 2 bytes) scores of active senones + */ + n_active2 = n_active; + if (fwrite(&n_active2, 2, 1, senfh) != 1) + goto error_out; + if (n_active == bin_mdef_n_sen(acmod->mdef)) { + if (fwrite(senscr, 2, n_active, senfh) != (size_t) n_active) + goto error_out; + } + else { + int i, n; + if (fwrite(active, 1, n_active, senfh) != (size_t) n_active) + goto error_out; + for (i = n = 0; i < n_active; ++i) { + n += active[i]; + if (fwrite(senscr + n, 2, 1, senfh) != 1) + goto error_out; + } + } + return 0; +error_out: + E_ERROR_SYSTEM("Failed to write frame to senone file"); + return -1; +} + +/** + * Internal version, used for reading previous frames in acmod_score() + */ +static int +acmod_read_scores_internal(acmod_t *acmod) +{ + FILE *senfh = acmod->insenfh; + int16 n_active; + size_t rv; + + if (acmod->n_feat_frame == acmod->n_feat_alloc) { + if (acmod->grow_feat) + acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2); + else + return 0; + } + + if (senfh == NULL) + return -1; + + if ((rv = fread(&n_active, 2, 1, senfh)) != 1) + goto error_out; + + acmod->n_senone_active = n_active; + if (acmod->n_senone_active == bin_mdef_n_sen(acmod->mdef)) { + if ((rv = fread(acmod->senone_scores, 2, + acmod->n_senone_active, senfh)) + != (size_t) acmod->n_senone_active) + goto error_out; + } + else { + int i, n; + + if ((rv = fread(acmod->senone_active, 1, + acmod->n_senone_active, senfh)) + != (size_t) acmod->n_senone_active) + goto error_out; + + for (i = 0, n = 0; i < acmod->n_senone_active; ++i) { + int j, sen = n + acmod->senone_active[i]; + for (j = n + 1; j < sen; ++j) + acmod->senone_scores[j] = SENSCR_DUMMY; + + if ((rv = fread(acmod->senone_scores + sen, 2, 1, senfh)) != 1) + goto error_out; + + n = sen; + } + + n++; + while (n < bin_mdef_n_sen(acmod->mdef)) + acmod->senone_scores[n++] = SENSCR_DUMMY; + } + return 1; + +error_out: + if (ferror(senfh)) { + E_ERROR_SYSTEM("Failed to read frame from senone file"); + return -1; + } + return 0; +} + +int +acmod_read_scores(acmod_t *acmod) +{ + int inptr, rv; + + if (acmod->grow_feat) { + /* Grow to avoid wraparound if grow_feat == TRUE. */ + inptr = acmod->feat_outidx + acmod->n_feat_frame; + /* Has to be +1, otherwise, next time acmod_advance() is + * called, this will wrap around. */ + while (inptr + 1 >= acmod->n_feat_alloc) + acmod_grow_feat_buf(acmod, acmod->n_feat_alloc * 2); + } + else { + inptr = (acmod->feat_outidx + acmod->n_feat_frame) % + acmod->n_feat_alloc; + } + + if ((rv = acmod_read_scores_internal(acmod)) != 1) + return rv; + + /* Set acmod->senscr_frame appropriately so that these scores + get reused below in acmod_score(). */ + acmod->senscr_frame = acmod->output_frame + acmod->n_feat_frame; + + E_DEBUG("Frame %d has %d active states\n", + acmod->senscr_frame, acmod->n_senone_active); + + /* Increment the "feature frame counter" and record the file + * position for the relevant frame in the (possibly circular) + * buffer. */ + ++acmod->n_feat_frame; + acmod->framepos[inptr] = ftell(acmod->insenfh); + + return 1; +} + +static int +calc_frame_idx(acmod_t *acmod, int *inout_frame_idx) +{ + int frame_idx; + + /* Calculate the absolute frame index to be scored. */ + if (inout_frame_idx == NULL) + frame_idx = acmod->output_frame; + else if (*inout_frame_idx < 0) + frame_idx = acmod->output_frame + 1 + *inout_frame_idx; + else + frame_idx = *inout_frame_idx; + + return frame_idx; +} + +static int +calc_feat_idx(acmod_t *acmod, int frame_idx) +{ + int n_backfr, feat_idx; + + n_backfr = acmod->n_feat_alloc - acmod->n_feat_frame; + if (frame_idx < 0 || acmod->output_frame - frame_idx > n_backfr) { + E_ERROR("Frame %d outside queue of %d frames, %d alloc (%d > %d), " + "cannot score\n", frame_idx, acmod->n_feat_frame, + acmod->n_feat_alloc, acmod->output_frame - frame_idx, + n_backfr); + return -1; + } + + /* Get the index in feat_buf/framepos of the frame to be scored. */ + feat_idx = (acmod->feat_outidx + frame_idx - acmod->output_frame) % + acmod->n_feat_alloc; + if (feat_idx < 0) + feat_idx += acmod->n_feat_alloc; + + return feat_idx; +} + +mfcc_t ** +acmod_get_frame(acmod_t *acmod, int *inout_frame_idx) +{ + int frame_idx, feat_idx; + + /* Calculate the absolute frame index requested. */ + frame_idx = calc_frame_idx(acmod, inout_frame_idx); + + /* Calculate position of requested frame in circular buffer. */ + if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0) + return NULL; + + if (inout_frame_idx) + *inout_frame_idx = frame_idx; + + return acmod->feat_buf[feat_idx]; +} + +int16 const * +acmod_score(acmod_t *acmod, int *inout_frame_idx) +{ + int frame_idx, feat_idx; + + /* Calculate the absolute frame index to be scored. */ + frame_idx = calc_frame_idx(acmod, inout_frame_idx); + + /* If all senones are being computed, or we are using a senone file, + then we can reuse existing scores. */ + if ((acmod->compallsen || acmod->insenfh) + && frame_idx == acmod->senscr_frame) { + if (inout_frame_idx) + *inout_frame_idx = frame_idx; + return acmod->senone_scores; + } + + /* Calculate position of requested frame in circular buffer. */ + if ((feat_idx = calc_feat_idx(acmod, frame_idx)) < 0) + return NULL; + + /* If there is an input senone file locate the appropriate frame and read it. */ + if (acmod->insenfh) { + fseek(acmod->insenfh, acmod->framepos[feat_idx], SEEK_SET); + if (acmod_read_scores_internal(acmod) < 0) + return NULL; + } + else { + /* Build active senone list. */ + acmod_flags2list(acmod); + + /* Generate scores for the next available frame */ + ps_mgau_frame_eval(acmod->mgau, + acmod->senone_scores, + acmod->senone_active, + acmod->n_senone_active, + acmod->feat_buf[feat_idx], + frame_idx, + acmod->compallsen); + } + + if (inout_frame_idx) + *inout_frame_idx = frame_idx; + acmod->senscr_frame = frame_idx; + + /* Dump scores to the senone dump file if one exists. */ + if (acmod->senfh) { + if (acmod_write_scores(acmod, acmod->n_senone_active, + acmod->senone_active, + acmod->senone_scores, + acmod->senfh) < 0) + return NULL; + E_DEBUG("Frame %d has %d active states\n", frame_idx, + acmod->n_senone_active); + } + + return acmod->senone_scores; +} + +int +acmod_best_score(acmod_t *acmod, int *out_best_senid) +{ + int i, best; + + best = SENSCR_DUMMY; + if (acmod->compallsen) { + for (i = 0; i < bin_mdef_n_sen(acmod->mdef); ++i) { + if (acmod->senone_scores[i] < best) { + best = acmod->senone_scores[i]; + *out_best_senid = i; + } + } + } + else { + int16 *senscr; + senscr = acmod->senone_scores; + for (i = 0; i < acmod->n_senone_active; ++i) { + senscr += acmod->senone_active[i]; + if (*senscr < best) { + best = *senscr; + *out_best_senid = i; + } + } + } + return best; +} + + +void +acmod_clear_active(acmod_t *acmod) +{ + if (acmod->compallsen) + return; + bitvec_clear_all(acmod->senone_active_vec, bin_mdef_n_sen(acmod->mdef)); + acmod->n_senone_active = 0; +} + +#define MPX_BITVEC_SET(a,h,i) \ + if (hmm_mpx_ssid(h,i) != BAD_SSID) \ + bitvec_set((a)->senone_active_vec, hmm_mpx_senid(h,i)) +#define NONMPX_BITVEC_SET(a,h,i) \ + bitvec_set((a)->senone_active_vec, \ + hmm_nonmpx_senid(h,i)) + +void +acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm) +{ + int i; + + if (acmod->compallsen) + return; + if (hmm_is_mpx(hmm)) { + switch (hmm_n_emit_state(hmm)) { + case 5: + MPX_BITVEC_SET(acmod, hmm, 4); + MPX_BITVEC_SET(acmod, hmm, 3); + /* FALLTHRU */ + case 3: + MPX_BITVEC_SET(acmod, hmm, 2); + MPX_BITVEC_SET(acmod, hmm, 1); + MPX_BITVEC_SET(acmod, hmm, 0); + break; + default: + for (i = 0; i < hmm_n_emit_state(hmm); ++i) { + MPX_BITVEC_SET(acmod, hmm, i); + } + } + } + else { + switch (hmm_n_emit_state(hmm)) { + case 5: + NONMPX_BITVEC_SET(acmod, hmm, 4); + NONMPX_BITVEC_SET(acmod, hmm, 3); + /* FALLTHRU */ + case 3: + NONMPX_BITVEC_SET(acmod, hmm, 2); + NONMPX_BITVEC_SET(acmod, hmm, 1); + NONMPX_BITVEC_SET(acmod, hmm, 0); + break; + default: + for (i = 0; i < hmm_n_emit_state(hmm); ++i) { + NONMPX_BITVEC_SET(acmod, hmm, i); + } + } + } +} + +int32 +acmod_flags2list(acmod_t *acmod) +{ + int32 w, l, n, b, total_dists, total_words, extra_bits; + bitvec_t *flagptr; + + total_dists = bin_mdef_n_sen(acmod->mdef); + if (acmod->compallsen) { + acmod->n_senone_active = total_dists; + return total_dists; + } + total_words = total_dists / BITVEC_BITS; + extra_bits = total_dists % BITVEC_BITS; + w = n = l = 0; + for (flagptr = acmod->senone_active_vec; w < total_words; ++w, ++flagptr) { + if (*flagptr == 0) + continue; + for (b = 0; b < BITVEC_BITS; ++b) { + if (*flagptr & (1UL << b)) { + int32 sen = w * BITVEC_BITS + b; + int32 delta = sen - l; + /* Handle excessive deltas "lossily" by adding a few + extra senones to bridge the gap. */ + while (delta > 255) { + acmod->senone_active[n++] = 255; + delta -= 255; + } + acmod->senone_active[n++] = delta; + l = sen; + } + } + } + + for (b = 0; b < extra_bits; ++b) { + if (*flagptr & (1UL << b)) { + int32 sen = w * BITVEC_BITS + b; + int32 delta = sen - l; + /* Handle excessive deltas "lossily" by adding a few + extra senones to bridge the gap. */ + while (delta > 255) { + acmod->senone_active[n++] = 255; + delta -= 255; + } + acmod->senone_active[n++] = delta; + l = sen; + } + } + + acmod->n_senone_active = n; + E_DEBUG("acmod_flags2list: %d active in frame %d\n", + acmod->n_senone_active, acmod->output_frame); + return n; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/acmod.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/acmod.h new file mode 100644 index 0000000000000000000000000000000000000000..00067f3b1711990b8790812a328ff0eb5a9b2015 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/acmod.h @@ -0,0 +1,500 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file acmod.h Acoustic model structures for PocketSphinx. + * @author David Huggins-Daines + */ + +#ifndef __ACMOD_H__ +#define __ACMOD_H__ + +/* System headers. */ +#include + +/* SphinxBase headers. */ +#include +#include +#include +#include +#include +#include +#include + +/* Local headers. */ +#include +#include +#include "bin_mdef.h" +#include "tmat.h" +#include "hmm.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * States in utterance processing. + */ +typedef enum acmod_state_e { + ACMOD_IDLE, /**< Not in an utterance. */ + ACMOD_STARTED, /**< Utterance started, no data yet. */ + ACMOD_PROCESSING, /**< Utterance in progress. */ + ACMOD_ENDED /**< Utterance ended, still buffering. */ +} acmod_state_t; + +/** + * Dummy senone score value for unintentionally active states. + */ +#define SENSCR_DUMMY 0x7fff + +/** + * Feature space linear transform structure. + */ +struct ps_mllr_s { + int refcnt; /**< Reference count. */ + int n_class; /**< Number of MLLR classes. */ + int n_feat; /**< Number of feature streams. */ + int *veclen; /**< Length of input vectors for each stream. */ + float32 ****A; /**< Rotation part of mean transformations. */ + float32 ***b; /**< Bias part of mean transformations. */ + float32 ***h; /**< Diagonal transformation of variances. */ + int32 *cb2mllr; /**< Mapping from codebooks to transformations. */ +}; + +/** + * Acoustic model parameter structure. + */ +typedef struct ps_mgau_s ps_mgau_t; + +typedef struct ps_mgaufuncs_s { + char const *name; + + int (*frame_eval)(ps_mgau_t *mgau, + int16 *senscr, + uint8 *senone_active, + int32 n_senone_active, + mfcc_t ** feat, + int32 frame, + int32 compallsen); + int (*transform)(ps_mgau_t *mgau, + ps_mllr_t *mllr); + void (*free)(ps_mgau_t *mgau); +} ps_mgaufuncs_t; + +struct ps_mgau_s { + ps_mgaufuncs_t *vt; /**< vtable of mgau functions. */ + int frame_idx; /**< frame counter. */ +}; + +#define ps_mgau_base(mg) ((ps_mgau_t *)(mg)) +#define ps_mgau_frame_eval(mg,senscr,senone_active,n_senone_active,feat,frame,compallsen) \ + (*ps_mgau_base(mg)->vt->frame_eval) \ + (mg, senscr, senone_active, n_senone_active, feat, frame, compallsen) +#define ps_mgau_transform(mg, mllr) \ + (*ps_mgau_base(mg)->vt->transform)(mg, mllr) +#define ps_mgau_free(mg) \ + (*ps_mgau_base(mg)->vt->free)(mg) + +/** + * Acoustic model structure. + * + * This object encapsulates all stages of acoustic processing, from + * raw audio input to acoustic score output. The reason for grouping + * all of these modules together is that they all have to "agree" in + * their parameterizations, and the configuration of the acoustic and + * dynamic feature computation is completely dependent on the + * parameters used to build the original acoustic model (which should + * by now always be specified in a feat.params file). + * + * Because there is not a one-to-one correspondence from blocks of + * input audio or frames of input features to frames of acoustic + * scores (due to dynamic feature calculation), results may not be + * immediately available after input, and the output results will not + * correspond to the last piece of data input. + * + * TODO: In addition, this structure serves the purpose of queueing + * frames of features (and potentially also scores in the future) for + * asynchronous passes of recognition operating in parallel. + */ +struct acmod_s { + /* Global objects, not retained. */ + cmd_ln_t *config; /**< Configuration. */ + logmath_t *lmath; /**< Log-math computation. */ + glist_t strings; /**< Temporary acoustic model filenames. */ + + /* Feature computation: */ + fe_t *fe; /**< Acoustic feature computation. */ + feat_t *fcb; /**< Dynamic feature computation. */ + + /* Model parameters: */ + bin_mdef_t *mdef; /**< Model definition. */ + tmat_t *tmat; /**< Transition matrices. */ + ps_mgau_t *mgau; /**< Model parameters. */ + ps_mllr_t *mllr; /**< Speaker transformation. */ + + /* Senone scoring: */ + int16 *senone_scores; /**< GMM scores for current frame. */ + bitvec_t *senone_active_vec; /**< Active GMMs in current frame. */ + uint8 *senone_active; /**< Array of deltas to active GMMs. */ + int senscr_frame; /**< Frame index for senone_scores. */ + int n_senone_active; /**< Number of active GMMs. */ + int log_zero; /**< Zero log-probability value. */ + + /* Utterance processing: */ + mfcc_t **mfc_buf; /**< Temporary buffer of acoustic features. */ + mfcc_t ***feat_buf; /**< Temporary buffer of dynamic features. */ + FILE *rawfh; /**< File for writing raw audio data. */ + FILE *mfcfh; /**< File for writing acoustic feature data. */ + FILE *senfh; /**< File for writing senone score data. */ + FILE *insenfh; /**< Input senone score file. */ + long *framepos; /**< File positions of recent frames in senone file. */ + + /* A whole bunch of flags and counters: */ + uint8 state; /**< State of utterance processing. */ + uint8 compallsen; /**< Compute all senones? */ + uint8 grow_feat; /**< Whether to grow feat_buf. */ + uint8 insen_swap; /**< Whether to swap input senone score. */ + + frame_idx_t output_frame; /**< Index of next frame of dynamic features. */ + frame_idx_t n_mfc_alloc; /**< Number of frames allocated in mfc_buf */ + frame_idx_t n_mfc_frame; /**< Number of frames active in mfc_buf */ + frame_idx_t mfc_outidx; /**< Start of active frames in mfc_buf */ + frame_idx_t n_feat_alloc; /**< Number of frames allocated in feat_buf */ + frame_idx_t n_feat_frame; /**< Number of frames active in feat_buf */ + frame_idx_t feat_outidx; /**< Start of active frames in feat_buf */ +}; +typedef struct acmod_s acmod_t; + +/** + * Initialize an acoustic model. + * + * @param config a command-line object containing parameters. + * Ownership of this pointer is retained by this object, + * so you may free it if you no longer need it. + * @param lmath global log-math parameters. + * @param fe a previously-initialized acoustic feature module to use, + * or NULL to create one automatically. If this is supplied + * and its parameters do not match those in the acoustic + * model, this function will fail. This pointer is retained. + * @param fcb a previously-initialized dynamic feature module to use, + * or NULL to create one automatically. If this is supplied + * and its parameters do not match those in the acoustic + * model, this function will fail. This pointer is retained. + * @return a newly initialized acmod_t, or NULL on failure. + */ +POCKETSPHINX_EXPORT +acmod_t *acmod_init(cmd_ln_t *config, logmath_t *lmath, fe_t *fe, feat_t *fcb); + +/** + * Reinitialize feature computation modules. + */ +POCKETSPHINX_EXPORT +int acmod_reinit_feat(acmod_t *acmod, fe_t *fe, feat_t *fcb); + +/** + * Verify that feature extraction parameters are compatible with + * acoustic model. + * + * @param fe acoustic feature extraction module to verify. + * @return TRUE if compatible, FALSE otherwise + */ +POCKETSPHINX_EXPORT +int acmod_fe_mismatch(acmod_t *acmod, fe_t *fe); + +/** + * Verify that dynamic feature computation parameters are compatible + * with acoustic model. + * + * @param fcb dynamic feature computation module to verify. + * @return TRUE if compatible, FALSE otherwise + */ +POCKETSPHINX_EXPORT +int acmod_feat_mismatch(acmod_t *acmod, feat_t *fcb); + +/** + * Adapt acoustic model using a linear transform. + * + * @param mllr The new transform to use, or NULL to update the + * existing transform. The decoder retains ownership of + * this pointer, so you may free it if you no longer need + * it. + * @return The updated transform object for this decoder, or + * NULL on failure. + */ +POCKETSPHINX_EXPORT +ps_mllr_t *acmod_update_mllr(acmod_t *acmod, ps_mllr_t *mllr); + +/** + * Start logging senone scores to a filehandle. + * + * @param acmod Acoustic model object. + * @param logfh Filehandle to log to. + * @return 0 for success, <0 on error. + */ +POCKETSPHINX_EXPORT +int acmod_set_senfh(acmod_t *acmod, FILE *senfh); + +/** + * Start logging MFCCs to a filehandle. + * + * @param acmod Acoustic model object. + * @param logfh Filehandle to log to. + * @return 0 for success, <0 on error. + */ +POCKETSPHINX_EXPORT +int acmod_set_mfcfh(acmod_t *acmod, FILE *logfh); + +/** + * Start logging raw audio to a filehandle. + * + * @param acmod Acoustic model object. + * @param logfh Filehandle to log to. + * @return 0 for success, <0 on error. + */ +POCKETSPHINX_EXPORT +int acmod_set_rawfh(acmod_t *acmod, FILE *logfh); + +/** + * Finalize an acoustic model. + */ +POCKETSPHINX_EXPORT +void acmod_free(acmod_t *acmod); + +/** + * Mark the start of an utterance. + */ +POCKETSPHINX_EXPORT +int acmod_start_utt(acmod_t *acmod); + +/** + * Mark the end of an utterance. + */ +POCKETSPHINX_EXPORT +int acmod_end_utt(acmod_t *acmod); + +/** + * Rewind the current utterance, allowing it to be rescored. + * + * After calling this function, the internal frame index is reset, and + * acmod_score() will return scores starting at the first frame of the + * current utterance. Currently, acmod_set_grow() must have been + * called to enable growing the feature buffer in order for this to + * work. In the future, senone scores may be cached instead. + * + * @return 0 for success, <0 for failure (if the utterance can't be + * rewound due to no feature or score data available) + */ +POCKETSPHINX_EXPORT +int acmod_rewind(acmod_t *acmod); + +/** + * Advance the frame index. + * + * This function moves to the next frame of input data. Subsequent + * calls to acmod_score() will return scores for that frame, until the + * next call to acmod_advance(). + * + * @return New frame index. + */ +POCKETSPHINX_EXPORT +int acmod_advance(acmod_t *acmod); + +/** + * Set memory allocation policy for utterance processing. + * + * @param grow_feat If non-zero, the internal dynamic feature buffer + * will expand as necessary to encompass any amount of data fed to the + * model. + * @return previous allocation policy. + */ +POCKETSPHINX_EXPORT +int acmod_set_grow(acmod_t *acmod, int grow_feat); + +/** + * TODO: Set queue length for utterance processing. + * + * This function allows multiple concurrent passes of search to + * operate on different parts of the utterance. + */ + +/** + * Feed raw audio data to the acoustic model for scoring. + * + * @param inout_raw In: Pointer to buffer of raw samples + * Out: Pointer to next sample to be read + * @param inout_n_samps In: Number of samples available + * Out: Number of samples remaining + * @param full_utt If non-zero, this block represents a full + * utterance and should be processed as such. + * @return Number of frames of data processed. + */ +POCKETSPHINX_EXPORT +int acmod_process_raw(acmod_t *acmod, + int16 const **inout_raw, + size_t *inout_n_samps, + int full_utt); + + +/** + * Feed acoustic feature data into the acoustic model for scoring. + * + * @param inout_cep In: Pointer to buffer of features + * Out: Pointer to next frame to be read + * @param inout_n_frames In: Number of frames available + * Out: Number of frames remaining + * @param full_utt If non-zero, this block represents a full + * utterance and should be processed as such. + * @return Number of frames of data processed. + */ +POCKETSPHINX_EXPORT +int acmod_process_cep(acmod_t *acmod, + mfcc_t ***inout_cep, + int *inout_n_frames, + int full_utt); + +/** + * Feed dynamic feature data into the acoustic model for scoring. + * + * Unlike acmod_process_raw() and acmod_process_cep(), this function + * accepts a single frame at a time. This is because there is no need + * to do buffering when using dynamic features as input. However, if + * the dynamic feature buffer is full, this function will fail, so you + * should either always check the return value, or always pair a call + * to it with a call to acmod_score(). + * + * @param feat Pointer to one frame of dynamic features. + * @return Number of frames processed (either 0 or 1). + */ +POCKETSPHINX_EXPORT +int acmod_process_feat(acmod_t *acmod, + mfcc_t **feat); + +/** + * Set up a senone score dump file for input. + * + * @param insenfh File handle of dump file + * @return 0 for success, <0 for failure + */ +POCKETSPHINX_EXPORT +int acmod_set_insenfh(acmod_t *acmod, FILE *insenfh); + +/** + * Read one frame of scores from senone score dump file. + * + * @return Number of frames read or <0 on error. + */ +POCKETSPHINX_EXPORT +int acmod_read_scores(acmod_t *acmod); + +/** + * Get a frame of dynamic feature data. + * + * @param inout_frame_idx Input: frame index to get, or NULL + * to obtain features for the most recent frame. + * Output: frame index corresponding to this + * set of features. + * @return Feature array, or NULL if requested frame is not available. + */ +POCKETSPHINX_EXPORT +mfcc_t **acmod_get_frame(acmod_t *acmod, int *inout_frame_idx); + +/** + * Score one frame of data. + * + * @param inout_frame_idx Input: frame index to score, or NULL + * to obtain scores for the most recent frame. + * Output: frame index corresponding to this + * set of scores. + * @return Array of senone scores for this frame, or NULL if no frame + * is available for scoring (such as if a frame index is + * requested that is not yet or no longer available). The + * data pointed to persists only until the next call to + * acmod_score() or acmod_advance(). + */ +POCKETSPHINX_EXPORT +int16 const *acmod_score(acmod_t *acmod, + int *inout_frame_idx); + +/** + * Write senone dump file header. + */ +POCKETSPHINX_EXPORT +int acmod_write_senfh_header(acmod_t *acmod, FILE *logfh); + +/** + * Write a frame of senone scores to a dump file. + */ +POCKETSPHINX_EXPORT +int acmod_write_scores(acmod_t *acmod, int n_active, uint8 const *active, + int16 const *senscr, FILE *senfh); + + +/** + * Get best score and senone index for current frame. + */ +POCKETSPHINX_EXPORT +int acmod_best_score(acmod_t *acmod, int *out_best_senid); + +/** + * Clear set of active senones. + */ +void acmod_clear_active(acmod_t *acmod); + +/** + * Activate senones associated with an HMM. + */ +void acmod_activate_hmm(acmod_t *acmod, hmm_t *hmm); + +/** + * Activate a single senone. + */ +#define acmod_activate_sen(acmod, sen) bitvec_set((acmod)->senone_active_vec, sen) + +/** + * Build active list. + */ +POCKETSPHINX_EXPORT +int32 acmod_flags2list(acmod_t *acmod); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __ACMOD_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/allphone_search.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/allphone_search.c new file mode 100644 index 0000000000000000000000000000000000000000..3ee9ea11640b30296214c3abafbc802b357bbda5 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/allphone_search.c @@ -0,0 +1,915 @@ +/* -*- c-basic-offset: 4 -*- */ +/* ==================================================================== + * Copyright (c) 2014 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* +* allphone_search.c -- Search for phonetic decoding. +*/ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "pocketsphinx_internal.h" +#include "allphone_search.h" + +static ps_lattice_t * +allphone_search_lattice(ps_search_t * search) +{ + (void) search; + return NULL; +} + +static int +allphone_search_prob(ps_search_t * search) +{ + (void) search; + return 0; +} + +static void +allphone_backtrace(allphone_search_t * allphs, int32 f, int32 *out_score); +static void +allphone_clear_segments(allphone_search_t * allphs); + +static void +allphone_search_seg_free(ps_seg_t * seg) +{ + ckd_free(seg); +} + +static void +allphone_search_fill_iter(ps_seg_t *seg, phseg_t *phseg) +{ + seg->sf = phseg->sf; + seg->ef = phseg->ef; + seg->ascr = phseg->score; + seg->lscr = phseg->tscore; + seg->word = bin_mdef_ciphone_str(ps_search_acmod(seg->search)->mdef, phseg->ci); +} + +static ps_seg_t * +allphone_search_seg_next(ps_seg_t * seg) +{ + phseg_iter_t *itor = (phseg_iter_t *) seg; + phseg_t *phseg; + + itor->seg = itor->seg->next; + + if (itor->seg == NULL) { + allphone_search_seg_free(seg); + return NULL; + } + phseg = gnode_ptr(itor->seg); + allphone_search_fill_iter(seg, phseg); + + return seg; +} + +static ps_segfuncs_t fsg_segfuncs = { + /* seg_next */ allphone_search_seg_next, + /* seg_free */ allphone_search_seg_free +}; + + +static ps_seg_t * +allphone_search_seg_iter(ps_search_t * search) +{ + allphone_search_t *allphs = (allphone_search_t *) search; + phseg_iter_t *iter; + + allphone_backtrace(allphs, allphs->frame - 1, NULL); + if (allphs->segments == NULL) + return NULL; + + iter = ckd_calloc(1, sizeof(phseg_iter_t)); + + iter->base.vt = &fsg_segfuncs; + iter->base.search = search; + iter->seg = allphs->segments; + allphone_search_fill_iter((ps_seg_t *)iter, gnode_ptr(iter->seg)); + + return (ps_seg_t *) iter; +} + +static ps_searchfuncs_t allphone_funcs = { + /* start: */ allphone_search_start, + /* step: */ allphone_search_step, + /* finish: */ allphone_search_finish, + /* reinit: */ allphone_search_reinit, + /* free: */ allphone_search_free, + /* lattice: */ allphone_search_lattice, + /* hyp: */ allphone_search_hyp, + /* prob: */ allphone_search_prob, + /* seg_iter: */ allphone_search_seg_iter, +}; + +/** + * Find PHMM node with same senone sequence and tmat id as the given triphone. + * Return ptr to PHMM node if found, NULL otherwise. + */ +static phmm_t * +phmm_lookup(allphone_search_t * allphs, s3pid_t pid) +{ + phmm_t *p; + bin_mdef_t *mdef; + phmm_t **ci_phmm; + + mdef = ((ps_search_t *) allphs)->acmod->mdef; + ci_phmm = allphs->ci_phmm; + + for (p = ci_phmm[bin_mdef_pid2ci(mdef, pid)]; p; p = p->next) { + if (mdef_pid2tmatid(mdef, p->pid) == mdef_pid2tmatid(mdef, pid)) + if (mdef_pid2ssid(mdef, p->pid) == mdef_pid2ssid(mdef, pid)) + return p; + } + + return NULL; +} + +static int32 +phmm_link(allphone_search_t * allphs) +{ + s3cipid_t ci, rc; + phmm_t *p, *p2; + int32 *rclist; + int32 i, n_link; + plink_t *l; + bin_mdef_t *mdef; + phmm_t **ci_phmm; + + mdef = ((ps_search_t *) allphs)->acmod->mdef; + ci_phmm = allphs->ci_phmm; + + rclist = (int32 *) ckd_calloc(mdef->n_ciphone + 1, sizeof(int32)); + + /* Create successor links between PHMM nodes */ + n_link = 0; + for (ci = 0; ci < mdef->n_ciphone; ci++) { + for (p = ci_phmm[ci]; p; p = p->next) { + /* Build rclist for p */ + i = 0; + for (rc = 0; rc < mdef->n_ciphone; rc++) { + if (bitvec_is_set(p->rc, rc)) + rclist[i++] = rc; + } + rclist[i] = BAD_S3CIPID; + + /* For each rc in rclist, transition to PHMMs for rc if left context = ci */ + for (i = 0; IS_S3CIPID(rclist[i]); i++) { + for (p2 = ci_phmm[rclist[i]]; p2; p2 = p2->next) { + if (bitvec_is_set(p2->lc, ci)) { + /* transition from p to p2 */ + l = (plink_t *) ckd_calloc(1, sizeof(*l)); + l->phmm = p2; + l->next = p->succlist; + p->succlist = l; + + n_link++; + } + } + } + } + } + + ckd_free(rclist); + + return n_link; +} + +/** + * Build net from phone HMMs + */ +static int +phmm_build(allphone_search_t * allphs) +{ + phmm_t *p, **pid2phmm; + bin_mdef_t *mdef; + int32 lrc_size; + uint32 *lc, *rc; + s3pid_t pid; + s3cipid_t ci; + s3cipid_t *filler; + int n_phmm, n_link; + int i, nphone; + + mdef = ((ps_search_t *) allphs)->acmod->mdef; + allphs->ci_phmm = + (phmm_t **) ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(phmm_t *)); + pid2phmm = + (phmm_t **) ckd_calloc(bin_mdef_n_phone(mdef), sizeof(phmm_t *)); + + /* For each unique ciphone/triphone entry in mdef, create a PHMM node */ + n_phmm = 0; + nphone = allphs->ci_only ? bin_mdef_n_ciphone(mdef) : bin_mdef_n_phone(mdef); + E_INFO("Building PHMM net of %d phones\n", nphone); + for (pid = 0; pid < nphone; pid++) { + if ((p = phmm_lookup(allphs, pid)) == NULL) { + /* not found, should be created */ + p = (phmm_t *) ckd_calloc(1, sizeof(*p)); + hmm_init(allphs->hmmctx, &(p->hmm), FALSE, + mdef_pid2ssid(mdef, pid), mdef->phone[pid].tmat); + p->pid = pid; + p->ci = bin_mdef_pid2ci(mdef, pid); + p->succlist = NULL; + p->next = allphs->ci_phmm[p->ci]; + allphs->ci_phmm[p->ci] = p; + n_phmm++; + } + pid2phmm[pid] = p; + } + + /* Fill out bitvecs of each PHMM node, alloc continuous memory chunk for context bitvectors */ + lrc_size = bitvec_size(bin_mdef_n_ciphone(mdef)); + lc = ckd_calloc(n_phmm * 2 * lrc_size, sizeof(bitvec_t)); + rc = lc + (n_phmm * lrc_size); + for (ci = 0; ci < mdef->n_ciphone; ci++) { + for (p = allphs->ci_phmm[ci]; p; p = p->next) { + p->lc = lc; + lc += lrc_size; + p->rc = rc; + rc += lrc_size; + } + } + + /* Fill out lc and rc bitmaps (remember to map all fillers to each other!!) */ + filler = + (s3cipid_t *) ckd_calloc(bin_mdef_n_ciphone(mdef) + 1, + sizeof(s3cipid_t)); + + /* Connect fillers */ + i = 0; + for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) { + p = pid2phmm[ci]; + bitvec_set_all(p->lc, bin_mdef_n_ciphone(mdef)); + bitvec_set_all(p->rc, bin_mdef_n_ciphone(mdef)); + if (mdef->phone[ci].info.ci.filler) { + filler[i++] = ci; + } + } + filler[i] = BAD_S3CIPID; + + + /* Loop over cdphones only if ci_only is not set */ + for (pid = bin_mdef_n_ciphone(mdef); pid < nphone; + pid++) { + p = pid2phmm[pid]; + + if (mdef->phone[mdef->phone[pid].info.cd.ctx[1]].info.ci.filler) { + for (i = 0; IS_S3CIPID(filler[i]); i++) + bitvec_set(p->lc, filler[i]); + } + else + bitvec_set(p->lc, mdef->phone[pid].info.cd.ctx[1]); + + if (mdef->phone[mdef->phone[pid].info.cd.ctx[2]].info.ci.filler) { + for (i = 0; IS_S3CIPID(filler[i]); i++) + bitvec_set(p->rc, filler[i]); + } + else + bitvec_set(p->rc, mdef->phone[pid].info.cd.ctx[2]); + } + ckd_free(pid2phmm); + ckd_free(filler); + + /* Create links between PHMM nodes */ + n_link = phmm_link(allphs); + + E_INFO("%d nodes, %d links\n", n_phmm, n_link); + return 0; +} + +static void +phmm_free(allphone_search_t * allphs) +{ + s3cipid_t ci; + bin_mdef_t *mdef; + + if (!allphs->ci_phmm) + return; + ckd_free(allphs->ci_phmm[0]->lc); + mdef = ((ps_search_t *) allphs)->acmod->mdef; + for (ci = 0; ci < mdef_n_ciphone(mdef); ++ci) { + phmm_t *p, *next; + + for (p = allphs->ci_phmm[ci]; p; p = next) { + plink_t *l, *lnext; + + next = p->next; + for (l = p->succlist; l; l = lnext) { + lnext = l->next; + ckd_free(l); + } + hmm_deinit(&(p->hmm)); + ckd_free(p); + } + } + ckd_free(allphs->ci_phmm); +} + +/** Evaluate active PHMMs */ +static int32 +phmm_eval_all(allphone_search_t * allphs, const int16 * senscr) +{ + s3cipid_t ci; + phmm_t *p; + int32 best; + bin_mdef_t *mdef; + phmm_t **ci_phmm; + + mdef = ((ps_search_t *) allphs)->acmod->mdef; + ci_phmm = allphs->ci_phmm; + + best = WORST_SCORE; + + hmm_context_set_senscore(allphs->hmmctx, senscr); + for (ci = 0; ci < mdef->n_ciphone; ci++) { + for (p = ci_phmm[(unsigned) ci]; p; p = p->next) { + if (hmm_frame(&(p->hmm)) == allphs->frame) { + int32 score; + allphs->n_hmm_eval++; + score = hmm_vit_eval((hmm_t *) p); + if (score > best) + best = score; + } + } + } + + return best; +} + +static void +phmm_exit(allphone_search_t * allphs, int32 best) +{ + s3cipid_t ci; + phmm_t *p; + int32 th, nf; + history_t *h; + blkarray_list_t *history; + bin_mdef_t *mdef; + int32 curfrm; + phmm_t **ci_phmm; + int32 *ci2lmwid; + + th = best + allphs->pbeam; + + history = allphs->history; + mdef = ps_search_acmod(allphs)->mdef; + curfrm = allphs->frame; + ci_phmm = allphs->ci_phmm; + ci2lmwid = allphs->ci2lmwid; + + nf = curfrm + 1; + + for (ci = 0; ci < mdef->n_ciphone; ci++) { + for (p = ci_phmm[(unsigned) ci]; p; p = p->next) { + if (hmm_frame(&(p->hmm)) == curfrm) { + + if (hmm_bestscore(&(p->hmm)) >= th) { + + h = (history_t *) ckd_calloc(1, sizeof(*h)); + h->ef = curfrm; + h->phmm = p; + h->hist = hmm_out_history(&(p->hmm)); + h->score = hmm_out_score(&(p->hmm)); + + if (!allphs->lm) { + h->tscore = allphs->inspen; + } + else { + if (h->hist > 0) { + int32 n_used; + history_t *pred = + blkarray_list_get(history, h->hist); + + if (pred->hist > 0) { + history_t *pred_pred = + blkarray_list_get(history, + h->hist); + h->tscore = + ngram_tg_score(allphs->lm, + ci2lmwid + [pred_pred->phmm->ci], + ci2lmwid[pred-> + phmm->ci], + ci2lmwid[p->ci], + &n_used) >> + SENSCR_SHIFT; + } + else { + h->tscore = + ngram_bg_score(allphs->lm, + ci2lmwid + [pred->phmm->ci], + ci2lmwid[p->ci], + &n_used) >> + SENSCR_SHIFT; + } + } + else { + /* + * This is the beginning SIL and in srch_allphone_begin() + * it's inscore is set to 0. + */ + h->tscore = 0; + } + } + + blkarray_list_append(history, h); + + /* Mark PHMM active in next frame */ + hmm_frame(&(p->hmm)) = nf; + } + else { + /* Reset state scores */ + hmm_clear(&(p->hmm)); + } + } + } + } +} + +static void +phmm_trans(allphone_search_t * allphs, int32 best, + int32 frame_history_start) +{ + history_t *h; + phmm_t *from, *to; + plink_t *l; + int32 newscore, nf, curfrm; + int32 *ci2lmwid; + int32 hist_idx; + + curfrm = allphs->frame; + nf = curfrm + 1; + ci2lmwid = allphs->ci2lmwid; + + /* Transition from exited nodes to initial states of HMMs */ + for (hist_idx = frame_history_start; + hist_idx < blkarray_list_n_valid(allphs->history); hist_idx++) { + h = blkarray_list_get(allphs->history, hist_idx); + from = h->phmm; + for (l = from->succlist; l; l = l->next) { + int32 tscore; + to = l->phmm; + + /* No LM, just use uniform (insertion penalty). */ + if (!allphs->lm) + tscore = allphs->inspen; + else { + int32 n_used; + if (h->hist > 0) { + history_t *pred = + blkarray_list_get(allphs->history, h->hist); + tscore = + ngram_tg_score(allphs->lm, + ci2lmwid[pred->phmm->ci], + ci2lmwid[from->ci], + ci2lmwid[to->ci], + &n_used) >> SENSCR_SHIFT; + } + else { + tscore = ngram_bg_score(allphs->lm, + ci2lmwid[from->ci], + ci2lmwid[to->ci], + &n_used) >> SENSCR_SHIFT; + } + } + + newscore = h->score + tscore; + if ((newscore > best + allphs->beam) + && (newscore > hmm_in_score(&(to->hmm)))) { + hmm_enter(&(to->hmm), newscore, hist_idx, nf); + } + } + } +} + +ps_search_t * +allphone_search_init(const char *name, + ngram_model_t * lm, + cmd_ln_t * config, + acmod_t * acmod, dict_t * dict, dict2pid_t * d2p) +{ + int i; + bin_mdef_t *mdef; + allphone_search_t *allphs; + + allphs = (allphone_search_t *) ckd_calloc(1, sizeof(*allphs)); + ps_search_init(ps_search_base(allphs), &allphone_funcs, PS_SEARCH_TYPE_ALLPHONE, name, config, acmod, + dict, d2p); + mdef = acmod->mdef; + + allphs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(mdef), + acmod->tmat->tp, NULL, mdef->sseq); + if (allphs->hmmctx == NULL) { + ps_search_free(ps_search_base(allphs)); + return NULL; + } + + allphs->ci_only = cmd_ln_boolean_r(config, "-allphone_ci"); + allphs->lw = cmd_ln_float32_r(config, "-lw"); + + phmm_build(allphs); + + if (lm) { + int32 silwid; + + allphs->lm = ngram_model_retain(lm); + + silwid = ngram_wid(allphs->lm, bin_mdef_ciphone_str(mdef, + mdef_silphone + (mdef))); + if (silwid == ngram_unknown_wid(allphs->lm)) { + E_ERROR("Phonetic LM does not have SIL phone in vocabulary\n"); + allphone_search_free((ps_search_t *) allphs); + return NULL; + } + + allphs->ci2lmwid = + (int32 *) ckd_calloc(mdef->n_ciphone, + sizeof(*allphs->ci2lmwid)); + for (i = 0; i < mdef->n_ciphone; i++) { + allphs->ci2lmwid[i] = + ngram_wid(allphs->lm, + (char *) bin_mdef_ciphone_str(mdef, i)); + /* Map filler phones and other missing phones to silence if not found */ + if (allphs->ci2lmwid[i] == ngram_unknown_wid(allphs->lm)) + allphs->ci2lmwid[i] = silwid; + } + } + else { + E_WARN + ("Failed to load language model specified in -allphone, doing unconstrained phone-loop decoding\n"); + allphs->inspen = + (int32) (logmath_log + (acmod->lmath, cmd_ln_float32_r(config, "-pip")) + * allphs->lw) >> SENSCR_SHIFT; + } + + allphs->n_tot_frame = 0; + allphs->frame = -1; + allphs->segments = NULL; + + /* Get search pruning parameters */ + allphs->beam + = + (int32) logmath_log(acmod->lmath, + cmd_ln_float64_r(config, "-beam")) + >> SENSCR_SHIFT; + allphs->pbeam + = + (int32) logmath_log(acmod->lmath, + cmd_ln_float64_r(config, "-pbeam")) + >> SENSCR_SHIFT; + + /* LM related weights/penalties */ + allphs->history = blkarray_list_init(); + + /* Acoustic score scale for posterior probabilities. */ + allphs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale"); + + E_INFO("Allphone(beam: %d, pbeam: %d)\n", allphs->beam, allphs->pbeam); + + ptmr_init(&allphs->perf); + + return (ps_search_t *) allphs; +} + +int +allphone_search_reinit(ps_search_t * search, dict_t * dict, + dict2pid_t * d2p) +{ + allphone_search_t *allphs = (allphone_search_t *) search; + + /* Free old dict2pid, dict */ + ps_search_base_reinit(search, dict, d2p); + + if (!allphs->lm) { + E_WARN + ("-lm argument missing; doing unconstrained phone-loop decoding\n"); + allphs->inspen = + (int32) (logmath_log + (search->acmod->lmath, + cmd_ln_float32_r(search->config, + "-pip")) * + allphs->lw) >> SENSCR_SHIFT; + } + + return 0; +} + +void +allphone_search_free(ps_search_t * search) +{ + allphone_search_t *allphs = (allphone_search_t *) search; + + + double n_speech = (double)allphs->n_tot_frame + / cmd_ln_int32_r(ps_search_config(allphs), "-frate"); + + E_INFO("TOTAL allphone %.2f CPU %.3f xRT\n", + allphs->perf.t_tot_cpu, + allphs->perf.t_tot_cpu / n_speech); + E_INFO("TOTAL allphone %.2f wall %.3f xRT\n", + allphs->perf.t_tot_elapsed, + allphs->perf.t_tot_elapsed / n_speech); + + ps_search_base_free(search); + + allphone_clear_segments(allphs); + hmm_context_free(allphs->hmmctx); + phmm_free(allphs); + if (allphs->lm) + ngram_model_free(allphs->lm); + if (allphs->ci2lmwid) + ckd_free(allphs->ci2lmwid); + if (allphs->history) + blkarray_list_free(allphs->history); + + ckd_free(allphs); +} + +int +allphone_search_start(ps_search_t * search) +{ + allphone_search_t *allphs; + bin_mdef_t *mdef; + s3cipid_t ci; + phmm_t *p; + + allphs = (allphone_search_t *) search; + mdef = search->acmod->mdef; + + /* Reset all HMMs. */ + for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) { + for (p = allphs->ci_phmm[(unsigned) ci]; p; p = p->next) { + hmm_clear(&(p->hmm)); + } + } + + allphs->n_hmm_eval = 0; + allphs->n_sen_eval = 0; + + /* Free history nodes, if any */ + blkarray_list_reset(allphs->history); + + /* Initialize start state of the SILENCE PHMM */ + allphs->frame = 0; + ci = bin_mdef_silphone(mdef); + if (NOT_S3CIPID(ci)) + E_FATAL("Cannot find CI-phone %s\n", S3_SILENCE_CIPHONE); + for (p = allphs->ci_phmm[ci]; p && (p->pid != ci); p = p->next); + if (!p) + E_FATAL("Cannot find HMM for %s\n", S3_SILENCE_CIPHONE); + hmm_enter(&(p->hmm), 0, 0, allphs->frame); + + ptmr_reset(&allphs->perf); + ptmr_start(&allphs->perf); + + return 0; +} + +static void +allphone_search_sen_active(allphone_search_t * allphs) +{ + acmod_t *acmod; + bin_mdef_t *mdef; + phmm_t *p; + int32 ci; + + acmod = ps_search_acmod(allphs); + mdef = acmod->mdef; + + acmod_clear_active(acmod); + for (ci = 0; ci < bin_mdef_n_ciphone(mdef); ci++) + for (p = allphs->ci_phmm[ci]; p; p = p->next) + if (hmm_frame(&(p->hmm)) == allphs->frame) + acmod_activate_hmm(acmod, &(p->hmm)); +} + +int +allphone_search_step(ps_search_t * search, int frame_idx) +{ + int32 bestscr, frame_history_start; + const int16 *senscr; + allphone_search_t *allphs = (allphone_search_t *) search; + acmod_t *acmod = search->acmod; + + if (!acmod->compallsen) + allphone_search_sen_active(allphs); + senscr = acmod_score(acmod, &frame_idx); + allphs->n_sen_eval += acmod->n_senone_active; + bestscr = phmm_eval_all(allphs, senscr); + + frame_history_start = blkarray_list_n_valid(allphs->history); + phmm_exit(allphs, bestscr); + phmm_trans(allphs, bestscr, frame_history_start); + + allphs->frame++; + + return 0; +} + +static int32 +ascore(allphone_search_t * allphs, history_t * h) +{ + int32 score = h->score; + + if (h->hist > 0) { + history_t *pred = blkarray_list_get(allphs->history, h->hist); + score -= pred->score; + } + + return score - h->tscore; +} + +static void +allphone_clear_segments(allphone_search_t * allphs) +{ + gnode_t *gn; + for (gn = allphs->segments; gn; gn = gn->next) { + ckd_free(gnode_ptr(gn)); + } + glist_free(allphs->segments); + allphs->segments = NULL; +} + +static void +allphone_backtrace(allphone_search_t * allphs, int32 f, int32 *out_score) +{ + int32 best, hist_idx, best_idx; + int32 frm, last_frm; + history_t *h; + phseg_t *s; + + /* Clear old list */ + allphone_clear_segments(allphs); + + frm = last_frm = f; + /* Find the first history entry for the requested frame */ + hist_idx = blkarray_list_n_valid(allphs->history) - 1; + while (hist_idx > 0) { + h = blkarray_list_get(allphs->history, hist_idx); + if (h->ef <= f) { + frm = last_frm = h->ef; + break; + } + hist_idx--; + } + + if (hist_idx < 0) + return; + + /* Find bestscore */ + best = (int32) 0x80000000; + best_idx = -1; + while (frm == last_frm && hist_idx > 0) { + h = blkarray_list_get(allphs->history, hist_idx); + frm = h->ef; + if (h->score > best && frm == last_frm) { + best = h->score; + best_idx = hist_idx; + } + hist_idx--; + } + + if (best_idx < 0) + return; + + if (out_score) + *out_score = best; + + /* Backtrace */ + while (best_idx > 0) { + h = blkarray_list_get(allphs->history, best_idx); + s = (phseg_t *) ckd_calloc(1, sizeof(phseg_t)); + s->ci = h->phmm->ci; + s->sf = + (h->hist > + 0) ? ((history_t *) blkarray_list_get(allphs->history, + h->hist))->ef + 1 : 0; + s->ef = h->ef; + s->score = ascore(allphs, h); + s->tscore = h->tscore; + allphs->segments = glist_add_ptr(allphs->segments, s); + + best_idx = h->hist; + } + + return; +} + +int +allphone_search_finish(ps_search_t * search) +{ + allphone_search_t *allphs; + int32 cf, n_hist; + + allphs = (allphone_search_t *) search; + + allphs->n_tot_frame += allphs->frame; + n_hist = blkarray_list_n_valid(allphs->history); + E_INFO + ("%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n", + allphs->frame, allphs->n_hmm_eval, + (allphs->frame > 0) ? allphs->n_hmm_eval / allphs->frame : 0, + allphs->n_sen_eval, + (allphs->frame > 0) ? allphs->n_sen_eval / allphs->frame : 0, + n_hist, (allphs->frame > 0) ? n_hist / allphs->frame : 0); + + /* Now backtrace. */ + allphone_backtrace(allphs, allphs->frame - 1, NULL); + + /* Print out some statistics. */ + ptmr_stop(&allphs->perf); + /* This is the number of frames processed. */ + cf = ps_search_acmod(allphs)->output_frame; + if (cf > 0) { + double n_speech = (double) (cf + 1) + / cmd_ln_int32_r(ps_search_config(allphs), "-frate"); + E_INFO("allphone %.2f CPU %.3f xRT\n", + allphs->perf.t_cpu, allphs->perf.t_cpu / n_speech); + E_INFO("allphone %.2f wall %.3f xRT\n", + allphs->perf.t_elapsed, allphs->perf.t_elapsed / n_speech); + } + + + return 0; +} + +char const * +allphone_search_hyp(ps_search_t * search, int32 * out_score) +{ + allphone_search_t *allphs; + phseg_t *p; + gnode_t *gn; + const char *phone_str; + bin_mdef_t *mdef; + int len, hyp_idx, phone_idx; + + allphs = (allphone_search_t *) search; + mdef = search->acmod->mdef; + + /* Create hypothesis */ + if (search->hyp_str) + ckd_free(search->hyp_str); + search->hyp_str = NULL; + + allphone_backtrace(allphs, allphs->frame - 1, out_score); + if (allphs->segments == NULL) { + return NULL; + } + + len = glist_count(allphs->segments) * 10; /* maximum length of one phone with spacebar */ + + search->hyp_str = (char *) ckd_calloc(len, sizeof(*search->hyp_str)); + hyp_idx = 0; + for (gn = allphs->segments; gn; gn = gn->next) { + p = gnode_ptr(gn); + phone_str = bin_mdef_ciphone_str(mdef, p->ci); + phone_idx = 0; + while (phone_str[phone_idx] != '\0') + search->hyp_str[hyp_idx++] = phone_str[phone_idx++]; + search->hyp_str[hyp_idx++] = ' '; + } + search->hyp_str[--hyp_idx] = '\0'; + E_INFO("Hyp: %s\n", search->hyp_str); + return search->hyp_str; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/allphone_search.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/allphone_search.h new file mode 100644 index 0000000000000000000000000000000000000000..5097db790c3131e0e561d77acb6216ff124a3276 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/allphone_search.h @@ -0,0 +1,190 @@ +/* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2014 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * allphone_search.h -- Search structures for phoneme decoding. + */ + + +#ifndef __ALLPHONE_SEARCH_H__ +#define __ALLPHONE_SEARCH_H__ + + +/* SphinxBase headers. */ +#include +#include +#include +#include + +/* Local headers. */ +#include "pocketsphinx_internal.h" +#include "blkarray_list.h" +#include "hmm.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Models a single unique pair. + * Can represent several different triphones, but all with the same parent basephone. + * (NOTE: Word-position attribute of triphone is ignored.) + */ +typedef struct phmm_s { + hmm_t hmm; /**< Base HMM structure */ + s3pid_t pid; /**< Phone id (temp. during init.) */ + s3cipid_t ci; /**< Parent basephone for this PHMM */ + bitvec_t *lc; /**< Set (bit-vector) of left context phones seen for this PHMM */ + bitvec_t *rc; /**< Set (bit-vector) of right context phones seen for this PHMM */ + struct phmm_s *next; /**< Next unique PHMM for same parent basephone */ + struct plink_s *succlist; /**< List of predecessor PHMM nodes */ +} phmm_t; + +/** + * List of links from a PHMM node to its successors; one link per successor. + */ +typedef struct plink_s { + phmm_t *phmm; /**< Successor PHMM node */ + struct plink_s *next; /**< Next link for parent PHMM node */ +} plink_t; + +/** + * History (paths) information at any point in allphone Viterbi search. + */ +typedef struct history_s { + phmm_t *phmm; /**< PHMM ending this path */ + int32 score; /**< Path score for this path */ + int32 tscore; /**< Transition score for this path */ + frame_idx_t ef; /**< End frame */ + int32 hist; /**< Previous history entry */ +} history_t; + +/** + * Phone level segmentation information + */ +typedef struct phseg_s { + s3cipid_t ci; /* CI-phone id */ + frame_idx_t sf, ef; /* Start and end frame for this phone occurrence */ + int32 score; /* Acoustic score for this segment of alignment */ + int32 tscore; /* Transition ("LM") score for this segment */ +} phseg_t; + +/** + * Segment iterator over list of phseg + */ +typedef struct phseg_iter_s { + ps_seg_t base; + glist_t seg; +} phseg_iter_t; + +/** + * Implementation of allphone search structure. + */ +typedef struct allphone_search_s { + ps_search_t base; + + hmm_context_t *hmmctx; /**< HMM context. */ + ngram_model_t *lm; /**< Ngram model set */ + int32 ci_only; /**< Use context-independent phones for decoding */ + phmm_t **ci_phmm; /**< PHMM lists (for each CI phone) */ + int32 *ci2lmwid; /**< Mapping of CI phones to LM word IDs */ + + int32 beam, pbeam; /**< Effective beams after applying beam_factor */ + int32 lw, inspen; /**< Language weights */ + + frame_idx_t frame; /**< Current frame. */ + float32 ascale; /**< Acoustic score scale for posterior probabilities. */ + + int32 n_tot_frame; /**< Total number of frames processed */ + int32 n_hmm_eval; /**< Total HMMs evaluated this utt */ + int32 n_sen_eval; /**< Total senones evaluated this utt */ + + /* Backtrace information */ + blkarray_list_t *history; /**< List of history nodes allocated in each frame */ + /* Hypothesis DAG */ + glist_t segments; + + ptmr_t perf; /**< Performance counter */ + +} allphone_search_t; + +/** + * Create, initialize and return a search module. + */ +ps_search_t *allphone_search_init(const char *name, + ngram_model_t * lm, + cmd_ln_t * config, + acmod_t * acmod, + dict_t * dict, dict2pid_t * d2p); + +/** + * Deallocate search structure. + */ +void allphone_search_free(ps_search_t * search); + +/** + * Update allphone search module. + */ +int allphone_search_reinit(ps_search_t * search, dict_t * dict, + dict2pid_t * d2p); + +/** + * Prepare the allphone search structure for beginning decoding of the next + * utterance. + */ +int allphone_search_start(ps_search_t * search); + +/** + * Step one frame forward through the Viterbi search. + */ +int allphone_search_step(ps_search_t * search, int frame_idx); + +/** + * Windup and clean the allphone search structure after utterance. + */ +int allphone_search_finish(ps_search_t * search); + +/** + * Get hypothesis string from the allphone search. + */ +char const *allphone_search_hyp(ps_search_t * search, int32 * out_score); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __ALLPHONE_SEARCH_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/bin_mdef.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/bin_mdef.c new file mode 100644 index 0000000000000000000000000000000000000000..3ea605889398b06792de762bb3caa2028ede4d3a --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/bin_mdef.c @@ -0,0 +1,891 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2005 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: bin_mdef.c + * + * Description: + * Binary format model definition files, with support for + * heterogeneous topologies and variable-size N-phones + * + * Author: + * David Huggins-Daines + *********************************************************************/ + +/* System headers. */ +#include +#include +#include + +/* SphinxBase headers. */ +#include +#include +#include +#include +#include + +/* Local headers. */ +#include "mdef.h" +#include "bin_mdef.h" + +bin_mdef_t * +bin_mdef_read_text(cmd_ln_t *config, const char *filename) +{ + bin_mdef_t *bmdef; + mdef_t *mdef; + int i, nodes, ci_idx, lc_idx, rc_idx; + int nchars; + + (void)config; + + if ((mdef = mdef_init((char *) filename, TRUE)) == NULL) + return NULL; + + /* Enforce some limits. */ + if (mdef->n_sen > BAD_SENID) { + E_ERROR("Number of senones exceeds limit: %d > %d\n", + mdef->n_sen, BAD_SENID); + mdef_free(mdef); + return NULL; + } + if (mdef->n_sseq > BAD_SSID) { + E_ERROR("Number of senone sequences exceeds limit: %d > %d\n", + mdef->n_sseq, BAD_SSID); + mdef_free(mdef); + return NULL; + } + /* We use uint8 for ciphones */ + if (mdef->n_ciphone > 255) { + E_ERROR("Number of phones exceeds limit: %d > %d\n", + mdef->n_ciphone, 255); + mdef_free(mdef); + return NULL; + } + + bmdef = ckd_calloc(1, sizeof(*bmdef)); + bmdef->refcnt = 1; + + /* Easy stuff. The mdef.c code has done the heavy lifting for us. */ + bmdef->n_ciphone = mdef->n_ciphone; + bmdef->n_phone = mdef->n_phone; + bmdef->n_emit_state = mdef->n_emit_state; + bmdef->n_ci_sen = mdef->n_ci_sen; + bmdef->n_sen = mdef->n_sen; + bmdef->n_tmat = mdef->n_tmat; + bmdef->n_sseq = mdef->n_sseq; + bmdef->sseq = mdef->sseq; + bmdef->cd2cisen = mdef->cd2cisen; + bmdef->sen2cimap = mdef->sen2cimap; + bmdef->n_ctx = 3; /* Triphones only. */ + bmdef->sil = mdef->sil; + mdef->sseq = NULL; /* We are taking over this one. */ + mdef->cd2cisen = NULL; /* And this one. */ + mdef->sen2cimap = NULL; /* And this one. */ + + /* Get the phone names. If they are not sorted + * ASCII-betically then we are in a world of hurt and + * therefore will simply refuse to continue. */ + bmdef->ciname = ckd_calloc(bmdef->n_ciphone, sizeof(*bmdef->ciname)); + nchars = 0; + for (i = 0; i < bmdef->n_ciphone; ++i) + nchars += strlen(mdef->ciphone[i].name) + 1; + bmdef->ciname[0] = ckd_calloc(nchars, 1); + strcpy(bmdef->ciname[0], mdef->ciphone[0].name); + for (i = 1; i < bmdef->n_ciphone; ++i) { + assert(i > 0); /* No reason to imagine it wouldn't be, but... */ + bmdef->ciname[i] = + bmdef->ciname[i - 1] + strlen(bmdef->ciname[i - 1]) + 1; + strcpy(bmdef->ciname[i], mdef->ciphone[i].name); + if (strcmp(bmdef->ciname[i - 1], bmdef->ciname[i]) > 0) { + /* FIXME: there should be a solution to this, actually. */ + E_ERROR("Phone names are not in sorted order, sorry."); + bin_mdef_free(bmdef); + mdef_free(mdef); + return NULL; + } + } + + /* Copy over phone information. */ + bmdef->phone = ckd_calloc(bmdef->n_phone, sizeof(*bmdef->phone)); + for (i = 0; i < mdef->n_phone; ++i) { + bmdef->phone[i].ssid = mdef->phone[i].ssid; + bmdef->phone[i].tmat = mdef->phone[i].tmat; + if (i < bmdef->n_ciphone) { + bmdef->phone[i].info.ci.filler = mdef->ciphone[i].filler; + } + else { + bmdef->phone[i].info.cd.wpos = mdef->phone[i].wpos; + bmdef->phone[i].info.cd.ctx[0] = mdef->phone[i].ci; + bmdef->phone[i].info.cd.ctx[1] = mdef->phone[i].lc; + bmdef->phone[i].info.cd.ctx[2] = mdef->phone[i].rc; + } + } + + /* Walk the wpos_ci_lclist once to find the total number of + * nodes and the starting locations for each level. */ + nodes = lc_idx = ci_idx = rc_idx = 0; + for (i = 0; i < N_WORD_POSN; ++i) { + int j; + for (j = 0; j < mdef->n_ciphone; ++j) { + ph_lc_t *lc; + + for (lc = mdef->wpos_ci_lclist[i][j]; lc; lc = lc->next) { + ph_rc_t *rc; + for (rc = lc->rclist; rc; rc = rc->next) { + ++nodes; /* RC node */ + } + ++nodes; /* LC node */ + ++rc_idx; /* Start of RC nodes (after LC nodes) */ + } + ++nodes; /* CI node */ + ++lc_idx; /* Start of LC nodes (after CI nodes) */ + ++rc_idx; /* Start of RC nodes (after CI and LC nodes) */ + } + ++nodes; /* wpos node */ + ++ci_idx; /* Start of CI nodes (after wpos nodes) */ + ++lc_idx; /* Start of LC nodes (after CI nodes) */ + ++rc_idx; /* STart of RC nodes (after wpos, CI, and LC nodes) */ + } + E_INFO("Allocating %d * %d bytes (%d KiB) for CD tree\n", + nodes, sizeof(*bmdef->cd_tree), + nodes * sizeof(*bmdef->cd_tree) / 1024); + bmdef->n_cd_tree = nodes; + bmdef->cd_tree = ckd_calloc(nodes, sizeof(*bmdef->cd_tree)); + for (i = 0; i < N_WORD_POSN; ++i) { + int j; + + bmdef->cd_tree[i].ctx = i; + bmdef->cd_tree[i].n_down = mdef->n_ciphone; + bmdef->cd_tree[i].c.down = ci_idx; +#if 0 + E_INFO("%d => %c (%d@%d)\n", + i, (WPOS_NAME)[i], + bmdef->cd_tree[i].n_down, bmdef->cd_tree[i].c.down); +#endif + + /* Now we can build the rest of the tree. */ + for (j = 0; j < mdef->n_ciphone; ++j) { + ph_lc_t *lc; + + bmdef->cd_tree[ci_idx].ctx = j; + bmdef->cd_tree[ci_idx].c.down = lc_idx; + for (lc = mdef->wpos_ci_lclist[i][j]; lc; lc = lc->next) { + ph_rc_t *rc; + + bmdef->cd_tree[lc_idx].ctx = lc->lc; + bmdef->cd_tree[lc_idx].c.down = rc_idx; + for (rc = lc->rclist; rc; rc = rc->next) { + bmdef->cd_tree[rc_idx].ctx = rc->rc; + bmdef->cd_tree[rc_idx].n_down = 0; + bmdef->cd_tree[rc_idx].c.pid = rc->pid; +#if 0 + E_INFO("%d => %s %s %s %c (%d@%d)\n", + rc_idx, + bmdef->ciname[j], + bmdef->ciname[lc->lc], + bmdef->ciname[rc->rc], + (WPOS_NAME)[i], + bmdef->cd_tree[rc_idx].n_down, + bmdef->cd_tree[rc_idx].c.down); +#endif + + ++bmdef->cd_tree[lc_idx].n_down; + ++rc_idx; + } + /* If there are no triphones here, + * this is considered a leafnode, so + * set the pid to -1. */ + if (bmdef->cd_tree[lc_idx].n_down == 0) + bmdef->cd_tree[lc_idx].c.pid = -1; +#if 0 + E_INFO("%d => %s %s %c (%d@%d)\n", + lc_idx, + bmdef->ciname[j], + bmdef->ciname[lc->lc], + (WPOS_NAME)[i], + bmdef->cd_tree[lc_idx].n_down, + bmdef->cd_tree[lc_idx].c.down); +#endif + + ++bmdef->cd_tree[ci_idx].n_down; + ++lc_idx; + } + + /* As above, so below. */ + if (bmdef->cd_tree[ci_idx].n_down == 0) + bmdef->cd_tree[ci_idx].c.pid = -1; +#if 0 + E_INFO("%d => %d=%s (%d@%d)\n", + ci_idx, j, bmdef->ciname[j], + bmdef->cd_tree[ci_idx].n_down, + bmdef->cd_tree[ci_idx].c.down); +#endif + + ++ci_idx; + } + } + + mdef_free(mdef); + + bmdef->alloc_mode = BIN_MDEF_FROM_TEXT; + return bmdef; +} + +bin_mdef_t * +bin_mdef_retain(bin_mdef_t *m) +{ + ++m->refcnt; + return m; +} + +int +bin_mdef_free(bin_mdef_t * m) +{ + if (m == NULL) + return 0; + if (--m->refcnt > 0) + return m->refcnt; + + switch (m->alloc_mode) { + case BIN_MDEF_FROM_TEXT: + ckd_free(m->ciname[0]); + ckd_free(m->sseq[0]); + ckd_free(m->phone); + ckd_free(m->cd_tree); + break; + case BIN_MDEF_IN_MEMORY: + ckd_free(m->ciname[0]); + break; + case BIN_MDEF_ON_DISK: + break; + } + if (m->filemap) + mmio_file_unmap(m->filemap); + ckd_free(m->cd2cisen); + ckd_free(m->sen2cimap); + ckd_free(m->ciname); + ckd_free(m->sseq); + ckd_free(m); + return 0; +} + +static const char format_desc[] = + "BEGIN FILE FORMAT DESCRIPTION\n" + "int32 n_ciphone; /**< Number of base (CI) phones */\n" + "int32 n_phone; /**< Number of base (CI) phones + (CD) triphones */\n" + "int32 n_emit_state; /**< Number of emitting states per phone (0 if heterogeneous) */\n" + "int32 n_ci_sen; /**< Number of CI senones; these are the first */\n" + "int32 n_sen; /**< Number of senones (CI+CD) */\n" + "int32 n_tmat; /**< Number of transition matrices */\n" + "int32 n_sseq; /**< Number of unique senone sequences */\n" + "int32 n_ctx; /**< Number of phones of context */\n" + "int32 n_cd_tree; /**< Number of nodes in CD tree structure */\n" + "int32 sil; /**< CI phone ID for silence */\n" + "char ciphones[][]; /**< CI phone strings (null-terminated) */\n" + "char padding[]; /**< Padding to a 4-bytes boundary */\n" + "struct { int16 ctx; int16 n_down; int32 pid/down } cd_tree[];\n" + "struct { int32 ssid; int32 tmat; int8 attr[4] } phones[];\n" + "int16 sseq[]; /**< Unique senone sequences */\n" + "int8 sseq_len[]; /**< Number of states in each sseq (none if homogeneous) */\n" + "END FILE FORMAT DESCRIPTION\n"; + +bin_mdef_t * +bin_mdef_read(cmd_ln_t *config, const char *filename) +{ + bin_mdef_t *m; + FILE *fh; + size_t tree_start; + int32 val, i, do_mmap, swap; + long pos, end; + int32 *sseq_size; + + /* Try to read it as text first. */ + if ((m = bin_mdef_read_text(config, filename)) != NULL) + return m; + + E_INFO("Reading binary model definition: %s\n", filename); + if ((fh = fopen(filename, "rb")) == NULL) + return NULL; + + if (fread(&val, 4, 1, fh) != 1) { + fclose(fh); + E_ERROR_SYSTEM("Failed to read byte-order marker from %s\n", + filename); + return NULL; + } + swap = 0; + if (val == BIN_MDEF_OTHER_ENDIAN) { + swap = 1; + E_INFO("Must byte-swap %s\n", filename); + } + if (fread(&val, 4, 1, fh) != 1) { + fclose(fh); + E_ERROR_SYSTEM("Failed to read version from %s\n", filename); + return NULL; + } + if (swap) + SWAP_INT32(&val); + if (val > BIN_MDEF_FORMAT_VERSION) { + E_ERROR("File format version %d for %s is newer than library\n", + val, filename); + fclose(fh); + return NULL; + } + if (fread(&val, 4, 1, fh) != 1) { + fclose(fh); + E_ERROR_SYSTEM("Failed to read header length from %s\n", filename); + return NULL; + } + if (swap) + SWAP_INT32(&val); + /* Skip format descriptor. */ + fseek(fh, val, SEEK_CUR); + + /* Finally allocate it. */ + m = ckd_calloc(1, sizeof(*m)); + m->refcnt = 1; + + /* Check these, to make gcc/glibc shut up. */ +#define FREAD_SWAP32_CHK(dest) \ + if (fread((dest), 4, 1, fh) != 1) { \ + fclose(fh); \ + ckd_free(m); \ + E_ERROR_SYSTEM("Failed to read %s from %s\n", #dest, filename); \ + return NULL; \ + } \ + if (swap) SWAP_INT32(dest); + + FREAD_SWAP32_CHK(&m->n_ciphone); + FREAD_SWAP32_CHK(&m->n_phone); + FREAD_SWAP32_CHK(&m->n_emit_state); + FREAD_SWAP32_CHK(&m->n_ci_sen); + FREAD_SWAP32_CHK(&m->n_sen); + FREAD_SWAP32_CHK(&m->n_tmat); + FREAD_SWAP32_CHK(&m->n_sseq); + FREAD_SWAP32_CHK(&m->n_ctx); + FREAD_SWAP32_CHK(&m->n_cd_tree); + FREAD_SWAP32_CHK(&m->sil); + + /* CI names are first in the file. */ + m->ciname = ckd_calloc(m->n_ciphone, sizeof(*m->ciname)); + + /* Decide whether to read in the whole file or mmap it. */ + do_mmap = config ? cmd_ln_boolean_r(config, "-mmap") : TRUE; + if (swap) { + E_WARN("-mmap specified, but mdef is other-endian. Will not memory-map.\n"); + do_mmap = FALSE; + } + /* Actually try to mmap it. */ + if (do_mmap) { + m->filemap = mmio_file_read(filename); + if (m->filemap == NULL) + do_mmap = FALSE; + } + pos = ftell(fh); + if (do_mmap) { + /* Get the base pointer from the memory map. */ + m->ciname[0] = (char *)mmio_file_ptr(m->filemap) + pos; + /* Success! */ + m->alloc_mode = BIN_MDEF_ON_DISK; + } + else { + /* Read everything into memory. */ + m->alloc_mode = BIN_MDEF_IN_MEMORY; + fseek(fh, 0, SEEK_END); + end = ftell(fh); + fseek(fh, pos, SEEK_SET); + m->ciname[0] = ckd_malloc(end - pos); + if (fread(m->ciname[0], 1, end - pos, fh) != (size_t)(end - pos)) + E_FATAL("Failed to read %d bytes of data from %s\n", end - pos, filename); + } + + for (i = 1; i < m->n_ciphone; ++i) + m->ciname[i] = m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1; + + /* Skip past the padding. */ + tree_start = + m->ciname[i - 1] + strlen(m->ciname[i - 1]) + 1 - m->ciname[0]; + tree_start = (tree_start + 3) & ~3; + m->cd_tree = (cd_tree_t *) (m->ciname[0] + tree_start); + if (swap) { + for (i = 0; i < m->n_cd_tree; ++i) { + SWAP_INT16(&m->cd_tree[i].ctx); + SWAP_INT16(&m->cd_tree[i].n_down); + SWAP_INT32(&m->cd_tree[i].c.down); + } + } + m->phone = (mdef_entry_t *) (m->cd_tree + m->n_cd_tree); + if (swap) { + for (i = 0; i < m->n_phone; ++i) { + SWAP_INT32(&m->phone[i].ssid); + SWAP_INT32(&m->phone[i].tmat); + } + } + sseq_size = (int32 *) (m->phone + m->n_phone); + if (swap) + SWAP_INT32(sseq_size); + m->sseq = ckd_calloc(m->n_sseq, sizeof(*m->sseq)); + m->sseq[0] = (uint16 *) (sseq_size + 1); + if (swap) { + for (i = 0; i < *sseq_size; ++i) + SWAP_INT16(m->sseq[0] + i); + } + if (m->n_emit_state) { + for (i = 1; i < m->n_sseq; ++i) + m->sseq[i] = m->sseq[0] + i * m->n_emit_state; + } + else { + m->sseq_len = (uint8 *) (m->sseq[0] + *sseq_size); + for (i = 1; i < m->n_sseq; ++i) + m->sseq[i] = m->sseq[i - 1] + m->sseq_len[i - 1]; + } + + /* Now build the CD-to-CI mappings using the senone sequences. + * This is the only really accurate way to do it, though it is + * still inaccurate in the case of heterogeneous topologies or + * cross-state tying. */ + m->cd2cisen = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->cd2cisen)); + m->sen2cimap = (int16 *) ckd_malloc(m->n_sen * sizeof(*m->sen2cimap)); + + /* Default mappings (identity, none) */ + for (i = 0; i < m->n_ci_sen; ++i) + m->cd2cisen[i] = i; + for (; i < m->n_sen; ++i) + m->cd2cisen[i] = -1; + for (i = 0; i < m->n_sen; ++i) + m->sen2cimap[i] = -1; + for (i = 0; i < m->n_phone; ++i) { + int32 j, ssid = m->phone[i].ssid; + + for (j = 0; j < bin_mdef_n_emit_state_phone(m, i); ++j) { + int s = bin_mdef_sseq2sen(m, ssid, j); + int ci = bin_mdef_pid2ci(m, i); + /* Take the first one and warn if we have cross-state tying. */ + if (m->sen2cimap[s] == -1) + m->sen2cimap[s] = ci; + if (m->sen2cimap[s] != ci) + E_WARN + ("Senone %d is shared between multiple base phones\n", + s); + + if (j > bin_mdef_n_emit_state_phone(m, ci)) + E_WARN("CD phone %d has fewer states than CI phone %d\n", + i, ci); + else + m->cd2cisen[s] = + bin_mdef_sseq2sen(m, m->phone[ci].ssid, j); + } + } + + /* Set the silence phone. */ + m->sil = bin_mdef_ciphone_id(m, S3_SILENCE_CIPHONE); + + E_INFO + ("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n", + m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state, + m->n_ci_sen, m->n_sen, m->n_sseq); + fclose(fh); + return m; +} + +int +bin_mdef_write(bin_mdef_t * m, const char *filename) +{ + FILE *fh; + int32 val, i; + + if ((fh = fopen(filename, "wb")) == NULL) + return -1; + + /* Byteorder marker. */ + val = BIN_MDEF_NATIVE_ENDIAN; + fwrite(&val, 1, 4, fh); + /* Version. */ + val = BIN_MDEF_FORMAT_VERSION; + fwrite(&val, 1, sizeof(val), fh); + + /* Round the format descriptor size up to a 4-byte boundary. */ + val = ((sizeof(format_desc) + 3) & ~3); + fwrite(&val, 1, sizeof(val), fh); + fwrite(format_desc, 1, sizeof(format_desc), fh); + /* Pad it with zeros. */ + i = 0; + fwrite(&i, 1, val - sizeof(format_desc), fh); + + /* Binary header things. */ + fwrite(&m->n_ciphone, 4, 1, fh); + fwrite(&m->n_phone, 4, 1, fh); + fwrite(&m->n_emit_state, 4, 1, fh); + fwrite(&m->n_ci_sen, 4, 1, fh); + fwrite(&m->n_sen, 4, 1, fh); + fwrite(&m->n_tmat, 4, 1, fh); + fwrite(&m->n_sseq, 4, 1, fh); + fwrite(&m->n_ctx, 4, 1, fh); + fwrite(&m->n_cd_tree, 4, 1, fh); + /* Write this as a 32-bit value to preserve alignment for the + * non-mmap case (we want things aligned both from the + * beginning of the file and the beginning of the phone + * strings). */ + val = m->sil; + fwrite(&val, 4, 1, fh); + + /* Phone strings. */ + for (i = 0; i < m->n_ciphone; ++i) + fwrite(m->ciname[i], 1, strlen(m->ciname[i]) + 1, fh); + /* Pad with zeros. */ + val = (ftell(fh) + 3) & ~3; + i = 0; + fwrite(&i, 1, val - ftell(fh), fh); + + /* Write CD-tree */ + fwrite(m->cd_tree, sizeof(*m->cd_tree), m->n_cd_tree, fh); + /* Write phones */ + fwrite(m->phone, sizeof(*m->phone), m->n_phone, fh); + if (m->n_emit_state) { + /* Write size of sseq */ + val = m->n_sseq * m->n_emit_state; + fwrite(&val, 4, 1, fh); + + /* Write sseq */ + fwrite(m->sseq[0], sizeof(**m->sseq), + m->n_sseq * m->n_emit_state, fh); + } + else { + int32 n; + + /* Calculate size of sseq */ + n = 0; + for (i = 0; i < m->n_sseq; ++i) + n += m->sseq_len[i]; + + /* Write size of sseq */ + fwrite(&n, 4, 1, fh); + + /* Write sseq */ + fwrite(m->sseq[0], sizeof(**m->sseq), n, fh); + + /* Write sseq_len */ + fwrite(m->sseq_len, 1, m->n_sseq, fh); + } + fclose(fh); + + return 0; +} + +int +bin_mdef_write_text(bin_mdef_t * m, const char *filename) +{ + FILE *fh; + int p, i, n_total_state; + + if (strcmp(filename, "-") == 0) + fh = stdout; + else { + if ((fh = fopen(filename, "w")) == NULL) + return -1; + } + + fprintf(fh, "0.3\n"); + fprintf(fh, "%d n_base\n", m->n_ciphone); + fprintf(fh, "%d n_tri\n", m->n_phone - m->n_ciphone); + if (m->n_emit_state) + n_total_state = m->n_phone * (m->n_emit_state + 1); + else { + n_total_state = 0; + for (i = 0; i < m->n_phone; ++i) + n_total_state += m->sseq_len[m->phone[i].ssid] + 1; + } + fprintf(fh, "%d n_state_map\n", n_total_state); + fprintf(fh, "%d n_tied_state\n", m->n_sen); + fprintf(fh, "%d n_tied_ci_state\n", m->n_ci_sen); + fprintf(fh, "%d n_tied_tmat\n", m->n_tmat); + fprintf(fh, "#\n# Columns definitions\n"); + fprintf(fh, "#%4s %3s %3s %1s %6s %4s %s\n", + "base", "lft", "rt", "p", "attrib", "tmat", + " ... state id's ..."); + + for (p = 0; p < m->n_ciphone; p++) { + int n_state; + + fprintf(fh, "%5s %3s %3s %1s", m->ciname[p], "-", "-", "-"); + + if (bin_mdef_is_fillerphone(m, p)) + fprintf(fh, " %6s", "filler"); + else + fprintf(fh, " %6s", "n/a"); + fprintf(fh, " %4d", m->phone[p].tmat); + + if (m->n_emit_state) + n_state = m->n_emit_state; + else + n_state = m->sseq_len[m->phone[p].ssid]; + for (i = 0; i < n_state; i++) { + fprintf(fh, " %6u", m->sseq[m->phone[p].ssid][i]); + } + fprintf(fh, " N\n"); + } + + + for (; p < m->n_phone; p++) { + int n_state; + + fprintf(fh, "%5s %3s %3s %c", + m->ciname[m->phone[p].info.cd.ctx[0]], + m->ciname[m->phone[p].info.cd.ctx[1]], + m->ciname[m->phone[p].info.cd.ctx[2]], + (WPOS_NAME)[m->phone[p].info.cd.wpos]); + + if (bin_mdef_is_fillerphone(m, p)) + fprintf(fh, " %6s", "filler"); + else + fprintf(fh, " %6s", "n/a"); + fprintf(fh, " %4d", m->phone[p].tmat); + + + if (m->n_emit_state) + n_state = m->n_emit_state; + else + n_state = m->sseq_len[m->phone[p].ssid]; + for (i = 0; i < n_state; i++) { + fprintf(fh, " %6u", m->sseq[m->phone[p].ssid][i]); + } + fprintf(fh, " N\n"); + } + + if (strcmp(filename, "-") != 0) + fclose(fh); + return 0; +} + +int +bin_mdef_ciphone_id(bin_mdef_t * m, const char *ciphone) +{ + int low, mid, high; + + /* Exact binary search on m->ciphone */ + low = 0; + high = m->n_ciphone; + while (low < high) { + int c; + + mid = (low + high) / 2; + c = strcmp(ciphone, m->ciname[mid]); + if (c == 0) + return mid; + else if (c > 0) + low = mid + 1; + else + high = mid; + } + return -1; +} + +int +bin_mdef_ciphone_id_nocase(bin_mdef_t * m, const char *ciphone) +{ + int low, mid, high; + + /* Exact binary search on m->ciphone */ + low = 0; + high = m->n_ciphone; + while (low < high) { + int c; + + mid = (low + high) / 2; + c = strcmp_nocase(ciphone, m->ciname[mid]); + if (c == 0) + return mid; + else if (c > 0) + low = mid + 1; + else + high = mid; + } + return -1; +} + +const char * +bin_mdef_ciphone_str(bin_mdef_t * m, int32 ci) +{ + assert(m != NULL); + assert(ci < m->n_ciphone); + return m->ciname[ci]; +} + +int +bin_mdef_phone_id(bin_mdef_t * m, int32 ci, int32 lc, int32 rc, int32 wpos) +{ + cd_tree_t *cd_tree; + int level, max; + int16 ctx[4]; + + assert(m); + + /* In the future, we might back off when context is not available, + * but for now we'll just return the CI phone. */ + if (lc < 0 || rc < 0) + return ci; + + assert((ci >= 0) && (ci < m->n_ciphone)); + assert((lc >= 0) && (lc < m->n_ciphone)); + assert((rc >= 0) && (rc < m->n_ciphone)); + assert((wpos >= 0) && (wpos < N_WORD_POSN)); + + /* Create a context list, mapping fillers to silence. */ + ctx[0] = wpos; + ctx[1] = ci; + ctx[2] = (m->sil >= 0 + && m->phone[lc].info.ci.filler) ? m->sil : lc; + ctx[3] = (m->sil >= 0 + && m->phone[rc].info.ci.filler) ? m->sil : rc; + + /* Walk down the cd_tree. */ + cd_tree = m->cd_tree; + level = 0; /* What level we are on. */ + max = N_WORD_POSN; /* Number of nodes on this level. */ + while (level < 4) { + int i; + +#if 0 + E_INFO("Looking for context %d=%s in %d at %d\n", + ctx[level], m->ciname[ctx[level]], + max, cd_tree - m->cd_tree); +#endif + for (i = 0; i < max; ++i) { +#if 0 + E_INFO("Look at context %d=%s at %d\n", + cd_tree[i].ctx, + m->ciname[cd_tree[i].ctx], cd_tree + i - m->cd_tree); +#endif + if (cd_tree[i].ctx == ctx[level]) + break; + } + if (i == max) + return -1; +#if 0 + E_INFO("Found context %d=%s at %d, n_down=%d, down=%d\n", + ctx[level], m->ciname[ctx[level]], + cd_tree + i - m->cd_tree, + cd_tree[i].n_down, cd_tree[i].c.down); +#endif + /* Leaf node, stop here. */ + if (cd_tree[i].n_down == 0) + return cd_tree[i].c.pid; + + /* Go down one level. */ + max = cd_tree[i].n_down; + cd_tree = m->cd_tree + cd_tree[i].c.down; + ++level; + } + /* We probably shouldn't get here. */ + return -1; +} + +int +bin_mdef_phone_id_nearest(bin_mdef_t * m, int32 b, int32 l, int32 r, int32 pos) +{ + int p, tmppos; + + + + /* In the future, we might back off when context is not available, + * but for now we'll just return the CI phone. */ + if (l < 0 || r < 0) + return b; + + p = bin_mdef_phone_id(m, b, l, r, pos); + if (p >= 0) + return p; + + /* Exact triphone not found; backoff to other word positions */ + for (tmppos = 0; tmppos < N_WORD_POSN; tmppos++) { + if (tmppos != pos) { + p = bin_mdef_phone_id(m, b, l, r, tmppos); + if (p >= 0) + return p; + } + } + + /* Nothing yet; backoff to silence phone if non-silence filler context */ + /* In addition, backoff to silence phone on left/right if in beginning/end position */ + if (m->sil >= 0) { + int newl = l, newr = r; + if (m->phone[(int)l].info.ci.filler + || pos == WORD_POSN_BEGIN || pos == WORD_POSN_SINGLE) + newl = m->sil; + if (m->phone[(int)r].info.ci.filler + || pos == WORD_POSN_END || pos == WORD_POSN_SINGLE) + newr = m->sil; + if ((newl != l) || (newr != r)) { + p = bin_mdef_phone_id(m, b, newl, newr, pos); + if (p >= 0) + return p; + + for (tmppos = 0; tmppos < N_WORD_POSN; tmppos++) { + if (tmppos != pos) { + p = bin_mdef_phone_id(m, b, newl, newr, tmppos); + if (p >= 0) + return p; + } + } + } + } + + /* Nothing yet; backoff to base phone */ + return b; +} + +int +bin_mdef_phone_str(bin_mdef_t * m, int pid, char *buf) +{ + char *wpos_name; + + assert(m); + assert((pid >= 0) && (pid < m->n_phone)); + wpos_name = WPOS_NAME; + + buf[0] = '\0'; + if (pid < m->n_ciphone) + sprintf(buf, "%s", bin_mdef_ciphone_str(m, pid)); + else { + sprintf(buf, "%s %s %s %c", + bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[0]), + bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[1]), + bin_mdef_ciphone_str(m, m->phone[pid].info.cd.ctx[2]), + wpos_name[m->phone[pid].info.cd.wpos]); + } + return 0; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/bin_mdef.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/bin_mdef.h new file mode 100644 index 0000000000000000000000000000000000000000..97db1454e216bf03a92003775d49bceb03482c7f --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/bin_mdef.h @@ -0,0 +1,247 @@ +/* -*- c-file-style: "linux" -*- */ +/* ==================================================================== + * Copyright (c) 2005 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file bin_mdef.h + * + * Binary format model definition files, with support for + * heterogeneous topologies and variable-size N-phones + * + * @author David Huggins-Daines + */ +#ifndef __BIN_MDEF_H__ +#define __BIN_MDEF_H__ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/* SphinxBase headers. */ +#include +#include +#include + +#include "mdef.h" + +#define BIN_MDEF_FORMAT_VERSION 1 +/* Little-endian machines will write "BMDF" to disk, big-endian ones "FDMB". */ +#define BIN_MDEF_NATIVE_ENDIAN 0x46444d42 /* 'BMDF' in little-endian order */ +#define BIN_MDEF_OTHER_ENDIAN 0x424d4446 /* 'BMDF' in big-endian order */ +#ifdef __GNUC__ +#define __ATTRIBUTE_PACKED __attribute__((packed)) +#else +#define __ATTRIBUTE_PACKED +#endif + +/** + * Phone entry (on-disk, 12 bytes) + */ +typedef struct mdef_entry_s mdef_entry_t; +struct mdef_entry_s { + int32 ssid; /**< Senone sequence ID */ + int32 tmat; /**< Transition matrix ID */ + /* FIXME: is any of this actually necessary? */ + union { + /**< CI phone information - attributes (just "filler" for now) */ + struct { + uint8 filler; + uint8 reserved[3]; + } ci; + /**< CD phone information - context info. */ + struct { + uint8 wpos; + uint8 ctx[3]; /**< quintphones will require hacking */ + } cd; + } info; +} __ATTRIBUTE_PACKED; + +/** + * Invalid senone sequence ID (limited to 16 bits for PocketSphinx). + */ +#define BAD_SSID 0xffff +/** + * Invalid senone ID (limited to 16 bits for PocketSphinx). + */ +#define BAD_SENID 0xffff + +/** + * Node in CD phone tree (on-disk, 8 bytes). + */ +typedef struct cd_tree_s cd_tree_t; +struct cd_tree_s { + int16 ctx; /**< Context (word position or CI phone) */ + int16 n_down; /**< Number of children (0 for leafnode) */ + union { + int32 pid; /**< Phone ID (leafnode) */ + int32 down; /**< Next level of the tree (offset from start of cd_trees) */ + } c; +}; + +/** + * Model definition structure (in-memory). + */ +typedef struct bin_mdef_s bin_mdef_t; +struct bin_mdef_s { + int refcnt; + int32 n_ciphone; /**< Number of base (CI) phones */ + int32 n_phone; /**< Number of base (CI) phones + (CD) triphones */ + int32 n_emit_state; /**< Number of emitting states per phone (0 for heterogeneous) */ + int32 n_ci_sen; /**< Number of CI senones; these are the first */ + int32 n_sen; /**< Number of senones (CI+CD) */ + int32 n_tmat; /**< Number of transition matrices */ + int32 n_sseq; /**< Number of unique senone sequences */ + int32 n_ctx; /**< Number of phones of context */ + int32 n_cd_tree; /**< Number of nodes in cd_tree (below) */ + int32 sil; /**< CI phone ID for silence */ + + mmio_file_t *filemap;/**< File map for this file (if any) */ + char **ciname; /**< CI phone names */ + cd_tree_t *cd_tree; /**< Tree mapping CD phones to phone IDs */ + mdef_entry_t *phone; /**< All phone structures */ + uint16 **sseq; /**< Unique senone sequences (2D array built at load time) */ + uint8 *sseq_len; /**< Number of states in each sseq (NULL for homogeneous) */ + + /* These two are not stored on disk, but are generated at load time. */ + int16 *cd2cisen; /**< Parent CI-senone id for each senone */ + int16 *sen2cimap; /**< Parent CI-phone for each senone (CI or CD) */ + + /** Allocation mode for this object. */ + enum { BIN_MDEF_FROM_TEXT, BIN_MDEF_IN_MEMORY, BIN_MDEF_ON_DISK } alloc_mode; +}; + +#define bin_mdef_is_fillerphone(m,p) (((p) < (m)->n_ciphone) \ + ? (m)->phone[p].info.ci.filler \ + : (m)->phone[(m)->phone[p].info.cd.ctx[0]].info.ci.filler) +#define bin_mdef_is_ciphone(m,p) ((p) < (m)->n_ciphone) +#define bin_mdef_n_ciphone(m) ((m)->n_ciphone) +#define bin_mdef_n_phone(m) ((m)->n_phone) +#define bin_mdef_n_sseq(m) ((m)->n_sseq) +#define bin_mdef_n_emit_state(m) ((m)->n_emit_state) +#define bin_mdef_n_emit_state_phone(m,p) ((m)->n_emit_state ? (m)->n_emit_state \ + : (m)->sseq_len[(m)->phone[p].ssid]) +#define bin_mdef_n_sen(m) ((m)->n_sen) +#define bin_mdef_n_tmat(m) ((m)->n_tmat) +#define bin_mdef_pid2ssid(m,p) ((m)->phone[p].ssid) +#define bin_mdef_pid2tmatid(m,p) ((m)->phone[p].tmat) +#define bin_mdef_silphone(m) ((m)->sil) +#define bin_mdef_sen2cimap(m,s) ((m)->sen2cimap[s]) +#define bin_mdef_sseq2sen(m,ss,pos) ((m)->sseq[ss][pos]) +#define bin_mdef_pid2ci(m,p) (((p) < (m)->n_ciphone) ? (p) \ + : (m)->phone[p].info.cd.ctx[0]) + +/** + * Read a binary mdef from a file. + */ +POCKETSPHINX_EXPORT +bin_mdef_t *bin_mdef_read(cmd_ln_t *config, const char *filename); +/** + * Read a text mdef from a file (creating an in-memory binary mdef). + */ +POCKETSPHINX_EXPORT +bin_mdef_t *bin_mdef_read_text(cmd_ln_t *config, const char *filename); +/** + * Write a binary mdef to a file. + */ +POCKETSPHINX_EXPORT +int bin_mdef_write(bin_mdef_t *m, const char *filename); +/** + * Write a binary mdef to a text file. + */ +POCKETSPHINX_EXPORT +int bin_mdef_write_text(bin_mdef_t *m, const char *filename); +/** + * Retain a pointer to a bin_mdef_t. + */ +POCKETSPHINX_EXPORT +bin_mdef_t *bin_mdef_retain(bin_mdef_t *m); +/** + * Release a pointer to a binary mdef. + */ +POCKETSPHINX_EXPORT +int bin_mdef_free(bin_mdef_t *m); + +/** + * Context-independent phone lookup. + * @return phone id for ciphone. + */ +POCKETSPHINX_EXPORT +int bin_mdef_ciphone_id(bin_mdef_t *m, /**< In: Model structure being queried */ + const char *ciphone); /**< In: ciphone for which id wanted */ + +/** + * Case-insensitive context-independent phone lookup. + * @return phone id for ciphone. + */ +POCKETSPHINX_EXPORT +int bin_mdef_ciphone_id_nocase(bin_mdef_t *m, /**< In: Model structure being queried */ + const char *ciphone); /**< In: ciphone for which id wanted */ + +/* Return value: READ-ONLY ciphone string name for the given ciphone id */ +POCKETSPHINX_EXPORT +const char *bin_mdef_ciphone_str(bin_mdef_t *m, /**< In: Model structure being queried */ + int32 ci); /**< In: ciphone id for which name wanted */ + +/* Return value: phone id for the given constituents if found, else -1 */ +POCKETSPHINX_EXPORT +int bin_mdef_phone_id(bin_mdef_t *m, /**< In: Model structure being queried */ + int32 b, /**< In: base ciphone id */ + int32 l, /**< In: left context ciphone id */ + int32 r, /**< In: right context ciphone id */ + int32 pos); /**< In: Word position */ + +/* Look up a phone id, backing off to other word positions. */ +POCKETSPHINX_EXPORT +int bin_mdef_phone_id_nearest(bin_mdef_t * m, int32 b, + int32 l, int32 r, int32 pos); + +/** + * Create a phone string for the given phone (base or triphone) id in the given buf. + * + * @return 0 if successful, -1 if error. + */ +POCKETSPHINX_EXPORT +int bin_mdef_phone_str(bin_mdef_t *m, /**< In: Model structure being queried */ + int pid, /**< In: phone id being queried */ + char *buf); /**< Out: On return, buf has the string */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __BIN_MDEF_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/blkarray_list.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/blkarray_list.c new file mode 100644 index 0000000000000000000000000000000000000000..4b9eb67d852b17a0ecaebfd940babba7778cf138 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/blkarray_list.c @@ -0,0 +1,172 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * blkarray_list.c -- block array-based list structure. + * + * HISTORY + * + * 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Started. + */ + +/* System headers. */ +#include + +/* SphinxBase headers. */ +#include +#include +#include + +/* Local headers. */ +#include "blkarray_list.h" + + +#define BLKARRAY_DEFAULT_MAXBLKS 16380 +#define BLKARRAY_DEFAULT_BLKSIZE 16380 + + +blkarray_list_t * +_blkarray_list_init(int32 maxblks, int32 blksize) +{ + blkarray_list_t *bl; + + if ((maxblks <= 0) || (blksize <= 0)) { + E_ERROR("Cannot allocate %dx%d blkarray\n", maxblks, blksize); + return NULL; + } + + bl = (blkarray_list_t *) ckd_calloc(1, sizeof(blkarray_list_t)); + bl->ptr = (void ***) ckd_calloc(maxblks, sizeof(void **)); + bl->maxblks = maxblks; + bl->blksize = blksize; + bl->n_valid = 0; + bl->cur_row = -1; /* No row is allocated (dummy) */ + bl->cur_row_free = blksize; /* The dummy row is full */ + + return bl; +} + + +blkarray_list_t * +blkarray_list_init(void) +{ + return _blkarray_list_init(BLKARRAY_DEFAULT_MAXBLKS, + BLKARRAY_DEFAULT_BLKSIZE); +} + +void +blkarray_list_free(blkarray_list_t *bl) +{ + blkarray_list_reset(bl); + ckd_free(bl->ptr); + ckd_free(bl); +} + + +int32 +blkarray_list_append(blkarray_list_t * bl, void *data) +{ + int32 id; + + assert(bl); + + if (bl->cur_row_free >= bl->blksize) { + /* Previous row is filled; need to allocate a new row */ + bl->cur_row++; + + if (bl->cur_row >= bl->maxblks) { + E_ERROR("Block array (%dx%d) exhausted\n", + bl->maxblks, bl->blksize); + bl->cur_row--; + return -1; + } + + /* Allocate the new row */ + assert(bl->ptr[bl->cur_row] == NULL); + bl->ptr[bl->cur_row] = (void **) ckd_malloc(bl->blksize * + sizeof(void *)); + + bl->cur_row_free = 0; + } + + bl->ptr[bl->cur_row][bl->cur_row_free] = data; + (bl->cur_row_free)++; + + id = (bl->n_valid)++; + assert(id >= 0); + + return id; +} + + +void +blkarray_list_reset(blkarray_list_t * bl) +{ + int32 i, j; + + /* Free all the allocated elements as well as the blocks */ + for (i = 0; i < bl->cur_row; i++) { + for (j = 0; j < bl->blksize; j++) + ckd_free(bl->ptr[i][j]); + + ckd_free(bl->ptr[i]); + bl->ptr[i] = NULL; + } + if (i == bl->cur_row) { /* NEED THIS! (in case cur_row < 0) */ + for (j = 0; j < bl->cur_row_free; j++) + ckd_free(bl->ptr[i][j]); + + ckd_free(bl->ptr[i]); + bl->ptr[i] = NULL; + } + + bl->n_valid = 0; + bl->cur_row = -1; + bl->cur_row_free = bl->blksize; +} + +void * +blkarray_list_get(blkarray_list_t *list, int32 n) +{ + int32 r, c; + + if (n >= blkarray_list_n_valid(list)) + return NULL; + + r = n / blkarray_list_blksize(list); + c = n - (r * blkarray_list_blksize(list)); + + return blkarray_list_ptr(list, r, c); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/blkarray_list.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/blkarray_list.h new file mode 100644 index 0000000000000000000000000000000000000000..37865c0bc0cb39bfd07c248293e97ce2d4a77cca --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/blkarray_list.h @@ -0,0 +1,149 @@ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * blkarray_list.h -- array-based list structure, for memory and access + * efficiency. + * + * HISTORY + * + * $Log: blkarray_list.h,v $ + * Revision 1.1.1.1 2006/05/23 18:45:02 dhuggins + * re-importation + * + * Revision 1.2 2004/12/10 16:48:58 rkm + * Added continuous density acoustic model handling + * + * Revision 1.1 2004/07/16 00:57:12 egouvea + * Added Ravi's implementation of FSG support. + * + * Revision 1.2 2004/05/27 14:22:57 rkm + * FSG cross-word triphones completed (but for single-phone words) + * + * Revision 1.1.1.1 2004/03/01 14:30:31 rkm + * + * + * Revision 1.1 2004/02/26 01:14:48 rkm + * *** empty log message *** + * + * + * 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Started. + */ + + +#ifndef __S2_BLKARRAY_LIST_H__ +#define __S2_BLKARRAY_LIST_H__ + + +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/* + * For maintaining a (conceptual) "list" of pointers to arbitrary data. + * The application is responsible for knowing the true data type. + * Use an array instead of a true list for efficiency (both memory and + * speed). But use a blocked (2-D) array to allow dynamic resizing at a + * coarse grain. An entire block is allocated or freed, as appropriate. + */ +typedef struct blkarray_list_s { + void ***ptr; /* ptr[][] is the user-supplied ptr */ + int32 maxblks; /* size of ptr (#rows) */ + int32 blksize; /* size of ptr[] (#cols, ie, size of each row) */ + int32 n_valid; /* # entries actually stored in the list */ + int32 cur_row; /* The current row being that has empty entry */ + int32 cur_row_free; /* First entry valid within the current row */ +} blkarray_list_t; + +/* Access macros */ +#define blkarray_list_ptr(l,r,c) ((l)->ptr[r][c]) +#define blkarray_list_maxblks(l) ((l)->maxblks) +#define blkarray_list_blksize(l) ((l)->blksize) +#define blkarray_list_n_valid(l) ((l)->n_valid) +#define blkarray_list_cur_row(l) ((l)->cur_row) +#define blkarray_list_cur_row_free(l) ((l)->cur_row_free) + + +/* + * Initialize and return a new blkarray_list containing an empty list + * (i.e., 0 length). Sized for the given values of maxblks and blksize. + * NOTE: (maxblks * blksize) should not overflow int32, but this is not + * checked. + * Return the allocated entry if successful, NULL if any error. + */ +blkarray_list_t *_blkarray_list_init (int32 maxblks, int32 blksize); + + +/* + * Like _blkarray_list_init() above, but for some default values of + * maxblks and blksize. + */ +blkarray_list_t *blkarray_list_init ( void ); + +/** + * Completely finalize a blkarray_list. + */ +void blkarray_list_free(blkarray_list_t *bl); + + +/* + * Append the given new entry (data) to the end of the list. + * Return the index of the entry if successful, -1 if any error. + * The returned indices are guaranteed to be successive integers (i.e., + * 0, 1, 2...) for successive append operations, until the list is reset, + * when they resume from 0. + */ +int32 blkarray_list_append (blkarray_list_t *, void *data); + + +/* + * Free all the entries in the list (using ckd_free) and reset the + * list length to 0. + */ +void blkarray_list_reset (blkarray_list_t *); + + +/* Gets n-th element of the array list */ +void * blkarray_list_get(blkarray_list_t *, int32 n); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/dict.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/dict.c new file mode 100644 index 0000000000000000000000000000000000000000..d57aebd3f18a799ffa33761f4abad783aa02e23b --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/dict.c @@ -0,0 +1,506 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* System headers. */ +#include + +/* SphinxBase headers. */ +#include +#include + +/* Local headers. */ +#include "dict.h" + + +#define DELIM " \t\n" /* Set of field separator characters */ +#define DEFAULT_NUM_PHONE (MAX_S3CIPID+1) + +#if WIN32 +#define snprintf sprintf_s +#endif + +extern const char *const cmu6_lts_phone_table[]; + +static s3cipid_t +dict_ciphone_id(dict_t * d, const char *str) +{ + if (d->nocase) + return bin_mdef_ciphone_id_nocase(d->mdef, str); + else + return bin_mdef_ciphone_id(d->mdef, str); +} + + +const char * +dict_ciphone_str(dict_t * d, s3wid_t wid, int32 pos) +{ + assert(d != NULL); + assert((wid >= 0) && (wid < d->n_word)); + assert((pos >= 0) && (pos < d->word[wid].pronlen)); + + return bin_mdef_ciphone_str(d->mdef, d->word[wid].ciphone[pos]); +} + + +s3wid_t +dict_add_word(dict_t * d, char const *word, s3cipid_t const * p, int32 np) +{ + int32 len; + dictword_t *wordp; + s3wid_t newwid; + char *wword; + + if (d->n_word >= d->max_words) { + E_INFO("Reallocating to %d KiB for word entries\n", + (d->max_words + S3DICT_INC_SZ) * sizeof(dictword_t) / 1024); + d->word = + (dictword_t *) ckd_realloc(d->word, + (d->max_words + + S3DICT_INC_SZ) * sizeof(dictword_t)); + d->max_words = d->max_words + S3DICT_INC_SZ; + } + + wordp = d->word + d->n_word; + wordp->word = (char *) ckd_salloc(word); /* Freed in dict_free */ + + /* Determine base/alt wids */ + wword = ckd_salloc(word); + if ((len = dict_word2basestr(wword)) > 0) { + int32 w; + + /* Truncated to a baseword string; find its ID */ + if (hash_table_lookup_int32(d->ht, wword, &w) < 0) { + E_ERROR("Missing base word for: %s\n", word); + ckd_free(wword); + ckd_free(wordp->word); + wordp->word = NULL; + return BAD_S3WID; + } + + /* Link into alt list */ + wordp->basewid = w; + wordp->alt = d->word[w].alt; + d->word[w].alt = d->n_word; + } else { + wordp->alt = BAD_S3WID; + wordp->basewid = d->n_word; + } + ckd_free(wword); + + /* Associate word string with d->n_word in hash table */ + if (hash_table_enter_int32(d->ht, wordp->word, d->n_word) != d->n_word) { + ckd_free(wordp->word); + wordp->word = NULL; + return BAD_S3WID; + } + + /* Fill in word entry, and set defaults */ + if (p && (np > 0)) { + wordp->ciphone = (s3cipid_t *) ckd_malloc(np * sizeof(s3cipid_t)); /* Freed in dict_free */ + memcpy(wordp->ciphone, p, np * sizeof(s3cipid_t)); + wordp->pronlen = np; + } + else { + wordp->ciphone = NULL; + wordp->pronlen = 0; + } + + newwid = d->n_word++; + + return newwid; +} + + +static int32 +dict_read(FILE * fp, dict_t * d) +{ + lineiter_t *li; + char **wptr; + s3cipid_t *p; + int32 lineno, nwd; + s3wid_t w; + int32 i, maxwd; + size_t stralloc, phnalloc; + + maxwd = 512; + p = (s3cipid_t *) ckd_calloc(maxwd + 4, sizeof(*p)); + wptr = (char **) ckd_calloc(maxwd, sizeof(char *)); /* Freed below */ + + lineno = 0; + stralloc = phnalloc = 0; + for (li = lineiter_start(fp); li; li = lineiter_next(li)) { + lineno++; + if (0 == strncmp(li->buf, "##", 2) + || 0 == strncmp(li->buf, ";;", 2)) + continue; + + if ((nwd = str2words(li->buf, wptr, maxwd)) < 0) { + /* Increase size of p, wptr. */ + nwd = str2words(li->buf, NULL, 0); + assert(nwd > maxwd); /* why else would it fail? */ + maxwd = nwd; + p = (s3cipid_t *) ckd_realloc(p, (maxwd + 4) * sizeof(*p)); + wptr = (char **) ckd_realloc(wptr, maxwd * sizeof(*wptr)); + } + + if (nwd == 0) /* Empty line */ + continue; + /* wptr[0] is the word-string and wptr[1..nwd-1] the pronunciation sequence */ + if (nwd == 1) { + E_ERROR("Line %d: No pronunciation for word '%s'; ignored\n", + lineno, wptr[0]); + continue; + } + + + /* Convert pronunciation string to CI-phone-ids */ + for (i = 1; i < nwd; i++) { + p[i - 1] = dict_ciphone_id(d, wptr[i]); + if (NOT_S3CIPID(p[i - 1])) { + E_ERROR("Line %d: Phone '%s' is missing in the acoustic model; word '%s' ignored\n", + lineno, wptr[i], wptr[0]); + break; + } + } + + if (i == nwd) { /* All CI-phones successfully converted to IDs */ + w = dict_add_word(d, wptr[0], p, nwd - 1); + if (NOT_S3WID(w)) + E_ERROR + ("Line %d: Failed to add the word '%s' (duplicate?); ignored\n", + lineno, wptr[0]); + else { + stralloc += strlen(d->word[w].word); + phnalloc += d->word[w].pronlen * sizeof(s3cipid_t); + } + } + } + E_INFO("Dictionary size %d, allocated %d KiB for strings, %d KiB for phones\n", + dict_size(d), (int)stralloc / 1024, (int)phnalloc / 1024); + ckd_free(p); + ckd_free(wptr); + + return 0; +} + +int +dict_write(dict_t *dict, char const *filename, char const *format) +{ + FILE *fh; + int i; + + (void)format; /* FIXME */ + if ((fh = fopen(filename, "w")) == NULL) { + E_ERROR_SYSTEM("Failed to open '%s'", filename); + return -1; + } + for (i = 0; i < dict->n_word; ++i) { + char *phones; + int j, phlen; + if (!dict_real_word(dict, i)) + continue; + for (phlen = j = 0; j < dict_pronlen(dict, i); ++j) + phlen += strlen(dict_ciphone_str(dict, i, j)) + 1; + phones = ckd_calloc(1, phlen); + for (j = 0; j < dict_pronlen(dict, i); ++j) { + strcat(phones, dict_ciphone_str(dict, i, j)); + if (j != dict_pronlen(dict, i) - 1) + strcat(phones, " "); + } + fprintf(fh, "%-30s %s\n", dict_wordstr(dict, i), phones); + ckd_free(phones); + } + fclose(fh); + return 0; +} + + +dict_t * +dict_init(cmd_ln_t *config, bin_mdef_t * mdef) +{ + FILE *fp, *fp2; + int32 n; + lineiter_t *li; + dict_t *d; + s3cipid_t sil; + char const *dictfile = NULL, *fillerfile = NULL; + + if (config) { + dictfile = cmd_ln_str_r(config, "-dict"); + fillerfile = cmd_ln_str_r(config, "_fdict"); + } + + /* + * First obtain #words in dictionary (for hash table allocation). + * Reason: The PC NT system doesn't like to grow memory gradually. Better to allocate + * all the required memory in one go. + */ + fp = NULL; + n = 0; + if (dictfile) { + if ((fp = fopen(dictfile, "r")) == NULL) { + E_ERROR_SYSTEM("Failed to open dictionary file '%s' for reading", dictfile); + return NULL; + } + for (li = lineiter_start(fp); li; li = lineiter_next(li)) { + if (0 != strncmp(li->buf, "##", 2) + && 0 != strncmp(li->buf, ";;", 2)) + n++; + } + fseek(fp, 0L, SEEK_SET); + } + + fp2 = NULL; + if (fillerfile) { + if ((fp2 = fopen(fillerfile, "r")) == NULL) { + E_ERROR_SYSTEM("Failed to open filler dictionary file '%s' for reading", fillerfile); + fclose(fp); + return NULL; + } + for (li = lineiter_start(fp2); li; li = lineiter_next(li)) { + if (0 != strncmp(li->buf, "##", 2) + && 0 != strncmp(li->buf, ";;", 2)) + n++; + } + fseek(fp2, 0L, SEEK_SET); + } + + /* + * Allocate dict entries. HACK!! Allow some extra entries for words not in file. + * Also check for type size restrictions. + */ + d = (dict_t *) ckd_calloc(1, sizeof(dict_t)); /* freed in dict_free() */ + d->refcnt = 1; + d->max_words = + (n + S3DICT_INC_SZ < MAX_S3WID) ? n + S3DICT_INC_SZ : MAX_S3WID; + if (n >= MAX_S3WID) { + E_ERROR("Number of words in dictionaries (%d) exceeds limit (%d)\n", n, + MAX_S3WID); + if (fp) fclose(fp); + if (fp2) fclose(fp2); + ckd_free(d); + return NULL; + } + + E_INFO("Allocating %d * %d bytes (%d KiB) for word entries\n", + d->max_words, sizeof(dictword_t), + d->max_words * sizeof(dictword_t) / 1024); + d->word = (dictword_t *) ckd_calloc(d->max_words, sizeof(dictword_t)); /* freed in dict_free() */ + d->n_word = 0; + if (mdef) + d->mdef = bin_mdef_retain(mdef); + + /* Create new hash table for word strings; case-insensitive word strings */ + if (config && cmd_ln_exists_r(config, "-dictcase")) + d->nocase = cmd_ln_boolean_r(config, "-dictcase"); + d->ht = hash_table_new(d->max_words, d->nocase); + + /* Digest main dictionary file */ + if (fp) { + E_INFO("Reading main dictionary: %s\n", dictfile); + dict_read(fp, d); + fclose(fp); + E_INFO("%d words read\n", d->n_word); + } + + if (dict_wordid(d, S3_START_WORD) != BAD_S3WID) { + E_ERROR("Remove sentence start word '' from the dictionary\n"); + dict_free(d); + return NULL; + } + if (dict_wordid(d, S3_FINISH_WORD) != BAD_S3WID) { + E_ERROR("Remove sentence start word '' from the dictionary\n"); + dict_free(d); + return NULL; + } + if (dict_wordid(d, S3_SILENCE_WORD) != BAD_S3WID) { + E_ERROR("Remove silence word '' from the dictionary\n"); + dict_free(d); + return NULL; + } + + /* Now the filler dictionary file, if it exists */ + d->filler_start = d->n_word; + if (fp2) { + E_INFO("Reading filler dictionary: %s\n", fillerfile); + dict_read(fp2, d); + fclose(fp2); + E_INFO("%d words read\n", d->n_word - d->filler_start); + } + if (mdef) + sil = bin_mdef_silphone(mdef); + else + sil = 0; + if (dict_wordid(d, S3_START_WORD) == BAD_S3WID) { + dict_add_word(d, S3_START_WORD, &sil, 1); + } + if (dict_wordid(d, S3_FINISH_WORD) == BAD_S3WID) { + dict_add_word(d, S3_FINISH_WORD, &sil, 1); + } + if (dict_wordid(d, S3_SILENCE_WORD) == BAD_S3WID) { + dict_add_word(d, S3_SILENCE_WORD, &sil, 1); + } + + d->filler_end = d->n_word - 1; + + /* Initialize distinguished word-ids */ + d->startwid = dict_wordid(d, S3_START_WORD); + d->finishwid = dict_wordid(d, S3_FINISH_WORD); + d->silwid = dict_wordid(d, S3_SILENCE_WORD); + + if ((d->filler_start > d->filler_end) + || (!dict_filler_word(d, d->silwid))) { + E_ERROR("Word '%s' must occur (only) in filler dictionary\n", + S3_SILENCE_WORD); + dict_free(d); + return NULL; + } + + /* No check that alternative pronunciations for filler words are in filler range!! */ + + return d; +} + + +s3wid_t +dict_wordid(dict_t *d, const char *word) +{ + int32 w; + + assert(d); + assert(word); + + if (hash_table_lookup_int32(d->ht, word, &w) < 0) + return (BAD_S3WID); + return w; +} + + +int +dict_filler_word(dict_t *d, s3wid_t w) +{ + assert(d); + assert((w >= 0) && (w < d->n_word)); + + w = dict_basewid(d, w); + if ((w == d->startwid) || (w == d->finishwid)) + return 0; + if ((w >= d->filler_start) && (w <= d->filler_end)) + return 1; + return 0; +} + +int +dict_real_word(dict_t *d, s3wid_t w) +{ + assert(d); + assert((w >= 0) && (w < d->n_word)); + + w = dict_basewid(d, w); + if ((w == d->startwid) || (w == d->finishwid)) + return 0; + if ((w >= d->filler_start) && (w <= d->filler_end)) + return 0; + return 1; +} + + +int32 +dict_word2basestr(char *word) +{ + int32 i, len; + + len = strlen(word); + if (word[len - 1] == ')') { + for (i = len - 2; (i > 0) && (word[i] != '('); --i); + + if (i > 0) { + /* The word is of the form (...); strip from left-paren */ + word[i] = '\0'; + return i; + } + } + + return -1; +} + +dict_t * +dict_retain(dict_t *d) +{ + ++d->refcnt; + return d; +} + +int +dict_free(dict_t * d) +{ + int i; + dictword_t *word; + + if (d == NULL) + return 0; + if (--d->refcnt > 0) + return d->refcnt; + + /* First Step, free all memory allocated for each word */ + for (i = 0; i < d->n_word; i++) { + word = (dictword_t *) & (d->word[i]); + if (word->word) + ckd_free((void *) word->word); + if (word->ciphone) + ckd_free((void *) word->ciphone); + } + + if (d->word) + ckd_free((void *) d->word); + if (d->ht) + hash_table_free(d->ht); + if (d->mdef) + bin_mdef_free(d->mdef); + ckd_free((void *) d); + + return 0; +} + +void +dict_report(dict_t * d) +{ + E_INFO_NOFN("Initialization of dict_t, report:\n"); + E_INFO_NOFN("Max word: %d\n", d->max_words); + E_INFO_NOFN("No of word: %d\n", d->n_word); + E_INFO_NOFN("\n"); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/dict.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/dict.h new file mode 100644 index 0000000000000000000000000000000000000000..804d465bef5bc5d9198f47d45c43165d695b3621 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/dict.h @@ -0,0 +1,222 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef _S3_DICT_H_ +#define _S3_DICT_H_ + +/** \file dict.h + * \brief Operations on dictionary. + */ + +/* SphinxBase headers. */ +#include + +/* Local headers. */ +#include "s3types.h" +#include "bin_mdef.h" +#include "pocketsphinx/export.h" + +#define S3DICT_INC_SZ 4096 + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + \struct dictword_t + \brief a structure for one dictionary word. +*/ +typedef struct dictword_s { + char *word; /**< Ascii word string */ + s3cipid_t *ciphone; /**< Pronunciation */ + int32 pronlen; /**< Pronunciation length */ + s3wid_t alt; /**< Next alternative pronunciation id, NOT_S3WID if none */ + s3wid_t basewid; /**< Base pronunciation id */ +} dictword_t; + +/** + \struct dict_t + \brief a structure for a dictionary. +*/ + +typedef struct dict_s { + int refcnt; + bin_mdef_t *mdef; /**< Model definition used for phone IDs; NULL if none used */ + dictword_t *word; /**< Array of entries in dictionary */ + hash_table_t *ht; /**< Hash table for mapping word strings to word ids */ + int32 max_words; /**< #Entries allocated in dict, including empty slots */ + int32 n_word; /**< #Occupied entries in dict; ie, excluding empty slots */ + int32 filler_start; /**< First filler word id (read from filler dict) */ + int32 filler_end; /**< Last filler word id (read from filler dict) */ + s3wid_t startwid; /**< FOR INTERNAL-USE ONLY */ + s3wid_t finishwid; /**< FOR INTERNAL-USE ONLY */ + s3wid_t silwid; /**< FOR INTERNAL-USE ONLY */ + int nocase; +} dict_t; + + +/** + * Initialize a new dictionary. + * + * If config and mdef are supplied, then the dictionary will be read + * from the files specified by the -dict and -fdict options in config, + * with case sensitivity determined by the -dictcase option. + * + * Otherwise an empty case-sensitive dictionary will be created. + * + * Return ptr to dict_t if successful, NULL otherwise. + */ +POCKETSPHINX_EXPORT +dict_t *dict_init(cmd_ln_t *config, /**< Configuration (-dict, -fdict, -dictcase) or NULL */ + bin_mdef_t *mdef /**< For looking up CI phone IDs (or NULL) */ + ); + +/** + * Write dictionary to a file. + */ +POCKETSPHINX_EXPORT +int dict_write(dict_t *dict, char const *filename, char const *format); + +/** Return word id for given word string if present. Otherwise return BAD_S3WID */ +POCKETSPHINX_EXPORT +s3wid_t dict_wordid(dict_t *d, const char *word); + +/** + * Return 1 if w is a filler word, 0 if not. A filler word is one that was read in from the + * filler dictionary; however, sentence START and FINISH words are not filler words. + */ +POCKETSPHINX_EXPORT +int dict_filler_word(dict_t *d, /**< The dictionary structure */ + s3wid_t w /**< The word ID */ + ); + +/** + * Test if w is a "real" word, i.e. neither a filler word nor START/FINISH. + */ +POCKETSPHINX_EXPORT +int dict_real_word(dict_t *d, /**< The dictionary structure */ + s3wid_t w /**< The word ID */ + ); + +/** + * Add a word with the given ciphone pronunciation list to the dictionary. + * Return value: Result word id if successful, BAD_S3WID otherwise + */ +POCKETSPHINX_EXPORT +s3wid_t dict_add_word(dict_t *d, /**< The dictionary structure. */ + char const *word, /**< The word. */ + s3cipid_t const *p, /**< The pronunciation. */ + int32 np /**< Number of phones. */ + ); + +/** + * Return value: CI phone string for the given word, phone position. + */ +POCKETSPHINX_EXPORT +const char *dict_ciphone_str(dict_t *d, /**< In: Dictionary to look up */ + s3wid_t wid, /**< In: Component word being looked up */ + int32 pos /**< In: Pronunciation phone position */ + ); + +/** Packaged macro access to dictionary members */ +#define dict_size(d) ((d)->n_word) +#define dict_num_fillers(d) (dict_filler_end(d) - dict_filler_start(d)) +/** + * Number of "real words" in the dictionary. + * + * This is the number of words that are not fillers, , or . + */ +#define dict_num_real_words(d) \ + (dict_size(d) - (dict_filler_end(d) - dict_filler_start(d)) - 2) +#define dict_basewid(d,w) ((d)->word[w].basewid) +#define dict_wordstr(d,w) ((w) < 0 ? NULL : (d)->word[w].word) +#define dict_basestr(d,w) ((d)->word[dict_basewid(d,w)].word) +#define dict_nextalt(d,w) ((d)->word[w].alt) +#define dict_pronlen(d,w) ((d)->word[w].pronlen) +#define dict_pron(d,w,p) ((d)->word[w].ciphone[p]) /**< The CI phones of the word w at position p */ +#define dict_filler_start(d) ((d)->filler_start) +#define dict_filler_end(d) ((d)->filler_end) +#define dict_startwid(d) ((d)->startwid) +#define dict_finishwid(d) ((d)->finishwid) +#define dict_silwid(d) ((d)->silwid) +#define dict_is_single_phone(d,w) ((d)->word[w].pronlen == 1) +#define dict_first_phone(d,w) ((d)->word[w].ciphone[0]) +#define dict_second_phone(d,w) ((d)->word[w].ciphone[1]) +#define dict_second_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 2]) +#define dict_last_phone(d,w) ((d)->word[w].ciphone[(d)->word[w].pronlen - 1]) + +/* Hard-coded special words */ +#define S3_START_WORD "" +#define S3_FINISH_WORD "" +#define S3_SILENCE_WORD "" +#define S3_UNKNOWN_WORD "" + +/** + * If the given word contains a trailing "(....)" (i.e., a Sphinx-II style alternative + * pronunciation specification), strip that trailing portion from it. Note that the given + * string is modified. + * Return value: If string was modified, the character position at which the original string + * was truncated; otherwise -1. + */ +POCKETSPHINX_EXPORT +int32 dict_word2basestr(char *word); + +/** + * Retain a pointer to an dict_t. + */ +POCKETSPHINX_EXPORT +dict_t *dict_retain(dict_t *d); + +/** + * Release a pointer to a dictionary. + */ +POCKETSPHINX_EXPORT +int dict_free(dict_t *d); + +/** Report a dictionary structure */ +POCKETSPHINX_EXPORT +void dict_report(dict_t *d /**< A dictionary structure */ + ); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/dict2pid.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/dict2pid.c new file mode 100644 index 0000000000000000000000000000000000000000..126898f02f70ca84b2d93fbc8b4ac20828452baa --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/dict2pid.c @@ -0,0 +1,572 @@ +/* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include + +#include "dict2pid.h" +#include "hmm.h" + + +/** + * @file dict2pid.c - dictionary word to senone sequence mappings + */ + +void +compress_table(s3ssid_t * uncomp_tab, s3ssid_t * com_tab, + s3cipid_t * ci_map, int32 n_ci) +{ + int32 found; + int32 r; + int32 tmp_r; + + for (r = 0; r < n_ci; r++) { + com_tab[r] = BAD_S3SSID; + ci_map[r] = BAD_S3CIPID; + } + /** Compress this map */ + for (r = 0; r < n_ci; r++) { + + found = 0; + for (tmp_r = 0; tmp_r < r && com_tab[tmp_r] != BAD_S3SSID; tmp_r++) { /* If it appears before, just filled in cimap; */ + if (uncomp_tab[r] == com_tab[tmp_r]) { + found = 1; + ci_map[r] = tmp_r; + break; + } + } + + if (found == 0) { + com_tab[tmp_r] = uncomp_tab[r]; + ci_map[r] = tmp_r; + } + } +} + + +static void +compress_right_context_tree(dict2pid_t * d2p, + s3ssid_t ***rdiph_rc) +{ + int32 n_ci; + int32 b, l, r; + s3ssid_t *rmap; + s3ssid_t *tmpssid; + s3cipid_t *tmpcimap; + bin_mdef_t *mdef = d2p->mdef; + size_t alloc; + + n_ci = mdef->n_ciphone; + + tmpssid = ckd_calloc(n_ci, sizeof(s3ssid_t)); + tmpcimap = ckd_calloc(n_ci, sizeof(s3cipid_t)); + + d2p->rssid = + (xwdssid_t **) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t *)); + alloc = mdef->n_ciphone * sizeof(xwdssid_t *); + + for (b = 0; b < n_ci; b++) { + d2p->rssid[b] = + (xwdssid_t *) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t)); + alloc += mdef->n_ciphone * sizeof(xwdssid_t); + + for (l = 0; l < n_ci; l++) { + rmap = rdiph_rc[b][l]; + compress_table(rmap, tmpssid, tmpcimap, mdef->n_ciphone); + + for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID; + r++); + + if (tmpssid[0] != BAD_S3SSID) { + d2p->rssid[b][l].ssid = ckd_calloc(r, sizeof(s3ssid_t)); + memcpy(d2p->rssid[b][l].ssid, tmpssid, + r * sizeof(s3ssid_t)); + d2p->rssid[b][l].cimap = + ckd_calloc(mdef->n_ciphone, sizeof(s3cipid_t)); + memcpy(d2p->rssid[b][l].cimap, tmpcimap, + (mdef->n_ciphone) * sizeof(s3cipid_t)); + d2p->rssid[b][l].n_ssid = r; + } + else { + d2p->rssid[b][l].ssid = NULL; + d2p->rssid[b][l].cimap = NULL; + d2p->rssid[b][l].n_ssid = 0; + } + } + } + + E_INFO("Allocated %d bytes (%d KiB) for word-final triphones\n", + (int)alloc, (int)alloc / 1024); + ckd_free(tmpssid); + ckd_free(tmpcimap); +} + +static void +compress_left_right_context_tree(dict2pid_t * d2p) +{ + int32 n_ci; + int32 b, l, r; + s3ssid_t *rmap; + s3ssid_t *tmpssid; + s3cipid_t *tmpcimap; + bin_mdef_t *mdef = d2p->mdef; + size_t alloc; + + n_ci = mdef->n_ciphone; + + tmpssid = ckd_calloc(n_ci, sizeof(s3ssid_t)); + tmpcimap = ckd_calloc(n_ci, sizeof(s3cipid_t)); + + assert(d2p->lrdiph_rc); + + d2p->lrssid = + (xwdssid_t **) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t *)); + alloc = mdef->n_ciphone * sizeof(xwdssid_t *); + + for (b = 0; b < n_ci; b++) { + + d2p->lrssid[b] = + (xwdssid_t *) ckd_calloc(mdef->n_ciphone, sizeof(xwdssid_t)); + alloc += mdef->n_ciphone * sizeof(xwdssid_t); + + for (l = 0; l < n_ci; l++) { + rmap = d2p->lrdiph_rc[b][l]; + + compress_table(rmap, tmpssid, tmpcimap, mdef->n_ciphone); + + for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID; + r++); + + if (tmpssid[0] != BAD_S3SSID) { + d2p->lrssid[b][l].ssid = ckd_calloc(r, sizeof(s3ssid_t)); + memcpy(d2p->lrssid[b][l].ssid, tmpssid, + r * sizeof(s3ssid_t)); + d2p->lrssid[b][l].cimap = + ckd_calloc(mdef->n_ciphone, sizeof(s3cipid_t)); + memcpy(d2p->lrssid[b][l].cimap, tmpcimap, + (mdef->n_ciphone) * sizeof(s3cipid_t)); + d2p->lrssid[b][l].n_ssid = r; + } + else { + d2p->lrssid[b][l].ssid = NULL; + d2p->lrssid[b][l].cimap = NULL; + d2p->lrssid[b][l].n_ssid = 0; + } + } + } + + /* Try to compress lrdiph_rc into lrdiph_rc_compressed */ + ckd_free(tmpssid); + ckd_free(tmpcimap); + + E_INFO("Allocated %d bytes (%d KiB) for single-phone word triphones\n", + (int)alloc, (int)alloc / 1024); +} + +/** + ARCHAN, A duplicate of get_rc_npid in ctxt_table.h. I doubt whether it is correct + because the compressed map has not been checked. +*/ +int32 +get_rc_nssid(dict2pid_t * d2p, s3wid_t w) +{ + int32 pronlen; + s3cipid_t b, lc; + dict_t *dict = d2p->dict; + + pronlen = dict->word[w].pronlen; + b = dict->word[w].ciphone[pronlen - 1]; + + if (pronlen == 1) { + /* Is this true ? + No known left context. But all cimaps (for any l) are identical; pick one + */ + /*E_INFO("Single phone word\n"); */ + return (d2p->lrssid[b][0].n_ssid); + } + else { + /* E_INFO("Multiple phone word\n"); */ + lc = dict->word[w].ciphone[pronlen - 2]; + return (d2p->rssid[b][lc].n_ssid); + } + +} + +s3cipid_t * +dict2pid_get_rcmap(dict2pid_t * d2p, s3wid_t w) +{ + int32 pronlen; + s3cipid_t b, lc; + dict_t *dict = d2p->dict; + + pronlen = dict->word[w].pronlen; + b = dict->word[w].ciphone[pronlen - 1]; + + if (pronlen == 1) { + /* Is this true ? + No known left context. But all cimaps (for any l) are identical; pick one + */ + /*E_INFO("Single phone word\n"); */ + return (d2p->lrssid[b][0].cimap); + } + else { + /* E_INFO("Multiple phone word\n"); */ + lc = dict->word[w].ciphone[pronlen - 2]; + return (d2p->rssid[b][lc].cimap); + } +} + +static void +free_compress_map(xwdssid_t ** tree, int32 n_ci) +{ + int32 b, l; + for (b = 0; b < n_ci; b++) { + for (l = 0; l < n_ci; l++) { + ckd_free(tree[b][l].ssid); + ckd_free(tree[b][l].cimap); + } + ckd_free(tree[b]); + } + ckd_free(tree); +} + +static void +populate_lrdiph(dict2pid_t *d2p, s3ssid_t ***rdiph_rc, s3cipid_t b) +{ + bin_mdef_t *mdef = d2p->mdef; + s3cipid_t l, r; + + for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { + for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { + s3pid_t p; + p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, + (s3cipid_t) l, + (s3cipid_t) r, + WORD_POSN_SINGLE); + d2p->lrdiph_rc[b][l][r] + = bin_mdef_pid2ssid(mdef, p); + if (r == bin_mdef_silphone(mdef)) + d2p->ldiph_lc[b][r][l] + = bin_mdef_pid2ssid(mdef, p); + if (rdiph_rc && l == bin_mdef_silphone(mdef)) + rdiph_rc[b][l][r] + = bin_mdef_pid2ssid(mdef, p); + assert(IS_S3SSID(bin_mdef_pid2ssid(mdef, p))); + E_DEBUG("%s(%s,%s) => %d / %d\n", + bin_mdef_ciphone_str(mdef, b), + bin_mdef_ciphone_str(mdef, l), + bin_mdef_ciphone_str(mdef, r), + p, bin_mdef_pid2ssid(mdef, p)); + } + } +} + +int +dict2pid_add_word(dict2pid_t *d2p, + int32 wid) +{ + bin_mdef_t *mdef = d2p->mdef; + dict_t *d = d2p->dict; + + if (dict_pronlen(d, wid) > 1) { + s3cipid_t l; + /* Make sure we have left and right context diphones for this + * word. */ + if (d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][0] + == BAD_S3SSID) { + E_DEBUG("Filling in left-context diphones for %s(?,%s)\n", + bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid)), + bin_mdef_ciphone_str(mdef, dict_second_phone(d, wid))); + for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { + int p + = bin_mdef_phone_id_nearest(mdef, + dict_first_phone(d, wid), l, + dict_second_phone(d, wid), + WORD_POSN_BEGIN); + d2p->ldiph_lc[dict_first_phone(d, wid)][dict_second_phone(d, wid)][l] + = bin_mdef_pid2ssid(mdef, p); + } + } + if (d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid + == 0) { + s3ssid_t *rmap; + s3ssid_t *tmpssid; + s3cipid_t *tmpcimap; + s3cipid_t r; + + E_DEBUG("Filling in right-context diphones for %s(%s,?)\n", + bin_mdef_ciphone_str(mdef, dict_last_phone(d, wid)), + bin_mdef_ciphone_str(mdef, dict_second_last_phone(d, wid))); + rmap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*rmap)); + for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { + int p + = bin_mdef_phone_id_nearest(mdef, + dict_last_phone(d, wid), + dict_second_last_phone(d, wid), r, + WORD_POSN_END); + rmap[r] = bin_mdef_pid2ssid(mdef, p); + } + tmpssid = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpssid)); + tmpcimap = ckd_calloc(bin_mdef_n_ciphone(mdef), sizeof(*tmpcimap)); + compress_table(rmap, tmpssid, tmpcimap, bin_mdef_n_ciphone(mdef)); + for (r = 0; r < mdef->n_ciphone && tmpssid[r] != BAD_S3SSID; r++) + ; + d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].ssid = tmpssid; + d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].cimap = tmpcimap; + d2p->rssid[dict_last_phone(d, wid)][dict_second_last_phone(d, wid)].n_ssid = r; + ckd_free(rmap); + } + } + else { + /* Make sure we have a left-right context triphone entry for + * this word. */ + E_INFO("Filling in context triphones for %s(?,?)\n", + bin_mdef_ciphone_str(mdef, dict_first_phone(d, wid))); + if (d2p->lrdiph_rc[dict_first_phone(d, wid)][0][0] == BAD_S3SSID) { + populate_lrdiph(d2p, NULL, dict_first_phone(d, wid)); + } + } + + return 0; +} + +s3ssid_t +dict2pid_internal(dict2pid_t *d2p, + int32 wid, + int pos) +{ + int b, l, r, p; + dict_t *dict = d2p->dict; + bin_mdef_t *mdef = d2p->mdef; + + if (pos == 0 || pos == dict_pronlen(dict, wid)) + return BAD_S3SSID; + + b = dict_pron(dict, wid, pos); + l = dict_pron(dict, wid, pos - 1); + r = dict_pron(dict, wid, pos + 1); + p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, + (s3cipid_t) l, (s3cipid_t) r, + WORD_POSN_INTERNAL); + return bin_mdef_pid2ssid(mdef, p); +} + +dict2pid_t * +dict2pid_build(bin_mdef_t * mdef, dict_t * dict) +{ + dict2pid_t *dict2pid; + s3ssid_t ***rdiph_rc; + bitvec_t *ldiph, *rdiph, *single; + int32 pronlen; + int32 b, l, r, w, p; + + E_INFO("Building PID tables for dictionary\n"); + assert(mdef); + assert(dict); + + dict2pid = (dict2pid_t *) ckd_calloc(1, sizeof(dict2pid_t)); + dict2pid->refcount = 1; + dict2pid->mdef = bin_mdef_retain(mdef); + dict2pid->dict = dict_retain(dict); + E_INFO("Allocating %d^3 * %d bytes (%d KiB) for word-initial triphones\n", + mdef->n_ciphone, sizeof(s3ssid_t), + mdef->n_ciphone * mdef->n_ciphone * mdef->n_ciphone * sizeof(s3ssid_t) / 1024); + dict2pid->ldiph_lc = + (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, + mdef->n_ciphone, sizeof(s3ssid_t)); + /* Only used internally to generate rssid */ + rdiph_rc = + (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, mdef->n_ciphone, + mdef->n_ciphone, sizeof(s3ssid_t)); + + dict2pid->lrdiph_rc = (s3ssid_t ***) ckd_calloc_3d(mdef->n_ciphone, + mdef->n_ciphone, + mdef->n_ciphone, + sizeof + (s3ssid_t)); + /* Actually could use memset for this, if BAD_S3SSID is guaranteed + * to be 65535... */ + for (b = 0; b < mdef->n_ciphone; ++b) { + for (r = 0; r < mdef->n_ciphone; ++r) { + for (l = 0; l < mdef->n_ciphone; ++l) { + dict2pid->ldiph_lc[b][r][l] = BAD_S3SSID; + dict2pid->lrdiph_rc[b][l][r] = BAD_S3SSID; + rdiph_rc[b][l][r] = BAD_S3SSID; + } + } + } + + /* Track which diphones / ciphones have been seen. */ + ldiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); + rdiph = bitvec_alloc(mdef->n_ciphone * mdef->n_ciphone); + single = bitvec_alloc(mdef->n_ciphone); + + for (w = 0; w < dict_size(dict2pid->dict); w++) { + pronlen = dict_pronlen(dict, w); + + if (pronlen >= 2) { + b = dict_first_phone(dict, w); + r = dict_second_phone(dict, w); + /* Populate ldiph_lc */ + if (bitvec_is_clear(ldiph, b * mdef->n_ciphone + r)) { + /* Mark this diphone as done */ + bitvec_set(ldiph, b * mdef->n_ciphone + r); + + /* Record all possible ssids for b(?,r) */ + for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { + p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, + (s3cipid_t) l, (s3cipid_t) r, + WORD_POSN_BEGIN); + dict2pid->ldiph_lc[b][r][l] = bin_mdef_pid2ssid(mdef, p); + } + } + + + /* Populate rdiph_rc */ + l = dict_second_last_phone(dict, w); + b = dict_last_phone(dict, w); + if (bitvec_is_clear(rdiph, b * mdef->n_ciphone + l)) { + /* Mark this diphone as done */ + bitvec_set(rdiph, b * mdef->n_ciphone + l); + + for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { + p = bin_mdef_phone_id_nearest(mdef, (s3cipid_t) b, + (s3cipid_t) l, (s3cipid_t) r, + WORD_POSN_END); + rdiph_rc[b][l][r] = bin_mdef_pid2ssid(mdef, p); + } + } + } + else if (pronlen == 1) { + b = dict_pron(dict, w, 0); + E_DEBUG("Building tables for single phone word %s phone %d = %s\n", + dict_wordstr(dict, w), b, bin_mdef_ciphone_str(mdef, b)); + /* Populate lrdiph_rc (and also ldiph_lc, rdiph_rc if needed) */ + if (bitvec_is_clear(single, b)) { + populate_lrdiph(dict2pid, rdiph_rc, b); + bitvec_set(single, b); + } + } + } + + bitvec_free(ldiph); + bitvec_free(rdiph); + bitvec_free(single); + + /* Try to compress rdiph_rc into rdiph_rc_compressed */ + compress_right_context_tree(dict2pid, rdiph_rc); + compress_left_right_context_tree(dict2pid); + + ckd_free_3d(rdiph_rc); + + return dict2pid; +} + +dict2pid_t * +dict2pid_retain(dict2pid_t *d2p) +{ + ++d2p->refcount; + return d2p; +} + +int +dict2pid_free(dict2pid_t * d2p) +{ + if (d2p == NULL) + return 0; + if (--d2p->refcount > 0) + return d2p->refcount; + + if (d2p->ldiph_lc) + ckd_free_3d((void ***) d2p->ldiph_lc); + + if (d2p->lrdiph_rc) + ckd_free_3d((void ***) d2p->lrdiph_rc); + + if (d2p->rssid) + free_compress_map(d2p->rssid, bin_mdef_n_ciphone(d2p->mdef)); + + if (d2p->lrssid) + free_compress_map(d2p->lrssid, bin_mdef_n_ciphone(d2p->mdef)); + + bin_mdef_free(d2p->mdef); + dict_free(d2p->dict); + ckd_free(d2p); + return 0; +} + +void +dict2pid_dump(FILE * fp, dict2pid_t * d2p) +{ + int32 w, p, pronlen; + int32 i, j, b, l, r; + bin_mdef_t *mdef = d2p->mdef; + dict_t *dict = d2p->dict; + + fprintf(fp, "# INTERNAL (wd comssid ssid ssid ... ssid comssid)\n"); + for (w = 0; w < dict_size(dict); w++) { + fprintf(fp, "%30s ", dict_wordstr(dict, w)); + + pronlen = dict_pronlen(dict, w); + for (p = 0; p < pronlen; p++) + fprintf(fp, " %5d", dict2pid_internal(d2p, w, p)); + fprintf(fp, "\n"); + } + fprintf(fp, "#\n"); + + fprintf(fp, "# LDIPH_LC (b r l ssid)\n"); + for (b = 0; b < bin_mdef_n_ciphone(mdef); b++) { + for (r = 0; r < bin_mdef_n_ciphone(mdef); r++) { + for (l = 0; l < bin_mdef_n_ciphone(mdef); l++) { + if (IS_S3SSID(d2p->ldiph_lc[b][r][l])) + fprintf(fp, "%6s %6s %6s %5d\n", bin_mdef_ciphone_str(mdef, (s3cipid_t) b), bin_mdef_ciphone_str(mdef, (s3cipid_t) r), bin_mdef_ciphone_str(mdef, (s3cipid_t) l), d2p->ldiph_lc[b][r][l]); /* RAH, ldiph_lc is returning an int32, %d expects an int16 */ + } + } + } + fprintf(fp, "#\n"); + + fprintf(fp, "# SSEQ %d (senid senid ...)\n", mdef->n_sseq); + for (i = 0; i < mdef->n_sseq; i++) { + fprintf(fp, "%5d ", i); + for (j = 0; j < bin_mdef_n_emit_state(mdef); j++) + fprintf(fp, " %5d", mdef->sseq[i][j]); + fprintf(fp, "\n"); + } + fprintf(fp, "#\n"); + fprintf(fp, "# END\n"); + + fflush(fp); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/dict2pid.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/dict2pid.h new file mode 100644 index 0000000000000000000000000000000000000000..c08d3e555cfb543fb1402a3b663f2af414aaccf6 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/dict2pid.h @@ -0,0 +1,187 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2014 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef _S3_DICT2PID_H_ +#define _S3_DICT2PID_H_ + +/* System headers. */ +#include + +/* SphinxBase headers. */ +#include +#include + +/* Local headers. */ +#include +#include "s3types.h" +#include "bin_mdef.h" +#include "dict.h" + +/** \file dict2pid.h + * \brief Building triphones for a dictionary. + * + * This is one of the more complicated parts of a cross-word + * triphone model decoder. The first and last phones of each word + * get their left and right contexts, respectively, from other + * words. For single-phone words, both its contexts are from other + * words, simultaneously. As these words are not known beforehand, + * life gets complicated. + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * \struct xwdssid_t + * \brief cross word triphone model structure + */ + +typedef struct xwdssid_s { + s3ssid_t *ssid; /**< Senone Sequence ID list for all context ciphones */ + s3cipid_t *cimap; /**< Index into ssid[] above for each ci phone */ + int32 n_ssid; /**< #Unique ssid in above, compressed ssid list */ +} xwdssid_t; + +/** + \struct dict2pid_t + \brief Building composite triphone (as well as word internal triphones) with the dictionary. +*/ + +typedef struct dict2pid_s { + int refcount; + + bin_mdef_t *mdef; /**< Model definition, used to generate + internal ssids on the fly. */ + dict_t *dict; /**< Dictionary this table refers to. */ + + /*Notice the order of the arguments */ + /* FIXME: This is crying out for compression - in Mandarin we have + * 180 context independent phones, which makes this an 11MB + * array. */ + s3ssid_t ***ldiph_lc; /**< For multi-phone words, [base][rc][lc] -> ssid; filled out for + word-initial base x rc combinations in current vocabulary */ + + + xwdssid_t **rssid; /**< Right context state sequence id table + First dimension: base phone, + Second dimension: left context. + */ + + + s3ssid_t ***lrdiph_rc; /**< For single-phone words, [base][lc][rc] -> ssid; filled out for + single-phone base x lc combinations in current vocabulary */ + + xwdssid_t **lrssid; /**< Left-Right context state sequence id table + First dimension: base phone, + Second dimension: left context. + */ +} dict2pid_t; + +/** Access macros; not designed for arbitrary use */ +#define dict2pid_rssid(d,ci,lc) (&(d)->rssid[ci][lc]) +#define dict2pid_ldiph_lc(d,b,r,l) ((d)->ldiph_lc[b][r][l]) +#define dict2pid_lrdiph_rc(d,b,l,r) ((d)->lrdiph_rc[b][l][r]) + +/** + * Build the dict2pid structure for the given model/dictionary + */ +POCKETSPHINX_EXPORT +dict2pid_t *dict2pid_build(bin_mdef_t *mdef, /**< A model definition*/ + dict_t *dict /**< An initialized dictionary */ + ); + +/** + * Retain a pointer to dict2pid + */ +POCKETSPHINX_EXPORT +dict2pid_t *dict2pid_retain(dict2pid_t *d2p); + +/** + * Free the memory dict2pid structure + */ +POCKETSPHINX_EXPORT +int dict2pid_free(dict2pid_t *d2p /**< In: the d2p */ + ); + +/** + * Return the senone sequence ID for the given word position. + */ +s3ssid_t dict2pid_internal(dict2pid_t *d2p, + int32 wid, + int pos); + +/** + * Add a word to the dict2pid structure (after adding it to dict). + */ +POCKETSPHINX_EXPORT +int dict2pid_add_word(dict2pid_t *d2p, + int32 wid); + +/** + * For debugging + */ +void dict2pid_dump(FILE *fp, /**< In: a file pointer */ + dict2pid_t *d2p /**< In: a dict2pid_t structure */ + ); + +/** Report a dict2pid data structure */ +void dict2pid_report(dict2pid_t *d2p /**< In: a dict2pid_t structure */ + ); + +/** + * Get number of rc + */ +int32 get_rc_nssid(dict2pid_t *d2p, /**< In: a dict2pid */ + s3wid_t w /**< In: a wid */ + ); + +/** + * Get RC map + */ +s3cipid_t* dict2pid_get_rcmap(dict2pid_t *d2p, /**< In: a dict2pid */ + s3wid_t w /**< In: a wid */ + ); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fast_ptm.txt b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fast_ptm.txt new file mode 100644 index 0000000000000000000000000000000000000000..e3297ea5791ff0cf3edaba7258acb91e6fc61adf --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fast_ptm.txt @@ -0,0 +1,23 @@ +Ideas for accelerating PTM computation +-------------------------------------- + +First thing to note is that codebook computation now takes up the +majority of the time spent evaluating PTMs. So speeding up Gaussian +evaluation is suddenly important again. + +Using a tighter top-N beam will speed up Gaussian computation by +imposing a higher floor on densities, but this effect isn't worth a +whole lot, in contrast to SC models where mixture computation rather +than density computation is the most expensive part. + +This means that we should probably bring back kd-trees, although the +implementation should be tweaked to be faster loading. + +Also, maybe more importantly, we can do some form of CI-GMM selection +on the codebooks. This won't actually work with the way the models +are set up currently since the CI phones share the same codebook as +the CD ones, and the goal is to prune codebooks rather than phones. + +But wait! It's okay, because we still have the same top-N mechanism +as before. We can use those top-N scores to do early pruning of +entire codebooks. This ought to give us the most bang for the buck. diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_interface.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_interface.c new file mode 100644 index 0000000000000000000000000000000000000000..43d11eb99dc36abe224b96a00c5b0092e7f5e856 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_interface.c @@ -0,0 +1,692 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1996-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#include +#include +#include +#include +#include + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/byteorder.h" +#include "sphinxbase/fixpoint.h" +#include "sphinxbase/genrand.h" +#include "sphinxbase/err.h" +#include "sphinxbase/cmd_ln.h" +#include "sphinxbase/ckd_alloc.h" + +#include "fe_internal.h" +#include "fe_warp.h" + +static const arg_t fe_args[] = { + waveform_to_cepstral_command_line_macro(), + { NULL, 0, NULL, NULL } +}; + +int +fe_parse_general_params(cmd_ln_t *config, fe_t * fe) +{ + int j, frate, window_samples; + + fe->config = cmd_ln_retain(config); + fe->sampling_rate = cmd_ln_float32_r(config, "-samprate"); + frate = cmd_ln_int32_r(config, "-frate"); + if (frate > MAX_INT16 || frate > fe->sampling_rate || frate < 1) { + E_ERROR + ("Frame rate %d can not be bigger than sample rate %.02f\n", + frate, fe->sampling_rate); + return -1; + } + + fe->frame_rate = (int16)frate; + if (cmd_ln_boolean_r(config, "-dither")) { + fe->dither = 1; + fe->dither_seed = cmd_ln_int32_r(config, "-seed"); + } +#ifdef WORDS_BIGENDIAN + /* i.e. if input_endian is *not* "big", then fe->swap is true. */ + fe->swap = strcmp("big", cmd_ln_str_r(config, "-input_endian")); +#else + /* and vice versa */ + fe->swap = strcmp("little", cmd_ln_str_r(config, "-input_endian")); +#endif + fe->window_length = cmd_ln_float32_r(config, "-wlen"); + fe->pre_emphasis_alpha = cmd_ln_float32_r(config, "-alpha"); + + fe->num_cepstra = (uint8)cmd_ln_int32_r(config, "-ncep"); + fe->fft_size = (int16)cmd_ln_int32_r(config, "-nfft"); + + window_samples = (int)(fe->window_length * fe->sampling_rate); + E_INFO("Frames are %d samples at intervals of %d\n", + window_samples, (int)(fe->sampling_rate / frate)); + if (window_samples > MAX_INT16) { + /* This is extremely unlikely! */ + E_ERROR("Frame size exceeds maximum FFT size (%d > %d)\n", + window_samples, MAX_INT16); + return -1; + } + + /* Set FFT size automatically from window size. */ + if (fe->fft_size == 0) { + fe->fft_order = 0; + fe->fft_size = (1<fft_order); + while (fe->fft_size < window_samples) { + fe->fft_order++; + fe->fft_size <<= 1; + } + E_INFO("FFT size automatically set to %d\n", fe->fft_size); + } + else { + /* Check FFT size, compute FFT order (log_2(n)) */ + for (j = fe->fft_size, fe->fft_order = 0; j > 1; j >>= 1, fe->fft_order++) { + if (((j % 2) != 0) || (fe->fft_size <= 0)) { + E_ERROR("fft: number of points must be a power of 2 (is %d)\n", + fe->fft_size); + return -1; + } + } + /* Verify that FFT size is greater or equal to window length. */ + if (fe->fft_size < window_samples) { + E_ERROR("FFT: Number of points must be greater or " + "equal to frame size\n"); + return -1; + } + } + + fe->remove_dc = cmd_ln_boolean_r(config, "-remove_dc"); + + if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "dct")) + fe->transform = DCT_II; + else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "legacy")) + fe->transform = LEGACY_DCT; + else if (0 == strcmp(cmd_ln_str_r(config, "-transform"), "htk")) + fe->transform = DCT_HTK; + else { + E_ERROR("Invalid transform type (values are 'dct', 'legacy', 'htk')\n"); + return -1; + } + + if (cmd_ln_boolean_r(config, "-logspec")) + fe->log_spec = RAW_LOG_SPEC; + if (cmd_ln_boolean_r(config, "-smoothspec")) + fe->log_spec = SMOOTH_LOG_SPEC; + + return 0; +} + +static int +fe_parse_melfb_params(cmd_ln_t *config, fe_t *fe, melfb_t * mel) +{ + mel->sampling_rate = fe->sampling_rate; + mel->fft_size = fe->fft_size; + mel->num_cepstra = fe->num_cepstra; + mel->num_filters = cmd_ln_int32_r(config, "-nfilt"); + + if (fe->log_spec) + fe->feature_dimension = mel->num_filters; + else + fe->feature_dimension = fe->num_cepstra; + + mel->upper_filt_freq = cmd_ln_float32_r(config, "-upperf"); + mel->lower_filt_freq = cmd_ln_float32_r(config, "-lowerf"); + + mel->doublewide = cmd_ln_boolean_r(config, "-doublebw"); + + mel->warp_type = cmd_ln_str_r(config, "-warp_type"); + mel->warp_params = cmd_ln_str_r(config, "-warp_params"); + mel->lifter_val = cmd_ln_int32_r(config, "-lifter"); + + mel->unit_area = cmd_ln_boolean_r(config, "-unit_area"); + mel->round_filters = cmd_ln_boolean_r(config, "-round_filters"); + + if (fe_warp_set(mel, mel->warp_type) != FE_SUCCESS) { + E_ERROR("Failed to initialize the warping function.\n"); + return -1; + } + fe_warp_set_parameters(mel, mel->warp_params, mel->sampling_rate); + return 0; +} + +void +fe_print_current(fe_t const *fe) +{ + E_INFO("Current FE Parameters:\n"); + E_INFO("\tSampling Rate: %f\n", fe->sampling_rate); + E_INFO("\tFrame Size: %d\n", fe->frame_size); + E_INFO("\tFrame Shift: %d\n", fe->frame_shift); + E_INFO("\tFFT Size: %d\n", fe->fft_size); + E_INFO("\tLower Frequency: %g\n", + fe->mel_fb->lower_filt_freq); + E_INFO("\tUpper Frequency: %g\n", + fe->mel_fb->upper_filt_freq); + E_INFO("\tNumber of filters: %d\n", fe->mel_fb->num_filters); + E_INFO("\tNumber of Overflow Samps: %d\n", fe->num_overflow_samps); + E_INFO("Will %sremove DC offset at frame level\n", + fe->remove_dc ? "" : "not "); + if (fe->dither) { + E_INFO("Will add dither to audio\n"); + E_INFO("Dither seeded with %d\n", fe->dither_seed); + } + else { + E_INFO("Will not add dither to audio\n"); + } + if (fe->mel_fb->lifter_val) { + E_INFO("Will apply sine-curve liftering, period %d\n", + fe->mel_fb->lifter_val); + } + E_INFO("Will %snormalize filters to unit area\n", + fe->mel_fb->unit_area ? "" : "not "); + E_INFO("Will %sround filter frequencies to DFT points\n", + fe->mel_fb->round_filters ? "" : "not "); + E_INFO("Will %suse double bandwidth in mel filter\n", + fe->mel_fb->doublewide ? "" : "not "); +} + +fe_t * +fe_init_auto_r(cmd_ln_t *config) +{ + fe_t *fe; + + fe = (fe_t*)ckd_calloc(1, sizeof(*fe)); + fe->refcount = 1; + + /* transfer params to front end */ + if (fe_parse_general_params(config, fe) < 0) { + fe_free(fe); + return NULL; + } + + /* compute remaining fe parameters */ + /* We add 0.5 so approximate the float with the closest + * integer. E.g., 2.3 is truncate to 2, whereas 3.7 becomes 4 + */ + fe->frame_shift = (int32) (fe->sampling_rate / fe->frame_rate + 0.5); + fe->frame_size = (int32) (fe->window_length * fe->sampling_rate + 0.5); + fe->pre_emphasis_prior = 0; + + assert (fe->frame_shift > 1); + if (fe->frame_size < fe->frame_shift) { + E_ERROR + ("Frame size %d (-wlen) must be greater than frame shift %d (-frate)\n", + fe->frame_size, fe->frame_shift); + fe_free(fe); + return NULL; + } + + if (fe->frame_size > (fe->fft_size)) { + E_ERROR + ("Number of FFT points has to be a power of 2 higher than %d, it is %d\n", + fe->frame_size, fe->fft_size); + fe_free(fe); + return NULL; + } + + if (fe->dither) + fe_init_dither(fe->dither_seed); + + /* establish buffers for overflow samps and hamming window */ + fe->overflow_samps = ckd_calloc(fe->frame_size, sizeof(float32)); + fe->hamming_window = ckd_calloc(fe->frame_size/2, sizeof(window_t)); + + /* create hamming window */ + fe_create_hamming(fe->hamming_window, fe->frame_size); + + /* init and fill appropriate filter structure */ + fe->mel_fb = ckd_calloc(1, sizeof(*fe->mel_fb)); + + /* transfer params to mel fb */ + fe_parse_melfb_params(config, fe, fe->mel_fb); + + if (fe->mel_fb->upper_filt_freq > fe->sampling_rate / 2 + 1.0) { + E_ERROR("Upper frequency %.1f is higher than samprate/2 (%.1f)\n", + fe->mel_fb->upper_filt_freq, fe->sampling_rate / 2); + fe_free(fe); + return NULL; + } + + fe_build_melfilters(fe->mel_fb); + fe_compute_melcosine(fe->mel_fb); + if (cmd_ln_boolean_r(config, "-remove_noise")) + fe->noise_stats = fe_init_noisestats(fe->mel_fb->num_filters); + + /* Create temporary FFT, spectrum and mel-spectrum buffers. */ + /* FIXME: Gosh there are a lot of these. */ + fe->spch = ckd_calloc(fe->frame_size, sizeof(*fe->spch)); + fe->frame = ckd_calloc(fe->fft_size, sizeof(*fe->frame)); + fe->spec = ckd_calloc(fe->fft_size, sizeof(*fe->spec)); + fe->mfspec = ckd_calloc(fe->mel_fb->num_filters, sizeof(*fe->mfspec)); + + /* create twiddle factors */ + fe->ccc = ckd_calloc(fe->fft_size / 4, sizeof(*fe->ccc)); + fe->sss = ckd_calloc(fe->fft_size / 4, sizeof(*fe->sss)); + fe_create_twiddle(fe); + + if (cmd_ln_boolean_r(config, "-verbose")) { + fe_print_current(fe); + } + + /*** Initialize the overflow buffers ***/ + fe_start_utt(fe); + return fe; +} + +arg_t const * +fe_get_args(void) +{ + return fe_args; +} + +cmd_ln_t * +fe_get_config(fe_t *fe) +{ + return fe->config; +} + +void +fe_init_dither(int32 seed) +{ + E_INFO("You are using %d as the seed.\n", seed); + s3_rand_seed(seed); +} + +int32 +fe_start_utt(fe_t * fe) +{ + fe->num_overflow_samps = 0; + // Does the same thing as above, but whatever... + memset(fe->overflow_samps, 0, + fe->frame_size * sizeof(*fe->overflow_samps)); + fe->pre_emphasis_prior = 0; + return 0; +} + +int +fe_get_output_size(fe_t *fe) +{ + return (int)fe->feature_dimension; +} + +void +fe_get_input_size(fe_t *fe, int *out_frame_shift, + int *out_frame_size) +{ + if (out_frame_shift) + *out_frame_shift = fe->frame_shift; + if (out_frame_size) + *out_frame_size = fe->frame_size; +} + +int32 +fe_process_frame(fe_t * fe, int16 const *spch, int32 nsamps, mfcc_t * fr_cep) +{ + fe_read_frame_int16(fe, spch, nsamps); + return fe_write_frame(fe, fr_cep); +} + +int +fe_process_frames_int16(fe_t *fe, + int16 const **inout_spch, + size_t *inout_nsamps, + mfcc_t **buf_cep, + int32 *inout_nframes) +{ + int32 frame_count; + int outidx, i, n_overflow, orig_n_overflow; + int16 const *orig_spch; + + /* In the special case where there is no output buffer, return the + * maximum number of frames which would be generated. */ + if (buf_cep == NULL) { + if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) + *inout_nframes = 0; + else + *inout_nframes = 1 + + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size) + / fe->frame_shift); + return *inout_nframes; + } + + /* Are there not enough samples to make at least 1 frame? */ + if (*inout_nsamps + fe->num_overflow_samps < (size_t)fe->frame_size) { + if (*inout_nsamps > 0) { + /* Append them to the overflow buffer. */ + memcpy(fe->overflow_samps + fe->num_overflow_samps, + *inout_spch, *inout_nsamps * (sizeof(**inout_spch))); + fe->num_overflow_samps += *inout_nsamps; + /* Update input-output pointers and counters. */ + *inout_spch += *inout_nsamps; + *inout_nsamps = 0; + } + /* We produced no frames of output, sorry! */ + *inout_nframes = 0; + return 0; + } + + /* Can't write a frame? Then do nothing! */ + if (*inout_nframes < 1) { + *inout_nframes = 0; + return 0; + } + + /* Keep track of the original start of the buffer. */ + orig_spch = *inout_spch; + orig_n_overflow = fe->num_overflow_samps; + /* How many frames will we be able to get? */ + frame_count = 1 + + ((*inout_nsamps + fe->num_overflow_samps - fe->frame_size) + / fe->frame_shift); + /* Limit it to the number of output frames available. */ + if (frame_count > *inout_nframes) + frame_count = *inout_nframes; + /* Index of output frame. */ + outidx = 0; + + /* Start processing, taking care of any incoming overflow. */ + if (fe->num_overflow_samps) { + int offset = fe->frame_size - fe->num_overflow_samps; + + /* Append start of spch to overflow samples to make a full frame. */ + memcpy(fe->overflow_samps + fe->num_overflow_samps, + *inout_spch, offset * sizeof(**inout_spch)); + fe_read_frame_int16(fe, fe->overflow_samps, fe->frame_size); + assert(outidx < frame_count); + fe_write_frame(fe, buf_cep[outidx]); + outidx++; + /* Update input-output pointers and counters. */ + *inout_spch += offset; + *inout_nsamps -= offset; + fe->num_overflow_samps -= fe->frame_shift; + } + else { + fe_read_frame_int16(fe, *inout_spch, fe->frame_size); + assert(outidx < frame_count); + fe_write_frame(fe, buf_cep[outidx]); + outidx++; + /* Update input-output pointers and counters. */ + *inout_spch += fe->frame_size; + *inout_nsamps -= fe->frame_size; + } + + /* Process all remaining frames. */ + for (i = 1; i < frame_count; ++i) { + assert(*inout_nsamps >= (size_t)fe->frame_shift); + + fe_shift_frame_int16(fe, *inout_spch, fe->frame_shift); + assert(outidx < frame_count); + fe_write_frame(fe, buf_cep[outidx]); + outidx++; + /* Update input-output pointers and counters. */ + *inout_spch += fe->frame_shift; + *inout_nsamps -= fe->frame_shift; + /* Amount of data behind the original input which is still needed. */ + if (fe->num_overflow_samps > 0) + fe->num_overflow_samps -= fe->frame_shift; + } + + /* How many relevant overflow samples are there left? */ + if (fe->num_overflow_samps <= 0) { + /* Maximum number of overflow samples past *inout_spch to save. */ + n_overflow = *inout_nsamps; + if (n_overflow > fe->frame_shift) + n_overflow = fe->frame_shift; + fe->num_overflow_samps = fe->frame_size - fe->frame_shift; + /* Make sure this isn't an illegal read! */ + if (fe->num_overflow_samps > *inout_spch - orig_spch) + fe->num_overflow_samps = *inout_spch - orig_spch; + fe->num_overflow_samps += n_overflow; + if (fe->num_overflow_samps > 0) { + memcpy(fe->overflow_samps, + *inout_spch - (fe->frame_size - fe->frame_shift), + fe->num_overflow_samps * sizeof(**inout_spch)); + /* Update the input pointer to cover this stuff. */ + *inout_spch += n_overflow; + *inout_nsamps -= n_overflow; + } + } + else { + /* There is still some relevant data left in the overflow buffer. */ + /* Shift existing data to the beginning. */ + memmove(fe->overflow_samps, + fe->overflow_samps + orig_n_overflow - fe->num_overflow_samps, + fe->num_overflow_samps * sizeof(*fe->overflow_samps)); + /* Copy in whatever we had in the original speech buffer. */ + n_overflow = *inout_spch - orig_spch + *inout_nsamps; + if (n_overflow > fe->frame_size - fe->num_overflow_samps) + n_overflow = fe->frame_size - fe->num_overflow_samps; + memcpy(fe->overflow_samps + fe->num_overflow_samps, + orig_spch, n_overflow * sizeof(*orig_spch)); + fe->num_overflow_samps += n_overflow; + /* Advance the input pointers. */ + if (n_overflow > *inout_spch - orig_spch) { + n_overflow -= (*inout_spch - orig_spch); + *inout_spch += n_overflow; + *inout_nsamps -= n_overflow; + } + } + + /* Finally update the frame counter with the number of frames we procesed. */ + *inout_nframes = outidx; /* FIXME: Not sure why I wrote it this way... */ + return 0; +} + +int +fe_process_frames(fe_t *fe, + int16 const **inout_spch, + size_t *inout_nsamps, + mfcc_t **buf_cep, + int32 *inout_nframes) +{ + return fe_process_frames_int16(fe, inout_spch, inout_nsamps, buf_cep, inout_nframes); +} + +int +fe_process_utt(fe_t * fe, int16 const * spch, size_t nsamps, + mfcc_t *** cep_block, int32 * nframes) +{ + mfcc_t **cep; + int rv; + + /* Figure out how many frames we will need. */ + fe_process_frames_int16(fe, NULL, &nsamps, NULL, nframes); + /* Create the output buffer (it has to exist, even if there are no output frames). */ + if (*nframes) + cep = (mfcc_t **)ckd_calloc_2d(*nframes, fe->feature_dimension, sizeof(**cep)); + else + cep = (mfcc_t **)ckd_calloc_2d(1, fe->feature_dimension, sizeof(**cep)); + /* Now just call fe_process_frames() with the allocated buffer. */ + rv = fe_process_frames_int16(fe, &spch, &nsamps, cep, nframes); + *cep_block = cep; + + return rv; +} + +int32 +fe_end_utt(fe_t * fe, mfcc_t * cepvector, int32 * nframes) +{ + /* Process any remaining data. */ + if (fe->num_overflow_samps > 0) { + fe_read_frame_int16(fe, fe->overflow_samps, + fe->num_overflow_samps); + fe_write_frame(fe, cepvector); + *nframes = 1; + } + else { + *nframes = 0; + } + + /* reset overflow buffers... */ + fe->num_overflow_samps = 0; + + return 0; +} + +fe_t * +fe_retain(fe_t *fe) +{ + ++fe->refcount; + return fe; +} + +int +fe_free(fe_t * fe) +{ + if (fe == NULL) + return 0; + if (--fe->refcount > 0) + return fe->refcount; + + /* kill FE instance - free everything... */ + if (fe->mel_fb) { + if (fe->mel_fb->mel_cosine) + fe_free_2d((void *) fe->mel_fb->mel_cosine); + ckd_free(fe->mel_fb->lifter); + ckd_free(fe->mel_fb->spec_start); + ckd_free(fe->mel_fb->filt_start); + ckd_free(fe->mel_fb->filt_width); + ckd_free(fe->mel_fb->filt_coeffs); + ckd_free(fe->mel_fb); + } + ckd_free(fe->spch); + ckd_free(fe->frame); + ckd_free(fe->ccc); + ckd_free(fe->sss); + ckd_free(fe->spec); + ckd_free(fe->mfspec); + ckd_free(fe->overflow_samps); + ckd_free(fe->hamming_window); + if (fe->noise_stats) + fe_free_noisestats(fe->noise_stats); + cmd_ln_free_r(fe->config); + ckd_free(fe); + + return 0; +} + +/** + * Convert a block of mfcc_t to float32 (can be done in-place) + **/ +int32 +fe_mfcc_to_float(fe_t * fe, + mfcc_t ** input, float32 ** output, int32 nframes) +{ + int32 i; + +#ifndef FIXED_POINT + if ((void *) input == (void *) output) + return nframes * fe->feature_dimension; +#endif + for (i = 0; i < nframes * fe->feature_dimension; ++i) + output[0][i] = MFCC2FLOAT(input[0][i]); + + return i; +} + +/** + * Convert a block of float32 to mfcc_t (can be done in-place) + **/ +int32 +fe_float_to_mfcc(fe_t * fe, + float32 ** input, mfcc_t ** output, int32 nframes) +{ + int32 i; + +#ifndef FIXED_POINT + if ((void *) input == (void *) output) + return nframes * fe->feature_dimension; +#endif + for (i = 0; i < nframes * fe->feature_dimension; ++i) + output[0][i] = FLOAT2MFCC(input[0][i]); + + return i; +} + +int32 +fe_logspec_to_mfcc(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep) +{ +#ifdef FIXED_POINT + fe_spec2cep(fe, fr_spec, fr_cep); +#else /* ! FIXED_POINT */ + powspec_t *powspec; + int32 i; + + powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); + for (i = 0; i < fe->mel_fb->num_filters; ++i) + powspec[i] = (powspec_t) fr_spec[i]; + fe_spec2cep(fe, powspec, fr_cep); + ckd_free(powspec); +#endif /* ! FIXED_POINT */ + return 0; +} + +int32 +fe_logspec_dct2(fe_t * fe, const mfcc_t * fr_spec, mfcc_t * fr_cep) +{ +#ifdef FIXED_POINT + fe_dct2(fe, fr_spec, fr_cep, 0); +#else /* ! FIXED_POINT */ + powspec_t *powspec; + int32 i; + + powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); + for (i = 0; i < fe->mel_fb->num_filters; ++i) + powspec[i] = (powspec_t) fr_spec[i]; + fe_dct2(fe, powspec, fr_cep, 0); + ckd_free(powspec); +#endif /* ! FIXED_POINT */ + return 0; +} + +int32 +fe_mfcc_dct3(fe_t * fe, const mfcc_t * fr_cep, mfcc_t * fr_spec) +{ +#ifdef FIXED_POINT + fe_dct3(fe, fr_cep, fr_spec); +#else /* ! FIXED_POINT */ + powspec_t *powspec; + int32 i; + + powspec = ckd_malloc(fe->mel_fb->num_filters * sizeof(powspec_t)); + fe_dct3(fe, fr_cep, powspec); + for (i = 0; i < fe->mel_fb->num_filters; ++i) + fr_spec[i] = (mfcc_t) powspec[i]; + ckd_free(powspec); +#endif /* ! FIXED_POINT */ + return 0; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_internal.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..cd2a9d3a1f0cae7bbc4b7388718fb57b64a7baad --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_internal.h @@ -0,0 +1,181 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1996-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef __FE_INTERNAL_H__ +#define __FE_INTERNAL_H__ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "sphinxbase/fe.h" +#include "sphinxbase/fixpoint.h" + +#include "fe_noise.h" +#include "fe_type.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +/* Values for the 'logspec' field. */ +enum { + RAW_LOG_SPEC = 1, + SMOOTH_LOG_SPEC = 2 +}; + +/* Values for the 'transform' field. */ +enum { + LEGACY_DCT = 0, + DCT_II = 1, + DCT_HTK = 2 +}; + +typedef struct melfb_s melfb_t; +/** Base Struct to hold all structure for MFCC computation. */ +struct melfb_s { + float32 sampling_rate; + int32 num_cepstra; + int32 num_filters; + int32 fft_size; + float32 lower_filt_freq; + float32 upper_filt_freq; + /* DCT coefficients. */ + mfcc_t **mel_cosine; + /* Filter coefficients. */ + mfcc_t *filt_coeffs; + int16 *spec_start; + int16 *filt_start; + int16 *filt_width; + /* Luxury mobile home. */ + int32 doublewide; + char const *warp_type; + char const *warp_params; + uint32 warp_id; + /* Precomputed normalization constants for unitary DCT-II/DCT-III */ + mfcc_t sqrt_inv_n, sqrt_inv_2n; + /* Value and coefficients for HTK-style liftering */ + int32 lifter_val; + mfcc_t *lifter; + /* Normalize filters to unit area */ + int32 unit_area; + /* Round filter frequencies to DFT points (hurts accuracy, but is + useful for legacy purposes) */ + int32 round_filters; +}; + +/* sqrt(1/2), also used for unitary DCT-II/DCT-III */ +#define SQRT_HALF FLOAT2MFCC(0.707106781186548) + +/** Structure for the front-end computation. */ +struct fe_s { + cmd_ln_t *config; + int refcount; + + float32 sampling_rate; + int16 frame_rate; + int16 frame_shift; + + float32 window_length; + int16 frame_size; + int16 fft_size; + + uint8 fft_order; + uint8 feature_dimension; + uint8 num_cepstra; + uint8 remove_dc; + + uint8 log_spec; + uint8 swap; + uint8 dither; + uint8 transform; + + float32 pre_emphasis_alpha; + int32 dither_seed; + + /* Twiddle factors for FFT. */ + frame_t *ccc, *sss; + /* Mel filter parameters. */ + melfb_t *mel_fb; + /* Half of a Hamming Window. */ + window_t *hamming_window; + + /* Temporary buffers for processing. */ + int16 *spch; + frame_t *frame; + powspec_t *spec, *mfspec; + int16 *overflow_samps; + int num_overflow_samps; + int16 pre_emphasis_prior; + /* Noise removal */ + noise_stats_t *noise_stats; +}; + +void fe_init_dither(int32 seed); + +/* Load a frame of data into the fe. */ +int fe_read_frame_int16(fe_t *fe, int16 const *in, int32 len); + +/* Shift the input buffer back and read more data. */ +int fe_shift_frame_int16(fe_t *fe, int16 const *in, int32 len); + +/* Process a frame of data into features. */ +int fe_write_frame(fe_t *fe, mfcc_t *fea); + +/* Initialization functions. */ +int32 fe_build_melfilters(melfb_t *MEL_FB); +int32 fe_compute_melcosine(melfb_t *MEL_FB); +void fe_create_hamming(window_t *in, int32 in_len); +void fe_create_twiddle(fe_t *fe); + +fixed32 fe_log_add(fixed32 x, fixed32 y); +fixed32 fe_log_sub(fixed32 x, fixed32 y); + +/* Miscellaneous processing functions. */ +void fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep); +void fe_dct2(fe_t *fe, const powspec_t *mflogspec, mfcc_t *mfcep, int htk); +void fe_dct3(fe_t *fe, const mfcc_t *mfcep, powspec_t *mflogspec); + +#ifdef __cplusplus +} +#endif + +#endif /* __FE_INTERNAL_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_noise.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_noise.c new file mode 100644 index 0000000000000000000000000000000000000000..b8de9098bb2ef2f35e36924459e1eb18f5e4935a --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_noise.c @@ -0,0 +1,364 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* This noise removal algorithm is inspired by the following papers + * Computationally Efficient Speech Enchancement by Spectral Minina Tracking + * by G. Doblinger + * + * Power-Normalized Cepstral Coefficients (PNCC) for Robust Speech Recognition + * by C. Kim. + * + * For the recent research and state of art see papers about IMRCA and + * A Minimum-Mean-Square-Error Noise Reduction Algorithm On Mel-Frequency + * Cepstra For Robust Speech Recognition by Dong Yu and others + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/err.h" + +#include "fe_noise.h" +#include "fe_internal.h" + +/* Noise supression constants */ +#define SMOOTH_WINDOW 4 +#define LAMBDA_POWER 0.7 +#define LAMBDA_A 0.995 +#define LAMBDA_B 0.5 +#define LAMBDA_T 0.85 +#define MU_T 0.2 +#define MAX_GAIN 20 +#define SLOW_PEAK_FORGET_FACTOR 0.9995 +#define SLOW_PEAK_LEARN_FACTOR 0.9 +#define SPEECH_VOLUME_RANGE 8.0 + +struct noise_stats_s { + /* Smoothed power */ + powspec_t *power; + /* Noise estimate */ + powspec_t *noise; + /* Signal floor estimate */ + powspec_t *floor; + /* Peak for temporal masking */ + powspec_t *peak; + /* Buffers used in update_noisestats */ + powspec_t *signal, *gain; + + /* Initialize it next time */ + int undefined; + /* Number of items to process */ + int num_filters; + + /* Sum of slow peaks for VAD */ + powspec_t slow_peak_sum; + + /* Precomputed constants */ + powspec_t lambda_power; + powspec_t comp_lambda_power; + powspec_t lambda_a; + powspec_t comp_lambda_a; + powspec_t lambda_b; + powspec_t comp_lambda_b; + powspec_t lambda_t; + powspec_t mu_t; + powspec_t max_gain; + powspec_t inv_max_gain; + + powspec_t smooth_scaling[2 * SMOOTH_WINDOW + 3]; +}; + +static void +fe_lower_envelope(noise_stats_t *noise_stats, const powspec_t *buf, powspec_t *floor_buf, int32 num_filt) +{ + int i; + + for (i = 0; i < num_filt; i++) { +#ifndef FIXED_POINT + if (buf[i] >= floor_buf[i]) { + floor_buf[i] = + noise_stats->lambda_a * floor_buf[i] + noise_stats->comp_lambda_a * buf[i]; + } + else { + floor_buf[i] = + noise_stats->lambda_b * floor_buf[i] + noise_stats->comp_lambda_b * buf[i]; + } +#else + if (buf[i] >= floor_buf[i]) { + floor_buf[i] = fe_log_add(noise_stats->lambda_a + floor_buf[i], + noise_stats->comp_lambda_a + buf[i]); + } + else { + floor_buf[i] = fe_log_add(noise_stats->lambda_b + floor_buf[i], + noise_stats->comp_lambda_b + buf[i]); + } +#endif + } +} + +/* temporal masking */ +static void +fe_temp_masking(noise_stats_t *noise_stats, powspec_t * buf, powspec_t * peak, int32 num_filt) +{ + powspec_t cur_in; + int i; + + for (i = 0; i < num_filt; i++) { + cur_in = buf[i]; + +#ifndef FIXED_POINT + peak[i] *= noise_stats->lambda_t; + if (buf[i] < noise_stats->lambda_t * peak[i]) + buf[i] = peak[i] * noise_stats->mu_t; +#else + peak[i] += noise_stats->lambda_t; + if (buf[i] < noise_stats->lambda_t + peak[i]) + buf[i] = peak[i] + noise_stats->mu_t; +#endif + + if (cur_in > peak[i]) + peak[i] = cur_in; + } +} + +/* spectral weight smoothing */ +static void +fe_weight_smooth(noise_stats_t *noise_stats, powspec_t * buf, powspec_t * coefs, int32 num_filt) +{ + int i, j; + int l1, l2; + powspec_t coef; + + for (i = 0; i < num_filt; i++) { + l1 = ((i - SMOOTH_WINDOW) > 0) ? (i - SMOOTH_WINDOW) : 0; + l2 = ((i + SMOOTH_WINDOW) < + (num_filt - 1)) ? (i + SMOOTH_WINDOW) : (num_filt - 1); + +#ifndef FIXED_POINT + (void)noise_stats; + coef = 0; + for (j = l1; j <= l2; j++) { + coef += coefs[j]; + } + buf[i] = buf[i] * (coef / (l2 - l1 + 1)); +#else + coef = MIN_FIXLOG; + for (j = l1; j <= l2; j++) { + coef = fe_log_add(coef, coefs[j]); + } + buf[i] = buf[i] + coef - noise_stats->smooth_scaling[l2 - l1 + 1]; +#endif + + } +} + +noise_stats_t * +fe_init_noisestats(int num_filters) +{ + int i; + noise_stats_t *noise_stats; + + noise_stats = (noise_stats_t *) ckd_calloc(1, sizeof(noise_stats_t)); + + noise_stats->power = + (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + noise_stats->noise = + (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + noise_stats->floor = + (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + noise_stats->peak = + (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + + noise_stats->undefined = TRUE; + noise_stats->num_filters = num_filters; + +#ifndef FIXED_POINT + noise_stats->lambda_power = LAMBDA_POWER; + noise_stats->comp_lambda_power = 1 - LAMBDA_POWER; + noise_stats->lambda_a = LAMBDA_A; + noise_stats->comp_lambda_a = 1 - LAMBDA_A; + noise_stats->lambda_b = LAMBDA_B; + noise_stats->comp_lambda_b = 1 - LAMBDA_B; + noise_stats->lambda_t = LAMBDA_T; + noise_stats->mu_t = MU_T; + noise_stats->max_gain = MAX_GAIN; + noise_stats->inv_max_gain = 1.0 / MAX_GAIN; + + for (i = 1; i < 2 * SMOOTH_WINDOW + 1; i++) { + noise_stats->smooth_scaling[i] = 1.0 / i; + } +#else + noise_stats->lambda_power = FLOAT2FIX(log(LAMBDA_POWER)); + noise_stats->comp_lambda_power = FLOAT2FIX(log(1 - LAMBDA_POWER)); + noise_stats->lambda_a = FLOAT2FIX(log(LAMBDA_A)); + noise_stats->comp_lambda_a = FLOAT2FIX(log(1 - LAMBDA_A)); + noise_stats->lambda_b = FLOAT2FIX(log(LAMBDA_B)); + noise_stats->comp_lambda_b = FLOAT2FIX(log(1 - LAMBDA_B)); + noise_stats->lambda_t = FLOAT2FIX(log(LAMBDA_T)); + noise_stats->mu_t = FLOAT2FIX(log(MU_T)); + noise_stats->max_gain = FLOAT2FIX(log(MAX_GAIN)); + noise_stats->inv_max_gain = FLOAT2FIX(log(1.0 / MAX_GAIN)); + + for (i = 1; i < 2 * SMOOTH_WINDOW + 3; i++) { + noise_stats->smooth_scaling[i] = FLOAT2FIX(log(i)); + } +#endif + + noise_stats->signal = (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + noise_stats->gain = (powspec_t *) ckd_calloc(num_filters, sizeof(powspec_t)); + + return noise_stats; +} + +void +fe_reset_noisestats(noise_stats_t * noise_stats) +{ + if (noise_stats) + noise_stats->undefined = TRUE; +} + +void +fe_free_noisestats(noise_stats_t * noise_stats) +{ + ckd_free(noise_stats->signal); + ckd_free(noise_stats->gain); + ckd_free(noise_stats->power); + ckd_free(noise_stats->noise); + ckd_free(noise_stats->floor); + ckd_free(noise_stats->peak); + ckd_free(noise_stats); +} + +/** + * For fixed point we are doing the computation in a fixlog domain, + * so we have to add many processing cases. + */ +void +fe_remove_noise(fe_t * fe) +{ + noise_stats_t *noise_stats; + powspec_t *mfspec; + int32 i, num_filts; + + if (fe->noise_stats == NULL) + return; + + noise_stats = fe->noise_stats; + mfspec = fe->mfspec; + num_filts = noise_stats->num_filters; + + if (noise_stats->undefined) { + noise_stats->slow_peak_sum = FIX2FLOAT(0.0); + for (i = 0; i < num_filts; i++) { + noise_stats->power[i] = mfspec[i]; +#ifndef FIXED_POINT + noise_stats->noise[i] = mfspec[i] / noise_stats->max_gain; + noise_stats->floor[i] = mfspec[i] / noise_stats->max_gain; + noise_stats->peak[i] = 0.0; +#else + noise_stats->noise[i] = mfspec[i] - noise_stats->max_gain;; + noise_stats->floor[i] = mfspec[i] - noise_stats->max_gain; + noise_stats->peak[i] = MIN_FIXLOG; +#endif + } + noise_stats->undefined = FALSE; + } + + /* Calculate smoothed power */ + for (i = 0; i < num_filts; i++) { +#ifndef FIXED_POINT + noise_stats->power[i] = + noise_stats->lambda_power * noise_stats->power[i] + noise_stats->comp_lambda_power * mfspec[i]; +#else + noise_stats->power[i] = fe_log_add(noise_stats->lambda_power + noise_stats->power[i], + noise_stats->comp_lambda_power + mfspec[i]); +#endif + } + + /* Update noise spectrum estimate */ + fe_lower_envelope(noise_stats, noise_stats->power, noise_stats->noise, num_filts); + + /* Drop out noise from signal */ + for (i = 0; i < num_filts; i++) { +#ifndef FIXED_POINT + noise_stats->signal[i] = noise_stats->power[i] - noise_stats->noise[i]; + if (noise_stats->signal[i] < 1.0) + noise_stats->signal[i] = 1.0; +#else + noise_stats->signal[i] = fe_log_sub(noise_stats->power[i], noise_stats->noise[i]); +#endif + } + + /* FIXME: Somewhat unclear why we have to do this twice, but this + * seeems to estimate some kind of signal floor. */ + fe_lower_envelope(noise_stats, noise_stats->signal, noise_stats->floor, num_filts); + + fe_temp_masking(noise_stats, noise_stats->signal, noise_stats->peak, num_filts); + + for (i = 0; i < num_filts; i++) { + if (noise_stats->signal[i] < noise_stats->floor[i]) + noise_stats->signal[i] = noise_stats->floor[i]; + } + +#ifndef FIXED_POINT + for (i = 0; i < num_filts; i++) { + if (noise_stats->signal[i] < noise_stats->max_gain * noise_stats->power[i]) + noise_stats->gain[i] = noise_stats->signal[i] / noise_stats->power[i]; + else + noise_stats->gain[i] = noise_stats->max_gain; + if (noise_stats->gain[i] < noise_stats->inv_max_gain) + noise_stats->gain[i] = noise_stats->inv_max_gain; + } +#else + for (i = 0; i < num_filts; i++) { + noise_stats->gain[i] = noise_stats->signal[i] - noise_stats->power[i]; + if (noise_stats->gain[i] > noise_stats->max_gain) + noise_stats->gain[i] = noise_stats->max_gain; + if (noise_stats->gain[i] < noise_stats->inv_max_gain) + noise_stats->gain[i] = noise_stats->inv_max_gain; + } +#endif + + /* Weight smoothing and time frequency normalization */ + fe_weight_smooth(noise_stats, mfspec, noise_stats->gain, num_filts); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_noise.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_noise.h new file mode 100644 index 0000000000000000000000000000000000000000..257b5035495e6ac04329d904485d130de9182770 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_noise.h @@ -0,0 +1,60 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#ifndef FE_NOISE_H +#define FE_NOISE_H + +#include "sphinxbase/fe.h" +#include "sphinxbase/fixpoint.h" +#include "fe_type.h" + +typedef struct noise_stats_s noise_stats_t; + +/* Creates noisestats object */ +noise_stats_t *fe_init_noisestats(int num_filters); + +/* Resets collected noise statistics */ +void fe_reset_noisestats(noise_stats_t * noise_stats); + +/* Frees allocated data */ +void fe_free_noisestats(noise_stats_t * noise_stats); + +/** + * Process frame, update noise statistics, remove noise components if needed. + */ +void fe_remove_noise(fe_t *fe); + +#endif /* FE_NOISE_H */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_sigproc.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_sigproc.c new file mode 100644 index 0000000000000000000000000000000000000000..4e47de97dee4f02a35fb94d4edb5ef55bbfb969e --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_sigproc.c @@ -0,0 +1,1388 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1996-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include +#include +#include +#include +#include + +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4244) +#endif + +/** + * Windows math.h does not contain M_PI + */ +#ifndef M_PI +#define M_PI 3.14159265358979323846 +#endif + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/byteorder.h" +#include "sphinxbase/fixpoint.h" +#include "sphinxbase/fe.h" +#include "sphinxbase/genrand.h" +#include "sphinxbase/err.h" + +#include "fe_internal.h" +#include "fe_warp.h" + +/* Use extra precision for cosines, Hamming window, pre-emphasis + * coefficient, twiddle factors. */ +#ifdef FIXED_POINT +#define FLOAT2COS(x) FLOAT2FIX_ANY(x,30) +#define COSMUL(x,y) FIXMUL_ANY(x,y,30) +#else +#define FLOAT2COS(x) (x) +#define COSMUL(x,y) ((x)*(y)) +#endif + +#ifdef FIXED_POINT + +/* Internal log-addition table for natural log with radix point at 8 + * bits. Each entry is 256 * log(1 + e^{-n/256}). This is used in the + * log-add computation: + * + * e^z = e^x + e^y + * e^z = e^x(1 + e^{y-x}) = e^y(1 + e^{x-y}) + * z = x + log(1 + e^{y-x}) = y + log(1 + e^{x-y}) + * + * So when y > x, z = y + logadd_table[-(x-y)] + * when x > y, z = x + logadd_table[-(y-x)] + */ +static const unsigned char fe_logadd_table[] = { + 177, 177, 176, 176, 175, 175, 174, 174, 173, 173, + 172, 172, 172, 171, 171, 170, 170, 169, 169, 168, + 168, 167, 167, 166, 166, 165, 165, 164, 164, 163, + 163, 162, 162, 161, 161, 161, 160, 160, 159, 159, + 158, 158, 157, 157, 156, 156, 155, 155, 155, 154, + 154, 153, 153, 152, 152, 151, 151, 151, 150, 150, + 149, 149, 148, 148, 147, 147, 147, 146, 146, 145, + 145, 144, 144, 144, 143, 143, 142, 142, 141, 141, + 141, 140, 140, 139, 139, 138, 138, 138, 137, 137, + 136, 136, 136, 135, 135, 134, 134, 134, 133, 133, + 132, 132, 131, 131, 131, 130, 130, 129, 129, 129, + 128, 128, 128, 127, 127, 126, 126, 126, 125, 125, + 124, 124, 124, 123, 123, 123, 122, 122, 121, 121, + 121, 120, 120, 119, 119, 119, 118, 118, 118, 117, + 117, 117, 116, 116, 115, 115, 115, 114, 114, 114, + 113, 113, 113, 112, 112, 112, 111, 111, 110, 110, + 110, 109, 109, 109, 108, 108, 108, 107, 107, 107, + 106, 106, 106, 105, 105, 105, 104, 104, 104, 103, + 103, 103, 102, 102, 102, 101, 101, 101, 100, 100, + 100, 99, 99, 99, 98, 98, 98, 97, 97, 97, + 96, 96, 96, 96, 95, 95, 95, 94, 94, 94, + 93, 93, 93, 92, 92, 92, 92, 91, 91, 91, + 90, 90, 90, 89, 89, 89, 89, 88, 88, 88, + 87, 87, 87, 87, 86, 86, 86, 85, 85, 85, + 85, 84, 84, 84, 83, 83, 83, 83, 82, 82, + 82, 82, 81, 81, 81, 80, 80, 80, 80, 79, + 79, 79, 79, 78, 78, 78, 78, 77, 77, 77, + 77, 76, 76, 76, 75, 75, 75, 75, 74, 74, + 74, 74, 73, 73, 73, 73, 72, 72, 72, 72, + 71, 71, 71, 71, 71, 70, 70, 70, 70, 69, + 69, 69, 69, 68, 68, 68, 68, 67, 67, 67, + 67, 67, 66, 66, 66, 66, 65, 65, 65, 65, + 64, 64, 64, 64, 64, 63, 63, 63, 63, 63, + 62, 62, 62, 62, 61, 61, 61, 61, 61, 60, + 60, 60, 60, 60, 59, 59, 59, 59, 59, 58, + 58, 58, 58, 58, 57, 57, 57, 57, 57, 56, + 56, 56, 56, 56, 55, 55, 55, 55, 55, 54, + 54, 54, 54, 54, 53, 53, 53, 53, 53, 52, + 52, 52, 52, 52, 52, 51, 51, 51, 51, 51, + 50, 50, 50, 50, 50, 50, 49, 49, 49, 49, + 49, 49, 48, 48, 48, 48, 48, 48, 47, 47, + 47, 47, 47, 47, 46, 46, 46, 46, 46, 46, + 45, 45, 45, 45, 45, 45, 44, 44, 44, 44, + 44, 44, 43, 43, 43, 43, 43, 43, 43, 42, + 42, 42, 42, 42, 42, 41, 41, 41, 41, 41, + 41, 41, 40, 40, 40, 40, 40, 40, 40, 39, + 39, 39, 39, 39, 39, 39, 38, 38, 38, 38, + 38, 38, 38, 37, 37, 37, 37, 37, 37, 37, + 37, 36, 36, 36, 36, 36, 36, 36, 35, 35, + 35, 35, 35, 35, 35, 35, 34, 34, 34, 34, + 34, 34, 34, 34, 33, 33, 33, 33, 33, 33, + 33, 33, 32, 32, 32, 32, 32, 32, 32, 32, + 32, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 30, 30, 30, 30, 30, 30, 30, 30, 30, 29, + 29, 29, 29, 29, 29, 29, 29, 29, 28, 28, + 28, 28, 28, 28, 28, 28, 28, 28, 27, 27, + 27, 27, 27, 27, 27, 27, 27, 27, 26, 26, + 26, 26, 26, 26, 26, 26, 26, 26, 25, 25, + 25, 25, 25, 25, 25, 25, 25, 25, 25, 24, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, + 23, 23, 22, 22, 22, 22, 22, 22, 22, 22, + 22, 22, 22, 22, 21, 21, 21, 21, 21, 21, + 21, 21, 21, 21, 21, 21, 21, 20, 20, 20, + 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, + 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, + 19, 19, 19, 19, 18, 18, 18, 18, 18, 18, + 18, 18, 18, 18, 18, 18, 18, 18, 18, 17, + 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 16, 16, 16, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, + 16, 15, 15, 15, 15, 15, 15, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 15, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, + 14, 14, 14, 14, 14, 14, 14, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, + 13, 13, 13, 13, 13, 13, 13, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, + 12, 12, 12, 12, 12, 12, 12, 12, 12, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, + 11, 11, 11, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, + 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 0 +}; + +static const int fe_logadd_table_size = + sizeof(fe_logadd_table) / sizeof(fe_logadd_table[0]); + +fixed32 +fe_log_add(fixed32 x, fixed32 y) +{ + fixed32 d, r; + + if (x > y) { + d = (x - y) >> (DEFAULT_RADIX - 8); + r = x; + } + else { + d = (y - x) >> (DEFAULT_RADIX - 8); + r = y; + } + + if (r <= MIN_FIXLOG) + return MIN_FIXLOG; + else if (d > fe_logadd_table_size - 1) + return r; + else { + r += ((fixed32) fe_logadd_table[d] << (DEFAULT_RADIX - 8)); +/* printf("%d - %d = %d | %f - %f = %f | %f - %f = %f\n", + x, y, r, FIX2FLOAT(x), FIX2FLOAT(y), FIX2FLOAT(r), + exp(FIX2FLOAT(x)), exp(FIX2FLOAT(y)), exp(FIX2FLOAT(r))); +*/ + return r; + } +} + +/* + * log_sub for spectral subtraction, similar to logadd but we had + * to smooth function around zero with fixlog in order to improve + * table interpolation properties + * + * The table is created with the file included into distribution + * + * e^z = e^x - e^y + * e^z = e^x (1 - e^(-(x - y))) + * z = x + log(1 - e^(-(x - y))) + * z = x + fixlog(a) + (log(1 - e^(- a)) - log(a)) + * + * Input radix is 8 output radix is 10 + */ +static const uint16 fe_logsub_table[] = { +1, 3, 5, 7, 9, 11, 13, 15, 17, 19, +21, 23, 25, 27, 29, 31, 33, 35, 37, 39, +41, 43, 45, 47, 49, 51, 53, 55, 56, 58, +60, 62, 64, 66, 68, 70, 72, 74, 76, 78, +80, 82, 84, 86, 88, 90, 92, 94, 95, 97, +99, 101, 103, 105, 107, 109, 111, 113, 115, 117, +119, 121, 122, 124, 126, 128, 130, 132, 134, 136, +138, 140, 142, 143, 145, 147, 149, 151, 153, 155, +157, 159, 161, 162, 164, 166, 168, 170, 172, 174, +176, 178, 179, 181, 183, 185, 187, 189, 191, 193, +194, 196, 198, 200, 202, 204, 206, 207, 209, 211, +213, 215, 217, 219, 220, 222, 224, 226, 228, 230, +232, 233, 235, 237, 239, 241, 243, 244, 246, 248, +250, 252, 254, 255, 257, 259, 261, 263, 265, 266, +268, 270, 272, 274, 275, 277, 279, 281, 283, 284, +286, 288, 290, 292, 294, 295, 297, 299, 301, 302, +304, 306, 308, 310, 311, 313, 315, 317, 319, 320, +322, 324, 326, 327, 329, 331, 333, 335, 336, 338, +340, 342, 343, 345, 347, 349, 350, 352, 354, 356, +357, 359, 361, 363, 364, 366, 368, 370, 371, 373, +375, 377, 378, 380, 382, 384, 385, 387, 389, 391, +392, 394, 396, 397, 399, 401, 403, 404, 406, 408, +410, 411, 413, 415, 416, 418, 420, 422, 423, 425, +427, 428, 430, 432, 433, 435, 437, 439, 440, 442, +444, 445, 447, 449, 450, 452, 454, 455, 457, 459, +460, 462, 464, 465, 467, 469, 471, 472, 474, 476, +477, 479, 481, 482, 484, 486, 487, 489, 490, 492, +494, 495, 497, 499, 500, 502, 504, 505, 507, 509, +510, 512, 514, 515, 517, 518, 520, 522, 523, 525, +527, 528, 530, 532, 533, 535, 536, 538, 540, 541, +543, 544, 546, 548, 549, 551, 553, 554, 556, 557, +559, 561, 562, 564, 565, 567, 569, 570, 572, 573, +575, 577, 578, 580, 581, 583, 585, 586, 588, 589, +591, 592, 594, 596, 597, 599, 600, 602, 603, 605, +607, 608, 610, 611, 613, 614, 616, 618, 619, 621, +622, 624, 625, 627, 628, 630, 632, 633, 635, 636, +638, 639, 641, 642, 644, 645, 647, 649, 650, 652, +653, 655, 656, 658, 659, 661, 662, 664, 665, 667, +668, 670, 671, 673, 674, 676, 678, 679, 681, 682, +684, 685, 687, 688, 690, 691, 693, 694, 696, 697, +699, 700, 702, 703, 705, 706, 708, 709, 711, 712, +714, 715, 717, 718, 719, 721, 722, 724, 725, 727, +728, 730, 731, 733, 734, 736, 737, 739, 740, 742, +743, 745, 746, 747, 749, 750, 752, 753, 755, 756, +758, 759, 761, 762, 763, 765, 766, 768, 769, 771, +772, 774, 775, 776, 778, 779, 781, 782, 784, 785, +786, 788, 789, 791, 792, 794, 795, 796, 798, 799, +801, 802, 804, 805, 806, 808, 809, 811, 812, 813, +815, 816, 818, 819, 820, 822, 823, 825, 826, 827, +829, 830, 832, 833, 834, 836, 837, 839, 840, 841, +843, 844, 846, 847, 848, 850, 851, 852, 854, 855, +857, 858, 859, 861, 862, 863, 865, 866, 868, 869, +870, 872, 873, 874, 876, 877, 878, 880, 881, 883, +884, 885, 887, 888, 889, 891, 892, 893, 895, 896, +897, 899, 900, 901, 903, 904, 905, 907, 908, 909, +911, 912, 913, 915, 916, 917, 919, 920, 921, 923, +924, 925, 927, 928, 929, 931, 932, 933, 935, 936, +937, 939, 940, 941, 942, 944, 945, 946, 948, 949, +950, 952, 953, 954, 956, 957, 958, 959, 961, 962, +963, 965, 966, 967, 968, 970, 971, 972, 974, 975, +976, 977, 979, 980, 981, 983, 984, 985, 986, 988, +989, 990, 991, 993, 994, 995, 997, 998, 999, 1000, +1002, 1003, 1004, 1005, 1007, 1008, 1009, 1010, 1012, 1013, +1014, 1015, 1017, 1018, 1019, 1020, 1022, 1023, 1024, 1025, +1027, 1028, 1029, 1030, 1032, 1033, 1034, 1035, 1037, 1038, +1039, 1040, 1041, 1043, 1044, 1045, 1046, 1048, 1049, 1050, +1051, 1052, 1054, 1055, 1056, 1057, 1059, 1060, 1061, 1062, +1063, 1065, 1066, 1067, 1068, 1069, 1071, 1072, 1073, 1074, +1076, 1077, 1078, 1079, 1080, 1082, 1083, 1084, 1085, 1086, +1087, 1089, 1090, 1091, 1092, 1093, 1095, 1096, 1097, 1098, +1099, 1101, 1102, 1103, 1104, 1105, 1106, 1108, 1109, 1110, +1111, 1112, 1114, 1115, 1116, 1117, 1118, 1119, 1121, 1122, +1123, 1124, 1125, 1126, 1128, 1129, 1130, 1131, 1132, 1133, +1135, 1136, 1137, 1138, 1139, 1140, 1141, 1143, 1144, 1145, +1146, 1147, 1148, 1149, 1151, 1152, 1153, 1154, 1155, 1156, +1157, 1159, 1160, 1161, 1162, 1163, 1164, 1165, 1167, 1168, +1169, 1170, 1171, 1172, 1173, 1174, 1176, 1177, 1178, 1179, +1180, 1181, 1182, 1183, 1185, 1186, 1187, 1188, 1189, 1190, +1191, 1192, 1193, 1195, 1196, 1197, 1198, 1199, 1200, 1201, +1202, 1203, 1205, 1206, 1207, 1208, 1209, 1210, 1211, 1212, +1213, 1214, 1216, 1217, 1218, 1219, 1220, 1221, 1222, 1223, +1224, 1225, 1226, 1228, 1229, 1230, 1231, 1232, 1233, 1234, +1235, 1236, 1237, 1238, 1239, 1240, 1242, 1243, 1244, 1245, +1246, 1247, 1248, 1249, 1250, 1251, 1252, 1253, 1254, 1255, +1256, 1258, 1259, 1260, 1261, 1262, 1263, 1264, 1265, 1266, +1267, 1268, 1269, 1270, 1271, 1272, 1273, 1274, 1275, 1277, +1278, 1279, 1280, 1281, 1282, 1283, 1284, 1285, 1286, 1287, +1288, 1289, 1290, 1291, 1292, 1293, 1294, 1295, 1296, 1297, +1298, 1299, 1300, 1301, 1302, 1303, 1305, 1306, 1307, 1308, +1309, 1310, 1311, 1312, 1313, 1314, 1315, 1316, 1317, 1318, +1319, 1320, 1321, 1322, 1323, 1324, 1325, 1326, 1327, 1328, +1329, 1330, 1331, 1332, 1333, 1334, 1335, 1336, 1337, 1338, +1339, 1340, 1341, 1342, 1343, 1344, 1345, 1346, 1347, 1348, +1349, 1350, 1351, 1352, 1353, 1354, 1355, 1356, 1357, 1358, +1359, 1360, 1361, 1362, 1363, 1364, 1365, 1366, 1367, 1368, +1369, 1370, 1371, 1372, 1372, 1373, 1374, 1375, 1376, 1377, +1378, 1379, 1380, 1381, 1382, 1383, 1384, 1385, 1386, 1387, +1388, 1389, 1390, 1391, 1392, 1393, 1394, 1395, 1396, 1397, +1398, 1399, 1399, 1400, 1401, 1402, 1403, 1404, 1405, 1406, +1407, 1408, 1409, 1410, 1411, 1412, 1413, 1414, 1415, 1416, +1417, 1418, 1418, 1419, 1420, 1421, 1422, 1423, 1424, 1425, +1426, 1427, 1428, 1429, 1430, 1431, 1432, 1432, 1433, 1434, +1435, 1436, 1437, 1438, 1439, 1440, 1441, 1442, 1443, 1444, +1444, 1445, 1446, 1447, 1448, 1449, 1450, 1451, 1452, 1453, +1454, 1455, 1455, 1456, 1457, 1458, 1459, 1460, 1461, 1462, +1463, 1464, 1465, 1466, 1466, 1467, 1468, 1469, 1470, 1471, +1472, 1473, 1474, 1475, 1475, 1476, 1477, 1478, 1479, 1480, +1481, 1482, 1483, 1483, 1484, 1485, 1486, 1487, 1488, 1489, +1490, 1491, 1491, 1492, 1493, 1494, 1495, 1496, 1497, 1498, +1499, 1499, 1500, 1501, 1502, 1503, 1504, 1505, 1506, 1506, +1507, 1508, 1509, 1510, 1511, 1512, 1513, 1513, 1514, 1515, +1516, 1517, 1518, 1519, 1520, 1520, 1521, 1522, 1523, 1524, +1525, 1526, 1526, 1527, 1528, 1529, 1530, 1531, 1532, 1532, +1533, 1534, 1535, 1536, 1537, 1538, 1538, 1539, 1540, 1541, +1542, 1543, 1544, 1544, 1545, 1546, 1547, 1548, 1549, 1550, +1550, 1551, 1552, 1553, 1554, 1555, 1555, 1556, 1557, 1558, +1559, 1560, 1560, 1561, 1562, 1563, 1564, 1565, 1565, 1566, +1567, 1568, 1569, 1570, 1570, 1571, 1572, 1573, 1574, 1575, +1575, 1576, 1577, 1578, 1579, 1580, 1580, 1581, 1582, 1583, +1584, 1584, 1585, 1586, 1587, 1588, 1589, 1589, 1590, 1591, +1592, 1593, 1593, 1594, 1595, 1596, 1597, 1598, 1598, 1599, +1600, 1601, 1602, 1602, 1603, 1604, 1605, 1606, 1606, 1607, +1608, 1609, 1610, 1610, 1611, 1612, 1613, 1614, 1614, 1615, +1616, 1617, 1618, 1618, 1619, 1620, 1621, 1622, 1622, 1623, +1624, 1625, 1626, 1626, 1627, 1628, 1629, 1630, 1630, 1631, +1632, 1633, 1634, 1634, 1635, 1636, 1637, 1637, 1638, 1639, +1640, 1641, 1641, 1642, 1643, 1644, 1645, 1645, 1646, 1647, +1648, 1648, 1649, 1650, 1651, 1652, 1652, 1653, 1654, 1655, +1655, 1656, 1657, 1658, 1658, 1659, 1660, 1661, 1662, 1662, +1663, 1664, 1665, 1665, 1666, 1667, 1668, 1668, 1669, 1670, +1671, 1671, 1672, 1673, 1674, 1675, 1675, 1676, 1677, 1678, +1678, 1679, 1680, 1681, 1681, 1682, 1683, 1684, 1684, 1685, +1686, 1687, 1687, 1688, 1689, 1690, 1690, 1691, 1692, 1693, +1693, 1694, 1695, 1696, 1696, 1697, 1698, 1699, 1699, 1700, +1701, 1702, 1702, 1703, 1704, 1705, 1705, 1706, 1707, 1707, +1708, 1709, 1710, 1710, 1711, 1712, 1713, 1713, 1714, 1715, +1716, 1716, 1717, 1718, 1718, 1719, 1720, 1721, 1721, 1722, +1723, 1724, 1724, 1725, 1726, 1727, 1727, 1728, 1729, 1729, +1730, 1731, 1732, 1732, 1733, 1734, 1734, 1735, 1736, 1737, +1737, 1738, 1739, 1740, 1740, 1741, 1742, 1742, 1743, 1744, +1745, 1745, 1746, 1747, 1747, 1748, 1749, 1749, 1750, 1751, +1752, 1752, 1753, 1754, 1754, 1755, 1756, 1757, 1757, 1758, +1759, 1759, 1760, 1761, 1762, 1762, 1763, 1764, 1764, 1765, +1766, 1766, 1767, 1768, 1769, 1769, 1770, 1771, 1771, 1772, +1773, 1773, 1774, 1775, 1776, 1776, 1777, 1778, 1778, 1779, +1780, 1780, 1781, 1782, 1782, 1783, 1784, 1784, 1785, 1786, +1787, 1787, 1788, 1789, 1789, 1790, 1791, 1791, 1792, 1793, +1793, 1794, 1795, 1795, 1796, 1797, 1798, 1798, 1799, 1800, +1800, 1801, 1802, 1802, 1803, 1804, 1804, 1805, 1806, 1806, +1807, 1808, 1808, 1809, 1810, 1810, 1811, 1812, 1812, 1813, +1814, 1814, 1815, 1816, 1816, 1817, 1818, 1818, 1819, 1820, +1820, 1821, 1822, 1822, 1823, 1824, 1824, 1825, 1826, 1826, +1827, 1828, 1828, 1829, 1830, 1830, 1831, 1832, 1832, 1833, +1834, 1834, 1835, 1836, 1836, 1837, 1838, 1838, 1839, 1840, +1840, 1841, 1842, 1842, 1843, 1844, 1844, 1845, 1845, 1846, +1847, 1847, 1848, 1849, 1849, 1850, 1851, 1851, 1852, 1853, +1853, 1854, 1855, 1855, 1856, 1857, 1857, 1858, 1858, 1859, +1860, 1860, 1861, 1862, 1862, 1863, 1864, 1864, 1865, 1866, +1866, 1867, 1867, 1868, 1869, 1869, 1870, 1871, 1871, 1872, +1873, 1873, 1874, 1874, 1875, 1876, 1876, 1877, 1878, 1878, +1879, 1879, 1880, 1881, 1881, 1882, 1883, 1883, 1884, 1885, +1885, 1886, 1886, 1887, 1888, 1888, 1889, 1890, 1890, 1891, +1891, 1892, 1893, 1893, 1894, 1895, 1895, 1896, 1896, 1897, +1898, 1898, 1899, 1900, 1900, 1901, 1901, 1902, 1903, 1903, +1904, 1904, 1905, 1906, 1906, 1907, 1908, 1908, 1909, 1909, +1910, 1911, 1911, 1912, 1912, 1913, 1914, 1914, 1915, 1916, +1916, 1917, 1917, 1918, 1919, 1919, 1920, 1920, 1921, 1922, +1922, 1923, 1923, 1924, 1925, 1925, 1926, 1926, 1927, 1928, +1928, 1929, 1929, 1930, 1931, 1931, 1932, 1932, 1933, 1934, +1934, 1935, 1935, 1936, 1937, 1937, 1938, 1938, 1939, 1940, +1940, 1941, 1941, 1942, 1943, 1943, 1944, 1944, 1945, 1946, +1946, 1947, 1947, 1948, 1949, 1949, 1950, 1950, 1951, 1952, +1952, 1953, 1953, 1954, 1955, 1955, 1956, 1956, 1957, 1957, +1958, 1959, 1959, 1960, 1960, 1961, 1962, 1962, 1963, 1963, +1964, 1964, 1965, 1966, 1966, 1967, 1967, 1968, 1969, 1969, +1970, 1970, 1971, 1971, 1972, 1973, 1973, 1974, 1974, 1975, +1976, 1976, 1977, 1977, 1978, 1978, 1979, 1980, 1980, 1981, +1981, 1982, 1982, 1983, 1984, 1984, 1985, 1985, 1986, 1986, +1987, 1988, 1988, 1989, 1989, 1990, 1990, 1991, 1992, 1992, +1993, 1993, 1994, 1994, 1995, 1996, 1996, 1997, 1997, 1998, +1998, 1999, 1999, 2000, 2001, 2001, 2002, 2002, 2003, 2003, +2004, 2005, 2005, 2006, 2006, 2007, 2007, 2008, 2008, 2009, +2010, 2010, 2011, 2011, 2012, 2012, 2013, 2014, 2014, 2015, +2015, 2016, 2016, 2017, 2017, 2018, 2019, 2019, 2020, 2020, +2021, 2021, 2022, 2022, 2023, 2023, 2024, 2025, 2025, 2026, +2026, 2027, 2027, 2028, 2028, 2029, 2030, 2030, 2031, 2031, +2032, 2032, 2033, 2033, 2034, 2034, 2035, 2036, 2036, 2037, +2037, 2038, 2038, 2039, 2039, 2040, 2040, 2041, 2042, 2042, +2043, 2043, 2044, 2044, 2045, 2045, 2046, 2046, 2047, 2048, +2048, 2049, 2049, 2050, 2050, 2051, 2051, 2052, 2052, 2053, +2053, 2054, 2054, 2055, 2056, 2056, 2057, 2057, 2058, 2058, +2059, 2059, 2060, 2060, 2061, 2061, 2062, 2062, 2063, 2064, +2064, 2065, 2065, 2066, 2066, 2067, 2067, 2068, 2068, 2069, +2069, 2070, 2070, 2071, 2072, 2072, 2073, 2073, 2074, 2074, +2075, 2075, 2076, 2076, 2077, 2077, 2078, 2078, 2079, 2079, +2080, 2080, 2081 +}; + +static const int fe_logsub_table_size = + sizeof(fe_logsub_table) / sizeof(fe_logsub_table[0]); + +fixed32 +fe_log_sub(fixed32 x, fixed32 y) +{ + fixed32 d, r; + + if (x < MIN_FIXLOG || y >= x) + return MIN_FIXLOG; + + d = (x - y) >> (DEFAULT_RADIX - 8); + + if (d > fe_logsub_table_size - 1) + return x; + + r = fe_logsub_table[d] << (DEFAULT_RADIX - 10); +/* + printf("diff=%d\n", + x + FIXLN(x-y) - r - + (x + FLOAT2FIX(logf(-expm1f(FIX2FLOAT(y - x)))))); +*/ + return x + FIXLN(x-y) - r; +} + +static fixed32 +fe_log(float32 x) +{ + if (x <= 0) { + return MIN_FIXLOG; + } + else { + return FLOAT2FIX(log(x)); + } +} +#endif + +static float32 +fe_mel(melfb_t * mel, float32 x) +{ + float32 warped = fe_warp_unwarped_to_warped(mel, x); + + return (float32) (2595.0 * log10(1.0 + warped / 700.0)); +} + +static float32 +fe_melinv(melfb_t * mel, float32 x) +{ + float32 warped = (float32) (700.0 * (pow(10.0, x / 2595.0) - 1.0)); + return fe_warp_warped_to_unwarped(mel, warped); +} + +int32 +fe_build_melfilters(melfb_t * mel_fb) +{ + float32 melmin, melmax, melbw, fftfreq; + int n_coeffs, i, j; + + + /* Filter coefficient matrix, in flattened form. */ + mel_fb->spec_start = + ckd_calloc(mel_fb->num_filters, sizeof(*mel_fb->spec_start)); + mel_fb->filt_start = + ckd_calloc(mel_fb->num_filters, sizeof(*mel_fb->filt_start)); + mel_fb->filt_width = + ckd_calloc(mel_fb->num_filters, sizeof(*mel_fb->filt_width)); + + /* First calculate the widths of each filter. */ + /* Minimum and maximum frequencies in mel scale. */ + melmin = fe_mel(mel_fb, mel_fb->lower_filt_freq); + melmax = fe_mel(mel_fb, mel_fb->upper_filt_freq); + + /* Width of filters in mel scale */ + melbw = (melmax - melmin) / (mel_fb->num_filters + 1); + if (mel_fb->doublewide) { + melmin -= melbw; + melmax += melbw; + if ((fe_melinv(mel_fb, melmin) < 0) || + (fe_melinv(mel_fb, melmax) > mel_fb->sampling_rate / 2)) { + E_WARN + ("Out of Range: low filter edge = %f (%f)\n", + fe_melinv(mel_fb, melmin), 0.0); + E_WARN + (" high filter edge = %f (%f)\n", + fe_melinv(mel_fb, melmax), mel_fb->sampling_rate / 2); + return FE_INVALID_PARAM_ERROR; + } + } + + /* DFT point spacing */ + fftfreq = mel_fb->sampling_rate / (float32) mel_fb->fft_size; + + /* Count and place filter coefficients. */ + n_coeffs = 0; + for (i = 0; i < mel_fb->num_filters; ++i) { + float32 freqs[3]; + + /* Left, center, right frequencies in Hertz */ + for (j = 0; j < 3; ++j) { + if (mel_fb->doublewide) + freqs[j] = fe_melinv(mel_fb, (i + j * 2) * melbw + melmin); + else + freqs[j] = fe_melinv(mel_fb, (i + j) * melbw + melmin); + /* Round them to DFT points if requested */ + if (mel_fb->round_filters) + freqs[j] = ((int) (freqs[j] / fftfreq + 0.5)) * fftfreq; + } + + /* spec_start is the start of this filter in the power spectrum. */ + mel_fb->spec_start[i] = -1; + /* There must be a better way... */ + for (j = 0; j < mel_fb->fft_size / 2 + 1; ++j) { + float32 hz = j * fftfreq; + if (hz < freqs[0]) + continue; + else if (hz > freqs[2] || j == mel_fb->fft_size / 2) { + /* filt_width is the width in DFT points of this filter. */ + mel_fb->filt_width[i] = j - mel_fb->spec_start[i]; + /* filt_start is the start of this filter in the filt_coeffs array. */ + mel_fb->filt_start[i] = n_coeffs; + n_coeffs += mel_fb->filt_width[i]; + break; + } + if (mel_fb->spec_start[i] == -1) + mel_fb->spec_start[i] = j; + } + } + + /* Now go back and allocate the coefficient array. */ + mel_fb->filt_coeffs = + ckd_malloc(n_coeffs * sizeof(*mel_fb->filt_coeffs)); + + /* And now generate the coefficients. */ + n_coeffs = 0; + for (i = 0; i < mel_fb->num_filters; ++i) { + float32 freqs[3]; + + /* Left, center, right frequencies in Hertz */ + for (j = 0; j < 3; ++j) { + if (mel_fb->doublewide) + freqs[j] = fe_melinv(mel_fb, (i + j * 2) * melbw + melmin); + else + freqs[j] = fe_melinv(mel_fb, (i + j) * melbw + melmin); + /* Round them to DFT points if requested */ + if (mel_fb->round_filters) + freqs[j] = ((int) (freqs[j] / fftfreq + 0.5)) * fftfreq; + } + + for (j = 0; j < mel_fb->filt_width[i]; ++j) { + float32 hz, loslope, hislope; + + hz = (mel_fb->spec_start[i] + j) * fftfreq; + if (hz < freqs[0] || hz > freqs[2]) { + E_FATAL + ("Failed to create filterbank, frequency range does not match. " + "Sample rate %f, FFT size %d, lowerf %f < freq %f > upperf %f.\n", + mel_fb->sampling_rate, mel_fb->fft_size, freqs[0], hz, + freqs[2]); + } + loslope = (hz - freqs[0]) / (freqs[1] - freqs[0]); + hislope = (freqs[2] - hz) / (freqs[2] - freqs[1]); + if (mel_fb->unit_area) { + loslope *= 2 / (freqs[2] - freqs[0]); + hislope *= 2 / (freqs[2] - freqs[0]); + } + if (loslope < hislope) { +#ifdef FIXED_POINT + mel_fb->filt_coeffs[n_coeffs] = fe_log(loslope); +#else + mel_fb->filt_coeffs[n_coeffs] = loslope; +#endif + } + else { +#ifdef FIXED_POINT + mel_fb->filt_coeffs[n_coeffs] = fe_log(hislope); +#else + mel_fb->filt_coeffs[n_coeffs] = hislope; +#endif + } + ++n_coeffs; + } + } + + return FE_SUCCESS; +} + +int32 +fe_compute_melcosine(melfb_t * mel_fb) +{ + + float64 freqstep; + int32 i, j; + + mel_fb->mel_cosine = + (mfcc_t **) ckd_calloc_2d(mel_fb->num_cepstra, + mel_fb->num_filters, sizeof(mfcc_t)); + + freqstep = M_PI / mel_fb->num_filters; + /* NOTE: The first row vector is actually unnecessary but we leave + * it in to avoid confusion. */ + for (i = 0; i < mel_fb->num_cepstra; i++) { + for (j = 0; j < mel_fb->num_filters; j++) { + float64 cosine; + + cosine = cos(freqstep * i * (j + 0.5)); + mel_fb->mel_cosine[i][j] = FLOAT2COS(cosine); + } + } + + /* Also precompute normalization constants for unitary DCT. */ + mel_fb->sqrt_inv_n = FLOAT2COS(sqrt(1.0 / mel_fb->num_filters)); + mel_fb->sqrt_inv_2n = FLOAT2COS(sqrt(2.0 / mel_fb->num_filters)); + + /* And liftering weights */ + if (mel_fb->lifter_val) { + mel_fb->lifter = + calloc(mel_fb->num_cepstra, sizeof(*mel_fb->lifter)); + for (i = 0; i < mel_fb->num_cepstra; ++i) { + mel_fb->lifter[i] = FLOAT2MFCC(1 + mel_fb->lifter_val / 2 + * sin(i * M_PI / + mel_fb->lifter_val)); + } + } + + return (0); +} + +static void +fe_pre_emphasis_int16(int16 const *in, frame_t * out, int32 len, + float32 factor, int16 prior) +{ + int i; + +#if defined(FIXED16) + int16 fxd_alpha = (int16)(factor * 0x8000); + int32 tmp1, tmp2; + + tmp1 = (int32)in[0] << 15; + tmp2 = (int32)prior * fxd_alpha; + out[0] = (int16)((tmp1 - tmp2) >> 15); + for (i = 1; i < len; ++i) { + tmp1 = (int32)in[i] << 15; + tmp2 = (int32)in[i-1] * fxd_alpha; + out[i] = (int16)((tmp1 - tmp2) >> 15); + } +#elif defined(FIXED_POINT) + fixed32 fxd_alpha = FLOAT2FIX(factor); + out[0] = ((fixed32) in[0] << DEFAULT_RADIX) - (prior * fxd_alpha); + for (i = 1; i < len; ++i) + out[i] = ((fixed32) in[i] << DEFAULT_RADIX) + - (fixed32) in[i - 1] * fxd_alpha; +#else + out[0] = (frame_t) in[0] - (frame_t) prior *factor; + for (i = 1; i < len; i++) + out[i] = (frame_t) in[i] - (frame_t) in[i - 1] * factor; +#endif +} + +static void +fe_copy_to_frame_int16(int16 const *in, frame_t * out, int32 len) +{ + int i; + +#if defined(FIXED16) + memcpy(out, in, len * sizeof(*out)); +#elif defined(FIXED_POINT) + for (i = 0; i < len; i++) + out[i] = (int32) in[i] << DEFAULT_RADIX; +#else /* FIXED_POINT */ + for (i = 0; i < len; i++) + out[i] = (frame_t) in[i]; +#endif /* FIXED_POINT */ +} + +void +fe_create_hamming(window_t * in, int32 in_len) +{ + int i; + + /* Symmetric, so we only create the first half of it. */ + for (i = 0; i < in_len / 2; i++) { + float64 hamm; + hamm = (0.54 - 0.46 * cos(2 * M_PI * i / + ((float64) in_len - 1.0))); +#ifdef FIXED16 + in[i] = (int16)(hamm * 0x8000); +#else + in[i] = FLOAT2COS(hamm); +#endif + } +} + +static void +fe_hamming_window(frame_t * in, window_t * window, int32 in_len, + int32 remove_dc) +{ + int i; + + if (remove_dc) { +#ifdef FIXED16 + int32 mean = 0; /* Use int32 to avoid possibility of overflow */ +#else + frame_t mean = 0; +#endif + + for (i = 0; i < in_len; i++) + mean += in[i]; + mean /= in_len; + for (i = 0; i < in_len; i++) + in[i] -= (frame_t) mean; + } + +#ifdef FIXED16 + for (i = 0; i < in_len/2; i++) { + int32 tmp1, tmp2; + + tmp1 = (int32)in[i] * window[i]; + tmp2 = (int32)in[in_len-1-i] * window[i]; + in[i] = (int16)(tmp1 >> 15); + in[in_len-1-i] = (int16)(tmp2 >> 15); + } +#else + for (i = 0; i < in_len/2; i++) { + in[i] = COSMUL(in[i], window[i]); + in[in_len - 1 - i] = COSMUL(in[in_len - 1 - i], window[i]); + } +#endif +} + +static int +fe_spch_to_frame(fe_t * fe, int len) +{ + /* Copy to the frame buffer. */ + if (fe->pre_emphasis_alpha != 0.0) { + fe_pre_emphasis_int16(fe->spch, fe->frame, len, + fe->pre_emphasis_alpha, + fe->pre_emphasis_prior); + if (len >= fe->frame_shift) + fe->pre_emphasis_prior = fe->spch[fe->frame_shift - 1]; + else + fe->pre_emphasis_prior = fe->spch[len - 1]; + } + else + fe_copy_to_frame_int16(fe->spch, fe->frame, len); + + /* Zero pad up to FFT size. */ + memset(fe->frame + len, 0, (fe->fft_size - len) * sizeof(*fe->frame)); + + /* Window. */ + fe_hamming_window(fe->frame, fe->hamming_window, fe->frame_size, + fe->remove_dc); + + return len; +} + +int +fe_read_frame_int16(fe_t * fe, int16 const *in, int32 len) +{ + int i; + + if (len > fe->frame_size) + len = fe->frame_size; + + /* Read it into the raw speech buffer. */ + memcpy(fe->spch, in, len * sizeof(*in)); + /* Swap and dither if necessary. */ + if (fe->swap) + for (i = 0; i < len; ++i) + SWAP_INT16(&fe->spch[i]); + if (fe->dither) + for (i = 0; i < len; ++i) + fe->spch[i] += (int16) ((!(s3_rand_int31() % 4)) ? 1 : 0); + + return fe_spch_to_frame(fe, len); +} + +int +fe_read_frame(fe_t * fe, int16 const *in, int32 len) +{ + return fe_read_frame_int16(fe, in, len); +} + +int +fe_shift_frame_int16(fe_t * fe, int16 const *in, int32 len) +{ + int offset, i; + + if (len > fe->frame_shift) + len = fe->frame_shift; + offset = fe->frame_size - fe->frame_shift; + + /* Shift data into the raw speech buffer. */ + memmove(fe->spch, fe->spch + fe->frame_shift, + offset * sizeof(*fe->spch)); + memcpy(fe->spch + offset, in, len * sizeof(*fe->spch)); + /* Swap and dither if necessary. */ + if (fe->swap) + for (i = 0; i < len; ++i) + SWAP_INT16(&fe->spch[offset + i]); + if (fe->dither) + for (i = 0; i < len; ++i) + fe->spch[offset + i] + += (int16) ((!(s3_rand_int31() % 4)) ? 1 : 0); + + return fe_spch_to_frame(fe, offset + len); +} + +int +fe_shift_frame(fe_t * fe, int16 const *in, int32 len) +{ + return fe_shift_frame_int16(fe, in, len); +} + +/** + * Create arrays of twiddle factors. + */ +void +fe_create_twiddle(fe_t * fe) +{ + int i; + + for (i = 0; i < fe->fft_size / 4; ++i) { + float64 a = 2 * M_PI * i / fe->fft_size; +#ifdef FIXED16 + fe->ccc[i] = (int16)(cos(a) * 0x8000); + fe->sss[i] = (int16)(sin(a) * 0x8000); +#elif defined(FIXED_POINT) + fe->ccc[i] = FLOAT2COS(cos(a)); + fe->sss[i] = FLOAT2COS(sin(a)); +#else + fe->ccc[i] = cos(a); + fe->sss[i] = sin(a); +#endif + } +} + +/* Translated from the FORTRAN (obviously) from "Real-Valued Fast + * Fourier Transform Algorithms" by Henrik V. Sorensen et al., IEEE + * Transactions on Acoustics, Speech, and Signal Processing, vol. 35, + * no.6. The 16-bit version does a version of "block floating + * point" in order to avoid rounding errors. + */ +#if defined(FIXED16) +static int +fe_fft_real(fe_t *fe) +{ + int i, j, k, m, n, lz; + frame_t *x, xt, max; + + x = fe->frame; + m = fe->fft_order; + n = fe->fft_size; + + /* Bit-reverse the input. */ + j = 0; + for (i = 0; i < n - 1; ++i) { + if (i < j) { + xt = x[j]; + x[j] = x[i]; + x[i] = xt; + } + k = n / 2; + while (k <= j) { + j -= k; + k /= 2; + } + j += k; + } + /* Determine how many bits of dynamic range are in the input. */ + max = 0; + for (i = 0; i < n; ++i) + if (abs(x[i]) > max) + max = abs(x[i]); + /* The FFT has a gain of M bits, so we need to attenuate the input + * by M bits minus the number of leading zeroes in the input's + * range in order to avoid overflows. */ + for (lz = 0; lz < m; ++lz) + if (max & (1 << (15-lz))) + break; + + /* Basic butterflies (2-point FFT, real twiddle factors): + * x[i] = x[i] + 1 * x[i+1] + * x[i+1] = x[i] + -1 * x[i+1] + */ + /* The quantization error introduced by attenuating the input at + * any given stage of the FFT has a cascading effect, so we hold + * off on it until it's absolutely necessary. */ + for (i = 0; i < n; i += 2) { + int atten = (lz == 0); + xt = x[i] >> atten; + x[i] = xt + (x[i + 1] >> atten); + x[i + 1] = xt - (x[i + 1] >> atten); + } + + /* The rest of the butterflies, in stages from 1..m */ + for (k = 1; k < m; ++k) { + int n1, n2, n4; + /* Start attenuating once we hit the number of leading zeros. */ + int atten = (k >= lz); + + n4 = k - 1; + n2 = k; + n1 = k + 1; + /* Stride over each (1 << (k+1)) points */ + for (i = 0; i < n; i += (1 << n1)) { + /* Basic butterfly with real twiddle factors: + * x[i] = x[i] + 1 * x[i + (1<> atten; + x[i] = xt + (x[i + (1 << n2)] >> atten); + x[i + (1 << n2)] = xt - (x[i + (1 << n2)] >> atten); + + /* The other ones with real twiddle factors: + * x[i + (1<> atten; + x[i + (1 << n4)] = x[i + (1 << n4)] >> atten; + + /* Butterflies with complex twiddle factors. + * There are (1<ccc[j << (m - n1)]; + ss = fe->sss[j << (m - n1)]; + + /* There are some symmetry properties which allow us + * to get away with only four multiplications here. */ + { + int32 tmp1, tmp2; + tmp1 = (int32)x[i3] * cc + (int32)x[i4] * ss; + tmp2 = (int32)x[i3] * ss - (int32)x[i4] * cc; + t1 = (int16)(tmp1 >> 15) >> atten; + t2 = (int16)(tmp2 >> 15) >> atten; + } + + x[i4] = (x[i2] >> atten) - t2; + x[i3] = (-x[i2] >> atten) - t2; + x[i2] = (x[i1] >> atten) - t1; + x[i1] = (x[i1] >> atten) + t1; + } + } + } + + /* Return the residual scaling factor. */ + return lz; +} +#else /* !FIXED16 */ +static int +fe_fft_real(fe_t *fe) +{ + int i, j, k, m, n; + frame_t *x, xt; + + x = fe->frame; + m = fe->fft_order; + n = fe->fft_size; + + /* Bit-reverse the input. */ + j = 0; + for (i = 0; i < n - 1; ++i) { + if (i < j) { + xt = x[j]; + x[j] = x[i]; + x[i] = xt; + } + k = n / 2; + while (k <= j) { + j -= k; + k /= 2; + } + j += k; + } + + /* Basic butterflies (2-point FFT, real twiddle factors): + * x[i] = x[i] + 1 * x[i+1] + * x[i+1] = x[i] + -1 * x[i+1] + */ + for (i = 0; i < n; i += 2) { + xt = x[i]; + x[i] = (xt + x[i + 1]); + x[i + 1] = (xt - x[i + 1]); + } + + /* The rest of the butterflies, in stages from 1..m */ + for (k = 1; k < m; ++k) { + int n1, n2, n4; + + n4 = k - 1; + n2 = k; + n1 = k + 1; + /* Stride over each (1 << (k+1)) points */ + for (i = 0; i < n; i += (1 << n1)) { + /* Basic butterfly with real twiddle factors: + * x[i] = x[i] + 1 * x[i + (1<ccc[j << (m - n1)]; + ss = fe->sss[j << (m - n1)]; + + /* There are some symmetry properties which allow us + * to get away with only four multiplications here. */ + t1 = COSMUL(x[i3], cc) + COSMUL(x[i4], ss); + t2 = COSMUL(x[i3], ss) - COSMUL(x[i4], cc); + + x[i4] = (x[i2] - t2); + x[i3] = (-x[i2] - t2); + x[i2] = (x[i1] - t1); + x[i1] = (x[i1] + t1); + } + } + } + + /* This isn't used, but return it for completeness. */ + return m; +} +#endif /* !FIXED16 */ + +static void +fe_spec_magnitude(fe_t * fe) +{ + frame_t *fft; + powspec_t *spec; + int32 j, scale, fftsize; + + /* Do FFT and get the scaling factor back (only actually used in + * fixed-point). Note the scaling factor is expressed in bits. */ + scale = fe_fft_real(fe); + + /* Convenience pointers to make things less awkward below. */ + fft = fe->frame; + spec = fe->spec; + fftsize = fe->fft_size; + + /* We need to scale things up the rest of the way to N. */ + scale = fe->fft_order - scale; + + /* The first point (DC coefficient) has no imaginary part */ + { +#ifdef FIXED16 + spec[0] = fixlog(abs(fft[0]) << scale) * 2; +#elif defined(FIXED_POINT) + spec[0] = FIXLN(abs(fft[0]) << scale) * 2; +#else + spec[0] = fft[0] * fft[0]; +#endif + } + + for (j = 1; j <= fftsize / 2; j++) { +#ifdef FIXED16 + int32 rr = fixlog(abs(fft[j]) << scale) * 2; + int32 ii = fixlog(abs(fft[fftsize - j]) << scale) * 2; + spec[j] = fe_log_add(rr, ii); +#elif defined(FIXED_POINT) + int32 rr = FIXLN(abs(fft[j]) << scale) * 2; + int32 ii = FIXLN(abs(fft[fftsize - j]) << scale) * 2; + spec[j] = fe_log_add(rr, ii); +#else + spec[j] = fft[j] * fft[j] + fft[fftsize - j] * fft[fftsize - j]; +#endif + } +} + +static void +fe_mel_spec(fe_t * fe) +{ + int whichfilt; + powspec_t *spec, *mfspec; + + /* Convenience poitners. */ + spec = fe->spec; + mfspec = fe->mfspec; + + for (whichfilt = 0; whichfilt < fe->mel_fb->num_filters; whichfilt++) { + int spec_start, filt_start, i; + + spec_start = fe->mel_fb->spec_start[whichfilt]; + filt_start = fe->mel_fb->filt_start[whichfilt]; + +#ifdef FIXED_POINT + mfspec[whichfilt] = + spec[spec_start] + fe->mel_fb->filt_coeffs[filt_start]; + for (i = 1; i < fe->mel_fb->filt_width[whichfilt]; i++) { + mfspec[whichfilt] = fe_log_add(mfspec[whichfilt], + spec[spec_start + i] + + fe->mel_fb-> + filt_coeffs[filt_start + i]); + } +#else /* !FIXED_POINT */ + mfspec[whichfilt] = 0; + for (i = 0; i < fe->mel_fb->filt_width[whichfilt]; i++) + mfspec[whichfilt] += + spec[spec_start + i] * fe->mel_fb->filt_coeffs[filt_start + + i]; +#endif /* !FIXED_POINT */ + } + +} + +#define LOG_FLOOR 1e-4 + +static void +fe_mel_cep(fe_t * fe, mfcc_t * mfcep) +{ + int32 i; + powspec_t *mfspec; + + /* Convenience pointer. */ + mfspec = fe->mfspec; + + for (i = 0; i < fe->mel_fb->num_filters; ++i) { +#ifndef FIXED_POINT /* It's already in log domain for fixed point */ + mfspec[i] = log(mfspec[i] + LOG_FLOOR); +#endif /* !FIXED_POINT */ + } + + /* If we are doing LOG_SPEC, then do nothing. */ + if (fe->log_spec == RAW_LOG_SPEC) { + for (i = 0; i < fe->feature_dimension; i++) { + mfcep[i] = (mfcc_t) mfspec[i]; + } + } + /* For smoothed spectrum, do DCT-II followed by (its inverse) DCT-III */ + else if (fe->log_spec == SMOOTH_LOG_SPEC) { + /* FIXME: This is probably broken for fixed-point. */ + fe_dct2(fe, mfspec, mfcep, 0); + fe_dct3(fe, mfcep, mfspec); + for (i = 0; i < fe->feature_dimension; i++) { + mfcep[i] = (mfcc_t) mfspec[i]; + } + } + else if (fe->transform == DCT_II) + fe_dct2(fe, mfspec, mfcep, FALSE); + else if (fe->transform == DCT_HTK) + fe_dct2(fe, mfspec, mfcep, TRUE); + else + fe_spec2cep(fe, mfspec, mfcep); + + return; +} + +void +fe_spec2cep(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep) +{ + int32 i, j, beta; + + /* Compute C0 separately (its basis vector is 1) to avoid + * costly multiplications. */ + mfcep[0] = mflogspec[0] / 2; /* beta = 0.5 */ + for (j = 1; j < fe->mel_fb->num_filters; j++) + mfcep[0] += mflogspec[j]; /* beta = 1.0 */ + mfcep[0] /= (frame_t) fe->mel_fb->num_filters; + + for (i = 1; i < fe->num_cepstra; ++i) { + mfcep[i] = 0; + for (j = 0; j < fe->mel_fb->num_filters; j++) { + if (j == 0) + beta = 1; /* 0.5 */ + else + beta = 2; /* 1.0 */ + mfcep[i] += COSMUL(mflogspec[j], + fe->mel_fb->mel_cosine[i][j]) * beta; + } + /* Note that this actually normalizes by num_filters, like the + * original Sphinx front-end, due to the doubled 'beta' factor + * above. */ + mfcep[i] /= (frame_t) fe->mel_fb->num_filters * 2; + } +} + +void +fe_dct2(fe_t * fe, const powspec_t * mflogspec, mfcc_t * mfcep, int htk) +{ + int32 i, j; + + /* Compute C0 separately (its basis vector is 1) to avoid + * costly multiplications. */ + mfcep[0] = mflogspec[0]; + for (j = 1; j < fe->mel_fb->num_filters; j++) + mfcep[0] += mflogspec[j]; + if (htk) + mfcep[0] = COSMUL(mfcep[0], fe->mel_fb->sqrt_inv_2n); + else /* sqrt(1/N) = sqrt(2/N) * 1/sqrt(2) */ + mfcep[0] = COSMUL(mfcep[0], fe->mel_fb->sqrt_inv_n); + + for (i = 1; i < fe->num_cepstra; ++i) { + mfcep[i] = 0; + for (j = 0; j < fe->mel_fb->num_filters; j++) { + mfcep[i] += COSMUL(mflogspec[j], fe->mel_fb->mel_cosine[i][j]); + } + mfcep[i] = COSMUL(mfcep[i], fe->mel_fb->sqrt_inv_2n); + } +} + +void +fe_lifter(fe_t * fe, mfcc_t * mfcep) +{ + int32 i; + + if (fe->mel_fb->lifter_val == 0) + return; + + for (i = 0; i < fe->num_cepstra; ++i) { + mfcep[i] = MFCCMUL(mfcep[i], fe->mel_fb->lifter[i]); + } +} + +void +fe_dct3(fe_t * fe, const mfcc_t * mfcep, powspec_t * mflogspec) +{ + int32 i, j; + + for (i = 0; i < fe->mel_fb->num_filters; ++i) { + mflogspec[i] = COSMUL(mfcep[0], SQRT_HALF); + for (j = 1; j < fe->num_cepstra; j++) { + mflogspec[i] += COSMUL(mfcep[j], fe->mel_fb->mel_cosine[j][i]); + } + mflogspec[i] = COSMUL(mflogspec[i], fe->mel_fb->sqrt_inv_2n); + } +} + +int +fe_write_frame(fe_t * fe, mfcc_t * feat) +{ + fe_spec_magnitude(fe); + fe_mel_spec(fe); + fe_remove_noise(fe); + fe_mel_cep(fe, feat); + fe_lifter(fe, feat); + + return 1; +} + + +void * +fe_create_2d(int32 d1, int32 d2, int32 elem_size) +{ + return (void *) ckd_calloc_2d(d1, d2, elem_size); +} + +void +fe_free_2d(void *arr) +{ + ckd_free_2d((void **) arr); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_type.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_type.h new file mode 100644 index 0000000000000000000000000000000000000000..160ed8ff8602471077e5ed185ed8d5d2dcd8b3f0 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_type.h @@ -0,0 +1,65 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#ifndef FE_TYPE_H +#define FE_TYPE_H + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "sphinxbase/fe.h" +#include "sphinxbase/fixpoint.h" + +#ifdef FIXED16 +/* Q15 format */ +typedef int16 frame_t; +typedef int16 window_t; +typedef int32 powspec_t; +typedef struct { int16 r, i; } complex; +#elif defined(FIXED_POINT) +typedef fixed32 frame_t; +typedef int32 powspec_t; +typedef fixed32 window_t; +typedef struct { fixed32 r, i; } complex; +#else /* FIXED_POINT */ +typedef float64 frame_t; +typedef float64 powspec_t; +typedef float64 window_t; +typedef struct { float64 r, i; } complex; +#endif /* FIXED_POINT */ + +#endif /* FE_TYPE_H */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp.c new file mode 100644 index 0000000000000000000000000000000000000000..e409bea76ea4a9f4ee706ebe0ab68ae3a5ae78d8 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp.c @@ -0,0 +1,252 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: fe_warp.c + * + * Description: + * Allows a caller to choose a warping function. + *********************************************************************/ + +/* static char rcsid[] = "@(#)$Id: fe_warp.c,v 1.2 2006/02/17 00:31:34 egouvea Exp $";*/ + +#include "fe_warp_inverse_linear.h" +#include "fe_warp_affine.h" +#include "fe_warp_piecewise_linear.h" +#include "fe_warp.h" + +#include "sphinxbase/err.h" + +#include +#include +#include +#include + +/* This is for aliases for each of the entries below. Currently not + used. +*/ +static char *__name2id[] = { + "inverse", + "linear", + "piecewise", + NULL +}; + +static char *name2id[] = { + "inverse_linear", + "affine", + "piecewise_linear", + NULL +}; + +static fe_warp_conf_t fe_warp_conf[FE_WARP_ID_MAX + 1] = { + {fe_warp_inverse_linear_set_parameters, + fe_warp_inverse_linear_doc, + fe_warp_inverse_linear_id, + fe_warp_inverse_linear_n_param, + fe_warp_inverse_linear_warped_to_unwarped, + fe_warp_inverse_linear_unwarped_to_warped, + fe_warp_inverse_linear_print}, /* Inverse linear warping */ + {fe_warp_affine_set_parameters, + fe_warp_affine_doc, + fe_warp_affine_id, + fe_warp_affine_n_param, + fe_warp_affine_warped_to_unwarped, + fe_warp_affine_unwarped_to_warped, + fe_warp_affine_print}, /* Affine warping */ + {fe_warp_piecewise_linear_set_parameters, + fe_warp_piecewise_linear_doc, + fe_warp_piecewise_linear_id, + fe_warp_piecewise_linear_n_param, + fe_warp_piecewise_linear_warped_to_unwarped, + fe_warp_piecewise_linear_unwarped_to_warped, + fe_warp_piecewise_linear_print}, /* Piecewise_Linear warping */ +}; + +int +fe_warp_set(melfb_t *mel, const char *id_name) +{ + uint32 i; + + for (i = 0; name2id[i]; i++) { + if (strcmp(id_name, name2id[i]) == 0) { + mel->warp_id = i; + break; + } + } + + if (name2id[i] == NULL) { + for (i = 0; __name2id[i]; i++) { + if (strcmp(id_name, __name2id[i]) == 0) { + mel->warp_id = i; + break; + } + } + if (__name2id[i] == NULL) { + E_ERROR("Unimplemented warping function %s\n", id_name); + E_ERROR("Implemented functions are:\n"); + for (i = 0; name2id[i]; i++) { + fprintf(stderr, "\t%s\n", name2id[i]); + } + mel->warp_id = FE_WARP_ID_NONE; + + return FE_START_ERROR; + } + } + + return FE_SUCCESS; +} + +void +fe_warp_set_parameters(melfb_t *mel, char const *param_str, float sampling_rate) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + fe_warp_conf[mel->warp_id].set_parameters(param_str, sampling_rate); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("feat module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } +} + +const char * +fe_warp_doc(melfb_t *mel) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + return fe_warp_conf[mel->warp_id].doc(); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return NULL; +} + +uint32 +fe_warp_id(melfb_t *mel) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + assert(mel->warp_id == fe_warp_conf[mel->warp_id].id()); + return mel->warp_id; + } + else if (mel->warp_id != FE_WARP_ID_NONE) { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return FE_WARP_ID_NONE; +} + +uint32 +fe_warp_n_param(melfb_t *mel) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + return fe_warp_conf[mel->warp_id].n_param(); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return 0; +} + +float +fe_warp_warped_to_unwarped(melfb_t *mel, float nonlinear) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + return fe_warp_conf[mel->warp_id].warped_to_unwarped(nonlinear); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return 0; +} + +float +fe_warp_unwarped_to_warped(melfb_t *mel,float linear) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + return fe_warp_conf[mel->warp_id].unwarped_to_warped(linear); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } + + return 0; +} + +void +fe_warp_print(melfb_t *mel, const char *label) +{ + if (mel->warp_id <= FE_WARP_ID_MAX) { + fe_warp_conf[mel->warp_id].print(label); + } + else if (mel->warp_id == FE_WARP_ID_NONE) { + E_FATAL("fe_warp module must be configured w/ a valid ID\n"); + } + else { + E_FATAL + ("fe_warp module misconfigured with invalid fe_warp_id %u\n", + mel->warp_id); + } +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp.h new file mode 100644 index 0000000000000000000000000000000000000000..f2fd14550c33d3f2197963aa0e751edbe67f3601 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp.h @@ -0,0 +1,90 @@ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef FE_WARP_H +#define FE_WARP_H + +#include "fe_internal.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#define FE_WARP_ID_INVERSE_LINEAR 0 +#define FE_WARP_ID_AFFINE 1 +#define FE_WARP_ID_PIECEWISE_LINEAR 2 +#define FE_WARP_ID_EIDE_GISH 3 +#define FE_WARP_ID_MAX 2 +#define FE_WARP_ID_NONE 0xffffffff + +typedef struct { + void (*set_parameters)(char const *param_str, float sampling_rate); + const char * (*doc)(void); + uint32 (*id)(void); + uint32 (*n_param)(void); + float (*warped_to_unwarped)(float nonlinear); + float (*unwarped_to_warped)(float linear); + void (*print)(const char *label); +} fe_warp_conf_t; + +int fe_warp_set(melfb_t *mel, const char *id_name); + +uint32 fe_warp_id(melfb_t *mel); + +const char * fe_warp_doc(melfb_t *mel); + +void fe_warp_set_parameters(melfb_t *mel, char const *param_str, float sampling_rate); + +uint32 fe_warp_n_param(melfb_t *mel); + +float fe_warp_warped_to_unwarped(melfb_t *mel, float nonlinear); + +float fe_warp_unwarped_to_warped(melfb_t *mel, float linear); + +void fe_warp_print(melfb_t *mel, const char *label); + +#define FE_WARP_NO_SIZE 0xffffffff + +#ifdef __cplusplus +} +#endif + + +#endif /* FE_WARP_H */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_affine.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_affine.c new file mode 100644 index 0000000000000000000000000000000000000000..3986119175c6a74e6982dfe20c7dc01046fae2ef --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_affine.c @@ -0,0 +1,181 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: fe_warp_affine.c + * + * Description: + * Warp the frequency axis according to an affine function, i.e.: + * + * w' = a * w + b + * + *********************************************************************/ + +/* static char rcsid[] = "@(#)$Id: fe_warp_affine.c,v 1.2 2006/02/17 00:31:34 egouvea Exp $"; */ + +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/err.h" + +#include "fe_warp.h" +#include "fe_warp_affine.h" + +#define N_PARAM 2 +#define YES 1 +#define NO 0 + +/* + * params[0] : a + * params[1] : b + */ +static float params[N_PARAM] = { 1.0f, 0.0f }; +static int32 is_neutral = YES; +static char p_str[256] = ""; +static float nyquist_frequency = 0.0f; + + +const char * +fe_warp_affine_doc() +{ + return "affine :== < w' = a * x + b >"; +} + +uint32 +fe_warp_affine_id() +{ + return FE_WARP_ID_AFFINE; +} + +uint32 +fe_warp_affine_n_param() +{ + return N_PARAM; +} + +void +fe_warp_affine_set_parameters(char const *param_str, float sampling_rate) +{ + char *tok; + char *seps = " \t"; + char temp_param_str[256]; + int param_index = 0; + + nyquist_frequency = sampling_rate / 2; + if (param_str == NULL) { + is_neutral = YES; + return; + } + /* The new parameters are the same as the current ones, so do nothing. */ + if (strcmp(param_str, p_str) == 0) { + return; + } + is_neutral = NO; + strcpy(temp_param_str, param_str); + memset(params, 0, N_PARAM * sizeof(float)); + strcpy(p_str, param_str); + /* FIXME: strtok() is not re-entrant... */ + tok = strtok(temp_param_str, seps); + while (tok != NULL) { + params[param_index++] = (float) atof_c(tok); + tok = strtok(NULL, seps); + if (param_index >= N_PARAM) { + break; + } + } + if (tok != NULL) { + E_INFO + ("Affine warping takes up to two arguments, %s ignored.\n", + tok); + } + if (params[0] == 0) { + is_neutral = YES; + E_INFO + ("Affine warping cannot have slope zero, warping not applied.\n"); + } +} + +float +fe_warp_affine_warped_to_unwarped(float nonlinear) +{ + if (is_neutral) { + return nonlinear; + } + else { + /* linear = (nonlinear - b) / a */ + float temp = nonlinear - params[1]; + temp /= params[0]; + if (temp > nyquist_frequency) { + E_WARN + ("Warp factor %g results in frequency (%.1f) higher than Nyquist (%.1f)\n", + params[0], temp, nyquist_frequency); + } + return temp; + } +} + +float +fe_warp_affine_unwarped_to_warped(float linear) +{ + if (is_neutral) { + return linear; + } + else { + /* nonlinear = a * linear - b */ + float temp = linear * params[0]; + temp += params[1]; + return temp; + } +} + +void +fe_warp_affine_print(const char *label) +{ + uint32 i; + + for (i = 0; i < N_PARAM; i++) { + printf("%s[%04u]: %6.3f ", label, i, params[i]); + } + printf("\n"); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_affine.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_affine.h new file mode 100644 index 0000000000000000000000000000000000000000..44027d97a492f4817d4c44011d730fea71aec03e --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_affine.h @@ -0,0 +1,76 @@ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef FE_WARP_AFFINE_H +#define FE_WARP_AFFINE_H + +#include "sphinxbase/fe.h" + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +const char * +fe_warp_affine_doc(void); + +uint32 +fe_warp_affine_id(void); + +uint32 +fe_warp_affine_n_param(void); + +void +fe_warp_affine_set_parameters(char const *param_str, float sampling_rate); + +float +fe_warp_affine_warped_to_unwarped(float nonlinear); + +float +fe_warp_affine_unwarped_to_warped(float linear); + +void +fe_warp_affine_print(const char *label); + +#ifdef __cplusplus +} +#endif + +#endif /* FE_WARP_AFFINE_H */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_inverse_linear.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_inverse_linear.c new file mode 100644 index 0000000000000000000000000000000000000000..85e42986bc8718c5e805df14d895535b62ebb9f7 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_inverse_linear.c @@ -0,0 +1,178 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: fe_warp_inverse_linear.c + * + * Description: + * Warp the frequency axis according to an inverse_linear function, i.e.: + * + * w' = w / a + * + *********************************************************************/ + +/* static char rcsid[] = "@(#)$Id: fe_warp_inverse_linear.c,v 1.3 2006/02/23 19:40:11 eht Exp $"; */ + +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/err.h" + +#include "fe_warp.h" +#include "fe_warp_inverse_linear.h" + +#define N_PARAM 1 +#define YES 1 +#define NO 0 + +/* + * params[0] : a + */ +static float params[N_PARAM] = { 1.0f }; +static int32 is_neutral = YES; +static char p_str[256] = ""; +static float nyquist_frequency = 0.0f; + + +const char * +fe_warp_inverse_linear_doc() +{ + return "inverse_linear :== < w' = x / a >"; +} + +uint32 +fe_warp_inverse_linear_id() +{ + return FE_WARP_ID_INVERSE_LINEAR; +} + +uint32 +fe_warp_inverse_linear_n_param() +{ + return N_PARAM; +} + +void +fe_warp_inverse_linear_set_parameters(char const *param_str, float sampling_rate) +{ + char *tok; + char *seps = " \t"; + char temp_param_str[256]; + int param_index = 0; + + nyquist_frequency = sampling_rate / 2; + if (param_str == NULL) { + is_neutral = YES; + return; + } + /* The new parameters are the same as the current ones, so do nothing. */ + if (strcmp(param_str, p_str) == 0) { + return; + } + is_neutral = NO; + strcpy(temp_param_str, param_str); + memset(params, 0, N_PARAM * sizeof(float)); + strcpy(p_str, param_str); + /* FIXME: strtok() is not re-entrant... */ + tok = strtok(temp_param_str, seps); + while (tok != NULL) { + params[param_index++] = (float) atof_c(tok); + tok = strtok(NULL, seps); + if (param_index >= N_PARAM) { + break; + } + } + if (tok != NULL) { + E_INFO + ("Inverse linear warping takes only one argument, %s ignored.\n", + tok); + } + if (params[0] == 0) { + is_neutral = YES; + E_INFO + ("Inverse linear warping cannot have slope zero, warping not applied.\n"); + } +} + +float +fe_warp_inverse_linear_warped_to_unwarped(float nonlinear) +{ + if (is_neutral) { + return nonlinear; + } + else { + /* linear = nonlinear * a */ + float temp = nonlinear * params[0]; + if (temp > nyquist_frequency) { + E_WARN + ("Warp factor %g results in frequency (%.1f) higher than Nyquist (%.1f)\n", + params[0], temp, nyquist_frequency); + } + return temp; + } +} + +float +fe_warp_inverse_linear_unwarped_to_warped(float linear) +{ + if (is_neutral) { + return linear; + } + else { + /* nonlinear = a / linear */ + float temp = linear / params[0]; + return temp; + } +} + +void +fe_warp_inverse_linear_print(const char *label) +{ + uint32 i; + + for (i = 0; i < N_PARAM; i++) { + printf("%s[%04u]: %6.3f ", label, i, params[i]); + } + printf("\n"); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_inverse_linear.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_inverse_linear.h new file mode 100644 index 0000000000000000000000000000000000000000..8d4a76725e9c01a9b0e60be7e688fe099f44b655 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_inverse_linear.h @@ -0,0 +1,77 @@ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef FE_WARP_inverse_linear_H +#define FE_WARP_inverse_linear_H + +#include "sphinxbase/fe.h" + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +const char * +fe_warp_inverse_linear_doc(void); + +uint32 +fe_warp_inverse_linear_id(void); + +uint32 +fe_warp_inverse_linear_n_param(void); + +void +fe_warp_inverse_linear_set_parameters(char const *param_str, float sampling_rate); + +float +fe_warp_inverse_linear_warped_to_unwarped(float nonlinear); + +float +fe_warp_inverse_linear_unwarped_to_warped(float linear); + +void +fe_warp_inverse_linear_print(const char *label); + +#ifdef __cplusplus +} +#endif + + +#endif /* FE_WARP_inverse_linear_H */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_piecewise_linear.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_piecewise_linear.c new file mode 100644 index 0000000000000000000000000000000000000000..34570120fc74c185d37c8cbb75f40d610915ec48 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_piecewise_linear.c @@ -0,0 +1,223 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: fe_warp_piecewise_linear.c + * + * Description: + * + * Warp the frequency axis according to an piecewise linear + * function. The function is linear up to a frequency F, where + * the slope changes so that the Nyquist frequency in the warped + * axis maps to the Nyquist frequency in the unwarped. + * + * w' = a * w, w < F + * w' = a' * w + b, W > F + * w'(0) = 0 + * w'(F) = F + * w'(Nyq) = Nyq + * + *********************************************************************/ + +/* static char rcsid[] = "@(#)$Id: fe_warp_piecewise_linear.c,v 1.2 2006/02/17 00:31:34 egouvea Exp $"; */ + +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/err.h" + +#include "fe_warp.h" +#include "fe_warp_piecewise_linear.h" + +#define N_PARAM 2 +#define YES 1 +#define NO 0 + +/* + * params[0] : a + * params[1] : F (the non-differentiable point) + */ +static float params[N_PARAM] = { 1.0f, 6800.0f }; +static float final_piece[2]; +static int32 is_neutral = YES; +static char p_str[256] = ""; +static float nyquist_frequency = 0.0f; + + +const char * +fe_warp_piecewise_linear_doc() +{ + return "piecewise_linear :== < w' = a * w, w < F >"; +} + +uint32 +fe_warp_piecewise_linear_id() +{ + return FE_WARP_ID_PIECEWISE_LINEAR; +} + +uint32 +fe_warp_piecewise_linear_n_param() +{ + return N_PARAM; +} + +void +fe_warp_piecewise_linear_set_parameters(char const *param_str, + float sampling_rate) +{ + char *tok; + char *seps = " \t"; + char temp_param_str[256]; + int param_index = 0; + + nyquist_frequency = sampling_rate / 2; + if (param_str == NULL) { + is_neutral = YES; + return; + } + /* The new parameters are the same as the current ones, so do nothing. */ + if (strcmp(param_str, p_str) == 0) { + return; + } + is_neutral = NO; + strcpy(temp_param_str, param_str); + memset(params, 0, N_PARAM * sizeof(float)); + memset(final_piece, 0, 2 * sizeof(float)); + strcpy(p_str, param_str); + /* FIXME: strtok() is not re-entrant... */ + tok = strtok(temp_param_str, seps); + while (tok != NULL) { + params[param_index++] = (float) atof_c(tok); + tok = strtok(NULL, seps); + if (param_index >= N_PARAM) { + break; + } + } + if (tok != NULL) { + E_INFO + ("Piecewise linear warping takes up to two arguments, %s ignored.\n", + tok); + } + if (params[1] < sampling_rate) { + /* Precompute these. These are the coefficients of a + * straight line that contains the points (F, aF) and (N, + * N), where a = params[0], F = params[1], N = Nyquist + * frequency. + */ + if (params[1] == 0) { + params[1] = sampling_rate * 0.85f; + } + final_piece[0] = + (nyquist_frequency - + params[0] * params[1]) / (nyquist_frequency - params[1]); + final_piece[1] = + nyquist_frequency * params[1] * (params[0] - + 1.0f) / (nyquist_frequency - + params[1]); + } + else { + memset(final_piece, 0, 2 * sizeof(float)); + } + if (params[0] == 0) { + is_neutral = YES; + E_INFO + ("Piecewise linear warping cannot have slope zero, warping not applied.\n"); + } +} + +float +fe_warp_piecewise_linear_warped_to_unwarped(float nonlinear) +{ + if (is_neutral) { + return nonlinear; + } + else { + /* linear = (nonlinear - b) / a */ + float temp; + if (nonlinear < params[0] * params[1]) { + temp = nonlinear / params[0]; + } + else { + temp = nonlinear - final_piece[1]; + temp /= final_piece[0]; + } + if (temp > nyquist_frequency) { + E_WARN + ("Warp factor %g results in frequency (%.1f) higher than Nyquist (%.1f)\n", + params[0], temp, nyquist_frequency); + } + return temp; + } +} + +float +fe_warp_piecewise_linear_unwarped_to_warped(float linear) +{ + if (is_neutral) { + return linear; + } + else { + float temp; + /* nonlinear = a * linear - b */ + if (linear < params[1]) { + temp = linear * params[0]; + } + else { + temp = final_piece[0] * linear + final_piece[1]; + } + return temp; + } +} + +void +fe_warp_piecewise_linear_print(const char *label) +{ + uint32 i; + + for (i = 0; i < N_PARAM; i++) { + printf("%s[%04u]: %6.3f ", label, i, params[i]); + } + printf("\n"); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_piecewise_linear.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_piecewise_linear.h new file mode 100644 index 0000000000000000000000000000000000000000..f15cb251e5cc9e2ff71a5e56b1011d7761bf8f66 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fe_warp_piecewise_linear.h @@ -0,0 +1,77 @@ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef FE_WARP_PIECEWIDE_LINEAR_H +#define FE_WARP_PIECEWIDE_LINEAR_H + +#include "sphinxbase/fe.h" + + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +const char * +fe_warp_piecewise_linear_doc(void); + +uint32 +fe_warp_piecewise_linear_id(void); + +uint32 +fe_warp_piecewise_linear_n_param(void); + +void +fe_warp_piecewise_linear_set_parameters(char const *param_str, float sampling_rate); + +float +fe_warp_piecewise_linear_warped_to_unwarped(float nonlinear); + +float +fe_warp_piecewise_linear_unwarped_to_warped(float linear); + +void +fe_warp_piecewise_linear_print(const char *label); + +#ifdef __cplusplus +} +#endif + + +#endif /* FE_WARP_PIECEWIDE_LINEAR_H */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fixlog.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fixlog.c new file mode 100644 index 0000000000000000000000000000000000000000..f00c71f4d9ef41a99c2562f97caffbb375df5c17 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/fixlog.c @@ -0,0 +1,229 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2005 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + * File: fixlog.c + * + * Description: Fast approximate fixed-point logarithms + * + * Author: David Huggins-Daines + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/fixpoint.h" + +#include "fe_internal.h" + +/* Table of log2(x/128)*(1<= 4 + y = __builtin_clz(x); + x <<= y; + y = (31 - y); +#else + for (y = 31; y > 0; --y) { + if (x & 0x80000000) + break; + x <<= 1; + } +#endif + y <<= DEFAULT_RADIX; + /* Do a table lookup for the MSB of the mantissa. */ + x = (x >> 24) & 0x7f; + return y + logtable[x]; +} + +int +fixlog(uint32 x) +{ + int32 y; + y = fixlog2(x); + return FIXMUL(y, FIXLN_2); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/make_log_sub_table.py b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/make_log_sub_table.py new file mode 100644 index 0000000000000000000000000000000000000000..cf01c8ac8e7c2258c0b83e79dc2bd38d137f5d97 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/make_log_sub_table.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python + +import math + +radix = 8 +scale = 1< 0: + stop = int(round(math.log(-math.expm1(-byx/scale)) * out_scale)) + if stop == 0: + break + + byx = byx + 1. + +print "static const uint16 logsub_table[] = {" +for i in range(0,len(logtab),10): + if i+10 <= len(logtab): + print ", ".join(str(x) for x in logtab[i:i+10]) + "," + else: + print ", ".join(str(x) for x in logtab[i:]) +print "};" diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/make_log_table.py b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/make_log_table.py new file mode 100644 index 0000000000000000000000000000000000000000..7a3ed599745e73c85d6c19a5d3baa8681792a0b1 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fe/make_log_table.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python + +import math + +radix = 8 +scale = 1< + */ + +/* This implements part of the YIN algorithm: + * + * "YIN, a fundamental frequency estimator for speech and music". + * Alain de Cheveigné and Hideki Kawahara. Journal of the Acoustical + * Society of America, 111 (4), April 2002. + */ + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/fixpoint.h" + +#include "sphinxbase/yin.h" + +#include +#include + +struct yin_s { + uint16 frame_size; /** Size of analysis frame. */ + uint16 search_threshold; /**< Threshold for finding period, in Q15 */ + uint16 search_range; /**< Range around best local estimate to search, in Q15 */ + uint16 nfr; /**< Number of frames read so far. */ + + unsigned char wsize; /**< Size of smoothing window. */ + unsigned char wstart; /**< First frame in window. */ + unsigned char wcur; /**< Current frame of analysis. */ + unsigned char endut; /**< Hoch Hech! Are we at the utterance end? */ + + fixed32 **diff_window; /**< Window of difference function outputs. */ + uint16 *period_window; /**< Window of best period estimates. */ +}; + +/** + * The core of YIN: cumulative mean normalized difference function. + */ +static void +cmn_diff(int16 const *signal, int32 *out_diff, int ndiff) +{ + uint32 cum, cshift; + int32 t, tscale; + + out_diff[0] = 32768; + cum = 0; + cshift = 0; + + /* Determine how many bits we can scale t up by below. */ + for (tscale = 0; tscale < 32; ++tscale) + if (ndiff & (1<<(31-tscale))) + break; + --tscale; /* Avoid teh overflowz. */ + /* printf("tscale is %d (ndiff - 1) << tscale is %d\n", + tscale, (ndiff-1) << tscale); */ + + /* Somewhat elaborate block floating point implementation. + * The fp implementation of this is really a lot simpler. */ + for (t = 1; t < ndiff; ++t) { + uint32 dd, dshift, norm; + int j; + + dd = 0; + dshift = 0; + for (j = 0; j < ndiff; ++j) { + int diff = signal[j] - signal[t + j]; + /* Guard against overflows. */ + if (dd > (1UL<>= 1; + ++dshift; + } + dd += (diff * diff) >> dshift; + } + /* Make sure the diffs and cum are shifted to the same + * scaling factor (usually dshift will be zero) */ + if (dshift > cshift) { + cum += dd << (dshift-cshift); + } + else { + cum += dd >> (cshift-dshift); + } + + /* Guard against overflows and also ensure that (t< cum. */ + while (cum > (1UL<>= 1; + ++cshift; + } + /* Avoid divide-by-zero! */ + if (cum == 0) cum = 1; + /* Calculate the normalizer in high precision. */ + norm = (t << tscale) / cum; + /* Do a long multiply and shift down to Q15. */ + out_diff[t] = (int32)(((long long)dd * norm) + >> (tscale - 15 + cshift - dshift)); + /* printf("dd %d cshift %d dshift %d scaledt %d cum %d norm %d cmn %d\n", + dd, cshift, dshift, (t<frame_size = frame_size; + pe->search_threshold = (uint16)(search_threshold * 32768); + pe->search_range = (uint16)(search_range * 32768); + pe->wsize = smooth_window * 2 + 1; + pe->diff_window = ckd_calloc_2d(pe->wsize, + pe->frame_size / 2, + sizeof(**pe->diff_window)); + pe->period_window = ckd_calloc(pe->wsize, + sizeof(*pe->period_window)); + return pe; +} + +void +yin_free(yin_t *pe) +{ + ckd_free_2d(pe->diff_window); + ckd_free(pe->period_window); + ckd_free(pe); +} + +void +yin_start(yin_t *pe) +{ + /* Reset the circular window pointers. */ + pe->wstart = pe->endut = 0; + pe->nfr = 0; +} + +void +yin_end(yin_t *pe) +{ + pe->endut = 1; +} + +int +thresholded_search(int32 *diff_window, fixed32 threshold, int start, int end) +{ + int i, min, argmin; + + min = INT_MAX; + argmin = 0; + for (i = start; i < end; ++i) { + int diff = diff_window[i]; + + if (diff < threshold) { + min = diff; + argmin = i; + break; + } + if (diff < min) { + min = diff; + argmin = i; + } + } + return argmin; +} + +void +yin_write(yin_t *pe, int16 const *frame) +{ + int outptr, difflen; + + /* Rotate the window one frame forward. */ + ++pe->wstart; + /* Fill in the frame before wstart. */ + outptr = pe->wstart - 1; + /* Wrap around the window pointer. */ + if (pe->wstart == pe->wsize) + pe->wstart = 0; + + /* Now calculate normalized difference function. */ + difflen = pe->frame_size / 2; + cmn_diff(frame, pe->diff_window[outptr], difflen); + + /* Find the first point under threshold. If not found, then + * use the absolute minimum. */ + pe->period_window[outptr] + = thresholded_search(pe->diff_window[outptr], + pe->search_threshold, 0, difflen); + + /* Increment total number of frames. */ + ++pe->nfr; +} + +int +yin_read(yin_t *pe, uint16 *out_period, uint16 *out_bestdiff) +{ + int wstart, wlen, half_wsize, i; + int best, best_diff, search_width, low_period, high_period; + + half_wsize = (pe->wsize-1)/2; + /* Without any smoothing, just return the current value (don't + * need to do anything to the current poitner either). */ + if (half_wsize == 0) { + if (pe->endut) + return 0; + *out_period = pe->period_window[0]; + *out_bestdiff = pe->diff_window[0][pe->period_window[0]]; + return 1; + } + + /* We can't do anything unless we have at least (wsize-1)/2 + 1 + * frames, unless we're at the end of the utterance. */ + if (pe->endut == 0 && pe->nfr < half_wsize + 1) { + /* Don't increment the current pointer either. */ + return 0; + } + + /* Establish the smoothing window. */ + /* End of utterance. */ + if (pe->endut) { + /* We are done (no more data) when pe->wcur = pe->wstart. */ + if (pe->wcur == pe->wstart) + return 0; + /* I.e. pe->wcur (circular minus) half_wsize. */ + wstart = (pe->wcur + pe->wsize - half_wsize) % pe->wsize; + /* Number of frames from wstart up to pe->wstart. */ + wlen = pe->wstart - wstart; + if (wlen < 0) wlen += pe->wsize; + /*printf("ENDUT! ");*/ + } + /* Beginning of utterance. */ + else if (pe->nfr < pe->wsize) { + wstart = 0; + wlen = pe->nfr; + } + /* Normal case, it is what it is. */ + else { + wstart = pe->wstart; + wlen = pe->wsize; + } + + /* Now (finally) look for the best local estimate. */ + /* printf("Searching for local estimate in %d frames around %d\n", + wlen, pe->nfr + 1 - wlen); */ + best = pe->period_window[pe->wcur]; + best_diff = pe->diff_window[pe->wcur][best]; + for (i = 0; i < wlen; ++i) { + int j = wstart + i; + int diff; + + j %= pe->wsize; + diff = pe->diff_window[j][pe->period_window[j]]; + /* printf("%.2f,%.2f ", 1.0 - (double)diff/32768, + pe->period_window[j] ? 8000.0/pe->period_window[j] : 8000.0); */ + if (diff < best_diff) { + best_diff = diff; + best = pe->period_window[j]; + } + } + /* printf("best: %.2f, %.2f\n", 1.0 - (double)best_diff/32768, + best ? 8000.0/best : 8000.0); */ + /* If it's the same as the current one then return it. */ + if (best == pe->period_window[pe->wcur]) { + /* Increment the current pointer. */ + if (++pe->wcur == pe->wsize) + pe->wcur = 0; + *out_period = best; + *out_bestdiff = best_diff; + return 1; + } + /* Otherwise, redo the search inside a narrower range. */ + search_width = best * pe->search_range / 32768; + /* printf("Search width = %d * %.2f = %d\n", + best, (double)pe->search_range/32768, search_width); */ + if (search_width == 0) search_width = 1; + low_period = best - search_width; + high_period = best + search_width; + if (low_period < 0) low_period = 0; + if (high_period > pe->frame_size / 2) high_period = pe->frame_size / 2; + /* printf("Searching from %d to %d\n", low_period, high_period); */ + best = thresholded_search(pe->diff_window[pe->wcur], + pe->search_threshold, + low_period, high_period); + best_diff = pe->diff_window[pe->wcur][best]; + + if (out_period) + *out_period = (best > 32768) ? 32768 : best; + if (out_bestdiff) + *out_bestdiff = (best_diff > 32768) ? 32768 : best_diff; + + /* Increment the current pointer. */ + if (++pe->wcur == pe->wsize) + pe->wcur = 0; + return 1; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/agc.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/agc.c new file mode 100644 index 0000000000000000000000000000000000000000..271baf49d1275662a4709a9b23629cddb15192ec --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/agc.c @@ -0,0 +1,227 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * agc.c -- Various forms of automatic gain control (AGC) + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1996 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log$ + * Revision 1.5 2005/06/21 19:25:41 arthchan2003 + * 1, Fixed doxygen documentation. 2, Added $ keyword. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + +#include +#ifdef HAVE_CONFIG_H +#include +#endif + +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/agc.h" + +/* NOTE! These must match the enum in agc.h */ +const char *agc_type_str[] = { + "none", + "max", + "emax", + "noise" +}; +static const int n_agc_type_str = sizeof(agc_type_str)/sizeof(agc_type_str[0]); + +agc_type_t +agc_type_from_str(const char *str) +{ + int i; + + for (i = 0; i < n_agc_type_str; ++i) { + if (0 == strcmp(str, agc_type_str[i])) + return (agc_type_t)i; + } + E_FATAL("Unknown AGC type '%s'\n", str); + return AGC_NONE; +} + +agc_t *agc_init(void) +{ + agc_t *agc; + agc = ckd_calloc(1, sizeof(*agc)); + agc->noise_thresh = FLOAT2MFCC(2.0); + + return agc; +} + +void agc_free(agc_t *agc) +{ + ckd_free(agc); +} + +/** + * Normalize c0 for all frames such that max(c0) = 0. + */ +void +agc_max(agc_t *agc, mfcc_t **mfc, int32 n_frame) +{ + int32 i; + + if (n_frame <= 0) + return; + agc->obs_max = mfc[0][0]; + for (i = 1; i < n_frame; i++) { + if (mfc[i][0] > agc->obs_max) { + agc->obs_max = mfc[i][0]; + agc->obs_frame = 1; + } + } + + E_INFO("AGCMax: obs=max= %.2f\n", agc->obs_max); + for (i = 0; i < n_frame; i++) + mfc[i][0] -= agc->obs_max; +} + +void +agc_emax_set(agc_t *agc, float32 m) +{ + agc->max = FLOAT2MFCC(m); + E_INFO("AGCEMax: max= %.2f\n", m); +} + +float32 +agc_emax_get(agc_t *agc) +{ + return MFCC2FLOAT(agc->max); +} + +void +agc_emax(agc_t *agc, mfcc_t **mfc, int32 n_frame) +{ + int i; + + if (n_frame <= 0) + return; + for (i = 0; i < n_frame; ++i) { + if (mfc[i][0] > agc->obs_max) { + agc->obs_max = mfc[i][0]; + agc->obs_frame = 1; + } + mfc[i][0] -= agc->max; + } +} + +/* Update estimated max for next utterance */ +void +agc_emax_update(agc_t *agc) +{ + if (agc->obs_frame) { /* Update only if some data observed */ + agc->obs_max_sum += agc->obs_max; + agc->obs_utt++; + + /* Re-estimate max over past history; decay the history */ + agc->max = agc->obs_max_sum / agc->obs_utt; + if (agc->obs_utt == 16) { + agc->obs_max_sum /= 2; + agc->obs_utt = 8; + } + } + E_INFO("AGCEMax: obs= %.2f, new= %.2f\n", agc->obs_max, agc->max); + + /* Reset the accumulators for the next utterance. */ + agc->obs_frame = 0; + agc->obs_max = FLOAT2MFCC(-1000.0); /* Less than any real C0 value (hopefully!!) */ +} + +void +agc_noise(agc_t *agc, + mfcc_t **cep, + int32 nfr) +{ + mfcc_t min_energy; /* Minimum log-energy */ + mfcc_t noise_level; /* Average noise_level */ + int32 i; /* frame index */ + int32 noise_frames; /* Number of noise frames */ + + /* Determine minimum log-energy in utterance */ + min_energy = cep[0][0]; + for (i = 0; i < nfr; ++i) { + if (cep[i][0] < min_energy) + min_energy = cep[i][0]; + } + + /* Average all frames between min_energy and min_energy + agc->noise_thresh */ + noise_frames = 0; + noise_level = 0; + min_energy += agc->noise_thresh; + for (i = 0; i < nfr; ++i) { + if (cep[i][0] < min_energy) { + noise_level += cep[i][0]; + noise_frames++; + } + } + + if (noise_frames > 0) { + noise_level /= noise_frames; + E_INFO("AGC NOISE: max= %6.3f\n", MFCC2FLOAT(noise_level)); + /* Subtract noise_level from all log_energy values */ + for (i = 0; i < nfr; i++) { + cep[i][0] -= noise_level; + } + } +} + +void +agc_set_threshold(agc_t *agc, float32 threshold) +{ + agc->noise_thresh = FLOAT2MFCC(threshold); +} + +float32 +agc_get_threshold(agc_t *agc) +{ + return FLOAT2MFCC(agc->noise_thresh); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/cmn.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/cmn.c new file mode 100644 index 0000000000000000000000000000000000000000..071044b02cb9cc8e9bdc87700a66097d0cf6a940 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/cmn.c @@ -0,0 +1,188 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * cmn.c -- Various forms of cepstral mean normalization + */ + +#include +#include +#include +#include +#include +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4244) +#endif + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/err.h" +#include "sphinxbase/cmn.h" + +/* NOTE! These must match the enum in cmn.h */ +const char *cmn_type_str[] = { + "none", + "batch", + "live" +}; +const char *cmn_alt_type_str[] = { + "none", + "current", + "prior" +}; +static const int n_cmn_type_str = sizeof(cmn_type_str)/sizeof(cmn_type_str[0]); + +cmn_type_t +cmn_type_from_str(const char *str) +{ + int i; + + for (i = 0; i < n_cmn_type_str; ++i) { + if (0 == strcmp(str, cmn_type_str[i]) || 0 == strcmp(str, cmn_alt_type_str[i])) + return (cmn_type_t)i; + } + E_FATAL("Unknown CMN type '%s'\n", str); + return CMN_NONE; +} + +cmn_t * +cmn_init(int32 veclen) +{ + cmn_t *cmn; + cmn = (cmn_t *) ckd_calloc(1, sizeof(cmn_t)); + cmn->veclen = veclen; + cmn->cmn_mean = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); + cmn->cmn_var = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); + cmn->sum = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); + cmn->nframe = 0; + + return cmn; +} + + +void +cmn(cmn_t *cmn, mfcc_t ** mfc, int32 varnorm, int32 n_frame) +{ + mfcc_t *mfcp; + mfcc_t t; + int32 i, f; + int32 n_pos_frame; + + assert(mfc != NULL); + + if (n_frame <= 0) + return; + + /* If cmn->cmn_mean wasn't NULL, we need to zero the contents */ + memset(cmn->cmn_mean, 0, cmn->veclen * sizeof(mfcc_t)); + + /* Find mean cep vector for this utterance */ + for (f = 0, n_pos_frame = 0; f < n_frame; f++) { + mfcp = mfc[f]; + + /* Skip zero energy frames */ + if (mfcp[0] < 0) + continue; + + for (i = 0; i < cmn->veclen; i++) { + cmn->cmn_mean[i] += mfcp[i]; + } + + n_pos_frame++; + } + + for (i = 0; i < cmn->veclen; i++) + cmn->cmn_mean[i] /= n_pos_frame; + + E_INFO("CMN: "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT("\n"); + if (!varnorm) { + /* Subtract mean from each cep vector */ + for (f = 0; f < n_frame; f++) { + mfcp = mfc[f]; + for (i = 0; i < cmn->veclen; i++) + mfcp[i] -= cmn->cmn_mean[i]; + } + } + else { + /* Scale cep vectors to have unit variance along each dimension, and subtract means */ + /* If cmn->cmn_var wasn't NULL, we need to zero the contents */ + memset(cmn->cmn_var, 0, cmn->veclen * sizeof(mfcc_t)); + + for (f = 0; f < n_frame; f++) { + mfcp = mfc[f]; + + for (i = 0; i < cmn->veclen; i++) { + t = mfcp[i] - cmn->cmn_mean[i]; + cmn->cmn_var[i] += MFCCMUL(t, t); + } + } + for (i = 0; i < cmn->veclen; i++) + /* Inverse Std. Dev, RAH added type case from sqrt */ + cmn->cmn_var[i] = FLOAT2MFCC(sqrt((float64)n_frame / MFCC2FLOAT(cmn->cmn_var[i]))); + + for (f = 0; f < n_frame; f++) { + mfcp = mfc[f]; + for (i = 0; i < cmn->veclen; i++) + mfcp[i] = MFCCMUL((mfcp[i] - cmn->cmn_mean[i]), cmn->cmn_var[i]); + } + } +} + +/* + * RAH, free previously allocated memory + */ +void +cmn_free(cmn_t * cmn) +{ + if (cmn != NULL) { + if (cmn->cmn_var) + ckd_free((void *) cmn->cmn_var); + + if (cmn->cmn_mean) + ckd_free((void *) cmn->cmn_mean); + + if (cmn->sum) + ckd_free((void *) cmn->sum); + + ckd_free((void *) cmn); + } +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/cmn_live.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/cmn_live.c new file mode 100644 index 0000000000000000000000000000000000000000..59bf9759a895a9f5be7809d908cced4ff173d73d --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/cmn_live.c @@ -0,0 +1,163 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4244) +#endif + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/err.h" +#include "sphinxbase/cmn.h" + +void +cmn_live_set(cmn_t *cmn, mfcc_t const * vec) +{ + int32 i; + + E_INFO("Update from < "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT(">\n"); + + for (i = 0; i < cmn->veclen; i++) { + cmn->cmn_mean[i] = vec[i]; + cmn->sum[i] = vec[i] * CMN_WIN; + } + cmn->nframe = CMN_WIN; + + E_INFO("Update to < "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT(">\n"); +} + +static void +cmn_live_shiftwin(cmn_t *cmn) +{ + mfcc_t sf; + int32 i; + + E_INFO("Update from < "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT(">\n"); + + sf = FLOAT2MFCC(1.0) / cmn->nframe; + for (i = 0; i < cmn->veclen; i++) + cmn->cmn_mean[i] = cmn->sum[i] / cmn->nframe; /* sum[i] * sf */ + + /* Make the accumulation decay exponentially */ + if (cmn->nframe >= CMN_WIN_HWM) { + sf = CMN_WIN * sf; + for (i = 0; i < cmn->veclen; i++) + cmn->sum[i] = MFCCMUL(cmn->sum[i], sf); + cmn->nframe = CMN_WIN; + } + + E_INFO("Update to < "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT(">\n"); +} + +void +cmn_live_update(cmn_t *cmn) +{ + mfcc_t sf; + int32 i; + + if (cmn->nframe <= 0) + return; + + E_INFO("Update from < "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT(">\n"); + + /* Update mean buffer */ + sf = FLOAT2MFCC(1.0) / cmn->nframe; + for (i = 0; i < cmn->veclen; i++) + cmn->cmn_mean[i] = cmn->sum[i] / cmn->nframe; /* sum[i] * sf; */ + + /* Make the accumulation decay exponentially */ + if (cmn->nframe > CMN_WIN_HWM) { + sf = CMN_WIN * sf; + for (i = 0; i < cmn->veclen; i++) + cmn->sum[i] = MFCCMUL(cmn->sum[i], sf); + cmn->nframe = CMN_WIN; + } + + E_INFO("Update to < "); + for (i = 0; i < cmn->veclen; i++) + E_INFOCONT("%5.2f ", MFCC2FLOAT(cmn->cmn_mean[i])); + E_INFOCONT(">\n"); +} + +void +cmn_live(cmn_t *cmn, mfcc_t **incep, int32 varnorm, int32 nfr) +{ + int32 i, j; + + if (nfr <= 0) + return; + + if (varnorm) + E_FATAL + ("Variance normalization not implemented in live mode decode\n"); + + for (i = 0; i < nfr; i++) { + + /* Skip zero energy frames */ + if (incep[i][0] < 0) + continue; + + for (j = 0; j < cmn->veclen; j++) { + cmn->sum[j] += incep[i][j]; + incep[i][j] -= cmn->cmn_mean[j]; + } + + ++cmn->nframe; + } + + /* Shift buffer down if we have more than CMN_WIN_HWM frames */ + if (cmn->nframe > CMN_WIN_HWM) + cmn_live_shiftwin(cmn); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/feat.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/feat.c new file mode 100644 index 0000000000000000000000000000000000000000..1b5214730ea1738cab537ddcc0fd67eb23a6506b --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/feat.c @@ -0,0 +1,1497 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * feat.c -- Feature vector description and cepstra->feature computation. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1996 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log$ + * Revision 1.22 2006/02/23 03:59:40 arthchan2003 + * Merged from branch SPHINX3_5_2_RCI_IRII_BRANCH: a, Free buffers correctly. b, Fixed dox-doc. + * + * Revision 1.21.4.3 2005/10/17 04:45:57 arthchan2003 + * Free stuffs in cmn and feat corectly. + * + * Revision 1.21.4.2 2005/09/26 02:19:57 arthchan2003 + * Add message to show the directory which the feature is searched for. + * + * Revision 1.21.4.1 2005/07/03 22:55:50 arthchan2003 + * More correct deallocation in feat.c. The cmn deallocation is still not correct at this point. + * + * Revision 1.21 2005/06/22 03:29:35 arthchan2003 + * Makefile.am s for all subdirectory of libs3decoder/ + * + * Revision 1.4 2005/04/21 23:50:26 archan + * Some more refactoring on the how reporting of structures inside kbcore_t is done, it is now 50% nice. Also added class-based LM test case into test-decode.sh.in. At this moment, everything in search mode 5 is already done. It is time to test the idea whether the search can really be used. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 20.Apr.2001 RAH (rhoughton@mediasite.com, ricky.houghton@cs.cmu.edu) + * Adding feat_free() to free allocated memory + * + * 02-Jan-2001 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University + * Modified feat_s2mfc2feat_block() to handle empty buffers at + * the end of an utterance + * + * 30-Dec-2000 Rita Singh (rsingh@cs.cmu.edu) at Carnegie Mellon University + * Added feat_s2mfc2feat_block() to allow feature computation + * from sequences of blocks of cepstral vectors + * + * 12-Jun-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Major changes to accommodate arbitrary feature input types. Added + * feat_read(), moved various cep2feat functions from other files into + * this one. Also, made this module object-oriented with the feat_t type. + * Changed definition of s2mfc_read to let the caller manage MFC buffers. + * + * 03-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added unistd.h include. + * + * 02-Oct-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added check for sf argument to s2mfc_read being within file size. + * + * 18-Sep-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added sf, ef parameters to s2mfc_read(). + * + * 10-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added feat_cepsize(). + * Added different feature-handling (s2_4x, s3_1x39 at this point). + * Moved feature-dependent functions to feature-dependent files. + * + * 09-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Moved constant declarations from feat.h into here. + * + * 04-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + + +/* + * This module encapsulates different feature streams used by the Sphinx group. New + * stream types can be added by augmenting feat_init() and providing an accompanying + * compute_feat function. It also provides a "generic" feature vector definition for + * handling "arbitrary" speech input feature types (see the last section in feat_init()). + * In this case the speech input data should already be feature vectors; no computation, + * such as MFC->feature conversion, is available or needed. + */ + +#include +#include +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4244 4996) +#endif + +#include "sphinxbase/fe.h" +#include "sphinxbase/feat.h" +#include "sphinxbase/bio.h" +#include "sphinxbase/pio.h" +#include "sphinxbase/cmn.h" +#include "sphinxbase/agc.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/prim_type.h" +#include "sphinxbase/glist.h" + +#define FEAT_VERSION "1.0" +#define FEAT_DCEP_WIN 2 + +#ifdef DUMP_FEATURES +static void +cep_dump_dbg(feat_t *fcb, mfcc_t **mfc, int32 nfr, const char *text) +{ + int32 i, j; + + E_INFO("%s\n", text); + for (i = 0; i < nfr; i++) { + for (j = 0; j < fcb->cepsize; j++) { + fprintf(stderr, "%f ", MFCC2FLOAT(mfc[i][j])); + } + fprintf(stderr, "\n"); + } +} +static void +feat_print_dbg(feat_t *fcb, mfcc_t ***feat, int32 nfr, const char *text) +{ + E_INFO("%s\n", text); + feat_print(fcb, feat, nfr, stderr); +} +#else /* !DUMP_FEATURES */ +#define cep_dump_dbg(fcb,mfc,nfr,text) +#define feat_print_dbg(fcb,mfc,nfr,text) +#endif + +int32 ** +parse_subvecs(char const *str) +{ + char const *strp; + int32 n, n2, l; + glist_t dimlist; /* List of dimensions in one subvector */ + glist_t veclist; /* List of dimlists (subvectors) */ + int32 **subvec; + gnode_t *gn, *gn2; + + veclist = NULL; + + strp = str; + for (;;) { + dimlist = NULL; + + for (;;) { + if (sscanf(strp, "%d%n", &n, &l) != 1) + E_FATAL("'%s': Couldn't read int32 @pos %d\n", str, + strp - str); + strp += l; + + if (*strp == '-') { + strp++; + + if (sscanf(strp, "%d%n", &n2, &l) != 1) + E_FATAL("'%s': Couldn't read int32 @pos %d\n", str, + strp - str); + strp += l; + } + else + n2 = n; + + if ((n < 0) || (n > n2)) + E_FATAL("'%s': Bad subrange spec ending @pos %d\n", str, + strp - str); + + for (; n <= n2; n++) { + gnode_t *gn; + for (gn = dimlist; gn; gn = gnode_next(gn)) + if (gnode_int32(gn) == n) + break; + if (gn != NULL) + E_FATAL("'%s': Duplicate dimension ending @pos %d\n", + str, strp - str); + + dimlist = glist_add_int32(dimlist, n); + } + + if ((*strp == '\0') || (*strp == '/')) + break; + + if (*strp != ',') + E_FATAL("'%s': Bad delimiter @pos %d\n", str, strp - str); + + strp++; + } + + veclist = glist_add_ptr(veclist, (void *) dimlist); + + if (*strp == '\0') + break; + + assert(*strp == '/'); + strp++; + } + + /* Convert the glists to arrays; remember the glists are in reverse order of the input! */ + n = glist_count(veclist); /* #Subvectors */ + subvec = (int32 **) ckd_calloc(n + 1, sizeof(int32 *)); /* +1 for sentinel */ + subvec[n] = NULL; /* sentinel */ + + for (--n, gn = veclist; (n >= 0) && gn; gn = gnode_next(gn), --n) { + gn2 = (glist_t) gnode_ptr(gn); + + n2 = glist_count(gn2); /* Length of this subvector */ + if (n2 <= 0) + E_FATAL("'%s': 0-length subvector\n", str); + + subvec[n] = (int32 *) ckd_calloc(n2 + 1, sizeof(int32)); /* +1 for sentinel */ + subvec[n][n2] = -1; /* sentinel */ + + for (--n2; (n2 >= 0) && gn2; gn2 = gnode_next(gn2), --n2) + subvec[n][n2] = gnode_int32(gn2); + assert((n2 < 0) && (!gn2)); + } + assert((n < 0) && (!gn)); + + /* Free the glists */ + for (gn = veclist; gn; gn = gnode_next(gn)) { + gn2 = (glist_t) gnode_ptr(gn); + glist_free(gn2); + } + glist_free(veclist); + + return subvec; +} + +void +subvecs_free(int32 **subvecs) +{ + int32 **sv; + + for (sv = subvecs; sv && *sv; ++sv) + ckd_free(*sv); + ckd_free(subvecs); +} + +int +feat_set_subvecs(feat_t *fcb, int32 **subvecs) +{ + int32 **sv; + uint32 n_sv, n_dim, i; + + if (subvecs == NULL) { + subvecs_free(fcb->subvecs); + ckd_free(fcb->sv_buf); + ckd_free(fcb->sv_len); + fcb->n_sv = 0; + fcb->subvecs = NULL; + fcb->sv_len = NULL; + fcb->sv_buf = NULL; + fcb->sv_dim = 0; + return 0; + } + + if (fcb->n_stream != 1) { + E_ERROR("Subvector specifications require single-stream features!"); + return -1; + } + + n_sv = 0; + n_dim = 0; + for (sv = subvecs; sv && *sv; ++sv) { + int32 *d; + + for (d = *sv; d && *d != -1; ++d) { + ++n_dim; + } + ++n_sv; + } + if (n_dim > feat_dimension(fcb)) { + E_ERROR("Total dimensionality of subvector specification %d " + "> feature dimensionality %d\n", n_dim, feat_dimension(fcb)); + return -1; + } + + fcb->n_sv = n_sv; + fcb->subvecs = subvecs; + fcb->sv_len = (uint32 *)ckd_calloc(n_sv, sizeof(*fcb->sv_len)); + fcb->sv_buf = (mfcc_t *)ckd_calloc(n_dim, sizeof(*fcb->sv_buf)); + fcb->sv_dim = n_dim; + for (i = 0; i < n_sv; ++i) { + int32 *d; + for (d = subvecs[i]; d && *d != -1; ++d) { + ++fcb->sv_len[i]; + } + } + + return 0; +} + +/** + * Project feature components to subvectors (if any). + */ +static void +feat_subvec_project(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr) +{ + uint32 i; + + if (fcb->subvecs == NULL) + return; + for (i = 0; i < nfr; ++i) { + mfcc_t *out; + int32 j; + + out = fcb->sv_buf; + for (j = 0; j < fcb->n_sv; ++j) { + int32 *d; + for (d = fcb->subvecs[j]; d && *d != -1; ++d) { + *out++ = inout_feat[i][0][*d]; + } + } + memcpy(inout_feat[i][0], fcb->sv_buf, fcb->sv_dim * sizeof(*fcb->sv_buf)); + } +} + +mfcc_t *** +feat_array_alloc(feat_t * fcb, int32 nfr) +{ + int32 i, j, k; + mfcc_t *data, *d, ***feat; + + assert(fcb); + assert(nfr > 0); + assert(feat_dimension(fcb) > 0); + + /* Make sure to use the dimensionality of the features *before* + LDA and subvector projection. */ + k = 0; + for (i = 0; i < fcb->n_stream; ++i) + k += fcb->stream_len[i]; + assert((uint32)k >= feat_dimension(fcb)); + assert(k >= fcb->sv_dim); + + feat = + (mfcc_t ***) ckd_calloc_2d(nfr, feat_dimension1(fcb), sizeof(mfcc_t *)); + data = (mfcc_t *) ckd_calloc(nfr * k, sizeof(mfcc_t)); + + for (i = 0; i < nfr; i++) { + d = data + i * k; + for (j = 0; j < feat_dimension1(fcb); j++) { + feat[i][j] = d; + d += feat_dimension2(fcb, j); + } + } + + return feat; +} + +mfcc_t *** +feat_array_realloc(feat_t *fcb, mfcc_t ***old_feat, int32 ofr, int32 nfr) +{ + int32 i, k, cf; + mfcc_t*** new_feat; + + assert(fcb); + assert(nfr > 0); + assert(ofr > 0); + assert(feat_dimension(fcb) > 0); + + /* Make sure to use the dimensionality of the features *before* + LDA and subvector projection. */ + k = 0; + for (i = 0; i < fcb->n_stream; ++i) + k += fcb->stream_len[i]; + assert((uint32)k >= feat_dimension(fcb)); + assert(k >= fcb->sv_dim); + + new_feat = feat_array_alloc(fcb, nfr); + + cf = (nfr < ofr) ? nfr : ofr; + memcpy(new_feat[0][0], old_feat[0][0], cf * k * sizeof(mfcc_t)); + + feat_array_free(old_feat); + + return new_feat; +} + +void +feat_array_free(mfcc_t ***feat) +{ + ckd_free(feat[0][0]); + ckd_free_2d((void **)feat); +} + +static void +feat_s2_4x_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + mfcc_t *f; + mfcc_t *w, *_w; + mfcc_t *w1, *w_1, *_w1, *_w_1; + mfcc_t d1, d2; + int32 i, j; + + assert(fcb); + assert(feat_cepsize(fcb) == 13); + assert(feat_n_stream(fcb) == 4); + assert(feat_stream_len(fcb, 0) == 12); + assert(feat_stream_len(fcb, 1) == 24); + assert(feat_stream_len(fcb, 2) == 3); + assert(feat_stream_len(fcb, 3) == 12); + assert(feat_window_size(fcb) == 4); + + /* CEP; skip C0 */ + memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t)); + + /* + * DCEP(SHORT): mfc[2] - mfc[-2] + * DCEP(LONG): mfc[4] - mfc[-4] + */ + w = mfc[2] + 1; /* +1 to skip C0 */ + _w = mfc[-2] + 1; + + f = feat[1]; + for (i = 0; i < feat_cepsize(fcb) - 1; i++) /* Short-term */ + f[i] = w[i] - _w[i]; + + w = mfc[4] + 1; /* +1 to skip C0 */ + _w = mfc[-4] + 1; + + for (j = 0; j < feat_cepsize(fcb) - 1; i++, j++) /* Long-term */ + f[i] = w[j] - _w[j]; + + /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */ + w1 = mfc[3] + 1; /* Final +1 to skip C0 */ + _w1 = mfc[-1] + 1; + w_1 = mfc[1] + 1; + _w_1 = mfc[-3] + 1; + + f = feat[3]; + for (i = 0; i < feat_cepsize(fcb) - 1; i++) { + d1 = w1[i] - _w1[i]; + d2 = w_1[i] - _w_1[i]; + + f[i] = d1 - d2; + } + + /* POW: C0, DC0, D2C0; differences computed as above for rest of cep */ + f = feat[2]; + f[0] = mfc[0][0]; + f[1] = mfc[2][0] - mfc[-2][0]; + + d1 = mfc[3][0] - mfc[-1][0]; + d2 = mfc[1][0] - mfc[-3][0]; + f[2] = d1 - d2; +} + + +static void +feat_s3_1x39_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + mfcc_t *f; + mfcc_t *w, *_w; + mfcc_t *w1, *w_1, *_w1, *_w_1; + mfcc_t d1, d2; + int32 i; + + assert(fcb); + assert(feat_cepsize(fcb) == 13); + assert(feat_n_stream(fcb) == 1); + assert(feat_stream_len(fcb, 0) == 39); + assert(feat_window_size(fcb) == 3); + + /* CEP; skip C0 */ + memcpy(feat[0], mfc[0] + 1, (feat_cepsize(fcb) - 1) * sizeof(mfcc_t)); + /* + * DCEP: mfc[2] - mfc[-2]; + */ + f = feat[0] + feat_cepsize(fcb) - 1; + w = mfc[2] + 1; /* +1 to skip C0 */ + _w = mfc[-2] + 1; + + for (i = 0; i < feat_cepsize(fcb) - 1; i++) + f[i] = w[i] - _w[i]; + + /* POW: C0, DC0, D2C0 */ + f += feat_cepsize(fcb) - 1; + + f[0] = mfc[0][0]; + f[1] = mfc[2][0] - mfc[-2][0]; + + d1 = mfc[3][0] - mfc[-1][0]; + d2 = mfc[1][0] - mfc[-3][0]; + f[2] = d1 - d2; + + /* D2CEP: (mfc[3] - mfc[-1]) - (mfc[1] - mfc[-3]) */ + f += 3; + + w1 = mfc[3] + 1; /* Final +1 to skip C0 */ + _w1 = mfc[-1] + 1; + w_1 = mfc[1] + 1; + _w_1 = mfc[-3] + 1; + + for (i = 0; i < feat_cepsize(fcb) - 1; i++) { + d1 = w1[i] - _w1[i]; + d2 = w_1[i] - _w_1[i]; + + f[i] = d1 - d2; + } +} + + +static void +feat_s3_cep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + assert(fcb); + assert(feat_n_stream(fcb) == 1); + assert(feat_window_size(fcb) == 0); + + /* CEP */ + memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); +} + +static void +feat_s3_cep_dcep(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + mfcc_t *f; + mfcc_t *w, *_w; + int32 i; + + assert(fcb); + assert(feat_n_stream(fcb) == 1); + assert(feat_stream_len(fcb, 0) == (uint32)feat_cepsize(fcb) * 2); + assert(feat_window_size(fcb) == 2); + + /* CEP */ + memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); + + /* + * DCEP: mfc[2] - mfc[-2]; + */ + f = feat[0] + feat_cepsize(fcb); + w = mfc[2]; + _w = mfc[-2]; + + for (i = 0; i < feat_cepsize(fcb); i++) + f[i] = w[i] - _w[i]; +} + +static void +feat_1s_c_d_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + mfcc_t *f; + mfcc_t *w, *_w; + mfcc_t *w1, *w_1, *_w1, *_w_1; + mfcc_t d1, d2; + int32 i; + + assert(fcb); + assert(feat_n_stream(fcb) == 1); + assert(feat_stream_len(fcb, 0) == (uint32)feat_cepsize(fcb) * 3); + assert(feat_window_size(fcb) == FEAT_DCEP_WIN + 1); + + /* CEP */ + memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); + + /* + * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN; + */ + f = feat[0] + feat_cepsize(fcb); + w = mfc[FEAT_DCEP_WIN]; + _w = mfc[-FEAT_DCEP_WIN]; + + for (i = 0; i < feat_cepsize(fcb); i++) + f[i] = w[i] - _w[i]; + + /* + * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]), + * where w = FEAT_DCEP_WIN + */ + f += feat_cepsize(fcb); + + w1 = mfc[FEAT_DCEP_WIN + 1]; + _w1 = mfc[-FEAT_DCEP_WIN + 1]; + w_1 = mfc[FEAT_DCEP_WIN - 1]; + _w_1 = mfc[-FEAT_DCEP_WIN - 1]; + + for (i = 0; i < feat_cepsize(fcb); i++) { + d1 = w1[i] - _w1[i]; + d2 = w_1[i] - _w_1[i]; + + f[i] = d1 - d2; + } +} + +static void +feat_1s_c_d_ld_dd_cep2feat(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + mfcc_t *f; + mfcc_t *w, *_w; + mfcc_t *w1, *w_1, *_w1, *_w_1; + mfcc_t d1, d2; + int32 i; + + assert(fcb); + assert(feat_n_stream(fcb) == 1); + assert(feat_stream_len(fcb, 0) == (uint32)feat_cepsize(fcb) * 4); + assert(feat_window_size(fcb) == FEAT_DCEP_WIN * 2); + + /* CEP */ + memcpy(feat[0], mfc[0], feat_cepsize(fcb) * sizeof(mfcc_t)); + + /* + * DCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN; + */ + f = feat[0] + feat_cepsize(fcb); + w = mfc[FEAT_DCEP_WIN]; + _w = mfc[-FEAT_DCEP_WIN]; + + for (i = 0; i < feat_cepsize(fcb); i++) + f[i] = w[i] - _w[i]; + + /* + * LDCEP: mfc[w] - mfc[-w], where w = FEAT_DCEP_WIN * 2; + */ + f += feat_cepsize(fcb); + w = mfc[FEAT_DCEP_WIN * 2]; + _w = mfc[-FEAT_DCEP_WIN * 2]; + + for (i = 0; i < feat_cepsize(fcb); i++) + f[i] = w[i] - _w[i]; + + /* + * D2CEP: (mfc[w+1] - mfc[-w+1]) - (mfc[w-1] - mfc[-w-1]), + * where w = FEAT_DCEP_WIN + */ + f += feat_cepsize(fcb); + + w1 = mfc[FEAT_DCEP_WIN + 1]; + _w1 = mfc[-FEAT_DCEP_WIN + 1]; + w_1 = mfc[FEAT_DCEP_WIN - 1]; + _w_1 = mfc[-FEAT_DCEP_WIN - 1]; + + for (i = 0; i < feat_cepsize(fcb); i++) { + d1 = w1[i] - _w1[i]; + d2 = w_1[i] - _w_1[i]; + + f[i] = d1 - d2; + } +} + +static void +feat_copy(feat_t * fcb, mfcc_t ** mfc, mfcc_t ** feat) +{ + int32 win, i, j; + + win = feat_window_size(fcb); + + /* Concatenate input features */ + for (i = -win; i <= win; ++i) { + uint32 spos = 0; + + for (j = 0; j < feat_n_stream(fcb); ++j) { + uint32 stream_len; + + /* Unscale the stream length by the window. */ + stream_len = feat_stream_len(fcb, j) / (2 * win + 1); + memcpy(feat[j] + ((i + win) * stream_len), + mfc[i] + spos, + stream_len * sizeof(mfcc_t)); + spos += stream_len; + } + } +} + +feat_t * +feat_init(char const *type, cmn_type_t cmn, int32 varnorm, + agc_type_t agc, int32 breport, int32 cepsize) +{ + feat_t *fcb; + + if (cepsize == 0) + cepsize = 13; + if (breport) + E_INFO + ("Initializing feature stream to type: '%s', ceplen=%d, CMN='%s', VARNORM='%s', AGC='%s'\n", + type, cepsize, cmn_type_str[cmn], varnorm ? "yes" : "no", agc_type_str[agc]); + + fcb = (feat_t *) ckd_calloc(1, sizeof(feat_t)); + fcb->refcount = 1; + fcb->name = (char *) ckd_salloc(type); + if (strcmp(type, "s2_4x") == 0) { + /* Sphinx-II format 4-stream feature (Hack!! hardwired constants below) */ + if (cepsize != 13) { + E_ERROR("s2_4x features require cepsize == 13\n"); + ckd_free(fcb); + return NULL; + } + fcb->cepsize = 13; + fcb->n_stream = 4; + fcb->stream_len = (uint32 *) ckd_calloc(4, sizeof(uint32)); + fcb->stream_len[0] = 12; + fcb->stream_len[1] = 24; + fcb->stream_len[2] = 3; + fcb->stream_len[3] = 12; + fcb->out_dim = 51; + fcb->window_size = 4; + fcb->compute_feat = feat_s2_4x_cep2feat; + } + else if ((strcmp(type, "s3_1x39") == 0) || (strcmp(type, "1s_12c_12d_3p_12dd") == 0)) { + /* 1-stream cep/dcep/pow/ddcep (Hack!! hardwired constants below) */ + if (cepsize != 13) { + E_ERROR("s2_4x features require cepsize == 13\n"); + ckd_free(fcb); + return NULL; + } + fcb->cepsize = 13; + fcb->n_stream = 1; + fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); + fcb->stream_len[0] = 39; + fcb->out_dim = 39; + fcb->window_size = 3; + fcb->compute_feat = feat_s3_1x39_cep2feat; + } + else if (strncmp(type, "1s_c_d_dd", 9) == 0) { + fcb->cepsize = cepsize; + fcb->n_stream = 1; + fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); + fcb->stream_len[0] = cepsize * 3; + fcb->out_dim = cepsize * 3; + fcb->window_size = FEAT_DCEP_WIN + 1; /* ddcep needs the extra 1 */ + fcb->compute_feat = feat_1s_c_d_dd_cep2feat; + } + else if (strncmp(type, "1s_c_d_ld_dd", 12) == 0) { + fcb->cepsize = cepsize; + fcb->n_stream = 1; + fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); + fcb->stream_len[0] = cepsize * 4; + fcb->out_dim = cepsize * 4; + fcb->window_size = FEAT_DCEP_WIN * 2; + fcb->compute_feat = feat_1s_c_d_ld_dd_cep2feat; + } + else if (strncmp(type, "cep_dcep", 8) == 0 || strncmp(type, "1s_c_d", 6) == 0) { + /* 1-stream cep/dcep */ + fcb->cepsize = cepsize; + fcb->n_stream = 1; + fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); + fcb->stream_len[0] = feat_cepsize(fcb) * 2; + fcb->out_dim = fcb->stream_len[0]; + fcb->window_size = 2; + fcb->compute_feat = feat_s3_cep_dcep; + } + else if (strncmp(type, "cep", 3) == 0 || strncmp(type, "1s_c", 4) == 0) { + /* 1-stream cep */ + fcb->cepsize = cepsize; + fcb->n_stream = 1; + fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); + fcb->stream_len[0] = feat_cepsize(fcb); + fcb->out_dim = fcb->stream_len[0]; + fcb->window_size = 0; + fcb->compute_feat = feat_s3_cep; + } + else if (strncmp(type, "1s_3c", 5) == 0 || strncmp(type, "1s_4c", 5) == 0) { + /* 1-stream cep with frames concatenated, so called cepwin features */ + if (strncmp(type, "1s_3c", 5) == 0) + fcb->window_size = 3; + else + fcb->window_size = 4; + + fcb->cepsize = cepsize; + fcb->n_stream = 1; + fcb->stream_len = (uint32 *) ckd_calloc(1, sizeof(uint32)); + fcb->stream_len[0] = feat_cepsize(fcb) * (2 * fcb->window_size + 1); + fcb->out_dim = fcb->stream_len[0]; + fcb->compute_feat = feat_copy; + } + else { + int32 i, k, l; + size_t len; + char *strp; + char *mtype = ckd_salloc(type); + char *wd = ckd_salloc(type); + /* + * Generic definition: Format should be %d,%d,%d,...,%d (i.e., + * comma separated list of feature stream widths; #items = + * #streams). An optional window size (frames will be + * concatenated) is also allowed, which can be specified with + * a colon after the list of feature streams. + */ + len = strlen(mtype); + k = 0; + for (i = 1; (size_t)i < len - 1; i++) { + if (mtype[i] == ',') { + mtype[i] = ' '; + k++; + } + else if (mtype[i] == ':') { + mtype[i] = '\0'; + fcb->window_size = atoi(mtype + i + 1); + break; + } + } + k++; /* Presumably there are (#commas+1) streams */ + fcb->n_stream = k; + fcb->stream_len = (uint32 *) ckd_calloc(k, sizeof(uint32)); + + /* Scan individual feature stream lengths */ + strp = mtype; + i = 0; + fcb->out_dim = 0; + fcb->cepsize = 0; + while (sscanf(strp, "%s%n", wd, &l) == 1) { + strp += l; + if ((i >= fcb->n_stream) + || (sscanf(wd, "%u", &(fcb->stream_len[i])) != 1) + || (fcb->stream_len[i] <= 0)) + E_FATAL("Bad feature type argument\n"); + /* Input size before windowing */ + fcb->cepsize += fcb->stream_len[i]; + if (fcb->window_size > 0) + fcb->stream_len[i] *= (fcb->window_size * 2 + 1); + /* Output size after windowing */ + fcb->out_dim += fcb->stream_len[i]; + i++; + } + if (i != fcb->n_stream) + E_FATAL("Bad feature type argument\n"); + if (fcb->cepsize != cepsize) + E_FATAL("Bad feature type argument\n"); + + /* Input is already the feature stream */ + fcb->compute_feat = feat_copy; + ckd_free(mtype); + ckd_free(wd); + } + + if (cmn != CMN_NONE) + fcb->cmn_struct = cmn_init(feat_cepsize(fcb)); + fcb->cmn = cmn; + fcb->varnorm = varnorm; + if (agc != AGC_NONE) { + fcb->agc_struct = agc_init(); + /* + * No need to check if agc is set to EMAX; agc_emax_set() changes only emax related things + * Moreover, if agc is not NONE and block mode is used, feat_agc() SILENTLY + * switches to EMAX + */ + /* HACK: hardwired initial estimates based on use of CMN (from Sphinx2) */ + agc_emax_set(fcb->agc_struct, (cmn != CMN_NONE) ? 5.0 : 10.0); + } + fcb->agc = agc; + /* + * Make sure this buffer is large enough to be used in feat_s2mfc2feat_block_utt() + */ + fcb->cepbuf = (mfcc_t **) ckd_calloc_2d((LIVEBUFBLOCKSIZE < feat_window_size(fcb) * 2) ? feat_window_size(fcb) * 2 : LIVEBUFBLOCKSIZE, + feat_cepsize(fcb), + sizeof(mfcc_t)); + /* This one is actually just an array of pointers to "flatten out" + * wraparounds. */ + fcb->tmpcepbuf = (mfcc_t** )ckd_calloc(2 * feat_window_size(fcb) + 1, + sizeof(*fcb->tmpcepbuf)); + + return fcb; +} + + +void +feat_print(feat_t * fcb, mfcc_t *** feat, int32 nfr, FILE * fp) +{ + uint32 i, j, k; + + for (i = 0; i < (uint32)nfr; i++) { + fprintf(fp, "%8d:\n", i); + + for (j = 0; j < (uint32)feat_dimension1(fcb); j++) { + fprintf(fp, "\t%2d:", j); + + for (k = 0; k < feat_dimension2(fcb, j); k++) + fprintf(fp, " %8.4f", MFCC2FLOAT(feat[i][j][k])); + fprintf(fp, "\n"); + } + } + + fflush(fp); +} + +static void +feat_cmn(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt) +{ + cmn_type_t cmn_type = fcb->cmn; + + if (!(beginutt && endutt) + && cmn_type != CMN_NONE) /* Only cmn_prior in block computation mode. */ + fcb->cmn = cmn_type = CMN_LIVE; + + switch (cmn_type) { + case CMN_BATCH: + cmn(fcb->cmn_struct, mfc, fcb->varnorm, nfr); + break; + case CMN_LIVE: + cmn_live(fcb->cmn_struct, mfc, fcb->varnorm, nfr); + if (endutt) + cmn_live_update(fcb->cmn_struct); + break; + default: + ; + } + cep_dump_dbg(fcb, mfc, nfr, "After CMN"); +} + +static void +feat_agc(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt) +{ + agc_type_t agc_type = fcb->agc; + + if (!(beginutt && endutt) + && agc_type != AGC_NONE) /* Only agc_emax in block computation mode. */ + agc_type = AGC_EMAX; + + switch (agc_type) { + case AGC_MAX: + agc_max(fcb->agc_struct, mfc, nfr); + break; + case AGC_EMAX: + agc_emax(fcb->agc_struct, mfc, nfr); + if (endutt) + agc_emax_update(fcb->agc_struct); + break; + case AGC_NOISE: + agc_noise(fcb->agc_struct, mfc, nfr); + break; + default: + ; + } + cep_dump_dbg(fcb, mfc, nfr, "After AGC"); +} + +static void +feat_compute_utt(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 win, mfcc_t ***feat) +{ + int32 i; + + cep_dump_dbg(fcb, mfc, nfr, "Incoming features (after padding)"); + + /* Create feature vectors */ + for (i = win; i < nfr - win; i++) { + fcb->compute_feat(fcb, mfc + i, feat[i - win]); + } + + feat_print_dbg(fcb, feat, nfr - win * 2, "After dynamic feature computation"); + + if (fcb->lda) { + feat_lda_transform(fcb, feat, nfr - win * 2); + feat_print_dbg(fcb, feat, nfr - win * 2, "After LDA"); + } + + if (fcb->subvecs) { + feat_subvec_project(fcb, feat, nfr - win * 2); + feat_print_dbg(fcb, feat, nfr - win * 2, "After subvector projection"); + } +} + + +/** + * Read Sphinx-II format mfc file (s2mfc = Sphinx-II format MFC data). + * If out_mfc is NULL, no actual reading will be done, and the number of + * frames (plus padding) that would be read is returned. + * + * It's important that normalization is done before padding because + * frames outside the data we are interested in shouldn't be taken + * into normalization stats. + * + * @return # frames read (plus padding) if successful, -1 if + * error (e.g., mfc array too small). + */ +static int32 +feat_s2mfc_read_norm_pad(feat_t *fcb, char *file, int32 win, + int32 sf, int32 ef, + mfcc_t ***out_mfc, + int32 maxfr, + int32 cepsize) +{ + FILE *fp; + int32 n_float32; + float32 *float_feat; + struct stat statbuf; + int32 i, n, byterev; + int32 start_pad, end_pad; + mfcc_t **mfc; + + /* Initialize the output pointer to NULL, so that any attempts to + free() it if we fail before allocating it will not segfault! */ + if (out_mfc) + *out_mfc = NULL; + E_INFO("Reading mfc file: '%s'[%d..%d]\n", file, sf, ef); + if (ef >= 0 && ef <= sf) { + E_ERROR("%s: End frame (%d) <= Start frame (%d)\n", file, ef, sf); + return -1; + } + + /* Find filesize; HACK!! To get around intermittent NFS failures, use stat_retry */ + if ((stat_retry(file, &statbuf) < 0) + || ((fp = fopen(file, "rb")) == NULL)) { + E_ERROR_SYSTEM("Failed to open file '%s' for reading", file); + return -1; + } + + /* Read #floats in header */ + if (fread_retry(&n_float32, sizeof(int32), 1, fp) != 1) { + E_ERROR("%s: fread(#floats) failed\n", file); + fclose(fp); + return -1; + } + + /* Check if n_float32 matches file size */ + byterev = 0; + if ((int32) (n_float32 * sizeof(float32) + 4) != (int32) statbuf.st_size) { /* RAH, typecast both sides to remove compile warning */ + n = n_float32; + SWAP_INT32(&n); + + if ((int32) (n * sizeof(float32) + 4) != (int32) (statbuf.st_size)) { /* RAH, typecast both sides to remove compile warning */ + E_ERROR + ("%s: Header size field: %d(%08x); filesize: %d(%08x)\n", + file, n_float32, n_float32, statbuf.st_size, + statbuf.st_size); + fclose(fp); + return -1; + } + + n_float32 = n; + byterev = 1; + } + if (n_float32 <= 0) { + E_ERROR("%s: Header size field (#floats) = %d\n", file, n_float32); + fclose(fp); + return -1; + } + + /* Convert n to #frames of input */ + n = n_float32 / cepsize; + if (n * cepsize != n_float32) { + E_ERROR("Header size field: %d; not multiple of %d\n", n_float32, + cepsize); + fclose(fp); + return -1; + } + + /* Check start and end frames */ + if (sf > 0) { + if (sf >= n) { + E_ERROR("%s: Start frame (%d) beyond file size (%d)\n", file, + sf, n); + fclose(fp); + return -1; + } + } + if (ef < 0) + ef = n-1; + else if (ef >= n) { + E_WARN("%s: End frame (%d) beyond file size (%d), will truncate\n", + file, ef, n); + ef = n-1; + } + + /* Add window to start and end frames */ + sf -= win; + ef += win; + if (sf < 0) { + start_pad = -sf; + sf = 0; + } + else + start_pad = 0; + if (ef >= n) { + end_pad = ef - n + 1; + ef = n - 1; + } + else + end_pad = 0; + + /* Limit n if indicated by [sf..ef] */ + if ((ef - sf + 1) < n) + n = (ef - sf + 1); + if (maxfr > 0 && n + start_pad + end_pad > maxfr) { + E_ERROR("%s: Maximum output size(%d frames) < actual #frames(%d)\n", + file, maxfr, n + start_pad + end_pad); + fclose(fp); + return -1; + } + + /* If no output buffer was supplied, then skip the actual data reading. */ + if (out_mfc != NULL) { + /* Position at desired start frame and read actual MFC data */ + mfc = (mfcc_t **)ckd_calloc_2d(n + start_pad + end_pad, cepsize, sizeof(mfcc_t)); + if (sf > 0) + fseek(fp, sf * cepsize * sizeof(float32), SEEK_CUR); + n_float32 = n * cepsize; +#ifdef FIXED_POINT + float_feat = ckd_calloc(n_float32, sizeof(float32)); +#else + float_feat = mfc[start_pad]; +#endif + if (fread_retry(float_feat, sizeof(float32), n_float32, fp) != n_float32) { + E_ERROR("%s: fread(%dx%d) (MFC data) failed\n", file, n, cepsize); + ckd_free_2d(mfc); + fclose(fp); + return -1; + } + if (byterev) { + for (i = 0; i < n_float32; i++) { + SWAP_FLOAT32(&float_feat[i]); + } + } +#ifdef FIXED_POINT + for (i = 0; i < n_float32; ++i) { + mfc[start_pad][i] = FLOAT2MFCC(float_feat[i]); + } + ckd_free(float_feat); +#endif + + /* Normalize */ + feat_cmn(fcb, mfc + start_pad, n, 1, 1); + feat_agc(fcb, mfc + start_pad, n, 1, 1); + + /* Replicate start and end frames if necessary. */ + for (i = 0; i < start_pad; ++i) + memcpy(mfc[i], mfc[start_pad], cepsize * sizeof(mfcc_t)); + for (i = 0; i < end_pad; ++i) + memcpy(mfc[start_pad + n + i], mfc[start_pad + n - 1], + cepsize * sizeof(mfcc_t)); + + *out_mfc = mfc; + } + + fclose(fp); + return n + start_pad + end_pad; +} + + + +int32 +feat_s2mfc2feat(feat_t * fcb, const char *file, const char *dir, const char *cepext, + int32 sf, int32 ef, mfcc_t *** feat, int32 maxfr) +{ + char *path; + char *ps = "/"; + int32 win, nfr; + size_t file_length, cepext_length, path_length = 0; + mfcc_t **mfc; + + if (fcb->cepsize <= 0) { + E_ERROR("Bad cepsize: %d\n", fcb->cepsize); + return -1; + } + + if (cepext == NULL) + cepext = ""; + + /* + * Create mfc filename, combining file, dir and extension if + * necessary + */ + + /* + * First we decide about the path. If dir is defined, then use + * it. Otherwise assume the filename already contains the path. + */ + if (dir == NULL) { + dir = ""; + ps = ""; + /* + * This is not true but some 3rd party apps + * may parse the output explicitly checking for this line + */ + E_INFO("At directory . (current directory)\n"); + } + else { + E_INFO("At directory %s\n", dir); + /* + * Do not forget the path separator! + */ + path_length += strlen(dir) + 1; + } + + /* + * Include cepext, if it's not already part of the filename. + */ + file_length = strlen(file); + cepext_length = strlen(cepext); + if ((file_length > cepext_length) + && (strcmp(file + file_length - cepext_length, cepext) == 0)) { + cepext = ""; + cepext_length = 0; + } + + /* + * Do not forget the '\0' + */ + path_length += file_length + cepext_length + 1; + path = (char*) ckd_calloc(path_length, sizeof(char)); + +#ifdef HAVE_SNPRINTF + /* + * Paranoia is our best friend... + */ + while ((file_length = snprintf(path, path_length, "%s%s%s%s", dir, ps, file, cepext)) > path_length) { + path_length = file_length; + path = (char*) ckd_realloc(path, path_length * sizeof(char)); + } +#else + sprintf(path, "%s%s%s%s", dir, ps, file, cepext); +#endif + + win = feat_window_size(fcb); + /* Pad maxfr with win, so we read enough raw feature data to + * calculate the requisite number of dynamic features. */ + if (maxfr >= 0) + maxfr += win * 2; + + if (feat != NULL) { + /* Read mfc file including window or padding if necessary. */ + nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, &mfc, maxfr, fcb->cepsize); + ckd_free(path); + if (nfr < 0) { + ckd_free_2d((void **) mfc); + return -1; + } + + /* Actually compute the features */ + feat_compute_utt(fcb, mfc, nfr, win, feat); + + ckd_free_2d((void **) mfc); + } + else { + /* Just calculate the number of frames we would need. */ + nfr = feat_s2mfc_read_norm_pad(fcb, path, win, sf, ef, NULL, maxfr, fcb->cepsize); + ckd_free(path); + if (nfr < 0) + return nfr; + } + + + return (nfr - win * 2); +} + +static int32 +feat_s2mfc2feat_block_utt(feat_t * fcb, mfcc_t ** uttcep, + int32 nfr, mfcc_t *** ofeat) +{ + mfcc_t **cepbuf; + int32 i, win, cepsize; + + win = feat_window_size(fcb); + cepsize = feat_cepsize(fcb); + + /* Copy and pad out the utterance (this requires that the + * feature computation functions always access the buffer via + * the frame pointers, which they do) */ + cepbuf = (mfcc_t **)ckd_calloc(nfr + win * 2, sizeof(mfcc_t *)); + memcpy(cepbuf + win, uttcep, nfr * sizeof(mfcc_t *)); + + /* Do normalization before we interpolate on the boundary */ + feat_cmn(fcb, cepbuf + win, nfr, 1, 1); + feat_agc(fcb, cepbuf + win, nfr, 1, 1); + + /* Now interpolate */ + for (i = 0; i < win; ++i) { + cepbuf[i] = fcb->cepbuf[i]; + memcpy(cepbuf[i], uttcep[0], cepsize * sizeof(mfcc_t)); + cepbuf[nfr + win + i] = fcb->cepbuf[win + i]; + memcpy(cepbuf[nfr + win + i], uttcep[nfr - 1], cepsize * sizeof(mfcc_t)); + } + /* Compute as usual. */ + feat_compute_utt(fcb, cepbuf, nfr + win * 2, win, ofeat); + ckd_free(cepbuf); + return nfr; +} + +int32 +feat_s2mfc2feat_live(feat_t * fcb, mfcc_t ** uttcep, int32 *inout_ncep, + int32 beginutt, int32 endutt, mfcc_t *** ofeat) +{ + int32 win, cepsize, nbufcep; + int32 i, j, nfeatvec; + int32 zero = 0; + + /* Avoid having to check this everywhere. */ + if (inout_ncep == NULL) inout_ncep = &zero; + + /* Special case for entire utterances. */ + if (beginutt && endutt && *inout_ncep > 0) + return feat_s2mfc2feat_block_utt(fcb, uttcep, *inout_ncep, ofeat); + + win = feat_window_size(fcb); + cepsize = feat_cepsize(fcb); + + /* Empty the input buffer on start of utterance. */ + if (beginutt) + fcb->bufpos = fcb->curpos; + + /* Calculate how much data is in the buffer already. */ + nbufcep = fcb->bufpos - fcb->curpos; + if (nbufcep < 0) + nbufcep = fcb->bufpos + LIVEBUFBLOCKSIZE - fcb->curpos; + /* Add any data that we have to replicate. */ + if (beginutt && *inout_ncep > 0) + nbufcep += win; + if (endutt) + nbufcep += win; + + /* Only consume as much input as will fit in the buffer. */ + if (nbufcep + *inout_ncep > LIVEBUFBLOCKSIZE) { + /* We also can't overwrite the trailing window, hence the + * reason why win is subtracted here. */ + *inout_ncep = LIVEBUFBLOCKSIZE - nbufcep - win; + /* Cancel end of utterance processing. */ + endutt = FALSE; + } + + /* FIXME: Don't modify the input! */ + feat_cmn(fcb, uttcep, *inout_ncep, beginutt, endutt); + feat_agc(fcb, uttcep, *inout_ncep, beginutt, endutt); + + /* Replicate first frame into the first win frames if we're at the + * beginning of the utterance and there was some actual input to + * deal with. (FIXME: Not entirely sure why that condition) */ + if (beginutt && *inout_ncep > 0) { + for (i = 0; i < win; i++) { + memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[0], + cepsize * sizeof(mfcc_t)); + fcb->bufpos %= LIVEBUFBLOCKSIZE; + } + /* Move the current pointer past this data. */ + fcb->curpos = fcb->bufpos; + nbufcep -= win; + } + + /* Copy in frame data to the circular buffer. */ + for (i = 0; i < *inout_ncep; ++i) { + memcpy(fcb->cepbuf[fcb->bufpos++], uttcep[i], + cepsize * sizeof(mfcc_t)); + fcb->bufpos %= LIVEBUFBLOCKSIZE; + ++nbufcep; + } + + /* Replicate last frame into the last win frames if we're at the + * end of the utterance (even if there was no input, so we can + * flush the output). */ + if (endutt) { + int32 tpos; /* Index of last input frame. */ + if (fcb->bufpos == 0) + tpos = LIVEBUFBLOCKSIZE - 1; + else + tpos = fcb->bufpos - 1; + for (i = 0; i < win; ++i) { + memcpy(fcb->cepbuf[fcb->bufpos++], fcb->cepbuf[tpos], + cepsize * sizeof(mfcc_t)); + fcb->bufpos %= LIVEBUFBLOCKSIZE; + } + } + + /* We have to leave the trailing window of frames. */ + nfeatvec = nbufcep - win; + if (nfeatvec <= 0) + return 0; /* Do nothing. */ + + for (i = 0; i < nfeatvec; ++i) { + /* Handle wraparound cases. */ + if (fcb->curpos - win < 0 || fcb->curpos + win >= LIVEBUFBLOCKSIZE) { + /* Use tmpcepbuf for this case. Actually, we just need the pointers. */ + for (j = -win; j <= win; ++j) { + int32 tmppos = + (fcb->curpos + j + LIVEBUFBLOCKSIZE) % LIVEBUFBLOCKSIZE; + fcb->tmpcepbuf[win + j] = fcb->cepbuf[tmppos]; + } + fcb->compute_feat(fcb, fcb->tmpcepbuf + win, ofeat[i]); + } + else { + fcb->compute_feat(fcb, fcb->cepbuf + fcb->curpos, ofeat[i]); + } + /* Move the read pointer forward. */ + ++fcb->curpos; + fcb->curpos %= LIVEBUFBLOCKSIZE; + } + + if (fcb->lda) + feat_lda_transform(fcb, ofeat, nfeatvec); + + if (fcb->subvecs) + feat_subvec_project(fcb, ofeat, nfeatvec); + + return nfeatvec; +} + +void +feat_update_stats(feat_t *fcb) +{ + if (fcb->cmn == CMN_LIVE) { + cmn_live_update(fcb->cmn_struct); + } + if (fcb->agc == AGC_EMAX || fcb->agc == AGC_MAX) { + agc_emax_update(fcb->agc_struct); + } +} + +feat_t * +feat_retain(feat_t *f) +{ + ++f->refcount; + return f; +} + +int +feat_free(feat_t * f) +{ + if (f == NULL) + return 0; + if (--f->refcount > 0) + return f->refcount; + + if (f->cepbuf) + ckd_free_2d((void **) f->cepbuf); + ckd_free(f->tmpcepbuf); + + if (f->name) { + ckd_free((void *) f->name); + } + if (f->lda) + ckd_free_3d((void ***) f->lda); + + ckd_free(f->stream_len); + ckd_free(f->sv_len); + ckd_free(f->sv_buf); + subvecs_free(f->subvecs); + + cmn_free(f->cmn_struct); + agc_free(f->agc_struct); + + ckd_free(f); + return 0; +} + + +void +feat_report(feat_t * f) +{ + int i; + E_INFO_NOFN("Initialization of feat_t, report:\n"); + E_INFO_NOFN("Feature type = %s\n", f->name); + E_INFO_NOFN("Cepstral size = %d\n", f->cepsize); + E_INFO_NOFN("Number of streams = %d\n", f->n_stream); + for (i = 0; i < f->n_stream; i++) { + E_INFO_NOFN("Vector size of stream[%d]: %d\n", i, + f->stream_len[i]); + } + E_INFO_NOFN("Number of subvectors = %d\n", f->n_sv); + for (i = 0; i < f->n_sv; i++) { + int32 *sv; + + E_INFO_NOFN("Components of subvector[%d]:", i); + for (sv = f->subvecs[i]; sv && *sv != -1; ++sv) + E_INFOCONT(" %d", *sv); + E_INFOCONT("\n"); + } + E_INFO_NOFN("Whether CMN is used = %d\n", f->cmn); + E_INFO_NOFN("Whether AGC is used = %d\n", f->agc); + E_INFO_NOFN("Whether variance is normalized = %d\n", f->varnorm); + E_INFO_NOFN("\n"); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/lda.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/lda.c new file mode 100644 index 0000000000000000000000000000000000000000..ac1b6cc756701510fb01d4dfac05fdc0c790cb3a --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/feat/lda.c @@ -0,0 +1,158 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * lda.c -- Read and apply LDA matrices to features. + * + * Author: David Huggins-Daines + */ + +#include +#include +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4018) +#endif + +#include "sphinxbase/feat.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/bio.h" +#include "sphinxbase/err.h" + +#define MATRIX_FILE_VERSION "0.1" + +int32 +feat_read_lda(feat_t *feat, const char *ldafile, int32 dim) +{ + FILE *fh; + int32 byteswap; + uint32 chksum, i, m, n; + char **argname, **argval; + + assert(feat); + if (feat->n_stream != 1) { + E_ERROR("LDA incompatible with multi-stream features (n_stream = %d)\n", + feat->n_stream); + return -1; + } + + if ((fh = fopen(ldafile, "rb")) == NULL) { + E_ERROR_SYSTEM("Failed to open transform file '%s' for reading", ldafile); + return -1; + } + + if (bio_readhdr(fh, &argname, &argval, &byteswap) < 0) { + E_ERROR("Failed to read header from transform file '%s'\n", ldafile); + fclose(fh); + return -1; + } + + for (i = 0; argname[i]; i++) { + if (strcmp(argname[i], "version") == 0) { + if (strcmp(argval[i], MATRIX_FILE_VERSION) != 0) + E_WARN("%s: Version mismatch: %s, expecting %s\n", + ldafile, argval[i], MATRIX_FILE_VERSION); + } + } + + bio_hdrarg_free(argname, argval); + argname = argval = NULL; + + chksum = 0; + + if (feat->lda) + ckd_free_3d((void ***)feat->lda); + + { + /* Use a temporary variable to avoid strict-aliasing problems. */ + void ***outlda; + + if (bio_fread_3d(&outlda, sizeof(float32), + &feat->n_lda, &m, &n, + fh, byteswap, &chksum) < 0) { + E_ERROR_SYSTEM("%s: bio_fread_3d(lda) failed\n", ldafile); + fclose(fh); + return -1; + } + feat->lda = (void *)outlda; + } + fclose(fh); + +#ifdef FIXED_POINT + /* FIXME: This is a fragile hack that depends on mfcc_t and + * float32 being the same size (which they are, but...) */ + for (i = 0; i < feat->n_lda * m * n; ++i) { + feat->lda[0][0][i] = FLOAT2MFCC(((float *)feat->lda[0][0])[i]); + } +#endif + + /* Note that SphinxTrain stores the eigenvectors as row vectors. */ + if (n != feat->stream_len[0]) + E_FATAL("LDA matrix dimension %d doesn't match feature stream size %d\n", n, feat->stream_len[0]); + + /* Override dim from file if it is 0 or greater than m. */ + if ((uint32)dim > m || dim <= 0) { + dim = m; + } + feat->out_dim = dim; + + return 0; +} + +void +feat_lda_transform(feat_t *fcb, mfcc_t ***inout_feat, uint32 nfr) +{ + mfcc_t *tmp; + uint32 i, j, k; + + tmp = ckd_calloc(fcb->stream_len[0], sizeof(mfcc_t)); + for (i = 0; i < nfr; ++i) { + /* Do the matrix multiplication inline here since fcb->lda + * is transposed (eigenvectors in rows not columns). */ + /* FIXME: In the future we ought to use the BLAS. */ + memset(tmp, 0, sizeof(mfcc_t) * fcb->stream_len[0]); + for (j = 0; j < feat_dimension(fcb); ++j) { + for (k = 0; k < fcb->stream_len[0]; ++k) { + tmp[j] += MFCCMUL(inout_feat[i][0][k], fcb->lda[0][j][k]); + } + } + memcpy(inout_feat[i][0], tmp, fcb->stream_len[0] * sizeof(mfcc_t)); + } + ckd_free(tmp); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_history.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_history.c new file mode 100644 index 0000000000000000000000000000000000000000..f51c76ec4166c041dcaebd51bb06cde37330ae6b --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_history.c @@ -0,0 +1,315 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * fsg_history.c -- FSG Viterbi decode history + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * + * 25-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Started.. + */ + +/* System headers. */ +#include + +/* SphinxBase headers. */ +#include +#include +#include + +/* Local headers. */ +#include "fsg_search_internal.h" +#include "fsg_history.h" + + +#define __FSG_DBG__ 0 + + +fsg_history_t * +fsg_history_init(fsg_model_t * fsg, dict_t *dict) +{ + fsg_history_t *h; + + h = (fsg_history_t *) ckd_calloc(1, sizeof(fsg_history_t)); + h->fsg = fsg; + h->entries = blkarray_list_init(); + + if (fsg && dict) { + h->n_ciphone = bin_mdef_n_ciphone(dict->mdef); + h->frame_entries = + (glist_t **) ckd_calloc_2d(fsg_model_n_state(fsg), + bin_mdef_n_ciphone(dict->mdef), + sizeof(**h->frame_entries)); + } + else { + h->frame_entries = NULL; + } + + return h; +} + +void +fsg_history_free(fsg_history_t *h) +{ + int32 s, lc, ns, np; + gnode_t *gn; + + if (h->fsg) { + ns = fsg_model_n_state(h->fsg); + np = h->n_ciphone; + + for (s = 0; s < ns; s++) { + for (lc = 0; lc < np; lc++) { + for (gn = h->frame_entries[s][lc]; gn; gn = gnode_next(gn)) { + ckd_free(gnode_ptr(gn)); + } + glist_free(h->frame_entries[s][lc]); + } + } + } + ckd_free_2d(h->frame_entries); + blkarray_list_free(h->entries); + ckd_free(h); +} + + +void +fsg_history_set_fsg(fsg_history_t *h, fsg_model_t *fsg, dict_t *dict) +{ + if (blkarray_list_n_valid(h->entries) != 0) { + E_WARN("Switching FSG while history not empty; history cleared\n"); + blkarray_list_reset(h->entries); + } + + if (h->frame_entries) + ckd_free_2d((void **) h->frame_entries); + h->frame_entries = NULL; + h->fsg = fsg; + + if (fsg && dict) { + h->n_ciphone = bin_mdef_n_ciphone(dict->mdef); + h->frame_entries = + (glist_t **) ckd_calloc_2d(fsg_model_n_state(fsg), + bin_mdef_n_ciphone(dict->mdef), + sizeof(glist_t)); + } +} + + +void +fsg_history_entry_add(fsg_history_t * h, + fsg_link_t * link, + int32 frame, int32 score, int32 pred, + int32 lc, fsg_pnode_ctxt_t rc) +{ + fsg_hist_entry_t *entry, *new_entry; + int32 s; + gnode_t *gn, *prev_gn; + + /* Skip the optimization for the initial dummy entries; always enter them */ + if (frame < 0) { + new_entry = + (fsg_hist_entry_t *) ckd_calloc(1, sizeof(fsg_hist_entry_t)); + new_entry->fsglink = link; + new_entry->frame = frame; + new_entry->score = score; + new_entry->pred = pred; + new_entry->lc = lc; + new_entry->rc = rc; + + blkarray_list_append(h->entries, (void *) new_entry); + return; + } + + s = fsg_link_to_state(link); + + /* Locate where this entry should be inserted in frame_entries[s][lc] */ + prev_gn = NULL; + for (gn = h->frame_entries[s][lc]; gn; gn = gnode_next(gn)) { + entry = (fsg_hist_entry_t *) gnode_ptr(gn); + + if (score BETTER_THAN entry->score) + break; /* Found where to insert new entry */ + + /* Existing entry score not worse than new score */ + if (FSG_PNODE_CTXT_SUB(&rc, &(entry->rc)) == 0) + return; /* rc set reduced to 0; new entry can be ignored */ + + prev_gn = gn; + } + + /* Create new entry after prev_gn (if prev_gn is NULL, at head) */ + new_entry = + (fsg_hist_entry_t *) ckd_calloc(1, sizeof(fsg_hist_entry_t)); + new_entry->fsglink = link; + new_entry->frame = frame; + new_entry->score = score; + new_entry->pred = pred; + new_entry->lc = lc; + new_entry->rc = rc; /* Note: rc set must be non-empty at this point */ + + if (!prev_gn) { + h->frame_entries[s][lc] = glist_add_ptr(h->frame_entries[s][lc], + (void *) new_entry); + prev_gn = h->frame_entries[s][lc]; + } + else + prev_gn = glist_insert_ptr(prev_gn, (void *) new_entry); + + /* + * Update the rc set of all the remaining entries in the list. At this + * point, gn is the entry, if any, immediately following new entry. + */ + while (gn) { + entry = (fsg_hist_entry_t *) gnode_ptr(gn); + + if (FSG_PNODE_CTXT_SUB(&(entry->rc), &rc) == 0) { + /* rc set of entry reduced to 0; can prune this entry */ + ckd_free((void *) entry); + gn = gnode_free(gn, prev_gn); + } + else { + prev_gn = gn; + gn = gnode_next(gn); + } + } +} + + +/* + * Transfer the surviving history entries for this frame into the permanent + * history table. + */ +void +fsg_history_end_frame(fsg_history_t * h) +{ + int32 s, lc, ns, np; + gnode_t *gn; + fsg_hist_entry_t *entry; + + ns = fsg_model_n_state(h->fsg); + np = h->n_ciphone; + + for (s = 0; s < ns; s++) { + for (lc = 0; lc < np; lc++) { + for (gn = h->frame_entries[s][lc]; gn; gn = gnode_next(gn)) { + entry = (fsg_hist_entry_t *) gnode_ptr(gn); + blkarray_list_append(h->entries, (void *) entry); + } + + glist_free(h->frame_entries[s][lc]); + h->frame_entries[s][lc] = NULL; + } + } +} + + +fsg_hist_entry_t * +fsg_history_entry_get(fsg_history_t * h, int32 id) +{ + return ((fsg_hist_entry_t *) blkarray_list_get(h->entries, id)); +} + + +void +fsg_history_reset(fsg_history_t * h) +{ + blkarray_list_reset(h->entries); +} + + +int32 +fsg_history_n_entries(fsg_history_t * h) +{ + return (blkarray_list_n_valid(h->entries)); +} + +void +fsg_history_utt_start(fsg_history_t * h) +{ + int32 s, lc, ns, np; + + assert(blkarray_list_n_valid(h->entries) == 0); + assert(h->frame_entries); + + ns = fsg_model_n_state(h->fsg); + np = h->n_ciphone; + + for (s = 0; s < ns; s++) { + for (lc = 0; lc < np; lc++) { + assert(h->frame_entries[s][lc] == NULL); + } + } +} + +void +fsg_history_utt_end(fsg_history_t * h) +{ + (void)h; +} + +void +fsg_history_print(fsg_history_t *h, dict_t *dict) +{ + int bpidx, bp; + + (void)dict; + for (bpidx = 0; bpidx < blkarray_list_n_valid(h->entries); bpidx++) { + bp = bpidx; + printf("History entry: "); + while (bp > 0) { + fsg_hist_entry_t *hist_entry = fsg_history_entry_get(h, bp); + fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry); + bp = fsg_hist_entry_pred(hist_entry); + if (fl) { + int32 wid = fsg_link_wid(fl); + const char *baseword = fsg_model_word_str(h->fsg, wid); + + printf("%s(%d->%d:%d) ", baseword, + fsg_link_from_state(hist_entry->fsglink), + fsg_link_to_state(hist_entry->fsglink), + hist_entry->frame); + } + } + printf("\n"); + } +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_history.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_history.h new file mode 100644 index 0000000000000000000000000000000000000000..f30a758abc97e3aa210cc971dbaf268843fcf6f6 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_history.h @@ -0,0 +1,226 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * fsg_history.h -- FSG Viterbi decode history + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * + * $Log: fsg_history.h,v $ + * Revision 1.1.1.1 2006/05/23 18:45:02 dhuggins + * re-importation + * + * Revision 1.1 2004/07/16 00:57:12 egouvea + * Added Ravi's implementation of FSG support. + * + * Revision 1.7 2004/07/07 22:30:35 rkm + * *** empty log message *** + * + * Revision 1.6 2004/07/07 13:56:33 rkm + * Added reporting of (acoustic score - best senone score)/frame + * + * Revision 1.5 2004/06/25 14:49:08 rkm + * Optimized size of history table and speed of word transitions by maintaining only best scoring word exits at each state + * + * Revision 1.4 2004/06/23 20:32:16 rkm + * *** empty log message *** + * + * Revision 1.3 2004/05/27 15:16:08 rkm + * *** empty log message *** + * + * + * 25-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Started, based on S3.3 version. + */ + + +#ifndef __S2_FSG_HISTORY_H__ +#define __S2_FSG_HISTORY_H__ + + +/* SphinxBase headers. */ +#include +#include + +/* Local headers. */ +#include "blkarray_list.h" +#include "fsg_lextree.h" +#include "dict.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/* + * The Viterbi history structure. This is a tree, with the root at the + * FSG start state, at frame 0, with a null predecessor. + */ + +/* + * A single Viterbi history entry + */ +typedef struct fsg_hist_entry_s { + fsg_link_t *fsglink; /* Link taken result in this entry */ + int32 score; /* Total path score at the end of this + transition */ + int32 pred; /* Predecessor entry; -1 if none */ + frame_idx_t frame; /* Ending frame for this entry */ + int16 lc; /* Left context provided by this entry to + succeeding words */ + fsg_pnode_ctxt_t rc; /* Possible right contexts to which this entry + applies */ +} fsg_hist_entry_t; + +/* Access macros */ +#define fsg_hist_entry_fsglink(v) ((v)->fsglink) +#define fsg_hist_entry_frame(v) ((v)->frame) +#define fsg_hist_entry_score(v) ((v)->score) +#define fsg_hist_entry_pred(v) ((v)->pred) +#define fsg_hist_entry_lc(v) ((v)->lc) +#define fsg_hist_entry_rc(v) ((v)->rc) + + +/* + * The entire tree of history entries (fsg_history_t.entries). + * Optimization: In a given frame, there may be several history entries, with + * the same left and right phonetic context, terminating in a particular state. + * Only the best scoring one of these needs to be saved, since everything else + * will be pruned according to the Viterbi algorithm. frame_entries is used + * temporarily in each frame to determine these best scoring entries in that + * frame. Only the ones not pruned are transferred to entries at the end of + * the frame. However, null transitions are a problem since they create + * entries that depend on entries created in the CURRENT frame. Hence, this + * pruning is done in two stages: first for the non-null transitions, and then + * for the null transitions alone. (This solution is sub-optimal, and can be + * improved with a little more work. SMOP.) + * Why is frame_entries a list? Each entry has a unique terminating state, + * and has a unique lc CIphone. But it has a SET of rc CIphones. + * frame_entries[s][lc] is an ordered list of entries created in the current + * frame, terminating in state s, and with left context lc. The list is in + * descending order of path score. When a new entry with (s,lc) arrives, + * its position in the list is determined. Then its rc set is modified by + * subtracting the union of the rc's of all its predecessors (i.e., better + * scoring entries). If the resulting rc set is empty, the entry is discarded. + * Otherwise, it is inserted, and the rc sets of all downstream entries in the + * list are updated by subtracting the new entry's rc. If any of them becomes + * empty, it is also discarded. + * As mentioned earlier, this procedure is applied in two stages, for the + * non-null transitions, and the null transitions, separately. + */ +typedef struct fsg_history_s { + fsg_model_t *fsg; /* The FSG for which this object applies */ + blkarray_list_t *entries; /* A list of history table entries; the root + entry is the first element of the list */ + glist_t **frame_entries; + int n_ciphone; +} fsg_history_t; + + +/* + * One-time initialization: Allocate and return an initially empty history + * module. + */ +fsg_history_t *fsg_history_init(fsg_model_t *fsg, dict_t *dict); + +void fsg_history_utt_start(fsg_history_t *h); + +void fsg_history_utt_end(fsg_history_t *h); + + +/* + * Create a history entry recording the completion of the given FSG + * transition, at the end of the given frame, with the given score, and + * the given predecessor history entry. + * The entry is initially temporary, and may be superseded by another + * with a higher score. The surviving entries must be transferred to + * the main history table, via fsg_history_end_frame(). + */ +void fsg_history_entry_add (fsg_history_t *h, + fsg_link_t *l, /* FSG transition */ + int32 frame, + int32 score, + int32 pred, + int32 lc, + fsg_pnode_ctxt_t rc); + +/* + * Transfer the surviving history entries for this frame into the permanent + * history table. This function can be called several times during a frame. + * Each time, the entries surviving so far are transferred, and the temporary + * lists cleared. This feature is used to handle the entries due to non-null + * transitions and null transitions separately. + */ +void fsg_history_end_frame (fsg_history_t *h); + + +/* Clear the hitory table */ +void fsg_history_reset (fsg_history_t *h); + + +/* Return the number of valid entries in the given history table */ +int32 fsg_history_n_entries (fsg_history_t *h); + +/* + * Return a ptr to the history entry for the given ID; NULL if there is no + * such entry. + */ +fsg_hist_entry_t *fsg_history_entry_get(fsg_history_t *h, int32 id); + + +/* + * Switch the FSG associated with the given history module. Should be done + * when the history list is empty. If not empty, the list is cleared. + */ +void fsg_history_set_fsg (fsg_history_t *h, fsg_model_t *fsg, dict_t *dict); + +/* Free the given Viterbi search history object */ +void fsg_history_free (fsg_history_t *h); + +/* Print the entire history */ +void fsg_history_print(fsg_history_t *h, dict_t *dict); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_lextree.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_lextree.c new file mode 100644 index 0000000000000000000000000000000000000000..e68f15fe1221a27c202d933df7b3c04892f9e5d4 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_lextree.c @@ -0,0 +1,835 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2010 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file fsg_lextree.c + * @brief The collection of all the lextrees for the entire FSM. + * @author M K Ravishankar + * @author Bhiksha Raj + */ + +/* System headers. */ +#include +#include +#include + +/* SphinxBase headers. */ +#include +#include + +/* Local headers. */ +#include "fsg_lextree.h" + +#define __FSG_DBG__ 0 + +/* A linklist structure that is actually used to build local lextrees at grammar nodes */ +typedef struct fsg_glist_linklist_t { + int32 ci, rc; + glist_t glist; + struct fsg_glist_linklist_t *next; +} fsg_glist_linklist_t; + +/** + * Build the phone lextree for all transitions out of state from_state. + * Return the root node of this tree. + * Also, return a linear linked list of all allocated fsg_pnode_t nodes in + * *alloc_head (for memory management purposes). + */ +static fsg_pnode_t *fsg_psubtree_init(fsg_lextree_t *tree, + fsg_model_t *fsg, + int32 from_state, + fsg_pnode_t **alloc_head); + +/** + * Free the given lextree. alloc_head: head of linear list of allocated + * nodes updated by fsg_psubtree_init(). + */ +static void fsg_psubtree_free(fsg_pnode_t *alloc_head); + +/** + * Dump the list of nodes in the given lextree to the given file. alloc_head: + * head of linear list of allocated nodes updated by fsg_psubtree_init(). + */ +static void fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *root, FILE *fp); + +/** + * Compute the left and right context CIphone sets for each state. + */ +static void +fsg_lextree_lc_rc(fsg_lextree_t *lextree) +{ + int32 s, i, j; + int32 n_ci; + fsg_model_t *fsg; + int32 silcipid; + int32 len; + + silcipid = bin_mdef_silphone(lextree->mdef); + assert(silcipid >= 0); + n_ci = bin_mdef_n_ciphone(lextree->mdef); + + fsg = lextree->fsg; + /* + * lextree->lc[s] = set of left context CIphones for state s. Similarly, rc[s] + * for right context CIphones. + */ + lextree->lc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->lc)); + lextree->rc = ckd_calloc_2d(fsg->n_state, n_ci + 1, sizeof(**lextree->rc)); + E_INFO("Allocated %d bytes (%d KiB) for left and right context phones\n", + fsg->n_state * (n_ci + 1) * 2, + fsg->n_state * (n_ci + 1) * 2 / 1024); + + + for (s = 0; s < fsg->n_state; s++) { + fsg_arciter_t *itor; + for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) { + fsg_link_t *l = fsg_arciter_get(itor); + int32 dictwid; /**< Dictionary (not FSG) word ID!! */ + + if (fsg_link_wid(l) >= 0) { + dictwid = dict_wordid(lextree->dict, + fsg_model_word_str(lextree->fsg, l->wid)); + + /* + * Add the first CIphone of l->wid to the rclist of state s, and + * the last CIphone to lclist of state d. + * (Filler phones are a pain to deal with. There is no direct + * marking of a filler phone; but only filler words are supposed to + * use such phones, so we use that fact. HACK!! FRAGILE!!) + * + * UPD: tests carsh here if .fsg model used with wrong hmm and + * dictionary + */ + if (fsg_model_is_filler(fsg, fsg_link_wid(l))) { + /* Filler phone; use silence phone as context */ + lextree->rc[fsg_link_from_state(l)][silcipid] = 1; + lextree->lc[fsg_link_to_state(l)][silcipid] = 1; + } + else { + len = dict_pronlen(lextree->dict, dictwid); + lextree->rc[fsg_link_from_state(l)][dict_pron(lextree->dict, dictwid, 0)] = 1; + lextree->lc[fsg_link_to_state(l)][dict_pron(lextree->dict, dictwid, len - 1)] = 1; + } + } + } + } + + for (s = 0; s < fsg->n_state; s++) { + /* + * Add SIL phone to the lclist and rclist of each state. Strictly + * speaking, only needed at start and final states, respectively, but + * all states considered since the user may change the start and final + * states. In any case, most applications would have a silence self + * loop at each state, hence these would be needed anyway. + */ + lextree->lc[s][silcipid] = 1; + lextree->rc[s][silcipid] = 1; + } + + /* + * Propagate lc and rc lists past null transitions. (Since FSG contains + * null transitions closure, no need to worry about a chain of successive + * null transitions. Right??) + * + * This can't be joined with the previous loop because we first calculate + * contexts and only then we can propagate them. + */ + for (s = 0; s < fsg->n_state; s++) { + fsg_arciter_t *itor; + for (itor = fsg_model_arcs(fsg, s); itor; itor = fsg_arciter_next(itor)) { + fsg_link_t *l = fsg_arciter_get(itor); + if (fsg_link_wid(l) < 0) { + /* + * lclist(d) |= lclist(s), because all the words ending up at s, can + * now also end at d, becoming the left context for words leaving d. + */ + for (i = 0; i < n_ci; i++) + lextree->lc[fsg_link_to_state(l)][i] |= lextree->lc[fsg_link_from_state(l)][i]; + /* + * Similarly, rclist(s) |= rclist(d), because all the words leaving d + * can equivalently leave s, becoming the right context for words + * ending up at s. + */ + for (i = 0; i < n_ci; i++) + lextree->rc[fsg_link_from_state(l)][i] |= lextree->rc[fsg_link_to_state(l)][i]; + } + } + } + + /* Convert the bit-vector representation into a list */ + for (s = 0; s < fsg->n_state; s++) { + j = 0; + for (i = 0; i < n_ci; i++) { + if (lextree->lc[s][i]) { + lextree->lc[s][j] = i; + j++; + } + } + lextree->lc[s][j] = -1; /* Terminate the list */ + + j = 0; + for (i = 0; i < n_ci; i++) { + if (lextree->rc[s][i]) { + lextree->rc[s][j] = i; + j++; + } + } + lextree->rc[s][j] = -1; /* Terminate the list */ + } +} + +/* + * For now, allocate the entire lextree statically. + */ +fsg_lextree_t * +fsg_lextree_init(fsg_model_t * fsg, dict_t *dict, dict2pid_t *d2p, + bin_mdef_t *mdef, hmm_context_t *ctx, + int32 wip, int32 pip) +{ + int32 s, n_leaves; + fsg_lextree_t *lextree; + fsg_pnode_t *pn; + + lextree = ckd_calloc(1, sizeof(fsg_lextree_t)); + lextree->fsg = fsg; + lextree->root = ckd_calloc(fsg_model_n_state(fsg), + sizeof(fsg_pnode_t *)); + lextree->alloc_head = ckd_calloc(fsg_model_n_state(fsg), + sizeof(fsg_pnode_t *)); + lextree->ctx = ctx; + lextree->dict = dict; + lextree->d2p = d2p; + lextree->mdef = mdef; + lextree->wip = wip; + lextree->pip = pip; + + /* Compute lc and rc for fsg. */ + fsg_lextree_lc_rc(lextree); + + /* Create lextree for each state, i.e. an HMM network that + * represents words for all arcs exiting that state. Note that + * for a dense grammar such as an N-gram model, this will + * rapidly exhaust all available memory. */ + lextree->n_pnode = 0; + n_leaves = 0; + for (s = 0; s < fsg_model_n_state(fsg); s++) { + lextree->root[s] = + fsg_psubtree_init(lextree, fsg, s, &(lextree->alloc_head[s])); + + for (pn = lextree->alloc_head[s]; pn; pn = pn->alloc_next) { + lextree->n_pnode++; + if (pn->leaf) + ++n_leaves; + } + } + E_INFO("%d HMM nodes in lextree (%d leaves)\n", + lextree->n_pnode, n_leaves); + E_INFO("Allocated %d bytes (%d KiB) for all lextree nodes\n", + lextree->n_pnode * sizeof(fsg_pnode_t), + lextree->n_pnode * sizeof(fsg_pnode_t) / 1024); + E_INFO("Allocated %d bytes (%d KiB) for lextree leafnodes\n", + n_leaves * sizeof(fsg_pnode_t), + n_leaves * sizeof(fsg_pnode_t) / 1024); + +#if __FSG_DBG__ + fsg_lextree_dump(lextree, stdout); +#endif + + return lextree; +} + + +void +fsg_lextree_dump(fsg_lextree_t * lextree, FILE * fp) +{ + int32 s; + + for (s = 0; s < fsg_model_n_state(lextree->fsg); s++) { + fprintf(fp, "State %5d root %p\n", s, lextree->root[s]); + fsg_psubtree_dump(lextree, lextree->root[s], fp); + } + fflush(fp); +} + + +void +fsg_lextree_free(fsg_lextree_t * lextree) +{ + int32 s; + + if (lextree == NULL) + return; + + if (lextree->fsg) + for (s = 0; s < fsg_model_n_state(lextree->fsg); s++) + fsg_psubtree_free(lextree->alloc_head[s]); + + ckd_free_2d(lextree->lc); + ckd_free_2d(lextree->rc); + ckd_free(lextree->root); + ckd_free(lextree->alloc_head); + ckd_free(lextree); +} + +/****************************** + * psubtree stuff starts here * + ******************************/ + +void fsg_glist_linklist_free(fsg_glist_linklist_t *glist) +{ + if (glist) { + fsg_glist_linklist_t *nxtglist; + if (glist->glist) + glist_free(glist->glist); + nxtglist = glist->next; + while (nxtglist) { + ckd_free(glist); + glist = nxtglist; + if (glist->glist) + glist_free(glist->glist); + nxtglist = glist->next; + } + ckd_free(glist); + } + return; +} + +void +fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t * ctxt) +{ + int32 i; + + for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++) + ctxt->bv[i] = 0xffffffff; +} + +uint32 fsg_pnode_ctxt_sub_generic(fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub) +{ + int32 i; + uint32 res = 0; + + for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++) + res |= (src->bv[i] = ~(sub->bv[i]) & src->bv[i]); + return res; +} + + +/* + * fsg_pnode_ctxt_sub(fsg_pnode_ctxt_t * src, fsg_pnode_ctxt_t * sub) + * This has been moved into a macro in fsg_psubtree.h + * because it is called so frequently! + */ + + +/* + * Add the word emitted by the given transition (fsglink) to the given lextree + * (rooted at root), and return the new lextree root. (There may actually be + * several root nodes, maintained in a linked list via fsg_pnode_t.sibling. + * "root" is the head of this list.) + * lclist, rclist: sets of left and right context phones for this link. + * alloc_head: head of a linear list of all allocated pnodes for the parent + * FSG state, kept elsewhere and updated by this routine. + */ +static fsg_pnode_t * +psubtree_add_trans(fsg_lextree_t *lextree, + fsg_pnode_t * root, + fsg_glist_linklist_t **curglist, + fsg_link_t * fsglink, + int16 *lclist, int16 *rclist, + fsg_pnode_t ** alloc_head) +{ + int32 silcipid; /* Silence CI phone ID */ + int32 pronlen; /* Pronunciation length */ + int32 wid; /* FSG (not dictionary!!) word ID */ + int32 dictwid; /* Dictionary (not FSG!!) word ID */ + int32 ssid; /* Senone Sequence ID */ + int32 tmatid; + gnode_t *gn; + fsg_pnode_t *pnode, *pred, *head; + int32 n_ci, p, lc, rc; + glist_t lc_pnodelist; /* Temp pnodes list for different left contexts */ + glist_t rc_pnodelist; /* Temp pnodes list for different right contexts */ + int32 i, j; + int n_lc_alloc = 0, n_int_alloc = 0, n_rc_alloc = 0; + + silcipid = bin_mdef_silphone(lextree->mdef); + n_ci = bin_mdef_n_ciphone(lextree->mdef); + + wid = fsg_link_wid(fsglink); + assert(wid >= 0); /* Cannot be a null transition */ + dictwid = dict_wordid(lextree->dict, + fsg_model_word_str(lextree->fsg, wid)); + pronlen = dict_pronlen(lextree->dict, dictwid); + assert(pronlen >= 1); + + assert(lclist[0] >= 0); /* At least one phonetic context provided */ + assert(rclist[0] >= 0); + + head = *alloc_head; + pred = NULL; + + if (pronlen == 1) { /* Single-phone word */ + int ci = dict_first_phone(lextree->dict, dictwid); + /* Only non-filler words are mpx */ + if (!dict_filler_word(lextree->dict, dictwid)) { + /* + * Left diphone ID for single-phone words already assumes SIL is right + * context; only left contexts need to be handled. + */ + lc_pnodelist = NULL; + + for (i = 0; lclist[i] >= 0; i++) { + lc = lclist[i]; + ssid = dict2pid_lrdiph_rc(lextree->d2p, ci, lc, silcipid); + tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid)); + /* Check if this ssid already allocated for some other context */ + for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { + pnode = (fsg_pnode_t *) gnode_ptr(gn); + + if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) { + /* already allocated; share it for this context phone */ + fsg_pnode_add_ctxt(pnode, lc); + break; + } + } + + if (!gn) { /* ssid not already allocated */ + pnode = + (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); + pnode->ctx = lextree->ctx; + pnode->next.fsglink = fsglink; + pnode->logs2prob = + (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT) + + lextree->wip + lextree->pip; + pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); + pnode->ppos = 0; + pnode->leaf = TRUE; + pnode->sibling = root; /* All root nodes linked together */ + fsg_pnode_add_ctxt(pnode, lc); /* Initially zeroed by calloc above */ + pnode->alloc_next = head; + head = pnode; + root = pnode; + ++n_lc_alloc; + + hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); + + lc_pnodelist = + glist_add_ptr(lc_pnodelist, (void *) pnode); + } + } + + glist_free(lc_pnodelist); + } + else { /* Filler word; no context modelled */ + ssid = bin_mdef_pid2ssid(lextree->mdef, ci); /* probably the same... */ + tmatid = bin_mdef_pid2tmatid(lextree->mdef, ci); + + pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); + pnode->ctx = lextree->ctx; + pnode->next.fsglink = fsglink; + pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT) + + lextree->wip + lextree->pip; + pnode->ci_ext = silcipid; /* Presents SIL as context to neighbors */ + pnode->ppos = 0; + pnode->leaf = TRUE; + pnode->sibling = root; + fsg_pnode_add_all_ctxt(&(pnode->ctxt)); + pnode->alloc_next = head; + head = pnode; + root = pnode; + ++n_int_alloc; + + hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); + } + } + else { /* Multi-phone word */ + fsg_pnode_t **ssid_pnode_map; /* Temp array of ssid->pnode mapping */ + ssid_pnode_map = + (fsg_pnode_t **) ckd_calloc(n_ci, sizeof(fsg_pnode_t *)); + lc_pnodelist = NULL; + rc_pnodelist = NULL; + + for (p = 0; p < pronlen; p++) { + int ci = dict_pron(lextree->dict, dictwid, p); + if (p == 0) { /* Root phone, handle required left contexts */ + /* Find if we already have an lc_pnodelist for the first phone of this word */ + fsg_glist_linklist_t *glist; + + rc = dict_pron(lextree->dict, dictwid, 1); + for (glist = *curglist; + glist && glist->glist; + glist = glist->next) { + if (glist->ci == ci && glist->rc == rc) + break; + } + if (glist && glist->glist) { + assert(glist->ci == ci && glist->rc == rc); + /* We've found a valid glist. Hook to it and move to next phoneme */ + E_DEBUG("Found match for (%d,%d)\n", ci, rc); + lc_pnodelist = glist->glist; + /* Set the predecessor node for the future tree first */ + pred = (fsg_pnode_t *) gnode_ptr(lc_pnodelist); + continue; + } + else { + /* Two cases that can bring us here + * a. glist == NULL, i.e. end of current list. Create new entry. + * b. glist->glist == NULL, i.e. first entry into list. + */ + if (glist == NULL) { /* Case a; reduce it to case b by allocing glist */ + glist = (fsg_glist_linklist_t*) ckd_calloc(1, sizeof(fsg_glist_linklist_t)); + glist->next = *curglist; + *curglist = glist; + } + glist->ci = ci; + glist->rc = rc; + lc_pnodelist = glist->glist = NULL; /* Gets created below */ + } + + for (i = 0; lclist[i] >= 0; i++) { + lc = lclist[i]; + ssid = dict2pid_ldiph_lc(lextree->d2p, ci, rc, lc); + tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_first_phone(lextree->dict, dictwid)); + /* Compression is not done by d2p, so we do it + * here. This might be slow, but it might not + * be... we'll see. */ + pnode = ssid_pnode_map[0]; + for (j = 0; j < n_ci && ssid_pnode_map[j] != NULL; ++j) { + pnode = ssid_pnode_map[j]; + if (hmm_nonmpx_ssid(&pnode->hmm) == ssid) + break; + } + assert(j < n_ci); + if (!pnode) { /* Allocate pnode for this new ssid */ + pnode = + (fsg_pnode_t *) ckd_calloc(1, + sizeof + (fsg_pnode_t)); + pnode->ctx = lextree->ctx; + /* This bit is tricky! For now we'll put the prob in the final link only */ + /* pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT) + + lextree->wip + lextree->pip; */ + pnode->logs2prob = lextree->wip + lextree->pip; + pnode->ci_ext = dict_first_phone(lextree->dict, dictwid); + pnode->ppos = 0; + pnode->leaf = FALSE; + pnode->sibling = root; /* All root nodes linked together */ + pnode->alloc_next = head; + head = pnode; + root = pnode; + ++n_lc_alloc; + + hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); + + lc_pnodelist = + glist_add_ptr(lc_pnodelist, (void *) pnode); + ssid_pnode_map[j] = pnode; + } + fsg_pnode_add_ctxt(pnode, lc); + } + /* Put the lc_pnodelist back into glist */ + glist->glist = lc_pnodelist; + + /* The predecessor node for the future tree is the root */ + pred = root; + } + else if (p != pronlen - 1) { /* Word internal phone */ + fsg_pnode_t *pnodeyoungest; + + ssid = dict2pid_internal(lextree->d2p, dictwid, p); + tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_pron (lextree->dict, dictwid, p)); + /* First check if we already have this ssid in our tree */ + pnode = pred->next.succ; + pnodeyoungest = pnode; /* The youngest sibling */ + while (pnode && (hmm_nonmpx_ssid(&pnode->hmm) != ssid || pnode->leaf)) { + pnode = pnode->sibling; + } + if (pnode && (hmm_nonmpx_ssid(&pnode->hmm) == ssid && !pnode->leaf)) { + /* Found the ssid; go to next phoneme */ + E_DEBUG("Found match for %d\n", ci); + pred = pnode; + continue; + } + + /* pnode not found, allocate it */ + pnode = (fsg_pnode_t *) ckd_calloc(1, sizeof(fsg_pnode_t)); + pnode->ctx = lextree->ctx; + pnode->logs2prob = lextree->pip; + pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); + pnode->ppos = p; + pnode->leaf = FALSE; + pnode->sibling = pnodeyoungest; /* May be NULL */ + if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ + for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { + pred = (fsg_pnode_t *) gnode_ptr(gn); + pred->next.succ = pnode; + } + } + else { /* Predecessor = word internal node */ + pred->next.succ = pnode; + } + pnode->alloc_next = head; + head = pnode; + ++n_int_alloc; + + hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); + + pred = pnode; + } + else { /* Leaf phone, handle required right contexts */ + /* Note, leaf phones are not part of the tree */ + xwdssid_t *rssid; + memset((void *) ssid_pnode_map, 0, + n_ci * sizeof(fsg_pnode_t *)); + lc = dict_pron(lextree->dict, dictwid, p-1); + rssid = dict2pid_rssid(lextree->d2p, ci, lc); + tmatid = bin_mdef_pid2tmatid(lextree->mdef, dict_pron (lextree->dict, dictwid, p)); + + for (i = 0; rclist[i] >= 0; i++) { + rc = rclist[i]; + + j = rssid->cimap[rc]; + ssid = rssid->ssid[j]; + pnode = ssid_pnode_map[j]; + + if (!pnode) { /* Allocate pnode for this new ssid */ + pnode = + (fsg_pnode_t *) ckd_calloc(1, + sizeof + (fsg_pnode_t)); + pnode->ctx = lextree->ctx; + /* We are plugging the word prob here. Ugly */ + /* pnode->logs2prob = lextree->pip; */ + pnode->logs2prob = (fsg_link_logs2prob(fsglink) >> SENSCR_SHIFT) + + lextree->pip; + pnode->ci_ext = dict_pron(lextree->dict, dictwid, p); + pnode->ppos = p; + pnode->leaf = TRUE; + pnode->sibling = rc_pnodelist ? + (fsg_pnode_t *) gnode_ptr(rc_pnodelist) : NULL; + pnode->next.fsglink = fsglink; + pnode->alloc_next = head; + head = pnode; + ++n_rc_alloc; + + hmm_init(lextree->ctx, &pnode->hmm, FALSE, ssid, tmatid); + + rc_pnodelist = + glist_add_ptr(rc_pnodelist, (void *) pnode); + ssid_pnode_map[j] = pnode; + } + else { + assert(hmm_nonmpx_ssid(&pnode->hmm) == ssid); + } + fsg_pnode_add_ctxt(pnode, rc); + } + + if (p == 1) { /* Predecessor = set of root nodes for left ctxts */ + for (gn = lc_pnodelist; gn; gn = gnode_next(gn)) { + pred = (fsg_pnode_t *) gnode_ptr(gn); + if (!pred->next.succ) + pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); + else { + /* Link to the end of the sibling chain */ + fsg_pnode_t *succ = pred->next.succ; + while (succ->sibling) succ = succ->sibling; + succ->sibling = (fsg_pnode_t*) gnode_ptr(rc_pnodelist); + /* Since all entries of lc_pnodelist point + to the same array, sufficient to update it once */ + break; + } + } + } + else { /* Predecessor = word internal node */ + if (!pred->next.succ) + pred->next.succ = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); + else { + /* Link to the end of the sibling chain */ + fsg_pnode_t *succ = pred->next.succ; + while (succ->sibling) succ = succ->sibling; + succ->sibling = (fsg_pnode_t *) gnode_ptr(rc_pnodelist); + } + } + } + } + + ckd_free((void *) ssid_pnode_map); + /* glist_free(lc_pnodelist); Nope; this gets freed outside */ + glist_free(rc_pnodelist); + } + + E_DEBUG("Allocated %d HMMs (%d lc, %d rc, %d internal)\n", + n_lc_alloc + n_rc_alloc + n_int_alloc, + n_lc_alloc, n_rc_alloc, n_int_alloc); + *alloc_head = head; + + return root; +} + + +static fsg_pnode_t * +fsg_psubtree_init(fsg_lextree_t *lextree, + fsg_model_t * fsg, int32 from_state, + fsg_pnode_t ** alloc_head) +{ + fsg_arciter_t *itor; + fsg_link_t *fsglink; + fsg_pnode_t *root; + int32 n_ci, n_arc; + fsg_glist_linklist_t *glist = NULL; + + root = NULL; + assert(*alloc_head == NULL); + + n_ci = bin_mdef_n_ciphone(lextree->mdef); + if (n_ci > (FSG_PNODE_CTXT_BVSZ * 32)) { + E_FATAL + ("#phones > %d; increase FSG_PNODE_CTXT_BVSZ and recompile\n", + FSG_PNODE_CTXT_BVSZ * 32); + } + + n_arc = 0; + for (itor = fsg_model_arcs(fsg, from_state); itor; + itor = fsg_arciter_next(itor)) { + int32 dst; + fsglink = fsg_arciter_get(itor); + dst = fsglink->to_state; + + if (fsg_link_wid(fsglink) < 0) + continue; + + E_DEBUG("Building lextree for arc from %d to %d: %s\n", + from_state, dst, fsg_model_word_str(fsg, fsg_link_wid(fsglink))); + root = psubtree_add_trans(lextree, root, &glist, fsglink, + lextree->lc[from_state], + lextree->rc[dst], + alloc_head); + ++n_arc; + } + E_DEBUG("State %d has %d outgoing arcs\n", from_state, n_arc); + + fsg_glist_linklist_free(glist); + + return root; +} + + +static void +fsg_psubtree_free(fsg_pnode_t * head) +{ + fsg_pnode_t *next; + + while (head) { + next = head->alloc_next; + hmm_deinit(&head->hmm); + ckd_free(head); + head = next; + } +} + +void fsg_psubtree_dump_node(fsg_lextree_t *tree, fsg_pnode_t *node, FILE *fp) +{ + int32 i; + fsg_link_t *tl; + + /* Indentation */ + for (i = 0; i <= node->ppos; i++) + fprintf(fp, " "); + + fprintf(fp, "%p.@", node); /* Pointer used as node + * ID */ + fprintf(fp, " %5d.SS", hmm_nonmpx_ssid(&node->hmm)); + fprintf(fp, " %10d.LP", node->logs2prob); + fprintf(fp, " %p.SIB", node->sibling); + fprintf(fp, " %s.%d", bin_mdef_ciphone_str(tree->mdef, node->ci_ext), node->ppos); + if ((node->ppos == 0) || node->leaf) { + fprintf(fp, " ["); + for (i = 0; i < FSG_PNODE_CTXT_BVSZ; i++) + fprintf(fp, "%08x", node->ctxt.bv[i]); + fprintf(fp, "]"); + } + if (node->leaf) { + tl = node->next.fsglink; + fprintf(fp, " {%s[%d->%d](%d)}", + fsg_model_word_str(tree->fsg, tl->wid), + tl->from_state, tl->to_state, tl->logs2prob); + } else { + fprintf(fp, " %p.NXT", node->next.succ); + } + fprintf(fp, "\n"); + + return; +} + +void +fsg_psubtree_dump(fsg_lextree_t *tree, fsg_pnode_t *root, FILE * fp) +{ + fsg_pnode_t *succ; + + if (root == NULL) return; + if (root->ppos == 0) { + while(root->sibling && root->sibling->next.succ == root->next.succ) { + fsg_psubtree_dump_node(tree, root, fp); + root = root->sibling; + } + fflush(fp); + } + + fsg_psubtree_dump_node(tree, root, fp); + + if (root->leaf) { + if (root->ppos == 0 && root->sibling) { /* For single-phone words */ + fsg_psubtree_dump(tree, root->sibling,fp); + } + return; + } + + succ = root->next.succ; + while(succ) { + fsg_psubtree_dump(tree, succ,fp); + succ = succ->sibling; + } + + if (root->ppos == 0) { + fsg_psubtree_dump(tree, root->sibling,fp); + fflush(fp); + } + + return; +} + +void +fsg_psubtree_pnode_deactivate(fsg_pnode_t * pnode) +{ + hmm_clear(&pnode->hmm); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_lextree.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_lextree.h new file mode 100644 index 0000000000000000000000000000000000000000..de2b461eca3efd841a037b6c452664f98c1a332f --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_lextree.h @@ -0,0 +1,266 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * fsg_lextree.h -- The collection of all the lextrees for the entire FSM. + * + */ + +#ifndef __S2_FSG_LEXTREE_H__ +#define __S2_FSG_LEXTREE_H__ + +/* SphinxBase headers. */ +#include +#include + +/* Local headers. */ +#include "hmm.h" +#include "dict.h" +#include "dict2pid.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/* + * Compile-time constant determining the size of the + * bitvector fsg_pnode_t.fsg_pnode_ctxt_t.bv. (See below.) + * But it makes memory allocation simpler and more efficient. + * Make it smaller (2) to save memory if your phoneset has less than + * 64 phones. + */ +#define FSG_PNODE_CTXT_BVSZ 4 + +typedef struct fsg_pnode_ctxt_s { + uint32 bv[FSG_PNODE_CTXT_BVSZ]; +} fsg_pnode_ctxt_t; + + +/* + * All transitions (words) out of any given FSG state represented are by a + * phonetic prefix lextree (except for epsilon or null transitions; they + * are not part of the lextree). Lextree leaf nodes represent individual + * FSG transitions, so no sharing is allowed at the leaf nodes. The FSG + * transition probs are distributed along the lextree: the prob at a node + * is the max of the probs of all leaf nodes (and, hence, FSG transitions) + * reachable from that node. + * + * To conserve memory, the underlying HMMs with state-level information are + * allocated only as needed. Root and leaf nodes must also account for all + * the possible phonetic contexts, with an independent HMM for each distinct + * context. + */ +typedef struct fsg_pnode_s { + /* + * If this is not a leaf node, the first successor (child) node. Otherwise + * the parent FSG transition for which this is the leaf node (for figuring + * the FSG destination state, and word emitted by the transition). A node + * may have several children. The succ ptr gives just the first; the rest + * are linked via the sibling ptr below. + */ + union { + struct fsg_pnode_s *succ; + fsg_link_t *fsglink; + } next; + + /* + * For simplicity of memory management (i.e., freeing the pnodes), all + * pnodes allocated for all transitions out of a state are maintained in a + * linear linked list through the alloc_next pointer. + */ + struct fsg_pnode_s *alloc_next; + + /* + * The next node that is also a child of the parent of this node; NULL if + * none. + */ + struct fsg_pnode_s *sibling; + + /* + * The transition (log) probability to be incurred upon transitioning to + * this node. (Transition probabilities are really associated with the + * transitions. But a lextree node has exactly one incoming transition. + * Hence, the prob can be associated with the node.) + * This is a logs2(prob) value, and includes the language weight. + */ + int32 logs2prob; + + /* + * The root and leaf positions associated with any transition have to deal + * with multiple phonetic contexts. However, different contexts may result + * in the same SSID (senone-seq ID), and can share a single pnode with that + * SSID. But the pnode should track the set of context CI phones that share + * it. Hence the fsg_pnode_ctxt_t bit-vector set-representation. (For + * simplicity of implementation, its size is a compile-time constant for + * now.) Single phone words would need a 2-D array of context, but that's + * too expensive. For now, they simply use SIL as right context, so only + * the left context is properly modelled. + * (For word-internal phones, this field is unused, of course.) + */ + fsg_pnode_ctxt_t ctxt; + + uint16 ci_ext; /* This node's CIphone as viewed externally (context) */ + uint8 ppos; /* Phoneme position in pronunciation */ + uint8 leaf; /* Whether this is a leaf node */ + + /* HMM-state-level stuff here */ + hmm_context_t *ctx; + hmm_t hmm; +} fsg_pnode_t; + +/* Access macros */ +#define fsg_pnode_leaf(p) ((p)->leaf) +#define fsg_pnode_logs2prob(p) ((p)->logs2prob) +#define fsg_pnode_succ(p) ((p)->next.succ) +#define fsg_pnode_fsglink(p) ((p)->next.fsglink) +#define fsg_pnode_sibling(p) ((p)->sibling) +#define fsg_pnode_hmmptr(p) (&((p)->hmm)) +#define fsg_pnode_ci_ext(p) ((p)->ci_ext) +#define fsg_pnode_ppos(p) ((p)->ppos) +#define fsg_pnode_leaf(p) ((p)->leaf) +#define fsg_pnode_ctxt(p) ((p)->ctxt) + +#define fsg_pnode_add_ctxt(p,c) ((p)->ctxt.bv[(c)>>5] |= (1 << ((c)&0x001f))) + +/* + * The following is macroized because its called very frequently + * ::: uint32 fsg_pnode_ctxt_sub (fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub); + */ +/* + * Subtract bitvector sub from bitvector src (src updated with the result). + * Return 0 if result is all 0, non-zero otherwise. + */ + +#if (FSG_PNODE_CTXT_BVSZ == 1) + #define FSG_PNODE_CTXT_SUB(src,sub) \ + ((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) +#elif (FSG_PNODE_CTXT_BVSZ == 2) + #define FSG_PNODE_CTXT_SUB(src,sub) \ + (((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) | \ + ((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1]))) +#elif (FSG_PNODE_CTXT_BVSZ == 4) + #define FSG_PNODE_CTXT_SUB(src,sub) \ + (((src)->bv[0] = (~((sub)->bv[0]) & (src)->bv[0])) | \ + ((src)->bv[1] = (~((sub)->bv[1]) & (src)->bv[1])) | \ + ((src)->bv[2] = (~((sub)->bv[2]) & (src)->bv[2])) | \ + ((src)->bv[3] = (~((sub)->bv[3]) & (src)->bv[3]))) +#else + #define FSG_PNODE_CTXT_SUB(src,sub) fsg_pnode_ctxt_sub_generic((src),(sub)) +#endif + +/** + * Collection of lextrees for an FSG. + */ +typedef struct fsg_lextree_s { + fsg_model_t *fsg; /**< The fsg for which this lextree is built. */ + hmm_context_t *ctx; /**< HMM context structure. */ + dict_t *dict; /**< Pronunciation dictionary for this FSG. */ + dict2pid_t *d2p; /**< Context-dependent phone mappings for this FSG. */ + bin_mdef_t *mdef; /**< Model definition (triphone mappings). */ + + /* + * Left and right CIphone sets for each state. + * Left context CIphones for a state S: If word W transitions into S, W's + * final CIphone is in S's {lc}. Words transitioning out of S must consider + * these left context CIphones. + * Similarly, right contexts for state S: If word W transitions out of S, + * W's first CIphone is in S's {rc}. Words transitioning into S must consider + * these right contexts. + * + * NOTE: Words may transition into and out of S INDIRECTLY, with intermediate + * null transitions. + * NOTE: Single-phone words are difficult; only SILENCE right context is + * modelled for them. + * NOTE: Non-silence filler phones aren't included in these sets. Filler + * words don't use context, and present the SILENCE phone as context to + * adjacent words. + */ + int16 **lc; /**< Left context triphone mappings for FSG. */ + int16 **rc; /**< Right context triphone mappings for FSG. */ + + fsg_pnode_t **root; /* root[s] = lextree representing all transitions + out of state s. Note that the "tree" for each + state is actually a collection of trees, linked + via fsg_pnode_t.sibling (root[s]->sibling) */ + fsg_pnode_t **alloc_head; /* alloc_head[s] = head of linear list of all + pnodes allocated for state s */ + int32 n_pnode; /* #HMM nodes in search structure */ + int32 wip; + int32 pip; +} fsg_lextree_t; + +/* Access macros */ +#define fsg_lextree_root(lt,s) ((lt)->root[s]) +#define fsg_lextree_n_pnode(lt) ((lt)->n_pnode) + +/** + * Create, initialize, and return a new phonetic lextree for the given FSG. + */ +fsg_lextree_t *fsg_lextree_init(fsg_model_t *fsg, dict_t *dict, + dict2pid_t *d2p, + bin_mdef_t *mdef, hmm_context_t *ctx, + int32 wip, int32 pip); + +/** + * Free lextrees for an FSG. + */ +void fsg_lextree_free(fsg_lextree_t *fsg); + +/** + * Print an FSG lextree to a file for debugging. + */ +void fsg_lextree_dump(fsg_lextree_t *fsg, FILE *fh); + +/** + * Mark the given pnode as inactive (for search). + */ +void fsg_psubtree_pnode_deactivate(fsg_pnode_t *pnode); + +/** + * Set all flags on in the given context bitvector. + */ +void fsg_pnode_add_all_ctxt(fsg_pnode_ctxt_t *ctxt); + +/** + * Generic variant for arbitrary size + */ +uint32 fsg_pnode_ctxt_sub_generic(fsg_pnode_ctxt_t *src, fsg_pnode_ctxt_t *sub); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_search.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_search.c new file mode 100644 index 0000000000000000000000000000000000000000..a2e98e906ffb5ffd41453b483d8ac7930939e75f --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_search.c @@ -0,0 +1,1574 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * fsg_search.c -- Search structures for FSM decoding. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 2004 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * + * 18-Feb-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Started. + */ + +/* System headers. */ +#include +#include +#include + +/* SphinxBase headers. */ +#include +#include +#include +#include + +/* Local headers. */ +#include "pocketsphinx_internal.h" +#include "ps_lattice_internal.h" +#include "fsg_search_internal.h" +#include "fsg_history.h" +#include "fsg_lextree.h" + +/* Turn this on for detailed debugging dump */ +#define __FSG_DBG__ 0 +#define __FSG_DBG_CHAN__ 0 +#define __FSG_ALLOW_BESTPATH__ 1 + +static ps_seg_t *fsg_search_seg_iter(ps_search_t *search); +static ps_lattice_t *fsg_search_lattice(ps_search_t *search); +static int fsg_search_prob(ps_search_t *search); + +static ps_searchfuncs_t fsg_funcs = { + /* start: */ fsg_search_start, + /* step: */ fsg_search_step, + /* finish: */ fsg_search_finish, + /* reinit: */ fsg_search_reinit, + /* free: */ fsg_search_free, + /* lattice: */ fsg_search_lattice, + /* hyp: */ fsg_search_hyp, + /* prob: */ fsg_search_prob, + /* seg_iter: */ fsg_search_seg_iter, +}; + +static int +fsg_search_add_silences(fsg_search_t *fsgs, fsg_model_t *fsg) +{ + dict_t *dict; + int32 wid; + int n_sil; + + dict = ps_search_dict(fsgs); + /* + * NOTE: Unlike N-Gram search, we do not use explicit start and + * end symbols. This is because the start and end nodes are + * defined in the grammar. We do add silence/filler self-loops to + * all states in order to allow for silence between words and at + * the beginning and end of utterances. + * + * This has some implications for word graph generation, namely, + * that there can (and usually will) be multiple start and end + * states in the word graph. We therefore do add explicit start + * and end nodes to the graph. + */ + /* Add silence self-loops to all states. */ + fsg_model_add_silence(fsg, "", -1, + cmd_ln_float32_r(ps_search_config(fsgs), "-silprob")); + n_sil = 0; + /* Add self-loops for all other fillers. */ + for (wid = dict_filler_start(dict); wid < dict_filler_end(dict); ++wid) { + char const *word = dict_wordstr(dict, wid); + if (wid == dict_startwid(dict) || wid == dict_finishwid(dict)) + continue; + fsg_model_add_silence(fsg, word, -1, + cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob")); + ++n_sil; + } + + return n_sil; +} + +/* Scans the dictionary and check if all words are present. */ +static int +fsg_search_check_dict(fsg_search_t *fsgs, fsg_model_t *fsg) +{ + dict_t *dict; + int i; + + dict = ps_search_dict(fsgs); + for (i = 0; i < fsg_model_n_word(fsg); ++i) { + char const *word; + int32 wid; + + word = fsg_model_word_str(fsg, i); + wid = dict_wordid(dict, word); + if (wid == BAD_S3WID) { + E_ERROR("The word '%s' is missing in the dictionary\n", word); + return FALSE; + } + } + + return TRUE; +} + +static int +fsg_search_add_altpron(fsg_search_t *fsgs, fsg_model_t *fsg) +{ + dict_t *dict; + int n_alt, n_word; + int i; + + dict = ps_search_dict(fsgs); + /* Scan FSG's vocabulary for words that have alternate pronunciations. */ + n_alt = 0; + n_word = fsg_model_n_word(fsg); + for (i = 0; i < n_word; ++i) { + char const *word; + int32 wid; + + word = fsg_model_word_str(fsg, i); + wid = dict_wordid(dict, word); + if (wid != BAD_S3WID) { + while ((wid = dict_nextalt(dict, wid)) != BAD_S3WID) { + n_alt += fsg_model_add_alt(fsg, word, dict_wordstr(dict, wid)); + } + } + } + + E_INFO("Added %d alternate word transitions\n", n_alt); + return n_alt; +} + +ps_search_t * +fsg_search_init(const char *name, + fsg_model_t *fsg, + cmd_ln_t *config, + acmod_t *acmod, + dict_t *dict, + dict2pid_t *d2p) +{ + fsg_search_t *fsgs = ckd_calloc(1, sizeof(*fsgs)); + ps_search_init(ps_search_base(fsgs), &fsg_funcs, PS_SEARCH_TYPE_FSG, name, config, acmod, dict, d2p); + + fsgs->fsg = fsg_model_retain(fsg); + /* Initialize HMM context. */ + fsgs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), + acmod->tmat->tp, NULL, acmod->mdef->sseq); + if (fsgs->hmmctx == NULL) { + ps_search_free(ps_search_base(fsgs)); + return NULL; + } + + /* Initialize the search history object */ + fsgs->history = fsg_history_init(NULL, dict); + fsgs->frame = -1; + + /* Get search pruning parameters */ + fsgs->beam_factor = 1.0f; + fsgs->beam = fsgs->beam_orig + = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam")) + >> SENSCR_SHIFT; + fsgs->pbeam = fsgs->pbeam_orig + = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam")) + >> SENSCR_SHIFT; + fsgs->wbeam = fsgs->wbeam_orig + = (int32) logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam")) + >> SENSCR_SHIFT; + + /* LM related weights/penalties */ + fsgs->lw = cmd_ln_float32_r(config, "-lw"); + fsgs->pip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip")) + * fsgs->lw) + >> SENSCR_SHIFT; + fsgs->wip = (int32) (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip")) + * fsgs->lw) + >> SENSCR_SHIFT; + + /* Acoustic score scale for posterior probabilities. */ + fsgs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale"); + + E_INFO("FSG(beam: %d, pbeam: %d, wbeam: %d; wip: %d, pip: %d)\n", + fsgs->beam_orig, fsgs->pbeam_orig, fsgs->wbeam_orig, + fsgs->wip, fsgs->pip); + + if (!fsg_search_check_dict(fsgs, fsg)) { + fsg_search_free(ps_search_base(fsgs)); + return NULL; + } + + if (cmd_ln_boolean_r(config, "-fsgusefiller") && + !fsg_model_has_sil(fsg)) + fsg_search_add_silences(fsgs, fsg); + + if (cmd_ln_boolean_r(config, "-fsgusealtpron") && + !fsg_model_has_alt(fsg)) + fsg_search_add_altpron(fsgs, fsg); + +#if __FSG_ALLOW_BESTPATH__ + /* If bestpath is enabled, hypotheses are generated from a ps_lattice_t. + * This is not allowed by default because it tends to be very slow. */ + if (cmd_ln_boolean_r(config, "-bestpath")) + fsgs->bestpath = TRUE; +#endif + + if (fsg_search_reinit(ps_search_base(fsgs), + ps_search_dict(fsgs), + ps_search_dict2pid(fsgs)) < 0) + { + ps_search_free(ps_search_base(fsgs)); + return NULL; + + } + ptmr_init(&fsgs->perf); + + return ps_search_base(fsgs); +} + +void +fsg_search_free(ps_search_t *search) +{ + fsg_search_t *fsgs = (fsg_search_t *)search; + + double n_speech = (double)fsgs->n_tot_frame + / cmd_ln_int32_r(ps_search_config(fsgs), "-frate"); + + E_INFO("TOTAL fsg %.2f CPU %.3f xRT\n", + fsgs->perf.t_tot_cpu, + fsgs->perf.t_tot_cpu / n_speech); + E_INFO("TOTAL fsg %.2f wall %.3f xRT\n", + fsgs->perf.t_tot_elapsed, + fsgs->perf.t_tot_elapsed / n_speech); + + ps_search_base_free(search); + fsg_lextree_free(fsgs->lextree); + if (fsgs->history) { + fsg_history_reset(fsgs->history); + fsg_history_set_fsg(fsgs->history, NULL, NULL); + fsg_history_free(fsgs->history); + } + hmm_context_free(fsgs->hmmctx); + fsg_model_free(fsgs->fsg); + ckd_free(fsgs); +} + +int +fsg_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) +{ + fsg_search_t *fsgs = (fsg_search_t *)search; + + /* Free the old lextree */ + if (fsgs->lextree) + fsg_lextree_free(fsgs->lextree); + + /* Free old dict2pid, dict */ + ps_search_base_reinit(search, dict, d2p); + + /* Update the number of words (not used by this module though). */ + search->n_words = dict_size(dict); + + /* Allocate new lextree for the given FSG */ + fsgs->lextree = fsg_lextree_init(fsgs->fsg, dict, d2p, + ps_search_acmod(fsgs)->mdef, + fsgs->hmmctx, fsgs->wip, fsgs->pip); + + /* Inform the history module of the new fsg */ + fsg_history_set_fsg(fsgs->history, fsgs->fsg, dict); + + return 0; +} + + +static void +fsg_search_sen_active(fsg_search_t *fsgs) +{ + gnode_t *gn; + fsg_pnode_t *pnode; + hmm_t *hmm; + + acmod_clear_active(ps_search_acmod(fsgs)); + + for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) { + pnode = (fsg_pnode_t *) gnode_ptr(gn); + hmm = fsg_pnode_hmmptr(pnode); + assert(hmm_frame(hmm) == fsgs->frame); + acmod_activate_hmm(ps_search_acmod(fsgs), hmm); + } +} + + +/* + * Evaluate all the active HMMs. + * (Executed once per frame.) + */ +static void +fsg_search_hmm_eval(fsg_search_t *fsgs) +{ + gnode_t *gn; + fsg_pnode_t *pnode; + hmm_t *hmm; + int32 bestscore; + int32 n, maxhmmpf; + + bestscore = WORST_SCORE; + + if (!fsgs->pnode_active) { + E_ERROR("Frame %d: No active HMM!!\n", fsgs->frame); + return; + } + + for (n = 0, gn = fsgs->pnode_active; gn; gn = gnode_next(gn), n++) { + int32 score; + + pnode = (fsg_pnode_t *) gnode_ptr(gn); + hmm = fsg_pnode_hmmptr(pnode); + assert(hmm_frame(hmm) == fsgs->frame); + +#if __FSG_DBG__ + E_INFO("pnode(%08x) active @frm %5d\n", (int32) pnode, + fsgs->frame); + hmm_dump(hmm, stdout); +#endif + score = hmm_vit_eval(hmm); +#if __FSG_DBG_CHAN__ + E_INFO("pnode(%08x) after eval @frm %5d\n", + (int32) pnode, fsgs->frame); + hmm_dump(hmm, stdout); +#endif + + if (score BETTER_THAN bestscore) + bestscore = score; + } + +#if __FSG_DBG__ + E_INFO("[%5d] %6d HMM; bestscr: %11d\n", fsgs->frame, n, bestscore); +#endif + fsgs->n_hmm_eval += n; + + /* Adjust beams if #active HMMs larger than absolute threshold */ + maxhmmpf = cmd_ln_int32_r(ps_search_config(fsgs), "-maxhmmpf"); + if (maxhmmpf != -1 && n > maxhmmpf) { + /* + * Too many HMMs active; reduce the beam factor applied to the default + * beams, but not if the factor is already at a floor (0.1). + */ + if (fsgs->beam_factor > 0.1) { /* Hack!! Hardwired constant 0.1 */ + fsgs->beam_factor *= 0.9f; /* Hack!! Hardwired constant 0.9 */ + fsgs->beam = + (int32) (fsgs->beam_orig * fsgs->beam_factor); + fsgs->pbeam = + (int32) (fsgs->pbeam_orig * fsgs->beam_factor); + fsgs->wbeam = + (int32) (fsgs->wbeam_orig * fsgs->beam_factor); + } + } + else { + fsgs->beam_factor = 1.0f; + fsgs->beam = fsgs->beam_orig; + fsgs->pbeam = fsgs->pbeam_orig; + fsgs->wbeam = fsgs->wbeam_orig; + } + + if (n > fsg_lextree_n_pnode(fsgs->lextree)) + E_FATAL("PANIC! Frame %d: #HMM evaluated(%d) > #PNodes(%d)\n", + fsgs->frame, n, fsg_lextree_n_pnode(fsgs->lextree)); + + fsgs->bestscore = bestscore; +} + + +static void +fsg_search_pnode_trans(fsg_search_t *fsgs, fsg_pnode_t * pnode) +{ + fsg_pnode_t *child; + hmm_t *hmm; + int32 newscore, thresh, nf; + + assert(pnode); + assert(!fsg_pnode_leaf(pnode)); + + nf = fsgs->frame + 1; + thresh = fsgs->bestscore + fsgs->beam; + + hmm = fsg_pnode_hmmptr(pnode); + + for (child = fsg_pnode_succ(pnode); + child; child = fsg_pnode_sibling(child)) { + newscore = hmm_out_score(hmm) + child->logs2prob; + + if ((newscore BETTER_THAN thresh) + && (newscore BETTER_THAN hmm_in_score(&child->hmm))) { + /* Incoming score > pruning threshold and > target's existing score */ + if (hmm_frame(&child->hmm) < nf) { + /* Child node not yet activated; do so */ + fsgs->pnode_active_next = + glist_add_ptr(fsgs->pnode_active_next, + (void *) child); + } + + hmm_enter(&child->hmm, newscore, hmm_out_history(hmm), nf); + } + } +} + + +static void +fsg_search_pnode_exit(fsg_search_t *fsgs, fsg_pnode_t * pnode) +{ + hmm_t *hmm; + fsg_link_t *fl; + int32 wid; + fsg_pnode_ctxt_t ctxt; + + assert(pnode); + assert(fsg_pnode_leaf(pnode)); + + hmm = fsg_pnode_hmmptr(pnode); + fl = fsg_pnode_fsglink(pnode); + assert(fl); + + wid = fsg_link_wid(fl); + assert(wid >= 0); + +#if __FSG_DBG__ + E_INFO("[%5d] Exit(%08x) %10d(score) %5d(pred)\n", + fsgs->frame, (int32) pnode, + hmm_out_score(hmm), hmm_out_history(hmm)); +#endif + + /* + * Check if this is filler or single phone word; these do not model right + * context (i.e., the exit score applies to all right contexts). + */ + if (fsg_model_is_filler(fsgs->fsg, wid) + /* FIXME: This might be slow due to repeated calls to dict_to_id(). */ + || (dict_is_single_phone(ps_search_dict(fsgs), + dict_wordid(ps_search_dict(fsgs), + fsg_model_word_str(fsgs->fsg, wid))))) { + /* Create a dummy context structure that applies to all right contexts */ + fsg_pnode_add_all_ctxt(&ctxt); + + /* Create history table entry for this word exit */ + fsg_history_entry_add(fsgs->history, + fl, + fsgs->frame, + hmm_out_score(hmm), + hmm_out_history(hmm), + pnode->ci_ext, ctxt); + + } + else { + /* Create history table entry for this word exit */ + fsg_history_entry_add(fsgs->history, + fl, + fsgs->frame, + hmm_out_score(hmm), + hmm_out_history(hmm), + pnode->ci_ext, pnode->ctxt); + } +} + + +/* + * (Beam) prune the just evaluated HMMs, determine which ones remain + * active, which ones transition to successors, which ones exit and + * terminate in their respective destination FSM states. + * (Executed once per frame.) + */ +static void +fsg_search_hmm_prune_prop(fsg_search_t *fsgs) +{ + gnode_t *gn; + fsg_pnode_t *pnode; + hmm_t *hmm; + int32 thresh, word_thresh, phone_thresh; + + assert(fsgs->pnode_active_next == NULL); + + thresh = fsgs->bestscore + fsgs->beam; + phone_thresh = fsgs->bestscore + fsgs->pbeam; + word_thresh = fsgs->bestscore + fsgs->wbeam; + + for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) { + pnode = (fsg_pnode_t *) gnode_ptr(gn); + hmm = fsg_pnode_hmmptr(pnode); + + if (hmm_bestscore(hmm) >= thresh) { + /* Keep this HMM active in the next frame */ + if (hmm_frame(hmm) == fsgs->frame) { + hmm_frame(hmm) = fsgs->frame + 1; + fsgs->pnode_active_next = + glist_add_ptr(fsgs->pnode_active_next, + (void *) pnode); + } + else { + assert(hmm_frame(hmm) == fsgs->frame + 1); + } + + if (!fsg_pnode_leaf(pnode)) { + if (hmm_out_score(hmm) >= phone_thresh) { + /* Transition out of this phone into its children */ + fsg_search_pnode_trans(fsgs, pnode); + } + } + else { + if (hmm_out_score(hmm) >= word_thresh) { + /* Transition out of leaf node into destination FSG state */ + fsg_search_pnode_exit(fsgs, pnode); + } + } + } + } +} + + +/* + * Propagate newly created history entries through null transitions. + */ +static void +fsg_search_null_prop(fsg_search_t *fsgs) +{ + int32 bpidx, n_entries, thresh, newscore; + fsg_hist_entry_t *hist_entry; + fsg_link_t *l; + int32 s; + fsg_model_t *fsg; + + fsg = fsgs->fsg; + thresh = fsgs->bestscore + fsgs->wbeam; /* Which beam really?? */ + + n_entries = fsg_history_n_entries(fsgs->history); + + for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) { + fsg_arciter_t *itor; + hist_entry = fsg_history_entry_get(fsgs->history, bpidx); + + l = fsg_hist_entry_fsglink(hist_entry); + + /* Destination FSG state for history entry */ + s = l ? fsg_link_to_state(l) : fsg_model_start_state(fsg); + + /* + * Check null transitions from d to all other states. (Only need to + * propagate one step, since FSG contains transitive closure of null + * transitions.) + */ + /* Add all links from from_state to dst */ + for (itor = fsg_model_arcs(fsg, s); itor; + itor = fsg_arciter_next(itor)) { + fsg_link_t *l = fsg_arciter_get(itor); + + /* FIXME: Need to deal with tag transitions somehow. */ + if (fsg_link_wid(l) != -1) + continue; + newscore = + fsg_hist_entry_score(hist_entry) + + (fsg_link_logs2prob(l) >> SENSCR_SHIFT); + + if (newscore >= thresh) { + fsg_history_entry_add(fsgs->history, l, + fsg_hist_entry_frame(hist_entry), + newscore, + bpidx, + fsg_hist_entry_lc(hist_entry), + fsg_hist_entry_rc(hist_entry)); + } + } + } +} + + +/* + * Perform cross-word transitions; propagate each history entry created in this + * frame to lextree roots attached to the target FSG state for that entry. + */ +static void +fsg_search_word_trans(fsg_search_t *fsgs) +{ + int32 bpidx, n_entries; + fsg_hist_entry_t *hist_entry; + fsg_link_t *l; + int32 score, newscore, thresh, nf, d; + fsg_pnode_t *root; + int32 lc, rc; + + n_entries = fsg_history_n_entries(fsgs->history); + + thresh = fsgs->bestscore + fsgs->beam; + nf = fsgs->frame + 1; + + for (bpidx = fsgs->bpidx_start; bpidx < n_entries; bpidx++) { + hist_entry = fsg_history_entry_get(fsgs->history, bpidx); + assert(hist_entry); + score = fsg_hist_entry_score(hist_entry); + assert(fsgs->frame == fsg_hist_entry_frame(hist_entry)); + + l = fsg_hist_entry_fsglink(hist_entry); + + /* Destination state for hist_entry */ + d = l ? fsg_link_to_state(l) : fsg_model_start_state(fsgs-> + fsg); + + lc = fsg_hist_entry_lc(hist_entry); + + /* Transition to all root nodes attached to state d */ + for (root = fsg_lextree_root(fsgs->lextree, d); + root; root = root->sibling) { + rc = root->ci_ext; + + if ((root->ctxt.bv[lc >> 5] & (1 << (lc & 0x001f))) && + (hist_entry->rc.bv[rc >> 5] & (1 << (rc & 0x001f)))) { + /* + * Last CIphone of history entry is in left-context list supported by + * target root node, and + * first CIphone of target root node is in right context list supported + * by history entry; + * So the transition can go ahead (if new score is good enough). + */ + newscore = score + root->logs2prob; + + if ((newscore BETTER_THAN thresh) + && (newscore BETTER_THAN hmm_in_score(&root->hmm))) { + if (hmm_frame(&root->hmm) < nf) { + /* Newly activated node; add to active list */ + fsgs->pnode_active_next = + glist_add_ptr(fsgs->pnode_active_next, + (void *) root); +#if __FSG_DBG__ + E_INFO + ("[%5d] WordTrans bpidx[%d] -> pnode[%08x] (activated)\n", + fsgs->frame, bpidx, (int32) root); +#endif + } + else { +#if __FSG_DBG__ + E_INFO + ("[%5d] WordTrans bpidx[%d] -> pnode[%08x]\n", + fsgs->frame, bpidx, (int32) root); +#endif + } + + hmm_enter(&root->hmm, newscore, bpidx, nf); + } + } + } + } +} + + +int +fsg_search_step(ps_search_t *search, int frame_idx) +{ + fsg_search_t *fsgs = (fsg_search_t *)search; + int16 const *senscr; + acmod_t *acmod = search->acmod; + gnode_t *gn; + fsg_pnode_t *pnode; + hmm_t *hmm; + + /* Activate our HMMs for the current frame if need be. */ + if (!acmod->compallsen) + fsg_search_sen_active(fsgs); + /* Compute GMM scores for the current frame. */ + senscr = acmod_score(acmod, &frame_idx); + fsgs->n_sen_eval += acmod->n_senone_active; + hmm_context_set_senscore(fsgs->hmmctx, senscr); + + /* Mark backpointer table for current frame. */ + fsgs->bpidx_start = fsg_history_n_entries(fsgs->history); + + /* Evaluate all active pnodes (HMMs) */ + fsg_search_hmm_eval(fsgs); + + /* + * Prune and propagate the HMMs evaluated; create history entries for + * word exits. The words exits are tentative, and may be pruned; make + * the survivors permanent via fsg_history_end_frame(). + */ + fsg_search_hmm_prune_prop(fsgs); + fsg_history_end_frame(fsgs->history); + + /* + * Propagate new history entries through any null transitions, creating + * new history entries, and then make the survivors permanent. + */ + fsg_search_null_prop(fsgs); + fsg_history_end_frame(fsgs->history); + + /* + * Perform cross-word transitions; propagate each history entry across its + * terminating state to the root nodes of the lextree attached to the state. + */ + fsg_search_word_trans(fsgs); + + /* + * We've now come full circle, HMM and FSG states have been updated for + * the next frame. + * Update the active lists, deactivate any currently active HMMs that + * did not survive into the next frame + */ + for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) { + pnode = (fsg_pnode_t *) gnode_ptr(gn); + hmm = fsg_pnode_hmmptr(pnode); + + if (hmm_frame(hmm) == fsgs->frame) { + /* This HMM NOT activated for the next frame; reset it */ + fsg_psubtree_pnode_deactivate(pnode); + } + else { + assert(hmm_frame(hmm) == (fsgs->frame + 1)); + } + } + + /* Free the currently active list */ + glist_free(fsgs->pnode_active); + + /* Make the next-frame active list the current one */ + fsgs->pnode_active = fsgs->pnode_active_next; + fsgs->pnode_active_next = NULL; + + /* End of this frame; ready for the next */ + ++fsgs->frame; + + return 1; +} + + +/* + * Set all HMMs to inactive, clear active lists, initialize FSM start + * state to be the only active node. + * (Executed at the start of each utterance.) + */ +int +fsg_search_start(ps_search_t *search) +{ + fsg_search_t *fsgs = (fsg_search_t *)search; + int32 silcipid; + fsg_pnode_ctxt_t ctxt; + + /* Reset dynamic adjustment factor for beams */ + fsgs->beam_factor = 1.0f; + fsgs->beam = fsgs->beam_orig; + fsgs->pbeam = fsgs->pbeam_orig; + fsgs->wbeam = fsgs->wbeam_orig; + + silcipid = bin_mdef_ciphone_id(ps_search_acmod(fsgs)->mdef, "SIL"); + + /* Initialize EVERYTHING to be inactive */ + assert(fsgs->pnode_active == NULL); + assert(fsgs->pnode_active_next == NULL); + + fsg_history_reset(fsgs->history); + fsg_history_utt_start(fsgs->history); + fsgs->final = FALSE; + + /* Dummy context structure that allows all right contexts to use this entry */ + fsg_pnode_add_all_ctxt(&ctxt); + + /* Create dummy history entry leading to start state */ + fsgs->frame = -1; + fsgs->bestscore = 0; + fsg_history_entry_add(fsgs->history, + NULL, -1, 0, -1, silcipid, ctxt); + fsgs->bpidx_start = 0; + + /* Propagate dummy history entry through NULL transitions from start state */ + fsg_search_null_prop(fsgs); + + /* Perform word transitions from this dummy history entry */ + fsg_search_word_trans(fsgs); + + /* Make the next-frame active list the current one */ + fsgs->pnode_active = fsgs->pnode_active_next; + fsgs->pnode_active_next = NULL; + + ++fsgs->frame; + + fsgs->n_hmm_eval = 0; + fsgs->n_sen_eval = 0; + + ptmr_reset(&fsgs->perf); + ptmr_start(&fsgs->perf); + + return 0; +} + +/* + * Cleanup at the end of each utterance. + */ +int +fsg_search_finish(ps_search_t *search) +{ + fsg_search_t *fsgs = (fsg_search_t *)search; + gnode_t *gn; + fsg_pnode_t *pnode; + int32 n_hist, cf; + + /* Deactivate all nodes in the current and next-frame active lists */ + for (gn = fsgs->pnode_active; gn; gn = gnode_next(gn)) { + pnode = (fsg_pnode_t *) gnode_ptr(gn); + fsg_psubtree_pnode_deactivate(pnode); + } + for (gn = fsgs->pnode_active_next; gn; gn = gnode_next(gn)) { + pnode = (fsg_pnode_t *) gnode_ptr(gn); + fsg_psubtree_pnode_deactivate(pnode); + } + + glist_free(fsgs->pnode_active); + fsgs->pnode_active = NULL; + glist_free(fsgs->pnode_active_next); + fsgs->pnode_active_next = NULL; + + fsgs->final = TRUE; + + n_hist = fsg_history_n_entries(fsgs->history); + fsgs->n_tot_frame += fsgs->frame; + E_INFO + ("%d frames, %d HMMs (%d/fr), %d senones (%d/fr), %d history entries (%d/fr)\n\n", + fsgs->frame, fsgs->n_hmm_eval, + (fsgs->frame > 0) ? fsgs->n_hmm_eval / fsgs->frame : 0, + fsgs->n_sen_eval, + (fsgs->frame > 0) ? fsgs->n_sen_eval / fsgs->frame : 0, + n_hist, (fsgs->frame > 0) ? n_hist / fsgs->frame : 0); + + /* Print out some statistics. */ + ptmr_stop(&fsgs->perf); + /* This is the number of frames processed. */ + cf = ps_search_acmod(fsgs)->output_frame; + if (cf > 0) { + double n_speech = (double) (cf + 1) + / cmd_ln_int32_r(ps_search_config(fsgs), "-frate"); + E_INFO("fsg %.2f CPU %.3f xRT\n", + fsgs->perf.t_cpu, fsgs->perf.t_cpu / n_speech); + E_INFO("fsg %.2f wall %.3f xRT\n", + fsgs->perf.t_elapsed, fsgs->perf.t_elapsed / n_speech); + } + + + return 0; +} + +static int +fsg_search_find_exit(fsg_search_t *fsgs, int frame_idx, int final, int32 *out_score) +{ + fsg_hist_entry_t *hist_entry = NULL; + fsg_model_t *fsg; + int bpidx, frm, last_frm, besthist; + int32 bestscore; + + if (frame_idx == -1) + frame_idx = fsgs->frame - 1; + last_frm = frm = frame_idx; + + /* Scan backwards to find a word exit in frame_idx. */ + bpidx = fsg_history_n_entries(fsgs->history) - 1; + while (bpidx > 0) { + hist_entry = fsg_history_entry_get(fsgs->history, bpidx); + if (fsg_hist_entry_frame(hist_entry) <= frame_idx) { + frm = last_frm = fsg_hist_entry_frame(hist_entry); + break; + } + bpidx--; + } + + /* No hypothesis (yet). */ + if (bpidx <= 0) + return bpidx; + + /* Now find best word exit in this frame. */ + bestscore = INT_MIN; + besthist = -1; + fsg = fsgs->fsg; + while (frm == last_frm) { + fsg_link_t *fl; + int32 score; + + fl = fsg_hist_entry_fsglink(hist_entry); + score = fsg_hist_entry_score(hist_entry); + + if (fl == NULL) + break; + + /* Prefer final hypothesis */ + if (score == bestscore && fsg_link_to_state(fl) == fsg_model_final_state(fsg)) { + besthist = bpidx; + } else if (score BETTER_THAN bestscore) { + /* Only enforce the final state constraint if this is a final hypothesis. */ + if ((!final) + || fsg_link_to_state(fl) == fsg_model_final_state(fsg)) { + bestscore = score; + besthist = bpidx; + } + } + + --bpidx; + if (bpidx < 0) + break; + hist_entry = fsg_history_entry_get(fsgs->history, bpidx); + frm = fsg_hist_entry_frame(hist_entry); + } + + /* Final state not reached. */ + if (besthist == -1) { + E_ERROR("Final result does not match the grammar in frame %d\n", frame_idx); + return -1; + } + + /* This here's the one we want. */ + if (out_score) + *out_score = bestscore; + + return besthist; +} + +/* FIXME: Mostly duplicated with ngram_search_bestpath(). */ +static ps_latlink_t * +fsg_search_bestpath(ps_search_t *search, int32 *out_score, int backward) +{ + fsg_search_t *fsgs = (fsg_search_t *)search; + + (void)backward; + if (search->last_link == NULL) { + search->last_link = ps_lattice_bestpath(search->dag, NULL, + 1.0, fsgs->ascale); + if (search->last_link == NULL) + return NULL; + /* Also calculate betas so we can fill in the posterior + * probability field in the segmentation. */ + if (search->post == 0) + search->post = ps_lattice_posterior(search->dag, NULL, fsgs->ascale); + } + if (out_score) + *out_score = search->last_link->path_scr + search->dag->final_node_ascr; + return search->last_link; +} + +char const * +fsg_search_hyp(ps_search_t *search, int32 *out_score) +{ + fsg_search_t *fsgs = (fsg_search_t *)search; + dict_t *dict = ps_search_dict(search); + char *c; + size_t len; + int bp, bpidx; + + /* Get last backpointer table index. */ + bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, out_score); + /* No hypothesis (yet). */ + if (bpidx <= 0) { + return NULL; + } + + /* If bestpath is enabled and the utterance is complete, then run it. + * Note that setting bestpath in fsg_search_init is disabled by default. */ + if (fsgs->bestpath && fsgs->final) { + ps_lattice_t *dag; + ps_latlink_t *link; + + if ((dag = fsg_search_lattice(search)) == NULL) { + E_WARN("Failed to obtain the lattice while bestpath enabled\n"); + return NULL; + } + if ((link = fsg_search_bestpath(search, out_score, FALSE)) == NULL) { + E_WARN("Failed to find the bestpath in a lattice\n"); + return NULL; + } + return ps_lattice_hyp(dag, link); + } + + bp = bpidx; + len = 0; + while (bp > 0) { + fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp); + fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry); + char const *baseword; + int32 wid; + + bp = fsg_hist_entry_pred(hist_entry); + wid = fsg_link_wid(fl); + if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid)) + continue; + baseword = dict_basestr(dict, + dict_wordid(dict, + fsg_model_word_str(fsgs->fsg, wid))); + len += strlen(baseword) + 1; + } + + ckd_free(search->hyp_str); + if (len == 0) { + search->hyp_str = NULL; + return search->hyp_str; + } + search->hyp_str = ckd_calloc(1, len); + + bp = bpidx; + c = search->hyp_str + len - 1; + while (bp > 0) { + fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp); + fsg_link_t *fl = fsg_hist_entry_fsglink(hist_entry); + char const *baseword; + int32 wid; + + bp = fsg_hist_entry_pred(hist_entry); + wid = fsg_link_wid(fl); + if (wid < 0 || fsg_model_is_filler(fsgs->fsg, wid)) + continue; + baseword = dict_basestr(dict, + dict_wordid(dict, + fsg_model_word_str(fsgs->fsg, wid))); + len = strlen(baseword); + c -= len; + memcpy(c, baseword, len); + if (c > search->hyp_str) { + --c; + *c = ' '; + } + } + + return search->hyp_str; +} + +static void +fsg_seg_bp2itor(ps_seg_t *seg, fsg_hist_entry_t *hist_entry) +{ + fsg_search_t *fsgs = (fsg_search_t *)seg->search; + fsg_hist_entry_t *ph = NULL; + int32 bp; + + if ((bp = fsg_hist_entry_pred(hist_entry)) >= 0) + ph = fsg_history_entry_get(fsgs->history, bp); + seg->word = fsg_model_word_str(fsgs->fsg, hist_entry->fsglink->wid); + seg->ef = fsg_hist_entry_frame(hist_entry); + seg->sf = ph ? fsg_hist_entry_frame(ph) + 1 : 0; + /* This is kind of silly but it happens for null transitions. */ + if (seg->sf > seg->ef) seg->sf = seg->ef; + seg->prob = 0; /* Bogus value... */ + /* "Language model" score = transition probability. */ + seg->lback = 1; + seg->lscr = fsg_link_logs2prob(hist_entry->fsglink) >> SENSCR_SHIFT; + if (ph) { + /* FIXME: Not sure exactly how cross-word triphones are handled. */ + seg->ascr = hist_entry->score - ph->score - seg->lscr; + } + else + seg->ascr = hist_entry->score - seg->lscr; +} + + +static void +fsg_seg_free(ps_seg_t *seg) +{ + fsg_seg_t *itor = (fsg_seg_t *)seg; + ckd_free(itor->hist); + ckd_free(itor); +} + +static ps_seg_t * +fsg_seg_next(ps_seg_t *seg) +{ + fsg_seg_t *itor = (fsg_seg_t *)seg; + + if (++itor->cur == itor->n_hist) { + fsg_seg_free(seg); + return NULL; + } + + fsg_seg_bp2itor(seg, itor->hist[itor->cur]); + return seg; +} + +static ps_segfuncs_t fsg_segfuncs = { + /* seg_next */ fsg_seg_next, + /* seg_free */ fsg_seg_free +}; + +static ps_seg_t * +fsg_search_seg_iter(ps_search_t *search) +{ + fsg_search_t *fsgs = (fsg_search_t *)search; + fsg_seg_t *itor; + int32 out_score; + int bp, bpidx, cur; + + bpidx = fsg_search_find_exit(fsgs, fsgs->frame, fsgs->final, &out_score); + /* No hypothesis (yet). */ + if (bpidx <= 0) + return NULL; + + /* If bestpath is enabled and the utterance is complete, then run it. + * Note that setting bestpath in fsg_search_init is disabled by default. */ + if (fsgs->bestpath && fsgs->final) { + ps_lattice_t *dag; + ps_latlink_t *link; + + if ((dag = fsg_search_lattice(search)) == NULL) + return NULL; + if ((link = fsg_search_bestpath(search, &out_score, TRUE)) == NULL) + return NULL; + return ps_lattice_seg_iter(dag, link, 1.0); + } + + /* Calling this an "iterator" is a bit of a misnomer since we have + * to get the entire backtrace in order to produce it. On the + * other hand, all we actually need is the bptbl IDs, and we can + * allocate a fixed-size array of them. */ + itor = ckd_calloc(1, sizeof(*itor)); + itor->base.vt = &fsg_segfuncs; + itor->base.search = search; + itor->base.lwf = 1.0; + itor->n_hist = 0; + bp = bpidx; + while (bp > 0) { + fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp); + bp = fsg_hist_entry_pred(hist_entry); + ++itor->n_hist; + } + if (itor->n_hist == 0) { + ckd_free(itor); + return NULL; + } + itor->hist = ckd_calloc(itor->n_hist, sizeof(*itor->hist)); + cur = itor->n_hist - 1; + bp = bpidx; + while (bp > 0) { + fsg_hist_entry_t *hist_entry = fsg_history_entry_get(fsgs->history, bp); + itor->hist[cur] = hist_entry; + bp = fsg_hist_entry_pred(hist_entry); + --cur; + } + + /* Fill in relevant fields for first element. */ + fsg_seg_bp2itor((ps_seg_t *)itor, itor->hist[0]); + + return (ps_seg_t *)itor; +} + +static int +fsg_search_prob(ps_search_t *search) +{ + fsg_search_t *fsgs = (fsg_search_t *)search; + + /* If bestpath is enabled and the utterance is complete, then run it. + * Note that setting bestpath in fsg_search_init is disabled by default. */ + if (fsgs->bestpath && fsgs->final) { + ps_lattice_t *dag; + ps_latlink_t *link; + + if ((dag = fsg_search_lattice(search)) == NULL) + return 0; + if ((link = fsg_search_bestpath(search, NULL, TRUE)) == NULL) + return 0; + return search->post; + } + else { + /* FIXME: Give some kind of good estimate here, eventually. */ + return 0; + } +} + +static ps_latnode_t * +find_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int32 wid, int32 node_id) +{ + ps_latnode_t *node; + + (void)fsg; + for (node = dag->nodes; node; node = node->next) + if ((node->sf == sf) && (node->wid == wid) && (node->node_id == node_id)) + break; + return node; +} + +static ps_latnode_t * +new_node(ps_lattice_t *dag, fsg_model_t *fsg, int sf, int ef, int32 wid, int32 node_id, int32 ascr) +{ + ps_latnode_t *node; + + node = find_node(dag, fsg, sf, wid, node_id); + + if (node) { + /* Update end frames. */ + if (node->lef == -1 || node->lef < ef) + node->lef = ef; + if (node->fef == -1 || node->fef > ef) + node->fef = ef; + /* Update best link score. */ + if (ascr BETTER_THAN node->info.best_exit) + node->info.best_exit = ascr; + } + else { + /* New node; link to head of list */ + node = listelem_malloc(dag->latnode_alloc); + node->wid = wid; + node->sf = sf; + node->fef = node->lef = ef; + node->reachable = FALSE; + node->entries = NULL; + node->exits = NULL; + node->info.best_exit = ascr; + node->node_id = node_id; + + node->next = dag->nodes; + dag->nodes = node; + ++dag->n_nodes; + } + + return node; +} + +static ps_latnode_t * +find_start_node(fsg_search_t *fsgs, ps_lattice_t *dag) +{ + ps_latnode_t *node; + glist_t start = NULL; + int nstart = 0; + + /* Look for all nodes starting in frame zero with some exits. */ + for (node = dag->nodes; node; node = node->next) { + if (node->sf == 0 && node->exits) { + E_INFO("Start node %s.%d:%d:%d\n", + fsg_model_word_str(fsgs->fsg, node->wid), + node->sf, node->fef, node->lef); + start = glist_add_ptr(start, node); + ++nstart; + } + } + + /* If there was more than one start node candidate, then we need + * to create an artificial start node with epsilon transitions to + * all of them. */ + if (nstart == 1) { + node = gnode_ptr(start); + } + else { + gnode_t *st; + int wid; + + wid = fsg_model_word_add(fsgs->fsg, ""); + if (fsgs->fsg->silwords) + bitvec_set(fsgs->fsg->silwords, wid); + node = new_node(dag, fsgs->fsg, 0, 0, wid, -1, 0); + for (st = start; st; st = gnode_next(st)) + ps_lattice_link(dag, node, gnode_ptr(st), 0, 0); + } + glist_free(start); + return node; +} + +static ps_latnode_t * +find_end_node(fsg_search_t *fsgs, ps_lattice_t *dag) +{ + ps_latnode_t *node; + glist_t end = NULL; + int nend = 0; + + /* Look for all nodes ending in last frame with some entries. */ + for (node = dag->nodes; node; node = node->next) { + if (node->lef == dag->n_frames - 1 && node->entries) { + E_INFO("End node %s.%d:%d:%d (%d)\n", + fsg_model_word_str(fsgs->fsg, node->wid), + node->sf, node->fef, node->lef, node->info.best_exit); + end = glist_add_ptr(end, node); + ++nend; + } + } + + if (nend == 1) { + node = gnode_ptr(end); + } + else if (nend == 0) { + ps_latnode_t *last = NULL; + int ef = 0; + + /* If there were no end node candidates, then just use the + * node with the last exit frame. */ + for (node = dag->nodes; node; node = node->next) { + if (node->lef > ef && node->entries) { + last = node; + ef = node->lef; + } + } + node = last; + if (node) + E_INFO("End node %s.%d:%d:%d (%d)\n", + fsg_model_word_str(fsgs->fsg, node->wid), + node->sf, node->fef, node->lef, node->info.best_exit); + } + else { + /* If there was more than one end node candidate, then we need + * to create an artificial end node with epsilon transitions + * out of all of them. */ + gnode_t *st; + int wid; + wid = fsg_model_word_add(fsgs->fsg, ""); + if (fsgs->fsg->silwords) + bitvec_set(fsgs->fsg->silwords, wid); + node = new_node(dag, fsgs->fsg, fsgs->frame, fsgs->frame, wid, -1, 0); + /* Use the "best" (in reality it will be the only) exit link + * score from this final node as the link score. */ + for (st = end; st; st = gnode_next(st)) { + ps_latnode_t *src = gnode_ptr(st); + ps_lattice_link(dag, src, node, src->info.best_exit, fsgs->frame); + } + } + glist_free(end); + return node; +} + +static void +mark_reachable(ps_lattice_t *dag, ps_latnode_t *end) +{ + glist_t q; + + (void)dag; + /* It doesn't matter which order we do this in. */ + end->reachable = TRUE; + q = glist_add_ptr(NULL, end); + while (q) { + ps_latnode_t *node = gnode_ptr(q); + latlink_list_t *x; + + /* Pop the front of the list. */ + q = gnode_free(q, NULL); + /* Expand all its predecessors that haven't been seen yet. */ + for (x = node->entries; x; x = x->next) { + ps_latnode_t *next = x->link->from; + if (!next->reachable) { + next->reachable = TRUE; + q = glist_add_ptr(q, next); + } + } + } +} + +/** + * Generate a lattice from FSG search results. + * + * One might think that this is simply a matter of adding acoustic + * scores to the FSG's edges. However, one would be wrong. The + * crucial difference here is that the word lattice is acyclic, and it + * also contains timing information. + */ +static ps_lattice_t * +fsg_search_lattice(ps_search_t *search) +{ + fsg_search_t *fsgs; + fsg_model_t *fsg; + ps_latnode_t *node; + ps_lattice_t *dag; + int32 i, n; + + fsgs = (fsg_search_t *)search; + + /* Check to see if a lattice has previously been created over the + * same number of frames, and reuse it if so. */ + if (search->dag && search->dag->n_frames == fsgs->frame) + return search->dag; + + /* Nope, create a new one. */ + ps_lattice_free(search->dag); + search->dag = NULL; + dag = ps_lattice_init_search(search, fsgs->frame); + fsg = fsgs->fsg; + + /* + * Each history table entry represents a link in the word graph. + * The set of nodes is determined by the number of unique + * (word,start-frame) pairs in the history table. So we will + * first find all those nodes. + */ + n = fsg_history_n_entries(fsgs->history); + for (i = 0; i < n; ++i) { + fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i); + int32 ascr; + int sf; + + /* Skip null transitions. */ + if (fh->fsglink == NULL || fh->fsglink->wid == -1) + continue; + + /* Find the start node of this link. */ + if (fh->pred) { + fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred); + /* FIXME: We include the transition score in the lattice + * link score. This is because of the practical + * difficulty of obtaining it separately in bestpath or + * forward-backward search, and because it is essentially + * a unigram probability, so there is no need to treat it + * separately from the acoustic score. However, it's not + * clear that this will actually yield correct results.*/ + ascr = fh->score - pfh->score; + sf = pfh->frame + 1; + } + else { + ascr = fh->score; + sf = 0; + } + + /* + * Note that although scores are tied to links rather than + * nodes, it's possible that there are no links out of the + * destination node, and thus we need to preserve its score in + * case it turns out to be utterance-final. + */ + new_node(dag, fsg, sf, fh->frame, fh->fsglink->wid, fsg_link_to_state(fh->fsglink), ascr); + } + + /* + * Now, we will create links only to nodes that actually exist. + */ + n = fsg_history_n_entries(fsgs->history); + for (i = 0; i < n; ++i) { + fsg_hist_entry_t *fh = fsg_history_entry_get(fsgs->history, i); + fsg_arciter_t *itor; + ps_latnode_t *src, *dest; + int32 ascr; + int sf; + + /* Skip null transitions. */ + if (fh->fsglink == NULL || fh->fsglink->wid == -1) + continue; + + /* Find the start node of this link and calculate its link score. */ + if (fh->pred) { + fsg_hist_entry_t *pfh = fsg_history_entry_get(fsgs->history, fh->pred); + sf = pfh->frame + 1; + ascr = fh->score - pfh->score; + } + else { + ascr = fh->score; + sf = 0; + } + src = find_node(dag, fsg, sf, fh->fsglink->wid, fsg_link_to_state(fh->fsglink)); + sf = fh->frame + 1; + + for (itor = fsg_model_arcs(fsg, fsg_link_to_state(fh->fsglink)); + itor; itor = fsg_arciter_next(itor)) { + fsg_link_t *link = fsg_arciter_get(itor); + + /* FIXME: Need to figure out what to do about tag transitions. */ + if (link->wid >= 0) { + /* + * For each non-epsilon link following this one, look for a + * matching node in the lattice and link to it. + */ + if ((dest = find_node(dag, fsg, sf, link->wid, fsg_link_to_state(link))) != NULL) + ps_lattice_link(dag, src, dest, ascr, fh->frame); + } + else { + /* + * Transitive closure on nulls has already been done, so we + * just need to look one link forward from them. + */ + fsg_arciter_t *itor2; + + /* Add all non-null links out of j. */ + for (itor2 = fsg_model_arcs(fsg, fsg_link_to_state(link)); + itor2; itor2 = fsg_arciter_next(itor2)) { + fsg_link_t *link = fsg_arciter_get(itor2); + + if (link->wid == -1) + continue; + + if ((dest = find_node(dag, fsg, sf, link->wid, fsg_link_to_state(link))) != NULL) { + ps_lattice_link(dag, src, dest, ascr, fh->frame); + } + } + } + } + } + + + /* Figure out which nodes are the start and end nodes. */ + if ((dag->start = find_start_node(fsgs, dag)) == NULL) { + E_WARN("Failed to find the start node\n"); + goto error_out; + } + if ((dag->end = find_end_node(fsgs, dag)) == NULL) { + E_WARN("Failed to find the end node\n"); + goto error_out; + } + + + E_INFO("lattice start node %s.%d end node %s.%d\n", + fsg_model_word_str(fsg, dag->start->wid), dag->start->sf, + fsg_model_word_str(fsg, dag->end->wid), dag->end->sf); + /* FIXME: Need to calculate final_node_ascr here. */ + + /* + * Convert word IDs from FSG to dictionary. + */ + for (node = dag->nodes; node; node = node->next) { + node->wid = dict_wordid(dag->search->dict, + fsg_model_word_str(fsg, node->wid)); + node->basewid = dict_basewid(dag->search->dict, node->wid); + } + + /* + * Now we are done, because the links in the graph are uniquely + * defined by the history table. However we should remove any + * nodes which are not reachable from the end node of the FSG. + * Everything is reachable from the start node by definition. + */ + mark_reachable(dag, dag->end); + + ps_lattice_delete_unreachable(dag); + { + int32 silpen, fillpen; + + silpen = (int32)(logmath_log(fsg->lmath, + cmd_ln_float32_r(ps_search_config(fsgs), "-silprob")) + * fsg->lw) + >> SENSCR_SHIFT; + fillpen = (int32)(logmath_log(fsg->lmath, + cmd_ln_float32_r(ps_search_config(fsgs), "-fillprob")) + * fsg->lw) + >> SENSCR_SHIFT; + + ps_lattice_penalize_fillers(dag, silpen, fillpen); + } + search->dag = dag; + + return dag; + + +error_out: + ps_lattice_free(dag); + return NULL; + +} + diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_search_internal.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_search_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..2b1fcd1ff02e23fef3d00801f271a68bdf3ed610 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/fsg_search_internal.h @@ -0,0 +1,164 @@ +/* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * fsg_search_internal.h -- Search structures for FSG decoding. + */ + + +#ifndef __S2_FSG_SEARCH_H__ +#define __S2_FSG_SEARCH_H__ + + +/* SphinxBase headers. */ +#include +#include +#include + +/* Local headers. */ +#include "pocketsphinx_internal.h" +#include "hmm.h" +#include "fsg_history.h" +#include "fsg_lextree.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Segmentation "iterator" for FSG history. + */ +typedef struct fsg_seg_s { + ps_seg_t base; /**< Base structure. */ + fsg_hist_entry_t **hist; /**< Sequence of history entries. */ + int16 n_hist; /**< Number of history entries. */ + int16 cur; /**< Current position in hist. */ +} fsg_seg_t; + +/** + * Implementation of FSG search (and "FSG set") structure. + */ +typedef struct fsg_search_s { + ps_search_t base; + + hmm_context_t *hmmctx; /**< HMM context. */ + + fsg_model_t *fsg; /**< FSG model */ + struct fsg_lextree_s *lextree;/**< Lextree structure for the currently + active FSG */ + struct fsg_history_s *history;/**< For storing the Viterbi search history */ + + glist_t pnode_active; /**< Those active in this frame */ + glist_t pnode_active_next; /**< Those activated for the next frame */ + + int32 beam_orig; /**< Global pruning threshold */ + int32 pbeam_orig; /**< Pruning threshold for phone transition */ + int32 wbeam_orig; /**< Pruning threshold for word exit */ + float32 beam_factor; /**< Dynamic/adaptive factor (<=1) applied to above + beams to determine actual effective beams. + For implementing absolute pruning. */ + int32 beam, pbeam, wbeam; /**< Effective beams after applying beam_factor */ + int32 lw, pip, wip; /**< Language weights */ + + frame_idx_t frame; /**< Current frame. */ + uint8 final; /**< Decoding is finished for this utterance. */ + uint8 bestpath; /**< Whether to run bestpath search + and confidence annotation at end. */ + float32 ascale; /**< Acoustic score scale for posterior probabilities. */ + + int32 bestscore; /**< For beam pruning */ + int32 bpidx_start; /**< First history entry index this frame */ + + int32 ascr, lscr; /**< Total acoustic and lm score for utt */ + + int32 n_hmm_eval; /**< Total HMMs evaluated this utt */ + int32 n_sen_eval; /**< Total senones evaluated this utt */ + + ptmr_t perf; /**< Performance counter */ + int32 n_tot_frame; + +} fsg_search_t; + +/* Access macros */ +#define fsg_search_frame(s) ((s)->frame) + +/** + * Create, initialize and return a search module. + */ +ps_search_t *fsg_search_init(const char *name, + fsg_model_t *fsg, + cmd_ln_t *config, + acmod_t *acmod, + dict_t *dict, + dict2pid_t *d2p); + +/** + * Deallocate search structure. + */ +void fsg_search_free(ps_search_t *search); + +/** + * Update FSG search module for new or updated FSGs. + */ +int fsg_search_reinit(ps_search_t *fsgs, dict_t *dict, dict2pid_t *d2p); + +/** + * Prepare the FSG search structure for beginning decoding of the next + * utterance. + */ +int fsg_search_start(ps_search_t *search); + +/** + * Step one frame forward through the Viterbi search. + */ +int fsg_search_step(ps_search_t *search, int frame_idx); + +/** + * Windup and clean the FSG search structure after utterance. + */ +int fsg_search_finish(ps_search_t *search); + +/** + * Get hypothesis string from the FSG search. + */ +char const *fsg_search_hyp(ps_search_t *search, int32 *out_score); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/hmm.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/hmm.c new file mode 100644 index 0000000000000000000000000000000000000000..6ef5a9f9780e8fd1ec8f02a005587eea1f7082c8 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/hmm.c @@ -0,0 +1,826 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file hmm.h Implementation of HMM base structure. + */ + +/* System headers. */ +#include +#include +#include +#include + +/* SphinxBase headers. */ +#include +#include + +/* Local headers. */ +#include "hmm.h" + +hmm_context_t * +hmm_context_init(int32 n_emit_state, + uint8 ** const *tp, + int16 const *senscore, + uint16 * const *sseq) +{ + hmm_context_t *ctx; + + assert(n_emit_state > 0); + if (n_emit_state > HMM_MAX_NSTATE) { + E_ERROR("Number of emitting states must be <= %d\n", HMM_MAX_NSTATE); + return NULL; + } + + ctx = ckd_calloc(1, sizeof(*ctx)); + ctx->n_emit_state = n_emit_state; + ctx->tp = tp; + ctx->senscore = senscore; + ctx->sseq = sseq; + ctx->st_sen_scr = ckd_calloc(n_emit_state, sizeof(*ctx->st_sen_scr)); + + return ctx; +} + +void +hmm_context_free(hmm_context_t *ctx) +{ + if (ctx == NULL) + return; + ckd_free(ctx->st_sen_scr); + ckd_free(ctx); +} + +void +hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid) +{ + hmm->ctx = ctx; + hmm->mpx = mpx; + hmm->n_emit_state = ctx->n_emit_state; + if (mpx) { + int i; + hmm->ssid = BAD_SSID; + hmm->senid[0] = ssid; + for (i = 1; i < hmm_n_emit_state(hmm); ++i) { + hmm->senid[i] = BAD_SSID; + } + } + else { + hmm->ssid = ssid; + memcpy(hmm->senid, ctx->sseq[ssid], hmm->n_emit_state * sizeof(*hmm->senid)); + } + hmm->tmatid = tmatid; + hmm_clear(hmm); +} + +void +hmm_deinit(hmm_t *hmm) +{ + (void)hmm; +} + +void +hmm_dump(hmm_t * hmm, + FILE * fp) +{ + int32 i; + + if (hmm_is_mpx(hmm)) { + fprintf(fp, "MPX "); + for (i = 0; i < hmm_n_emit_state(hmm); i++) + fprintf(fp, " %11d", hmm_senid(hmm, i)); + fprintf(fp, " ( "); + for (i = 0; i < hmm_n_emit_state(hmm); i++) + fprintf(fp, "%d ", hmm_ssid(hmm, i)); + fprintf(fp, ")\n"); + } + else { + fprintf(fp, "SSID "); + for (i = 0; i < hmm_n_emit_state(hmm); i++) + fprintf(fp, " %11d", hmm_senid(hmm, i)); + fprintf(fp, " (%d)\n", hmm_ssid(hmm, 0)); + } + + if (hmm->ctx->senscore) { + fprintf(fp, "SENSCR"); + for (i = 0; i < hmm_n_emit_state(hmm); i++) + fprintf(fp, " %11d", hmm_senscr(hmm, i)); + fprintf(fp, "\n"); + } + + fprintf(fp, "SCORES %11d", hmm_in_score(hmm)); + for (i = 1; i < hmm_n_emit_state(hmm); i++) + fprintf(fp, " %11d", hmm_score(hmm, i)); + fprintf(fp, " %11d", hmm_out_score(hmm)); + fprintf(fp, "\n"); + + fprintf(fp, "HISTID %11d", hmm_in_history(hmm)); + for (i = 1; i < hmm_n_emit_state(hmm); i++) + fprintf(fp, " %11d", hmm_history(hmm, i)); + fprintf(fp, " %11d", hmm_out_history(hmm)); + fprintf(fp, "\n"); + + if (hmm_in_score(hmm) > 0) + fprintf(fp, + "ALERT!! The input score %d is large than 0. Probably wrap around.\n", + hmm_in_score(hmm)); + if (hmm_out_score(hmm) > 0) + fprintf(fp, + "ALERT!! The output score %d is large than 0. Probably wrap around\n.", + hmm_out_score(hmm)); + + fflush(fp); +} + + +void +hmm_clear_scores(hmm_t * h) +{ + int32 i; + + hmm_in_score(h) = WORST_SCORE; + for (i = 1; i < hmm_n_emit_state(h); i++) + hmm_score(h, i) = WORST_SCORE; + hmm_out_score(h) = WORST_SCORE; + + h->bestscore = WORST_SCORE; +} + +void +hmm_clear(hmm_t * h) +{ + int32 i; + + hmm_in_score(h) = WORST_SCORE; + hmm_in_history(h) = -1; + for (i = 1; i < hmm_n_emit_state(h); i++) { + hmm_score(h, i) = WORST_SCORE; + hmm_history(h, i) = -1; + } + hmm_out_score(h) = WORST_SCORE; + hmm_out_history(h) = -1; + + h->bestscore = WORST_SCORE; + h->frame = -1; +} + +void +hmm_enter(hmm_t *h, int32 score, int32 histid, int frame) +{ + hmm_in_score(h) = score; + hmm_in_history(h) = histid; + hmm_frame(h) = frame; +} + +void +hmm_normalize(hmm_t *h, int32 bestscr) +{ + int32 i; + + for (i = 0; i < hmm_n_emit_state(h); i++) { + if (hmm_score(h, i) BETTER_THAN WORST_SCORE) + hmm_score(h, i) -= bestscr; + } + if (hmm_out_score(h) BETTER_THAN WORST_SCORE) + hmm_out_score(h) -= bestscr; +} + +#define hmm_tprob_5st(i, j) (-tp[(i)*6+(j)]) +#define nonmpx_senscr(i) (-senscore[sseq[i]]) + +static int32 +hmm_vit_eval_5st_lr(hmm_t * hmm) +{ + int16 const *senscore = hmm->ctx->senscore; + uint8 const *tp = hmm->ctx->tp[hmm->tmatid][0]; + uint16 const *sseq = hmm->senid; + int32 s5, s4, s3, s2, s1, s0, t2, t1, t0, bestScore; + + /* It was the best of scores, it was the worst of scores. */ + bestScore = WORST_SCORE; + + /* Cache problem here! */ + s4 = hmm_score(hmm, 4) + nonmpx_senscr(4); + s3 = hmm_score(hmm, 3) + nonmpx_senscr(3); + /* Transitions into non-emitting state 5 */ + if (s3 BETTER_THAN WORST_SCORE) { + t1 = s4 + hmm_tprob_5st(4, 5); + t2 = s3 + hmm_tprob_5st(3, 5); + if (t1 BETTER_THAN t2) { + s5 = t1; + hmm_out_history(hmm) = hmm_history(hmm, 4); + } else { + s5 = t2; + hmm_out_history(hmm) = hmm_history(hmm, 3); + } + if (s5 WORSE_THAN WORST_SCORE) s5 = WORST_SCORE; + hmm_out_score(hmm) = s5; + bestScore = s5; + } + + s2 = hmm_score(hmm, 2) + nonmpx_senscr(2); + /* All transitions into state 4 */ + if (s2 BETTER_THAN WORST_SCORE) { + t0 = s4 + hmm_tprob_5st(4, 4); + t1 = s3 + hmm_tprob_5st(3, 4); + t2 = s2 + hmm_tprob_5st(2, 4); + if (t0 BETTER_THAN t1) { + if (t2 BETTER_THAN t0) { + s4 = t2; + hmm_history(hmm, 4) = hmm_history(hmm, 2); + } else + s4 = t0; + } else { + if (t2 BETTER_THAN t1) { + s4 = t2; + hmm_history(hmm, 4) = hmm_history(hmm, 2); + } else { + s4 = t1; + hmm_history(hmm, 4) = hmm_history(hmm, 3); + } + } + if (s4 WORSE_THAN WORST_SCORE) s4 = WORST_SCORE; + if (s4 BETTER_THAN bestScore) bestScore = s4; + hmm_score(hmm, 4) = s4; + } + + s1 = hmm_score(hmm, 1) + nonmpx_senscr(1); + /* All transitions into state 3 */ + if (s1 BETTER_THAN WORST_SCORE) { + t0 = s3 + hmm_tprob_5st(3, 3); + t1 = s2 + hmm_tprob_5st(2, 3); + t2 = s1 + hmm_tprob_5st(1, 3); + if (t0 BETTER_THAN t1) { + if (t2 BETTER_THAN t0) { + s3 = t2; + hmm_history(hmm, 3) = hmm_history(hmm, 1); + } else + s3 = t0; + } else { + if (t2 BETTER_THAN t1) { + s3 = t2; + hmm_history(hmm, 3) = hmm_history(hmm, 1); + } else { + s3 = t1; + hmm_history(hmm, 3) = hmm_history(hmm, 2); + } + } + if (s3 WORSE_THAN WORST_SCORE) s3 = WORST_SCORE; + if (s3 BETTER_THAN bestScore) bestScore = s3; + hmm_score(hmm, 3) = s3; + } + + s0 = hmm_in_score(hmm) + nonmpx_senscr(0); + /* All transitions into state 2 (state 0 is always active) */ + t0 = s2 + hmm_tprob_5st(2, 2); + t1 = s1 + hmm_tprob_5st(1, 2); + t2 = s0 + hmm_tprob_5st(0, 2); + if (t0 BETTER_THAN t1) { + if (t2 BETTER_THAN t0) { + s2 = t2; + hmm_history(hmm, 2) = hmm_in_history(hmm); + } else + s2 = t0; + } else { + if (t2 BETTER_THAN t1) { + s2 = t2; + hmm_history(hmm, 2) = hmm_in_history(hmm); + } else { + s2 = t1; + hmm_history(hmm, 2) = hmm_history(hmm, 1); + } + } + if (s2 WORSE_THAN WORST_SCORE) s2 = WORST_SCORE; + if (s2 BETTER_THAN bestScore) bestScore = s2; + hmm_score(hmm, 2) = s2; + + + /* All transitions into state 1 */ + t0 = s1 + hmm_tprob_5st(1, 1); + t1 = s0 + hmm_tprob_5st(0, 1); + if (t0 BETTER_THAN t1) { + s1 = t0; + } else { + s1 = t1; + hmm_history(hmm, 1) = hmm_in_history(hmm); + } + if (s1 WORSE_THAN WORST_SCORE) s1 = WORST_SCORE; + if (s1 BETTER_THAN bestScore) bestScore = s1; + hmm_score(hmm, 1) = s1; + + /* All transitions into state 0 */ + s0 = s0 + hmm_tprob_5st(0, 0); + if (s0 WORSE_THAN WORST_SCORE) s0 = WORST_SCORE; + if (s0 BETTER_THAN bestScore) bestScore = s0; + hmm_in_score(hmm) = s0; + + hmm_bestscore(hmm) = bestScore; + return bestScore; +} + +#define mpx_senid(st) sseq[ssid[st]][st] +#define mpx_senscr(st) (-senscore[mpx_senid(st)]) + +static int32 +hmm_vit_eval_5st_lr_mpx(hmm_t * hmm) +{ + uint8 const *tp = hmm->ctx->tp[hmm->tmatid][0]; + int16 const *senscore = hmm->ctx->senscore; + uint16 * const *sseq = hmm->ctx->sseq; + uint16 *ssid = hmm->senid; + int32 bestScore; + int32 s5, s4, s3, s2, s1, s0, t2, t1, t0; + + /* Don't propagate WORST_SCORE */ + if (ssid[4] == BAD_SSID) + s4 = t1 = WORST_SCORE; + else { + s4 = hmm_score(hmm, 4) + mpx_senscr(4); + t1 = s4 + hmm_tprob_5st(4, 5); + } + if (ssid[3] == BAD_SSID) + s3 = t2 = WORST_SCORE; + else { + s3 = hmm_score(hmm, 3) + mpx_senscr(3); + t2 = s3 + hmm_tprob_5st(3, 5); + } + if (t1 BETTER_THAN t2) { + s5 = t1; + hmm_out_history(hmm) = hmm_history(hmm, 4); + } + else { + s5 = t2; + hmm_out_history(hmm) = hmm_history(hmm, 3); + } + if (s5 WORSE_THAN WORST_SCORE) s5 = WORST_SCORE; + hmm_out_score(hmm) = s5; + bestScore = s5; + + /* Don't propagate WORST_SCORE */ + if (ssid[2] == BAD_SSID) + s2 = t2 = WORST_SCORE; + else { + s2 = hmm_score(hmm, 2) + mpx_senscr(2); + t2 = s2 + hmm_tprob_5st(2, 4); + } + + t0 = t1 = WORST_SCORE; + if (s4 != WORST_SCORE) + t0 = s4 + hmm_tprob_5st(4, 4); + if (s3 != WORST_SCORE) + t1 = s3 + hmm_tprob_5st(3, 4); + if (t0 BETTER_THAN t1) { + if (t2 BETTER_THAN t0) { + s4 = t2; + hmm_history(hmm, 4) = hmm_history(hmm, 2); + ssid[4] = ssid[2]; + } + else + s4 = t0; + } + else { + if (t2 BETTER_THAN t1) { + s4 = t2; + hmm_history(hmm, 4) = hmm_history(hmm, 2); + ssid[4] = ssid[2]; + } + else { + s4 = t1; + hmm_history(hmm, 4) = hmm_history(hmm, 3); + ssid[4] = ssid[3]; + } + } + if (s4 WORSE_THAN WORST_SCORE) s4 = WORST_SCORE; + if (s4 BETTER_THAN bestScore) + bestScore = s4; + hmm_score(hmm, 4) = s4; + + /* Don't propagate WORST_SCORE */ + if (ssid[1] == BAD_SSID) + s1 = t2 = WORST_SCORE; + else { + s1 = hmm_score(hmm, 1) + mpx_senscr(1); + t2 = s1 + hmm_tprob_5st(1, 3); + } + t0 = t1 = WORST_SCORE; + if (s3 != WORST_SCORE) + t0 = s3 + hmm_tprob_5st(3, 3); + if (s2 != WORST_SCORE) + t1 = s2 + hmm_tprob_5st(2, 3); + if (t0 BETTER_THAN t1) { + if (t2 BETTER_THAN t0) { + s3 = t2; + hmm_history(hmm, 3) = hmm_history(hmm, 1); + ssid[3] = ssid[1]; + } + else + s3 = t0; + } + else { + if (t2 BETTER_THAN t1) { + s3 = t2; + hmm_history(hmm, 3) = hmm_history(hmm, 1); + ssid[3] = ssid[1]; + } + else { + s3 = t1; + hmm_history(hmm, 3) = hmm_history(hmm, 2); + ssid[3] = ssid[2]; + } + } + if (s3 WORSE_THAN WORST_SCORE) s3 = WORST_SCORE; + if (s3 BETTER_THAN bestScore) bestScore = s3; + hmm_score(hmm, 3) = s3; + + /* State 0 is always active */ + s0 = hmm_in_score(hmm) + mpx_senscr(0); + + /* Don't propagate WORST_SCORE */ + t0 = t1 = WORST_SCORE; + if (s2 != WORST_SCORE) + t0 = s2 + hmm_tprob_5st(2, 2); + if (s1 != WORST_SCORE) + t1 = s1 + hmm_tprob_5st(1, 2); + t2 = s0 + hmm_tprob_5st(0, 2); + if (t0 BETTER_THAN t1) { + if (t2 BETTER_THAN t0) { + s2 = t2; + hmm_history(hmm, 2) = hmm_in_history(hmm); + ssid[2] = ssid[0]; + } + else + s2 = t0; + } + else { + if (t2 BETTER_THAN t1) { + s2 = t2; + hmm_history(hmm, 2) = hmm_in_history(hmm); + ssid[2] = ssid[0]; + } + else { + s2 = t1; + hmm_history(hmm, 2) = hmm_history(hmm, 1); + ssid[2] = ssid[1]; + } + } + if (s2 WORSE_THAN WORST_SCORE) s2 = WORST_SCORE; + if (s2 BETTER_THAN bestScore) bestScore = s2; + hmm_score(hmm, 2) = s2; + + /* Don't propagate WORST_SCORE */ + t0 = WORST_SCORE; + if (s1 != WORST_SCORE) + t0 = s1 + hmm_tprob_5st(1, 1); + t1 = s0 + hmm_tprob_5st(0, 1); + if (t0 BETTER_THAN t1) { + s1 = t0; + } + else { + s1 = t1; + hmm_history(hmm, 1) = hmm_in_history(hmm); + ssid[1] = ssid[0]; + } + if (s1 WORSE_THAN WORST_SCORE) s1 = WORST_SCORE; + if (s1 BETTER_THAN bestScore) bestScore = s1; + hmm_score(hmm, 1) = s1; + + s0 += hmm_tprob_5st(0, 0); + if (s0 WORSE_THAN WORST_SCORE) s0 = WORST_SCORE; + if (s0 BETTER_THAN bestScore) bestScore = s0; + hmm_in_score(hmm) = s0; + + hmm_bestscore(hmm) = bestScore; + return bestScore; +} + +#define hmm_tprob_3st(i, j) (-tp[(i)*4+(j)]) + +static int32 +hmm_vit_eval_3st_lr(hmm_t * hmm) +{ + int16 const *senscore = hmm->ctx->senscore; + uint8 const *tp = hmm->ctx->tp[hmm->tmatid][0]; + uint16 const *sseq = hmm->senid; + int32 s3, s2, s1, s0, t2, t1, t0, bestScore; + + s2 = hmm_score(hmm, 2) + nonmpx_senscr(2); + s1 = hmm_score(hmm, 1) + nonmpx_senscr(1); + s0 = hmm_in_score(hmm) + nonmpx_senscr(0); + + /* It was the best of scores, it was the worst of scores. */ + bestScore = WORST_SCORE; + t2 = INT_MIN; /* Not used unless skipstate is true */ + + /* Transitions into non-emitting state 3 */ + if (s1 BETTER_THAN WORST_SCORE) { + t1 = s2 + hmm_tprob_3st(2, 3); + if (hmm_tprob_3st(1,3) BETTER_THAN TMAT_WORST_SCORE) + t2 = s1 + hmm_tprob_3st(1, 3); + if (t1 BETTER_THAN t2) { + s3 = t1; + hmm_out_history(hmm) = hmm_history(hmm, 2); + } else { + s3 = t2; + hmm_out_history(hmm) = hmm_history(hmm, 1); + } + if (s3 WORSE_THAN WORST_SCORE) s3 = WORST_SCORE; + hmm_out_score(hmm) = s3; + bestScore = s3; + } + + /* All transitions into state 2 (state 0 is always active) */ + t0 = s2 + hmm_tprob_3st(2, 2); + t1 = s1 + hmm_tprob_3st(1, 2); + if (hmm_tprob_3st(0, 2) BETTER_THAN TMAT_WORST_SCORE) + t2 = s0 + hmm_tprob_3st(0, 2); + if (t0 BETTER_THAN t1) { + if (t2 BETTER_THAN t0) { + s2 = t2; + hmm_history(hmm, 2) = hmm_in_history(hmm); + } else + s2 = t0; + } else { + if (t2 BETTER_THAN t1) { + s2 = t2; + hmm_history(hmm, 2) = hmm_in_history(hmm); + } else { + s2 = t1; + hmm_history(hmm, 2) = hmm_history(hmm, 1); + } + } + if (s2 WORSE_THAN WORST_SCORE) s2 = WORST_SCORE; + if (s2 BETTER_THAN bestScore) bestScore = s2; + hmm_score(hmm, 2) = s2; + + /* All transitions into state 1 */ + t0 = s1 + hmm_tprob_3st(1, 1); + t1 = s0 + hmm_tprob_3st(0, 1); + if (t0 BETTER_THAN t1) { + s1 = t0; + } else { + s1 = t1; + hmm_history(hmm, 1) = hmm_in_history(hmm); + } + if (s1 WORSE_THAN WORST_SCORE) s1 = WORST_SCORE; + if (s1 BETTER_THAN bestScore) bestScore = s1; + hmm_score(hmm, 1) = s1; + + /* All transitions into state 0 */ + s0 = s0 + hmm_tprob_3st(0, 0); + if (s0 WORSE_THAN WORST_SCORE) s0 = WORST_SCORE; + if (s0 BETTER_THAN bestScore) bestScore = s0; + hmm_in_score(hmm) = s0; + + hmm_bestscore(hmm) = bestScore; + return bestScore; +} + +static int32 +hmm_vit_eval_3st_lr_mpx(hmm_t * hmm) +{ + uint8 const *tp = hmm->ctx->tp[hmm->tmatid][0]; + int16 const *senscore = hmm->ctx->senscore; + uint16 * const *sseq = hmm->ctx->sseq; + uint16 *ssid = hmm->senid; + int32 bestScore; + int32 s3, s2, s1, s0, t2, t1, t0; + + /* Don't propagate WORST_SCORE */ + t2 = INT_MIN; /* Not used unless skipstate is true */ + if (ssid[2] == BAD_SSID) + s2 = t1 = WORST_SCORE; + else { + s2 = hmm_score(hmm, 2) + mpx_senscr(2); + t1 = s2 + hmm_tprob_3st(2, 3); + } + if (ssid[1] == BAD_SSID) + s1 = t2 = WORST_SCORE; + else { + s1 = hmm_score(hmm, 1) + mpx_senscr(1); + if (hmm_tprob_3st(1,3) BETTER_THAN TMAT_WORST_SCORE) + t2 = s1 + hmm_tprob_3st(1, 3); + } + if (t1 BETTER_THAN t2) { + s3 = t1; + hmm_out_history(hmm) = hmm_history(hmm, 2); + } + else { + s3 = t2; + hmm_out_history(hmm) = hmm_history(hmm, 1); + } + if (s3 WORSE_THAN WORST_SCORE) s3 = WORST_SCORE; + hmm_out_score(hmm) = s3; + bestScore = s3; + + /* State 0 is always active */ + s0 = hmm_in_score(hmm) + mpx_senscr(0); + + /* Don't propagate WORST_SCORE */ + t0 = t1 = WORST_SCORE; + if (s2 != WORST_SCORE) + t0 = s2 + hmm_tprob_3st(2, 2); + if (s1 != WORST_SCORE) + t1 = s1 + hmm_tprob_3st(1, 2); + if (hmm_tprob_3st(0,2) BETTER_THAN TMAT_WORST_SCORE) + t2 = s0 + hmm_tprob_3st(0, 2); + if (t0 BETTER_THAN t1) { + if (t2 BETTER_THAN t0) { + s2 = t2; + hmm_history(hmm, 2) = hmm_in_history(hmm); + ssid[2] = ssid[0]; + } + else + s2 = t0; + } + else { + if (t2 BETTER_THAN t1) { + s2 = t2; + hmm_history(hmm, 2) = hmm_in_history(hmm); + ssid[2] = ssid[0]; + } + else { + s2 = t1; + hmm_history(hmm, 2) = hmm_history(hmm, 1); + ssid[2] = ssid[1]; + } + } + if (s2 WORSE_THAN WORST_SCORE) s2 = WORST_SCORE; + if (s2 BETTER_THAN bestScore) bestScore = s2; + hmm_score(hmm, 2) = s2; + + /* Don't propagate WORST_SCORE */ + t0 = WORST_SCORE; + if (s1 != WORST_SCORE) + t0 = s1 + hmm_tprob_3st(1, 1); + t1 = s0 + hmm_tprob_3st(0, 1); + if (t0 BETTER_THAN t1) { + s1 = t0; + } + else { + s1 = t1; + hmm_history(hmm, 1) = hmm_in_history(hmm); + ssid[1] = ssid[0]; + } + if (s1 WORSE_THAN WORST_SCORE) s1 = WORST_SCORE; + if (s1 BETTER_THAN bestScore) bestScore = s1; + hmm_score(hmm, 1) = s1; + + /* State 0 is always active */ + s0 += hmm_tprob_3st(0, 0); + if (s0 WORSE_THAN WORST_SCORE) s0 = WORST_SCORE; + if (s0 BETTER_THAN bestScore) bestScore = s0; + hmm_in_score(hmm) = s0; + + hmm_bestscore(hmm) = bestScore; + return bestScore; +} + +static int32 +hmm_vit_eval_anytopo(hmm_t * hmm) +{ + hmm_context_t *ctx = hmm->ctx; + int32 to, from, bestfrom; + int32 newscr, scr, bestscr; + int final_state; + + /* Compute previous state-score + observation output prob for each emitting state */ + ctx->st_sen_scr[0] = hmm_in_score(hmm) + hmm_senscr(hmm, 0); + for (from = 1; from < hmm_n_emit_state(hmm); ++from) { + if ((ctx->st_sen_scr[from] = + hmm_score(hmm, from) + hmm_senscr(hmm, from)) WORSE_THAN WORST_SCORE) + ctx->st_sen_scr[from] = WORST_SCORE; + } + + /* FIXME/TODO: Use the BLAS for all this. */ + /* Evaluate final-state first, which does not have a self-transition */ + final_state = hmm_n_emit_state(hmm); + to = final_state; + scr = WORST_SCORE; + bestfrom = -1; + for (from = to - 1; from >= 0; --from) { + if ((hmm_tprob(hmm, from, to) BETTER_THAN TMAT_WORST_SCORE) && + ((newscr = ctx->st_sen_scr[from] + + hmm_tprob(hmm, from, to)) BETTER_THAN scr)) { + scr = newscr; + bestfrom = from; + } + } + hmm_out_score(hmm) = scr; + if (bestfrom >= 0) + hmm_out_history(hmm) = hmm_history(hmm, bestfrom); + bestscr = scr; + + /* Evaluate all other states, which might have self-transitions */ + for (to = final_state - 1; to >= 0; --to) { + /* Score from self-transition, if any */ + scr = + (hmm_tprob(hmm, to, to) BETTER_THAN TMAT_WORST_SCORE) + ? ctx->st_sen_scr[to] + hmm_tprob(hmm, to, to) + : WORST_SCORE; + + /* Scores from transitions from other states */ + bestfrom = -1; + for (from = to - 1; from >= 0; --from) { + if ((hmm_tprob(hmm, from, to) BETTER_THAN TMAT_WORST_SCORE) && + ((newscr = ctx->st_sen_scr[from] + + hmm_tprob(hmm, from, to)) BETTER_THAN scr)) { + scr = newscr; + bestfrom = from; + } + } + + /* Update new result for state to */ + if (to == 0) { + hmm_in_score(hmm) = scr; + if (bestfrom >= 0) + hmm_in_history(hmm) = hmm_history(hmm, bestfrom); + } + else { + hmm_score(hmm, to) = scr; + if (bestfrom >= 0) + hmm_history(hmm, to) = hmm_history(hmm, bestfrom); + } + /* Propagate ssid for multiplex HMMs */ + if (bestfrom >= 0 && hmm_is_mpx(hmm)) + hmm->senid[to] = hmm->senid[bestfrom]; + + if (bestscr WORSE_THAN scr) + bestscr = scr; + } + + hmm_bestscore(hmm) = bestscr; + return bestscr; +} + +int32 +hmm_vit_eval(hmm_t * hmm) +{ + if (hmm_is_mpx(hmm)) { + if (hmm_n_emit_state(hmm) == 5) + return hmm_vit_eval_5st_lr_mpx(hmm); + else if (hmm_n_emit_state(hmm) == 3) + return hmm_vit_eval_3st_lr_mpx(hmm); + else + return hmm_vit_eval_anytopo(hmm); + } + else { + if (hmm_n_emit_state(hmm) == 5) + return hmm_vit_eval_5st_lr(hmm); + else if (hmm_n_emit_state(hmm) == 3) + return hmm_vit_eval_3st_lr(hmm); + else + return hmm_vit_eval_anytopo(hmm); + } +} + +int32 +hmm_dump_vit_eval(hmm_t * hmm, FILE * fp) +{ + int32 bs = 0; + + if (fp) { + fprintf(fp, "BEFORE:\n"); + hmm_dump(hmm, fp); + } + bs = hmm_vit_eval(hmm); + if (fp) { + fprintf(fp, "AFTER:\n"); + hmm_dump(hmm, fp); + } + + return bs; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/hmm.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/hmm.h new file mode 100644 index 0000000000000000000000000000000000000000..719e4030f39cbe4d9839ff5c8ce9b24e1885a8e7 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/hmm.h @@ -0,0 +1,309 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file hmm.h Hidden Markov Model base structures. + */ + +#ifndef __HMM_H__ +#define __HMM_H__ + +/* System headers. */ +#include + +/* SphinxBase headers. */ +#include +#include + +/* PocketSphinx headers. */ +#include "bin_mdef.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Type for frame index values. Used in HMM indexes and + * backpointers and affects memory required.Due to limitations of FSG + * search implementation this value needs to be signed. + */ +typedef int32 frame_idx_t; + +/** + * Maximum number of frames in index, should be in sync with above. + */ +#define MAX_N_FRAMES MAX_INT32 + + +/** Shift count for senone scores. */ +#define SENSCR_SHIFT 10 + +/** + * Large "bad" score. + * + * This number must be "bad" enough so that 4 times WORST_SCORE will + * not overflow. The reason for this is that the search doesn't check + * the scores in a model before evaluating the model and it may + * require as many was 4 plies before the new 'good' score can wipe + * out the initial WORST_SCORE initialization. + */ +#define WORST_SCORE ((int)0xE0000000) + +/** + * Watch out, though! Transition matrix entries that are supposed to + * be "zero" don't actually get that small due to quantization. + */ +#define TMAT_WORST_SCORE (-255) + +/** + * Is one score better than another? + */ +#define BETTER_THAN > + +/** + * Is one score worse than another? + */ +#define WORSE_THAN < + +/** \file hmm.h + * \brief HMM data structure and operation + * + * For efficiency, this version is hardwired for two possible HMM + * topologies, but will fall back to others: + * + * 5-state left-to-right HMMs: (0 is the *emitting* entry state and E + * is a non-emitting exit state; the x's indicate allowed transitions + * between source and destination states): + * + *
+ *               0   1   2   3   4   E (destination-states)
+ *           0   x   x   x
+ *           1       x   x   x
+ *           2           x   x   x
+ *           3               x   x   x
+ *           4                   x   x
+ *    (source-states)
+ * 
+ * + * 5-state topologies that contain a subset of the above transitions should work as well. + * + * 3-state left-to-right HMMs (similar notation as the 5-state topology above): + * + *
+ *               0   1   2   E (destination-states)
+ *           0   x   x   x
+ *           1       x   x   x
+ *           2           x   x 
+ *    (source-states)
+ * 
+ * + * 3-state topologies that contain a subset of the above transitions should work as well. + */ + +/** + * @struct hmm_context_t + * @brief Shared information between a set of HMMs. + * + * We assume that the initial state is emitting and that the + * transition matrix is n_emit_state x (n_emit_state+1), where the + * extra destination dimension corresponds to the non-emitting final or + * exit state. + */ +typedef struct hmm_context_s { + int32 n_emit_state; /**< Number of emitting states in this set of HMMs. */ + uint8 ** const *tp; /**< State transition scores tp[id][from][to] (logs3 values). */ + int16 const *senscore; /**< State emission scores senscore[senid] + (negated scaled logs3 values). */ + uint16 * const *sseq; /**< Senone sequence mapping. */ + int32 *st_sen_scr; /**< Temporary array of senone scores (for some topologies). */ + listelem_alloc_t *mpx_ssid_alloc; /**< Allocator for senone sequence ID arrays. */ + void *udata; /**< Whatever you feel like, gosh. */ +} hmm_context_t; + +/** + * Hard-coded limit on the number of emitting states. + */ +#define HMM_MAX_NSTATE 5 + +/** + * @struct hmm_t + * @brief An individual HMM among the HMM search space. + * + * An individual HMM among the HMM search space. An HMM with N + * emitting states consists of N+1 internal states including the + * non-emitting exit (out) state. + */ +typedef struct hmm_s { + hmm_context_t *ctx; /**< Shared context data for this HMM. */ + int32 score[HMM_MAX_NSTATE]; /**< State scores for emitting states. */ + int32 history[HMM_MAX_NSTATE]; /**< History indices for emitting states. */ + int32 out_score; /**< Score for non-emitting exit state. */ + int32 out_history; /**< History index for non-emitting exit state. */ + uint16 ssid; /**< Senone sequence ID (for non-MPX) */ + uint16 senid[HMM_MAX_NSTATE]; /**< Senone IDs (non-MPX) or sequence IDs (MPX) */ + int32 bestscore; /**< Best [emitting] state score in current frame (for pruning). */ + int16 tmatid; /**< Transition matrix ID (see hmm_context_t). */ + frame_idx_t frame; /**< Frame in which this HMM was last active; <0 if inactive */ + uint8 mpx; /**< Is this HMM multiplex? (hoisted for speed) */ + uint8 n_emit_state; /**< Number of emitting states (hoisted for speed) */ +} hmm_t; + +/** Access macros. */ +#define hmm_context(h) (h)->ctx +#define hmm_is_mpx(h) (h)->mpx + +#define hmm_in_score(h) (h)->score[0] +#define hmm_score(h,st) (h)->score[st] +#define hmm_out_score(h) (h)->out_score + +#define hmm_in_history(h) (h)->history[0] +#define hmm_history(h,st) (h)->history[st] +#define hmm_out_history(h) (h)->out_history + +#define hmm_bestscore(h) (h)->bestscore +#define hmm_frame(h) (h)->frame +#define hmm_mpx_ssid(h,st) (h)->senid[st] +#define hmm_nonmpx_ssid(h) (h)->ssid +#define hmm_ssid(h,st) (hmm_is_mpx(h) \ + ? hmm_mpx_ssid(h,st) : hmm_nonmpx_ssid(h)) +#define hmm_mpx_senid(h,st) (hmm_mpx_ssid(h,st) == BAD_SENID \ + ? BAD_SENID : (h)->ctx->sseq[hmm_mpx_ssid(h,st)][st]) +#define hmm_nonmpx_senid(h,st) ((h)->senid[st]) +#define hmm_senid(h,st) (hmm_is_mpx(h) \ + ? hmm_mpx_senid(h,st) : hmm_nonmpx_senid(h,st)) +#define hmm_senscr(h,st) (hmm_senid(h,st) == BAD_SENID \ + ? WORST_SCORE \ + : -(h)->ctx->senscore[hmm_senid(h,st)]) +#define hmm_tmatid(h) (h)->tmatid +#define hmm_tprob(h,i,j) (-(h)->ctx->tp[hmm_tmatid(h)][i][j]) +#define hmm_n_emit_state(h) ((h)->n_emit_state) +#define hmm_n_state(h) ((h)->n_emit_state + 1) + +/** + * Create an HMM context. + **/ +hmm_context_t *hmm_context_init(int32 n_emit_state, + uint8 ** const *tp, + int16 const *senscore, + uint16 * const *sseq); + +/** + * Change the senone score array for a context. + **/ +#define hmm_context_set_senscore(ctx, senscr) ((ctx)->senscore = (senscr)) + +/** + * Free an HMM context. + * + * @note The transition matrices, senone scores, and senone sequence + * mapping are all assumed to be allocated externally, and will NOT be + * freed by this function. + **/ +void hmm_context_free(hmm_context_t *ctx); + +/** + * Populate a previously-allocated HMM structure, allocating internal data. + **/ +void hmm_init(hmm_context_t *ctx, hmm_t *hmm, int mpx, int ssid, int tmatid); + +/** + * Free an HMM structure, releasing internal data (but not the HMM structure itself). + */ +void hmm_deinit(hmm_t *hmm); + +/** + * Reset the states of the HMM to the invalid condition. + + * i.e., scores to WORST_SCORE and hist to undefined. + */ +void hmm_clear(hmm_t *h); + +/** + * Reset the scores of the HMM. + */ +void hmm_clear_scores(hmm_t *h); + +/** + * Renormalize the scores in this HMM based on the given best score. + */ +void hmm_normalize(hmm_t *h, int32 bestscr); + +/** + * Enter an HMM with the given path score and history ID. + **/ +void hmm_enter(hmm_t *h, int32 score, + int32 histid, int frame); + +/** + * Viterbi evaluation of given HMM. + * + * @note If this module were being used for tracking state + * segmentations, the dummy, non-emitting exit state would have to be + * updated separately. In the Viterbi DP diagram, transitions to the + * exit state occur from the current time; they are vertical + * transitions. Hence they should be made only after the history has + * been logged for the emitting states. But we're not bothered with + * state segmentations, for now. So, we update the exit state as + * well. +*/ +int32 hmm_vit_eval(hmm_t *hmm); + + +/** + * Like hmm_vit_eval, but dump HMM state and relevant senscr to fp first, for debugging;. + */ +int32 hmm_dump_vit_eval(hmm_t *hmm, /**< In/Out: HMM being updated */ + FILE *fp /**< An output file pointer */ + ); + +/** + * For debugging, dump the whole HMM out. + */ + +void hmm_dump(hmm_t *h, /**< In/Out: HMM being updated */ + FILE *fp /**< An output file pointer */ + ); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __HMM_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/kws_detections.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/kws_detections.c new file mode 100644 index 0000000000000000000000000000000000000000..1a5890e6a087e0b1aef42d00c7b9192ca8b151a0 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/kws_detections.c @@ -0,0 +1,120 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2014 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* +* kws_detections.c -- Object for storing keyphrase search results +*/ + +#include "kws_detections.h" + +void +kws_detections_reset(kws_detections_t *detections) +{ + gnode_t *gn; + + if (!detections->detect_list) + return; + + for (gn = detections->detect_list; gn; gn = gnode_next(gn)) + ckd_free(gnode_ptr(gn)); + glist_free(detections->detect_list); + detections->detect_list = NULL; +} + +void +kws_detections_add(kws_detections_t *detections, const char* keyphrase, int sf, int ef, int prob, int ascr) +{ + gnode_t *gn; + kws_detection_t* detection; + for (gn = detections->detect_list; gn; gn = gnode_next(gn)) { + kws_detection_t *det = (kws_detection_t *)gnode_ptr(gn); + if (strcmp(keyphrase, det->keyphrase) == 0 && det->sf < ef && det->ef > sf) { + if (det->prob < prob) { + det->sf = sf; + det->ef = ef; + det->prob = prob; + det->ascr = ascr; + } + return; + } + } + + /* Nothing found */ + detection = (kws_detection_t *)ckd_calloc(1, sizeof(*detection)); + detection->sf = sf; + detection->ef = ef; + detection->keyphrase = keyphrase; + detection->prob = prob; + detection->ascr = ascr; + detections->detect_list = glist_add_ptr(detections->detect_list, detection); +} + +char * +kws_detections_hyp_str(kws_detections_t *detections, int frame, int delay) +{ + gnode_t *gn; + char *c; + int len; + char *hyp_str; + + len = 0; + for (gn = detections->detect_list; gn; gn = gnode_next(gn)) { + kws_detection_t *det = (kws_detection_t *)gnode_ptr(gn); + if (det->ef < frame - delay) { + len += strlen(det->keyphrase) + 1; + } + } + + if (len == 0) { + return NULL; + } + + hyp_str = (char *)ckd_calloc(len, sizeof(char)); + c = hyp_str; + for (gn = detections->detect_list; gn; gn = gnode_next(gn)) { + kws_detection_t *det = (kws_detection_t *)gnode_ptr(gn); + if (det->ef < frame - delay) { + memcpy(c, det->keyphrase, strlen(det->keyphrase)); + c += strlen(det->keyphrase); + *c = ' '; + c++; + } + } + if (c > hyp_str) { + c--; + *c = '\0'; + } + return hyp_str; +} + diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/kws_detections.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/kws_detections.h new file mode 100644 index 0000000000000000000000000000000000000000..e9f478867f916cadf1bb806950330efe0278a4fe --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/kws_detections.h @@ -0,0 +1,87 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2014 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * kws_detections.h -- Structures for storing keyphrase spotting results. + */ + +#ifndef __KWS_DETECTIONS_H__ +#define __KWS_DETECTIONS_H__ + +/* SphinxBase headers. */ +#include + +/* Local headers. */ +#include "pocketsphinx_internal.h" +#include "hmm.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +typedef struct kws_detection_s { + const char* keyphrase; + frame_idx_t sf; + frame_idx_t ef; + int32 prob; + int32 ascr; +} kws_detection_t; + +typedef struct kws_detections_s { + glist_t detect_list; +} kws_detections_t; + +/** + * Reset history structure. + */ +void kws_detections_reset(kws_detections_t *detections); + +/** + * Add history entry. + */ +void kws_detections_add(kws_detections_t *detections, const char* keyphrase, int sf, int ef, int prob, int ascr); + +/** + * Compose hypothesis. + */ +char* kws_detections_hyp_str(kws_detections_t *detections, int frame, int delay); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __KWS_DETECTIONS_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/kws_search.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/kws_search.c new file mode 100644 index 0000000000000000000000000000000000000000..83cc20efce58ac087d767710829ba57a985d0aaa --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/kws_search.c @@ -0,0 +1,705 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* +* kws_search.c -- Search object for key phrase spotting. +*/ + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "pocketsphinx_internal.h" +#include "kws_search.h" + +/** Access macros */ +#define hmm_is_active(hmm) ((hmm)->frame > 0) +#define kws_nth_hmm(keyphrase,n) (&((keyphrase)->hmms[n])) + +/* Value selected experimentally as maximum difference between triphone +score and phone loop score, used in confidence computation to make sure +that confidence value is less than 1. This might be different for +different models. Corresponds to threshold of about 1e+50 */ +#define KWS_MAX 1500 + +static ps_lattice_t * +kws_search_lattice(ps_search_t * search) +{ + (void)search; + return NULL; +} + +static int +kws_search_prob(ps_search_t * search) +{ + (void)search; + return 0; +} + +static void +kws_seg_free(ps_seg_t *seg) +{ + kws_seg_t *itor = (kws_seg_t *)seg; + ckd_free(itor); +} + +static void +kws_seg_fill(kws_seg_t *itor) +{ + kws_detection_t* detection = (kws_detection_t*)gnode_ptr(itor->detection); + + itor->base.word = detection->keyphrase; + itor->base.sf = detection->sf; + itor->base.ef = detection->ef; + itor->base.prob = detection->prob; + itor->base.ascr = detection->ascr; + itor->base.lscr = 0; +} + +static ps_seg_t * +kws_seg_next(ps_seg_t *seg) +{ + kws_seg_t *itor = (kws_seg_t *)seg; + + gnode_t *detect_head = gnode_next(itor->detection); + while (detect_head != NULL && ((kws_detection_t*)gnode_ptr(detect_head))->ef > itor->last_frame) + detect_head = gnode_next(detect_head); + itor->detection = detect_head; + + if (!itor->detection) { + kws_seg_free(seg); + return NULL; + } + + kws_seg_fill(itor); + + return seg; +} + +static ps_segfuncs_t kws_segfuncs = { + /* seg_next */ kws_seg_next, + /* seg_free */ kws_seg_free +}; + +static ps_seg_t * +kws_search_seg_iter(ps_search_t * search) +{ + kws_search_t *kwss = (kws_search_t *)search; + kws_seg_t *itor; + gnode_t *detect_head = kwss->detections->detect_list; + + while (detect_head != NULL && ((kws_detection_t*)gnode_ptr(detect_head))->ef > kwss->frame - kwss->delay) + detect_head = gnode_next(detect_head); + + if (!detect_head) + return NULL; + + itor = (kws_seg_t *)ckd_calloc(1, sizeof(*itor)); + itor->base.vt = &kws_segfuncs; + itor->base.search = search; + itor->base.lwf = 1.0; + itor->detection = detect_head; + itor->last_frame = kwss->frame - kwss->delay; + kws_seg_fill(itor); + return (ps_seg_t *)itor; +} + +static ps_searchfuncs_t kws_funcs = { + /* start: */ kws_search_start, + /* step: */ kws_search_step, + /* finish: */ kws_search_finish, + /* reinit: */ kws_search_reinit, + /* free: */ kws_search_free, + /* lattice: */ kws_search_lattice, + /* hyp: */ kws_search_hyp, + /* prob: */ kws_search_prob, + /* seg_iter: */ kws_search_seg_iter, +}; + + +/* Activate senones for scoring */ +static void +kws_search_sen_active(kws_search_t * kwss) +{ + int i; + gnode_t *gn; + + acmod_clear_active(ps_search_acmod(kwss)); + + /* active phone loop hmms */ + for (i = 0; i < kwss->n_pl; i++) + acmod_activate_hmm(ps_search_acmod(kwss), &kwss->pl_hmms[i]); + + /* activate hmms in active nodes */ + for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) { + kws_keyphrase_t *keyphrase = gnode_ptr(gn); + for (i = 0; i < keyphrase->n_hmms; i++) { + if (hmm_is_active(kws_nth_hmm(keyphrase, i))) + acmod_activate_hmm(ps_search_acmod(kwss), kws_nth_hmm(keyphrase, i)); + } + } +} + +/* +* Evaluate all the active HMMs. +* (Executed once per frame.) +*/ +static void +kws_search_hmm_eval(kws_search_t * kwss, int16 const *senscr) +{ + int32 i; + gnode_t *gn; + int32 bestscore = WORST_SCORE; + + hmm_context_set_senscore(kwss->hmmctx, senscr); + + /* evaluate hmms from phone loop */ + for (i = 0; i < kwss->n_pl; ++i) { + hmm_t *hmm = &kwss->pl_hmms[i]; + int32 score; + + score = hmm_vit_eval(hmm); + if (score BETTER_THAN bestscore) + bestscore = score; + } + /* evaluate hmms for active nodes */ + for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) { + kws_keyphrase_t *keyphrase = gnode_ptr(gn); + for (i = 0; i < keyphrase->n_hmms; i++) { + hmm_t *hmm = kws_nth_hmm(keyphrase, i); + + if (hmm_is_active(hmm)) { + int32 score; + score = hmm_vit_eval(hmm); + if (score BETTER_THAN bestscore) + bestscore = score; + } + } + } + + kwss->bestscore = bestscore; +} + +/* +* (Beam) prune the just evaluated HMMs, determine which ones remain +* active. Executed once per frame. +*/ +static void +kws_search_hmm_prune(kws_search_t * kwss) +{ + int32 thresh, i; + gnode_t *gn; + + thresh = kwss->bestscore + kwss->beam; + + for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) { + kws_keyphrase_t *keyphrase = gnode_ptr(gn); + for (i = 0; i < keyphrase->n_hmms; i++) { + hmm_t *hmm = kws_nth_hmm(keyphrase, i); + if (hmm_is_active(hmm) && hmm_bestscore(hmm) < thresh) + hmm_clear(hmm); + } + } +} + + +/** +* Do phone transitions +*/ +static void +kws_search_trans(kws_search_t * kwss) +{ + hmm_t *pl_best_hmm = NULL; + int32 best_out_score = WORST_SCORE; + int i; + gnode_t *gn; + + /* select best hmm in phone-loop to be a predecessor */ + for (i = 0; i < kwss->n_pl; i++) + if (hmm_out_score(&kwss->pl_hmms[i]) BETTER_THAN best_out_score) { + best_out_score = hmm_out_score(&kwss->pl_hmms[i]); + pl_best_hmm = &kwss->pl_hmms[i]; + } + + /* out probs are not ready yet */ + if (!pl_best_hmm) + return; + + /* Check whether keyphrase wasn't spotted yet */ + for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) { + kws_keyphrase_t *keyphrase = gnode_ptr(gn); + hmm_t *last_hmm; + + if (keyphrase->n_hmms < 1) + continue; + + last_hmm = kws_nth_hmm(keyphrase, keyphrase->n_hmms - 1); + + if (hmm_is_active(last_hmm) + && hmm_out_score(pl_best_hmm) BETTER_THAN WORST_SCORE) { + + if (hmm_out_score(last_hmm) - hmm_out_score(pl_best_hmm) + >= keyphrase->threshold) { + + int32 prob = hmm_out_score(last_hmm) - hmm_out_score(pl_best_hmm) - KWS_MAX; + kws_detections_add(kwss->detections, keyphrase->word, + hmm_out_history(last_hmm), + kwss->frame, prob, + hmm_out_score(last_hmm)); + } /* keyphrase is spotted */ + } /* last hmm of keyphrase is active */ + } /* keyphrase loop */ + + /* Make transition for all phone loop hmms */ + for (i = 0; i < kwss->n_pl; i++) { + if (hmm_out_score(pl_best_hmm) + kwss->plp BETTER_THAN + hmm_in_score(&kwss->pl_hmms[i])) { + hmm_enter(&kwss->pl_hmms[i], + hmm_out_score(pl_best_hmm) + kwss->plp, + hmm_out_history(pl_best_hmm), kwss->frame + 1); + } + } + + /* Activate new keyphrase nodes, enter their hmms */ + for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) { + kws_keyphrase_t *keyphrase = gnode_ptr(gn); + + if (keyphrase->n_hmms < 1) + continue; + + for (i = keyphrase->n_hmms - 1; i > 0; i--) { + hmm_t *pred_hmm = kws_nth_hmm(keyphrase, i - 1); + hmm_t *hmm = kws_nth_hmm(keyphrase, i); + + if (hmm_is_active(pred_hmm)) { + if (!hmm_is_active(hmm) + || hmm_out_score(pred_hmm) BETTER_THAN + hmm_in_score(hmm)) + hmm_enter(hmm, hmm_out_score(pred_hmm), + hmm_out_history(pred_hmm), kwss->frame + 1); + } + } + + /* Enter keyphrase start node from phone loop */ + if (hmm_out_score(pl_best_hmm) BETTER_THAN + hmm_in_score(kws_nth_hmm(keyphrase, 0))) + hmm_enter(kws_nth_hmm(keyphrase, 0), hmm_out_score(pl_best_hmm), + kwss->frame, kwss->frame + 1); + } +} + +static int +kws_search_read_list(kws_search_t *kwss, const char* keyfile) +{ + FILE *list_file; + lineiter_t *li; + char *line; + + if ((list_file = fopen(keyfile, "r")) == NULL) { + E_ERROR_SYSTEM("Failed to open keyphrase file '%s'", keyfile); + return -1; + } + + kwss->keyphrases = NULL; + + /* read keyphrases */ + for (li = lineiter_start_clean(list_file); li; li = lineiter_next(li)) { + size_t begin, end; + kws_keyphrase_t *keyphrase; + + if (li->len == 0) + continue; + + keyphrase = ckd_calloc(1, sizeof(kws_keyphrase_t)); + + line = li->buf; + end = strlen(line) - 1; + begin = end - 1; + if (line[end] == '/') { + while (line[begin] != '/' && begin > 0) + begin--; + line[end] = 0; + line[begin] = 0; + keyphrase->threshold = (int32) logmath_log(kwss->base.acmod->lmath, atof_c(line + begin + 1)) + >> SENSCR_SHIFT; + } else { + keyphrase->threshold = kwss->def_threshold; + } + + keyphrase->word = ckd_salloc(line); + + kwss->keyphrases = glist_add_ptr(kwss->keyphrases, keyphrase); + } + + fclose(list_file); + return 0; +} + +ps_search_t * +kws_search_init(const char *name, + const char *keyphrase, + const char *keyfile, + cmd_ln_t * config, + acmod_t * acmod, dict_t * dict, dict2pid_t * d2p) +{ + kws_search_t *kwss = (kws_search_t *) ckd_calloc(1, sizeof(*kwss)); + ps_search_init(ps_search_base(kwss), &kws_funcs, PS_SEARCH_TYPE_KWS, name, config, acmod, dict, + d2p); + + kwss->detections = (kws_detections_t *)ckd_calloc(1, sizeof(*kwss->detections)); + + kwss->beam = + (int32) logmath_log(acmod->lmath, + cmd_ln_float64_r(config, + "-beam")) >> SENSCR_SHIFT; + + kwss->plp = + (int32) logmath_log(acmod->lmath, + cmd_ln_float32_r(config, + "-kws_plp")) >> SENSCR_SHIFT; + + + kwss->def_threshold = + (int32) logmath_log(acmod->lmath, + cmd_ln_float64_r(config, + "-kws_threshold")) >> + SENSCR_SHIFT; + + kwss->delay = (int32) cmd_ln_int32_r(config, "-kws_delay"); + + E_INFO("KWS(beam: %d, plp: %d, default threshold %d, delay %d)\n", + kwss->beam, kwss->plp, kwss->def_threshold, kwss->delay); + + if (keyfile) { + if (kws_search_read_list(kwss, keyfile) < 0) { + E_ERROR("Failed to create kws search\n"); + kws_search_free(ps_search_base(kwss)); + return NULL; + } + } else { + kws_keyphrase_t *k = ckd_calloc(1, sizeof(kws_keyphrase_t)); + k->threshold = kwss->def_threshold; + k->word = ckd_salloc(keyphrase); + kwss->keyphrases = glist_add_ptr(NULL, k); + } + + /* Reinit for provided keyphrase */ + if (kws_search_reinit(ps_search_base(kwss), + ps_search_dict(kwss), + ps_search_dict2pid(kwss)) < 0) { + ps_search_free(ps_search_base(kwss)); + return NULL; + } + + ptmr_init(&kwss->perf); + + return ps_search_base(kwss); +} + +void +kws_search_free(ps_search_t * search) +{ + kws_search_t *kwss; + double n_speech; + gnode_t *gn; + + kwss = (kws_search_t *) search; + + n_speech = (double)kwss->n_tot_frame + / cmd_ln_int32_r(ps_search_config(kwss), "-frate"); + + E_INFO("TOTAL kws %.2f CPU %.3f xRT\n", + kwss->perf.t_tot_cpu, + kwss->perf.t_tot_cpu / n_speech); + E_INFO("TOTAL kws %.2f wall %.3f xRT\n", + kwss->perf.t_tot_elapsed, + kwss->perf.t_tot_elapsed / n_speech); + + + ps_search_base_free(search); + hmm_context_free(kwss->hmmctx); + kws_detections_reset(kwss->detections); + ckd_free(kwss->detections); + + ckd_free(kwss->pl_hmms); + for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) { + kws_keyphrase_t *keyphrase = gnode_ptr(gn); + ckd_free(keyphrase->hmms); + ckd_free(keyphrase->word); + ckd_free(keyphrase); + } + glist_free(kwss->keyphrases); + ckd_free(kwss); +} + +int +kws_search_reinit(ps_search_t * search, dict_t * dict, dict2pid_t * d2p) +{ + char **wrdptr; + char *tmp_keyphrase; + int32 wid, pronlen, in_dict; + int32 n_hmms, n_wrds; + int32 ssid, tmatid; + int i, j, p; + kws_search_t *kwss = (kws_search_t *) search; + bin_mdef_t *mdef = search->acmod->mdef; + int32 silcipid = bin_mdef_silphone(mdef); + gnode_t *gn; + + /* Free old dict2pid, dict */ + ps_search_base_reinit(search, dict, d2p); + + /* Initialize HMM context. */ + if (kwss->hmmctx) + hmm_context_free(kwss->hmmctx); + kwss->hmmctx = + hmm_context_init(bin_mdef_n_emit_state(search->acmod->mdef), + search->acmod->tmat->tp, NULL, + search->acmod->mdef->sseq); + if (kwss->hmmctx == NULL) + return -1; + + /* Initialize phone loop HMMs. */ + if (kwss->pl_hmms) { + for (i = 0; i < kwss->n_pl; ++i) + hmm_deinit((hmm_t *) & kwss->pl_hmms[i]); + ckd_free(kwss->pl_hmms); + } + kwss->n_pl = bin_mdef_n_ciphone(search->acmod->mdef); + kwss->pl_hmms = + (hmm_t *) ckd_calloc(kwss->n_pl, sizeof(*kwss->pl_hmms)); + for (i = 0; i < kwss->n_pl; ++i) { + hmm_init(kwss->hmmctx, (hmm_t *) & kwss->pl_hmms[i], + FALSE, + bin_mdef_pid2ssid(search->acmod->mdef, i), + bin_mdef_pid2tmatid(search->acmod->mdef, i)); + } + + for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) { + kws_keyphrase_t *keyphrase = gnode_ptr(gn); + + /* Initialize keyphrase HMMs */ + tmp_keyphrase = (char *) ckd_salloc(keyphrase->word); + n_wrds = str2words(tmp_keyphrase, NULL, 0); + wrdptr = (char **) ckd_calloc(n_wrds, sizeof(*wrdptr)); + str2words(tmp_keyphrase, wrdptr, n_wrds); + + /* count amount of hmms */ + n_hmms = 0; + in_dict = TRUE; + for (i = 0; i < n_wrds; i++) { + wid = dict_wordid(dict, wrdptr[i]); + if (wid == BAD_S3WID) { + E_ERROR("Word '%s' in phrase '%s' is missing in the dictionary\n", wrdptr[i], keyphrase->word); + in_dict = FALSE; + break; + } + pronlen = dict_pronlen(dict, wid); + n_hmms += pronlen; + } + + if (!in_dict) { + ckd_free(wrdptr); + ckd_free(tmp_keyphrase); + continue; + } + + /* allocate node array */ + if (keyphrase->hmms) + ckd_free(keyphrase->hmms); + keyphrase->hmms = (hmm_t *) ckd_calloc(n_hmms, sizeof(hmm_t)); + keyphrase->n_hmms = n_hmms; + + /* fill node array */ + j = 0; + for (i = 0; i < n_wrds; i++) { + wid = dict_wordid(dict, wrdptr[i]); + pronlen = dict_pronlen(dict, wid); + for (p = 0; p < pronlen; p++) { + int32 ci = dict_pron(dict, wid, p); + if (p == 0) { + /* first phone of word */ + int32 rc = + pronlen > 1 ? dict_pron(dict, wid, 1) : silcipid; + ssid = dict2pid_ldiph_lc(d2p, ci, rc, silcipid); + } + else if (p == pronlen - 1) { + /* last phone of the word */ + int32 lc = dict_pron(dict, wid, p - 1); + xwdssid_t *rssid = dict2pid_rssid(d2p, ci, lc); + int j = rssid->cimap[silcipid]; + ssid = rssid->ssid[j]; + } + else { + /* word internal phone */ + ssid = dict2pid_internal(d2p, wid, p); + } + tmatid = bin_mdef_pid2tmatid(mdef, ci); + hmm_init(kwss->hmmctx, &keyphrase->hmms[j], FALSE, ssid, + tmatid); + j++; + } + } + + ckd_free(wrdptr); + ckd_free(tmp_keyphrase); + } + + + + return 0; +} + +int +kws_search_start(ps_search_t * search) +{ + int i; + kws_search_t *kwss = (kws_search_t *) search; + + kwss->frame = 0; + kwss->bestscore = 0; + kws_detections_reset(kwss->detections); + + /* Reset and enter all phone-loop HMMs. */ + for (i = 0; i < kwss->n_pl; ++i) { + hmm_t *hmm = (hmm_t *) & kwss->pl_hmms[i]; + hmm_clear(hmm); + hmm_enter(hmm, 0, -1, 0); + } + + ptmr_reset(&kwss->perf); + ptmr_start(&kwss->perf); + + return 0; +} + +int +kws_search_step(ps_search_t * search, int frame_idx) +{ + int16 const *senscr; + kws_search_t *kwss = (kws_search_t *) search; + acmod_t *acmod = search->acmod; + + /* Activate senones */ + if (!acmod->compallsen) + kws_search_sen_active(kwss); + + /* Calculate senone scores for current frame. */ + senscr = acmod_score(acmod, &frame_idx); + + /* Evaluate hmms in phone loop and in active keyphrase nodes */ + kws_search_hmm_eval(kwss, senscr); + + /* Prune hmms with low prob */ + kws_search_hmm_prune(kwss); + + /* Do hmms transitions */ + kws_search_trans(kwss); + + ++kwss->frame; + return 0; +} + +int +kws_search_finish(ps_search_t * search) +{ + kws_search_t *kwss; + int32 cf; + + kwss = (kws_search_t *) search; + + kwss->n_tot_frame += kwss->frame; + + /* Print out some statistics. */ + ptmr_stop(&kwss->perf); + /* This is the number of frames processed. */ + cf = ps_search_acmod(kwss)->output_frame; + if (cf > 0) { + double n_speech = (double) (cf + 1) + / cmd_ln_int32_r(ps_search_config(kwss), "-frate"); + E_INFO("kws %.2f CPU %.3f xRT\n", + kwss->perf.t_cpu, kwss->perf.t_cpu / n_speech); + E_INFO("kws %.2f wall %.3f xRT\n", + kwss->perf.t_elapsed, kwss->perf.t_elapsed / n_speech); + } + + return 0; +} + +char const * +kws_search_hyp(ps_search_t * search, int32 * out_score) +{ + kws_search_t *kwss = (kws_search_t *) search; + if (out_score) + *out_score = 0; + + if (search->hyp_str) + ckd_free(search->hyp_str); + search->hyp_str = kws_detections_hyp_str(kwss->detections, kwss->frame, kwss->delay); + + return search->hyp_str; +} + +char * +kws_search_get_keyphrases(ps_search_t * search) +{ + int c, len; + kws_search_t *kwss; + char* line; + gnode_t *gn; + + kwss = (kws_search_t *) search; + + len = 0; + for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) + len += strlen(((kws_keyphrase_t *)gnode_ptr(gn))->word) + 1; + + c = 0; + line = (char *)ckd_calloc(len, sizeof(*line)); + for (gn = kwss->keyphrases; gn; gn = gnode_next(gn)) { + const char *str = ((kws_keyphrase_t *)gnode_ptr(gn))->word; + memcpy(&line[c], str, strlen(str)); + c += strlen(str); + line[c++] = '\n'; + } + line[--c] = '\0'; + + return line; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/kws_search.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/kws_search.h new file mode 100644 index 0000000000000000000000000000000000000000..47a3373432aa5efd6504ac856955383e52480dcd --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/kws_search.h @@ -0,0 +1,153 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2013 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * kws_search.h -- Search structures for keyphrase spotting. + */ + +#ifndef __KWS_SEARCH_H__ +#define __KWS_SEARCH_H__ + +/* SphinxBase headers. */ +#include +#include + +/* Local headers. */ +#include "pocketsphinx_internal.h" +#include "kws_detections.h" +#include "hmm.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Segmentation "iterator" for KWS history. + */ +typedef struct kws_seg_s { + ps_seg_t base; /**< Base structure. */ + gnode_t *detection; /**< Keyphrase detection correspondent to segment. */ + frame_idx_t last_frame; /**< Last frame to raise the detection */ +} kws_seg_t; + +typedef struct kws_keyphrase_s { + char* word; + int32 threshold; + hmm_t* hmms; + int32 n_hmms; +} kws_keyphrase_t; + +/** + * Implementation of KWS search structure. + */ +typedef struct kws_search_s { + ps_search_t base; + + hmm_context_t *hmmctx; /**< HMM context. */ + + glist_t keyphrases; /**< Keyphrases to spot */ + + kws_detections_t *detections; /**< Keyword spotting history */ + frame_idx_t frame; /**< Frame index */ + + int32 beam; + + int32 plp; /**< Phone loop probability */ + int32 bestscore; /**< For beam pruning */ + int32 def_threshold; /**< default threshold for p(hyp)/p(altern) ratio */ + int32 delay; /**< Delay to wait for best detection score */ + + int32 n_pl; /**< Number of CI phones */ + hmm_t *pl_hmms; /**< Phone loop hmms - hmms of CI phones */ + + ptmr_t perf; /**< Performance counter */ + int32 n_tot_frame; + +} kws_search_t; + +/** + * Create, initialize and return a search module. Gets keyphrases either + * from keyphrase or from a keyphrase file. + */ +ps_search_t *kws_search_init(const char *name, + const char *keyphrase, + const char *keyfile, + cmd_ln_t * config, + acmod_t * acmod, + dict_t * dict, dict2pid_t * d2p); + +/** + * Deallocate search structure. + */ +void kws_search_free(ps_search_t * search); + +/** + * Update KWS search module for new key phrase. + */ +int kws_search_reinit(ps_search_t * kwss, dict_t * dict, dict2pid_t * d2p); + +/** + * Prepare the KWS search structure for beginning decoding of the next + * utterance. + */ +int kws_search_start(ps_search_t * search); + +/** + * Step one frame forward through the Viterbi search. + */ +int kws_search_step(ps_search_t * search, int frame_idx); + +/** + * Windup and clean the KWS search structure after utterance. + */ +int kws_search_finish(ps_search_t * search); + +/** + * Get hypothesis string from the KWS search. + */ +char const *kws_search_hyp(ps_search_t * search, int32 * out_score); + +/** + * Get active keyphrases + */ +char* kws_search_get_keyphrases(ps_search_t * search); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __KWS_SEARCH_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/_jsgf_scanner.l b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/_jsgf_scanner.l new file mode 100644 index 0000000000000000000000000000000000000000..43e29ba404721e2c6fce5bd784daf2f04cbbcbe8 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/_jsgf_scanner.l @@ -0,0 +1,87 @@ +/* -*- mode: text -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* YOU MUST USE FLEX 2.6.1 OR NEWER TO PROCESS THIS FILE!!! */ +%{ + +#include "jsgf_internal.h" +#include "jsgf_parser.h" + +%} + +%option 8bit reentrant bison-bridge noyywrap yylineno never-interactive nounput nounistd +%option header-file="jsgf_scanner.h" +%s COMMENT +%s DECL +%s DECLCOMMENT + +ws [ \t\r\n] +rulename \<[^<>]+\> +tag \{(\\.|[^\}]+)*\} +weight \/[0-9]*(\.[0-9]+)?(e-)?[0-9]*\/ +token [^ \t\r\n=;|*+<>()\[\]{}*/]+ +qstring \"(\\.|[^"]+)*\" +bom [\xEF][\xBB][\xBF] + +%% + +{ws} ; /* ignore whitespace */ +\/\/.*\n ; /* single-line comments */ +\/\* { BEGIN(COMMENT); } /* C-style comments */ +\*\/ { BEGIN(INITIAL); } +. ; /* Ignore stuff in comment mode */ + +\/\/.*\n ; /* single-line comments inside decl */ +\/\* { BEGIN(DECLCOMMENT); } /* C-style comments inside decl */ +\*\/ { BEGIN(DECL); } +. ; /* Ignore stuff in comment mode */ + +{bom}?#JSGF {BEGIN(DECL); return HEADER;} +grammar {BEGIN(DECL); return GRAMMAR;} +import {BEGIN(DECL); return IMPORT;} +public {BEGIN(DECL); return PUBLIC;} + +{rulename} { BEGIN(DECL); yylval->name = strdup(yytext); return RULENAME; } +{rulename} { yylval->name = strdup(yytext); return RULENAME; } + +{tag} { yylval->name = strdup(yytext); return TAG; } +{token} { yylval->name = strdup(yytext); return TOKEN; } +; { BEGIN(INITIAL); return yytext[0]; } +{qstring} { yylval->name = strdup(yytext); return TOKEN; } +{weight} { yylval->weight = atof_c(yytext+1); return WEIGHT; } +. return yytext[0]; /* Single-character tokens */ + +%% diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/fsg_model.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/fsg_model.c new file mode 100644 index 0000000000000000000000000000000000000000..449578377de6449dbf2f18f52b9eee110ecbfec6 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/fsg_model.c @@ -0,0 +1,940 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include +#include +#include + +/* SphinxBase headers. */ +#include "sphinxbase/err.h" +#include "sphinxbase/pio.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/prim_type.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/hash_table.h" +#include "sphinxbase/fsg_model.h" +#include "sphinxbase/bitvec.h" + +/** + * Adjacency list (opaque) for a state in an FSG. + * + * Actually we use hash tables so that random access is a bit faster. + * Plus it allows us to make the lookup code a bit less ugly. + */ + +struct trans_list_s { + hash_table_t *null_trans; /* Null transitions keyed by state. */ + hash_table_t *trans; /* Lists of non-null transitions keyed by state. */ +}; + +/** + * Implementation of arc iterator. + */ +struct fsg_arciter_s { + hash_iter_t *itor, *null_itor; + gnode_t *gn; +}; + +#define FSG_MODEL_BEGIN_DECL "FSG_BEGIN" +#define FSG_MODEL_END_DECL "FSG_END" +#define FSG_MODEL_N_DECL "N" +#define FSG_MODEL_NUM_STATES_DECL "NUM_STATES" +#define FSG_MODEL_S_DECL "S" +#define FSG_MODEL_START_STATE_DECL "START_STATE" +#define FSG_MODEL_F_DECL "F" +#define FSG_MODEL_FINAL_STATE_DECL "FINAL_STATE" +#define FSG_MODEL_T_DECL "T" +#define FSG_MODEL_TRANSITION_DECL "TRANSITION" +#define FSG_MODEL_COMMENT_CHAR '#' + + +static int32 +nextline_str2words(FILE * fp, int32 * lineno, + char **lineptr, char ***wordptr) +{ + for (;;) { + size_t len; + int32 n; + + ckd_free(*lineptr); + if ((*lineptr = fread_line(fp, &len)) == NULL) + return -1; + + (*lineno)++; + + if ((*lineptr)[0] == FSG_MODEL_COMMENT_CHAR) + continue; /* Skip comment lines */ + + n = str2words(*lineptr, NULL, 0); + if (n == 0) + continue; /* Skip blank lines */ + + /* Abuse of realloc(), but this doesn't have to be fast. */ + if (*wordptr == NULL) + *wordptr = ckd_calloc(n, sizeof(**wordptr)); + else + *wordptr = ckd_realloc(*wordptr, n * sizeof(**wordptr)); + return str2words(*lineptr, *wordptr, n); + } +} + +void +fsg_model_trans_add(fsg_model_t * fsg, + int32 from, int32 to, int32 logp, int32 wid) +{ + fsg_link_t *link; + glist_t gl; + gnode_t *gn; + + if (fsg->trans[from].trans == NULL) + fsg->trans[from].trans = hash_table_new(5, HASH_CASE_YES); + + /* Check for duplicate link (i.e., link already exists with label=wid) */ + for (gn = gl = fsg_model_trans(fsg, from, to); gn; gn = gnode_next(gn)) { + link = (fsg_link_t *) gnode_ptr(gn); + if (link->wid == wid) { + if (link->logs2prob < logp) + link->logs2prob = logp; + return; + } + } + + /* Create transition object */ + link = listelem_malloc(fsg->link_alloc); + link->from_state = from; + link->to_state = to; + link->logs2prob = logp; + link->wid = wid; + + /* Add it to the list of transitions and update the hash table */ + gl = glist_add_ptr(gl, (void *) link); + hash_table_replace_bkey(fsg->trans[from].trans, + (char const *) &link->to_state, + sizeof(link->to_state), gl); +} + +int32 +fsg_model_tag_trans_add(fsg_model_t * fsg, int32 from, int32 to, + int32 logp, int32 wid) +{ + fsg_link_t *link, *link2; + + (void)wid; + /* Check for transition probability */ + if (logp > 0) { + E_FATAL("Null transition prob must be <= 1.0 (state %d -> %d)\n", + from, to); + } + + /* Self-loop null transitions (with prob <= 1.0) are redundant */ + if (from == to) + return -1; + + if (fsg->trans[from].null_trans == NULL) + fsg->trans[from].null_trans = hash_table_new(5, HASH_CASE_YES); + + /* Check for a duplicate link; if found, keep the higher prob */ + link = fsg_model_null_trans(fsg, from, to); + if (link) { + if (link->logs2prob < logp) { + link->logs2prob = logp; + return 0; + } + else + return -1; + } + + /* Create null transition object */ + link = listelem_malloc(fsg->link_alloc); + link->from_state = from; + link->to_state = to; + link->logs2prob = logp; + link->wid = -1; + + link2 = (fsg_link_t *) + hash_table_enter_bkey(fsg->trans[from].null_trans, + (char const *) &link->to_state, + sizeof(link->to_state), link); + assert(link == link2); + + return 1; +} + +int32 +fsg_model_null_trans_add(fsg_model_t * fsg, int32 from, int32 to, + int32 logp) +{ + return fsg_model_tag_trans_add(fsg, from, to, logp, -1); +} + +glist_t +fsg_model_null_trans_closure(fsg_model_t * fsg, glist_t nulls) +{ + gnode_t *gn1; + int updated; + fsg_link_t *tl1, *tl2; + int32 k, n; + + E_INFO("Computing transitive closure for null transitions\n"); + + /* If our caller didn't give us a list of null-transitions, + make such a list. Just loop through all the FSG states, + and all the null-transitions in that state (which are kept in + their own hash table). */ + if (nulls == NULL) { + int i; + for (i = 0; i < fsg->n_state; ++i) { + hash_iter_t *itor; + hash_table_t *null_trans = fsg->trans[i].null_trans; + if (null_trans == NULL) + continue; + for (itor = hash_table_iter(null_trans); + itor != NULL; itor = hash_table_iter_next(itor)) { + nulls = glist_add_ptr(nulls, hash_entry_val(itor->ent)); + } + } + } + + /* + * Probably not the most efficient closure implementation, in general, but + * probably reasonably efficient for a sparse null transition matrix. + */ + n = 0; + do { + updated = FALSE; + + for (gn1 = nulls; gn1; gn1 = gnode_next(gn1)) { + hash_iter_t *itor; + + tl1 = (fsg_link_t *) gnode_ptr(gn1); + assert(tl1->wid < 0); + + if (fsg->trans[tl1->to_state].null_trans == NULL) + continue; + + for (itor = + hash_table_iter(fsg->trans[tl1->to_state].null_trans); + itor; itor = hash_table_iter_next(itor)) { + + tl2 = (fsg_link_t *) hash_entry_val(itor->ent); + + k = fsg_model_null_trans_add(fsg, + tl1->from_state, + tl2->to_state, + tl1->logs2prob + + tl2->logs2prob); + if (k >= 0) { + updated = TRUE; + if (k > 0) { + nulls = glist_add_ptr(nulls, (void *) + fsg_model_null_trans + (fsg, tl1->from_state, + tl2->to_state)); + n++; + } + } + } + } + } while (updated); + + E_INFO("%d null transitions added\n", n); + + return nulls; +} + +glist_t +fsg_model_trans(fsg_model_t * fsg, int32 i, int32 j) +{ + void *val; + + if (fsg->trans[i].trans == NULL) + return NULL; + if (hash_table_lookup_bkey(fsg->trans[i].trans, (char const *) &j, + sizeof(j), &val) < 0) + return NULL; + return (glist_t) val; +} + +fsg_link_t * +fsg_model_null_trans(fsg_model_t * fsg, int32 i, int32 j) +{ + void *val; + + if (fsg->trans[i].null_trans == NULL) + return NULL; + if (hash_table_lookup_bkey(fsg->trans[i].null_trans, (char const *) &j, + sizeof(j), &val) < 0) + return NULL; + return (fsg_link_t *) val; +} + +fsg_arciter_t * +fsg_model_arcs(fsg_model_t * fsg, int32 i) +{ + fsg_arciter_t *itor; + + if (fsg->trans[i].trans == NULL && fsg->trans[i].null_trans == NULL) + return NULL; + itor = ckd_calloc(1, sizeof(*itor)); + if (fsg->trans[i].null_trans) + itor->null_itor = hash_table_iter(fsg->trans[i].null_trans); + if (fsg->trans[i].trans) + itor->itor = hash_table_iter(fsg->trans[i].trans); + if (itor->itor != NULL) + itor->gn = hash_entry_val(itor->itor->ent); + return itor; +} + +fsg_link_t * +fsg_arciter_get(fsg_arciter_t * itor) +{ + /* Iterate over non-null arcs first. */ + if (itor->gn) + return (fsg_link_t *) gnode_ptr(itor->gn); + else if (itor->null_itor) + return (fsg_link_t *) hash_entry_val(itor->null_itor->ent); + else + return NULL; +} + +fsg_arciter_t * +fsg_arciter_next(fsg_arciter_t * itor) +{ + /* Iterate over non-null arcs first. */ + if (itor->gn) { + itor->gn = gnode_next(itor->gn); + /* Move to the next destination arc. */ + if (itor->gn == NULL) { + itor->itor = hash_table_iter_next(itor->itor); + if (itor->itor != NULL) + itor->gn = hash_entry_val(itor->itor->ent); + else if (itor->null_itor == NULL) + goto stop_iteration; + } + } + else { + if (itor->null_itor == NULL) + goto stop_iteration; + itor->null_itor = hash_table_iter_next(itor->null_itor); + if (itor->null_itor == NULL) + goto stop_iteration; + } + return itor; + stop_iteration: + fsg_arciter_free(itor); + return NULL; + +} + +void +fsg_arciter_free(fsg_arciter_t * itor) +{ + if (itor == NULL) + return; + hash_table_iter_free(itor->null_itor); + hash_table_iter_free(itor->itor); + ckd_free(itor); +} + +int +fsg_model_word_id(fsg_model_t * fsg, char const *word) +{ + int wid; + + /* Search for an existing word matching this. */ + for (wid = 0; wid < fsg->n_word; ++wid) { + if (0 == strcmp(fsg->vocab[wid], word)) + break; + } + /* If not found, add this to the vocab. */ + if (wid == fsg->n_word) + return -1; + return wid; +} + +int +fsg_model_word_add(fsg_model_t * fsg, char const *word) +{ + int wid, old_size; + + /* Search for an existing word matching this. */ + wid = fsg_model_word_id(fsg, word); + /* If not found, add this to the vocab. */ + if (wid == -1) { + wid = fsg->n_word; + if (fsg->n_word == fsg->n_word_alloc) { + old_size = fsg->n_word_alloc; + fsg->n_word_alloc += 10; + fsg->vocab = ckd_realloc(fsg->vocab, + fsg->n_word_alloc * + sizeof(*fsg->vocab)); + if (fsg->silwords) + fsg->silwords = + bitvec_realloc(fsg->silwords, old_size, + fsg->n_word_alloc); + if (fsg->altwords) + fsg->altwords = + bitvec_realloc(fsg->altwords, old_size, + fsg->n_word_alloc); + } + ++fsg->n_word; + fsg->vocab[wid] = ckd_salloc(word); + } + return wid; +} + +int +fsg_model_add_silence(fsg_model_t * fsg, char const *silword, + int state, float32 silprob) +{ + int32 logsilp; + int n_trans, silwid, src; + + E_INFO("Adding silence transitions for %s to FSG\n", silword); + + silwid = fsg_model_word_add(fsg, silword); + logsilp = (int32) (logmath_log(fsg->lmath, silprob) * fsg->lw); + if (fsg->silwords == NULL) + fsg->silwords = bitvec_alloc(fsg->n_word_alloc); + bitvec_set(fsg->silwords, silwid); + + n_trans = 0; + if (state == -1) { + for (src = 0; src < fsg->n_state; src++) { + fsg_model_trans_add(fsg, src, src, logsilp, silwid); + ++n_trans; + } + } + else { + fsg_model_trans_add(fsg, state, state, logsilp, silwid); + ++n_trans; + } + + E_INFO("Added %d silence word transitions\n", n_trans); + return n_trans; +} + +int +fsg_model_add_alt(fsg_model_t * fsg, char const *baseword, + char const *altword) +{ + int i, basewid, altwid; + int ntrans; + + /* FIXME: This will get slow, eventually... */ + for (basewid = 0; basewid < fsg->n_word; ++basewid) + if (0 == strcmp(fsg->vocab[basewid], baseword)) + break; + if (basewid == fsg->n_word) { + E_ERROR("Base word %s not present in FSG vocabulary!\n", baseword); + return -1; + } + altwid = fsg_model_word_add(fsg, altword); + if (fsg->altwords == NULL) + fsg->altwords = bitvec_alloc(fsg->n_word_alloc); + bitvec_set(fsg->altwords, altwid); + if (fsg_model_is_filler(fsg, basewid)) { + if (fsg->silwords == NULL) + fsg->silwords = bitvec_alloc(fsg->n_word_alloc); + bitvec_set(fsg->silwords, altwid); + } + + E_DEBUG("Adding alternate word transitions (%s,%s) to FSG\n", + baseword, altword); + + /* Look for all transitions involving baseword and duplicate them. */ + /* FIXME: This will also get slow, eventually... */ + ntrans = 0; + for (i = 0; i < fsg->n_state; ++i) { + hash_iter_t *itor; + if (fsg->trans[i].trans == NULL) + continue; + for (itor = hash_table_iter(fsg->trans[i].trans); itor; + itor = hash_table_iter_next(itor)) { + glist_t trans; + gnode_t *gn; + + trans = hash_entry_val(itor->ent); + for (gn = trans; gn; gn = gnode_next(gn)) { + fsg_link_t *fl = gnode_ptr(gn); + if (fl->wid == basewid) { + fsg_link_t *link; + + /* Create transition object */ + link = listelem_malloc(fsg->link_alloc); + link->from_state = fl->from_state; + link->to_state = fl->to_state; + link->logs2prob = fl->logs2prob; /* FIXME!!!??? */ + link->wid = altwid; + + trans = glist_add_ptr(trans, (void *) link); + ++ntrans; + } + } + hash_entry_val(itor->ent) = trans; + } + } + + E_DEBUG("Added %d alternate word transitions\n", ntrans); + return ntrans; +} + + +fsg_model_t * +fsg_model_init(char const *name, logmath_t * lmath, float32 lw, + int32 n_state) +{ + fsg_model_t *fsg; + + /* Allocate basic stuff. */ + fsg = ckd_calloc(1, sizeof(*fsg)); + fsg->refcount = 1; + fsg->link_alloc = listelem_alloc_init(sizeof(fsg_link_t)); + fsg->lmath = lmath; + fsg->name = name ? ckd_salloc(name) : NULL; + fsg->n_state = n_state; + fsg->lw = lw; + + fsg->trans = ckd_calloc(fsg->n_state, sizeof(*fsg->trans)); + + return fsg; +} + +fsg_model_t * +fsg_model_read(FILE * fp, logmath_t * lmath, float32 lw) +{ + fsg_model_t *fsg; + hash_table_t *vocab; + hash_iter_t *itor; + int32 lastwid; + char **wordptr; + char *lineptr; + char *fsgname; + int32 lineno; + int32 n, i, j; + int n_state, n_trans, n_null_trans; + glist_t nulls; + float32 p; + + lineno = 0; + vocab = hash_table_new(32, FALSE); + wordptr = NULL; + lineptr = NULL; + nulls = NULL; + fsgname = NULL; + fsg = NULL; + + /* Scan upto FSG_BEGIN header */ + for (;;) { + n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); + if (n < 0) { + E_ERROR("%s declaration missing\n", FSG_MODEL_BEGIN_DECL); + goto parse_error; + } + + if ((strcmp(wordptr[0], FSG_MODEL_BEGIN_DECL) == 0)) { + if (n > 2) { + E_ERROR("Line[%d]: malformed FSG_BEGIN declaration\n", + lineno); + goto parse_error; + } + break; + } + } + /* Save FSG name, or it will get clobbered below :(. + * If name is missing, try the default. + */ + if (n == 2) { + fsgname = ckd_salloc(wordptr[1]); + } + else { + E_WARN("FSG name is missing\n"); + fsgname = ckd_salloc("unknown"); + } + + /* Read #states */ + n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); + if ((n != 2) + || ((strcmp(wordptr[0], FSG_MODEL_N_DECL) != 0) + && (strcmp(wordptr[0], FSG_MODEL_NUM_STATES_DECL) != 0)) + || (sscanf(wordptr[1], "%d", &n_state) != 1) + || (n_state <= 0)) { + E_ERROR + ("Line[%d]: #states declaration line missing or malformed\n", + lineno); + goto parse_error; + } + + /* Now create the FSG. */ + fsg = fsg_model_init(fsgname, lmath, lw, n_state); + ckd_free(fsgname); + fsgname = NULL; + + /* Read start state */ + n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); + if ((n != 2) + || ((strcmp(wordptr[0], FSG_MODEL_S_DECL) != 0) + && (strcmp(wordptr[0], FSG_MODEL_START_STATE_DECL) != 0)) + || (sscanf(wordptr[1], "%d", &(fsg->start_state)) != 1) + || (fsg->start_state < 0) + || (fsg->start_state >= fsg->n_state)) { + E_ERROR + ("Line[%d]: start state declaration line missing or malformed\n", + lineno); + goto parse_error; + } + + /* Read final state */ + n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); + if ((n != 2) + || ((strcmp(wordptr[0], FSG_MODEL_F_DECL) != 0) + && (strcmp(wordptr[0], FSG_MODEL_FINAL_STATE_DECL) != 0)) + || (sscanf(wordptr[1], "%d", &(fsg->final_state)) != 1) + || (fsg->final_state < 0) + || (fsg->final_state >= fsg->n_state)) { + E_ERROR + ("Line[%d]: final state declaration line missing or malformed\n", + lineno); + goto parse_error; + } + + /* Read transitions */ + lastwid = 0; + n_trans = n_null_trans = 0; + for (;;) { + int32 wid, tprob; + + n = nextline_str2words(fp, &lineno, &lineptr, &wordptr); + if (n <= 0) { + E_ERROR("Line[%d]: transition or FSG_END statement expected\n", + lineno); + goto parse_error; + } + + if ((strcmp(wordptr[0], FSG_MODEL_END_DECL) == 0)) { + break; + } + + if ((strcmp(wordptr[0], FSG_MODEL_T_DECL) == 0) + || (strcmp(wordptr[0], FSG_MODEL_TRANSITION_DECL) == 0)) { + + + if (((n != 4) && (n != 5)) + || (sscanf(wordptr[1], "%d", &i) != 1) + || (sscanf(wordptr[2], "%d", &j) != 1) + || (i < 0) || (i >= fsg->n_state) + || (j < 0) || (j >= fsg->n_state)) { + E_ERROR + ("Line[%d]: transition spec malformed; Expecting: from-state to-state trans-prob [word]\n", + lineno); + goto parse_error; + } + + p = atof_c(wordptr[3]); + if ((p <= 0.0) || (p > 1.0)) { + E_ERROR + ("Line[%d]: transition spec malformed; Expecting float as transition probability\n", + lineno); + goto parse_error; + } + } + else { + E_ERROR("Line[%d]: transition or FSG_END statement expected\n", + lineno); + goto parse_error; + } + + tprob = (int32) (logmath_log(lmath, p) * fsg->lw); + /* Add word to "dictionary". */ + if (n > 4) { + if (hash_table_lookup_int32(vocab, wordptr[4], &wid) < 0) { + (void) hash_table_enter_int32(vocab, + ckd_salloc(wordptr[4]), + lastwid); + wid = lastwid; + ++lastwid; + } + fsg_model_trans_add(fsg, i, j, tprob, wid); + ++n_trans; + } + else { + if (fsg_model_null_trans_add(fsg, i, j, tprob) == 1) { + ++n_null_trans; + nulls = + glist_add_ptr(nulls, fsg_model_null_trans(fsg, i, j)); + } + } + } + + E_INFO("FSG: %d states, %d unique words, %d transitions (%d null)\n", + fsg->n_state, hash_table_inuse(vocab), n_trans, n_null_trans); + + + /* Now create a string table from the "dictionary" */ + fsg->n_word = hash_table_inuse(vocab); + fsg->n_word_alloc = fsg->n_word + 10; /* Pad it a bit. */ + fsg->vocab = ckd_calloc(fsg->n_word_alloc, sizeof(*fsg->vocab)); + for (itor = hash_table_iter(vocab); itor; + itor = hash_table_iter_next(itor)) { + char const *word = hash_entry_key(itor->ent); + int32 wid = (int32) (long) hash_entry_val(itor->ent); + fsg->vocab[wid] = (char *) word; + } + hash_table_free(vocab); + + /* Do transitive closure on null transitions */ + nulls = fsg_model_null_trans_closure(fsg, nulls); + glist_free(nulls); + + ckd_free(lineptr); + ckd_free(wordptr); + + return fsg; + + parse_error: + for (itor = hash_table_iter(vocab); itor; + itor = hash_table_iter_next(itor)) + ckd_free((char *) hash_entry_key(itor->ent)); + glist_free(nulls); + hash_table_free(vocab); + ckd_free(fsgname); + ckd_free(lineptr); + ckd_free(wordptr); + fsg_model_free(fsg); + return NULL; +} + + +fsg_model_t * +fsg_model_readfile(const char *file, logmath_t * lmath, float32 lw) +{ + FILE *fp; + fsg_model_t *fsg; + + if ((fp = fopen(file, "r")) == NULL) { + E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file); + return NULL; + } + fsg = fsg_model_read(fp, lmath, lw); + fclose(fp); + return fsg; +} + +fsg_model_t * +fsg_model_retain(fsg_model_t * fsg) +{ + ++fsg->refcount; + return fsg; +} + +static void +trans_list_free(fsg_model_t * fsg, int32 i) +{ + hash_iter_t *itor; + + /* FIXME (maybe): FSG links will all get freed when we call + * listelem_alloc_free() so don't bother freeing them explicitly + * here. */ + if (fsg->trans[i].trans) { + for (itor = hash_table_iter(fsg->trans[i].trans); + itor; itor = hash_table_iter_next(itor)) { + glist_t gl = (glist_t) hash_entry_val(itor->ent); + glist_free(gl); + } + } + hash_table_free(fsg->trans[i].trans); + hash_table_free(fsg->trans[i].null_trans); +} + +int +fsg_model_free(fsg_model_t * fsg) +{ + int i; + + if (fsg == NULL) + return 0; + + if (--fsg->refcount > 0) + return fsg->refcount; + + for (i = 0; i < fsg->n_word; ++i) + ckd_free(fsg->vocab[i]); + for (i = 0; i < fsg->n_state; ++i) + trans_list_free(fsg, i); + ckd_free(fsg->trans); + ckd_free(fsg->vocab); + listelem_alloc_free(fsg->link_alloc); + bitvec_free(fsg->silwords); + bitvec_free(fsg->altwords); + ckd_free(fsg->name); + ckd_free(fsg); + return 0; +} + + +void +fsg_model_write(fsg_model_t * fsg, FILE * fp) +{ + int32 i; + + fprintf(fp, "%s %s\n", FSG_MODEL_BEGIN_DECL, + fsg->name ? fsg->name : ""); + fprintf(fp, "%s %d\n", FSG_MODEL_NUM_STATES_DECL, fsg->n_state); + fprintf(fp, "%s %d\n", FSG_MODEL_START_STATE_DECL, fsg->start_state); + fprintf(fp, "%s %d\n", FSG_MODEL_FINAL_STATE_DECL, fsg->final_state); + + for (i = 0; i < fsg->n_state; i++) { + fsg_arciter_t *itor; + + for (itor = fsg_model_arcs(fsg, i); itor; + itor = fsg_arciter_next(itor)) { + fsg_link_t *tl = fsg_arciter_get(itor); + + fprintf(fp, "%s %d %d %f %s\n", FSG_MODEL_TRANSITION_DECL, + tl->from_state, tl->to_state, + logmath_exp(fsg->lmath, + (int32) (tl->logs2prob / fsg->lw)), + (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid)); + } + } + + fprintf(fp, "%s\n", FSG_MODEL_END_DECL); + + fflush(fp); +} + +void +fsg_model_writefile(fsg_model_t * fsg, char const *file) +{ + FILE *fp; + + assert(fsg); + + E_INFO("Writing FSG file '%s'\n", file); + + if ((fp = fopen(file, "w")) == NULL) { + E_ERROR_SYSTEM("Failed to open FSG file '%s' for reading", file); + return; + } + + fsg_model_write(fsg, fp); + + fclose(fp); +} + +static void +fsg_model_write_fsm_trans(fsg_model_t * fsg, int i, FILE * fp) +{ + fsg_arciter_t *itor; + + for (itor = fsg_model_arcs(fsg, i); itor; + itor = fsg_arciter_next(itor)) { + fsg_link_t *tl = fsg_arciter_get(itor); + fprintf(fp, "%d %d %s %f\n", + tl->from_state, tl->to_state, + (tl->wid < 0) ? "" : fsg_model_word_str(fsg, tl->wid), + -logmath_log_to_ln(fsg->lmath, tl->logs2prob / fsg->lw)); + } +} + +void +fsg_model_write_fsm(fsg_model_t * fsg, FILE * fp) +{ + int i; + + /* Write transitions from initial state first. */ + fsg_model_write_fsm_trans(fsg, fsg_model_start_state(fsg), fp); + + /* Other states. */ + for (i = 0; i < fsg->n_state; i++) { + if (i == fsg_model_start_state(fsg)) + continue; + fsg_model_write_fsm_trans(fsg, i, fp); + } + + /* Final state. */ + fprintf(fp, "%d 0\n", fsg_model_final_state(fsg)); + + fflush(fp); +} + +void +fsg_model_writefile_fsm(fsg_model_t * fsg, char const *file) +{ + FILE *fp; + + assert(fsg); + + E_INFO("Writing FSM file '%s'\n", file); + + if ((fp = fopen(file, "w")) == NULL) { + E_ERROR_SYSTEM("Failed to open fsm file '%s' for writing", file); + return; + } + + fsg_model_write_fsm(fsg, fp); + + fclose(fp); +} + +void +fsg_model_write_symtab(fsg_model_t * fsg, FILE * file) +{ + int i; + + fprintf(file, " 0\n"); + for (i = 0; i < fsg_model_n_word(fsg); ++i) { + fprintf(file, "%s %d\n", fsg_model_word_str(fsg, i), i + 1); + } + fflush(file); +} + +void +fsg_model_writefile_symtab(fsg_model_t * fsg, char const *file) +{ + FILE *fp; + + assert(fsg); + + E_INFO("Writing FSM symbol table '%s'\n", file); + + if ((fp = fopen(file, "w")) == NULL) { + E_ERROR("Failed to open symbol table '%s' for writing", file); + return; + } + + fsg_model_write_symtab(fsg, fp); + + fclose(fp); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf.c new file mode 100644 index 0000000000000000000000000000000000000000..ab123734e57c9d1e927eaafcf248de3488902f36 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf.c @@ -0,0 +1,966 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include +#include + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/hash_table.h" +#include "sphinxbase/filename.h" +#include "sphinxbase/err.h" +#include "sphinxbase/jsgf.h" + +#include "jsgf_internal.h" +#include "jsgf_parser.h" +#include "jsgf_scanner.h" + +extern int yyparse(void *scanner, jsgf_t * jsgf); + +/** + * \file jsgf.c + * + * This file implements the data structures for parsing JSGF grammars + * into Sphinx finite-state grammars. + **/ + +static int expand_rule(jsgf_t * grammar, jsgf_rule_t * rule, + int rule_entry, int rule_exit); + +jsgf_atom_t * +jsgf_atom_new(char *name, float weight) +{ + jsgf_atom_t *atom; + + atom = ckd_calloc(1, sizeof(*atom)); + atom->name = ckd_salloc(name); + atom->weight = weight; + return atom; +} + +int +jsgf_atom_free(jsgf_atom_t * atom) +{ + if (atom == NULL) + return 0; + ckd_free(atom->name); + ckd_free(atom); + return 0; +} + +jsgf_t * +jsgf_grammar_new(jsgf_t * parent) +{ + jsgf_t *grammar; + + grammar = ckd_calloc(1, sizeof(*grammar)); + /* If this is an imported/subgrammar, then we will share a global + * namespace with the parent grammar. */ + if (parent) { + grammar->rules = parent->rules; + grammar->imports = parent->imports; + grammar->searchpath = parent->searchpath; + grammar->parent = parent; + } + else { + grammar->rules = hash_table_new(64, 0); + grammar->imports = hash_table_new(16, 0); + } + + return grammar; +} + +void +jsgf_grammar_free(jsgf_t * jsgf) +{ + /* FIXME: Probably should just use refcounting instead. */ + if (jsgf->parent == NULL) { + hash_iter_t *itor; + gnode_t *gn; + + for (itor = hash_table_iter(jsgf->rules); itor; + itor = hash_table_iter_next(itor)) { + ckd_free((char *) itor->ent->key); + jsgf_rule_free((jsgf_rule_t *) itor->ent->val); + } + hash_table_free(jsgf->rules); + for (itor = hash_table_iter(jsgf->imports); itor; + itor = hash_table_iter_next(itor)) { + ckd_free((char *) itor->ent->key); + jsgf_grammar_free((jsgf_t *) itor->ent->val); + } + hash_table_free(jsgf->imports); + for (gn = jsgf->searchpath; gn; gn = gnode_next(gn)) + ckd_free(gnode_ptr(gn)); + glist_free(jsgf->searchpath); + for (gn = jsgf->links; gn; gn = gnode_next(gn)) + ckd_free(gnode_ptr(gn)); + glist_free(jsgf->links); + } + ckd_free(jsgf->name); + ckd_free(jsgf->version); + ckd_free(jsgf->charset); + ckd_free(jsgf->locale); + ckd_free(jsgf); +} + +static void +jsgf_rhs_free(jsgf_rhs_t * rhs) +{ + gnode_t *gn; + + if (rhs == NULL) + return; + + jsgf_rhs_free(rhs->alt); + for (gn = rhs->atoms; gn; gn = gnode_next(gn)) + jsgf_atom_free(gnode_ptr(gn)); + glist_free(rhs->atoms); + ckd_free(rhs); +} + +jsgf_atom_t * +jsgf_kleene_new(jsgf_t * jsgf, jsgf_atom_t * atom, int plus) +{ + jsgf_rule_t *rule; + jsgf_atom_t *rule_atom; + jsgf_rhs_t *rhs; + + /* Generate an "internal" rule of the form ( | ) */ + /* Or if plus is true, ( | ) */ + rhs = ckd_calloc(1, sizeof(*rhs)); + if (plus) + rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new(atom->name, 1.0)); + else + rhs->atoms = glist_add_ptr(NULL, jsgf_atom_new("", 1.0)); + rule = jsgf_define_rule(jsgf, NULL, rhs, 0); + rule_atom = jsgf_atom_new(rule->name, 1.0); + rhs = ckd_calloc(1, sizeof(*rhs)); + rhs->atoms = glist_add_ptr(NULL, rule_atom); + rhs->atoms = glist_add_ptr(rhs->atoms, atom); + rule->rhs->alt = rhs; + + return jsgf_atom_new(rule->name, 1.0); +} + +jsgf_rule_t * +jsgf_optional_new(jsgf_t * jsgf, jsgf_rhs_t * exp) +{ + jsgf_rhs_t *rhs = ckd_calloc(1, sizeof(*rhs)); + jsgf_atom_t *atom = jsgf_atom_new("", 1.0); + rhs->alt = exp; + rhs->atoms = glist_add_ptr(NULL, atom); + return jsgf_define_rule(jsgf, NULL, rhs, 0); +} + +void +jsgf_add_link(jsgf_t * grammar, jsgf_atom_t * atom, int from, int to) +{ + jsgf_link_t *link; + + link = ckd_calloc(1, sizeof(*link)); + link->from = from; + link->to = to; + link->atom = atom; + grammar->links = glist_add_ptr(grammar->links, link); +} + +static char * +extract_grammar_name(char *rule_name) +{ + char *dot_pos; + char *grammar_name = ckd_salloc(rule_name + 1); + if ((dot_pos = strrchr(grammar_name + 1, '.')) == NULL) { + ckd_free(grammar_name); + return NULL; + } + *dot_pos = '\0'; + return grammar_name; +} + +char const * +jsgf_grammar_name(jsgf_t * jsgf) +{ + return jsgf->name; +} + +static char * +jsgf_fullname(jsgf_t * jsgf, const char *name) +{ + char *fullname; + + /* Check if it is already qualified */ + if (strchr(name + 1, '.')) + return ckd_salloc(name); + + /* Skip leading < in name */ + fullname = ckd_malloc(strlen(jsgf->name) + strlen(name) + 4); + sprintf(fullname, "<%s.%s", jsgf->name, name + 1); + return fullname; +} + +static char * +jsgf_fullname_from_rule(jsgf_rule_t * rule, const char *name) +{ + char *fullname, *grammar_name; + + /* Check if it is already qualified */ + if (strchr(name + 1, '.')) + return ckd_salloc(name); + + /* Skip leading < in name */ + if ((grammar_name = extract_grammar_name(rule->name)) == NULL) + return ckd_salloc(name); + fullname = ckd_malloc(strlen(grammar_name) + strlen(name) + 4); + sprintf(fullname, "<%s.%s", grammar_name, name + 1); + ckd_free(grammar_name); + + return fullname; +} + +/* Extract as rulename everything after the secondlast dot, if existent. + * Because everything before the secondlast dot is the path-specification. */ +static char * +importname2rulename(char *importname) +{ + char *rulename = ckd_salloc(importname); + char *last_dotpos; + char *secondlast_dotpos; + + if ((last_dotpos = strrchr(rulename + 1, '.')) != NULL) { + *last_dotpos = '\0'; + if ((secondlast_dotpos = strrchr(rulename + 1, '.')) != NULL) { + *last_dotpos = '.'; + *secondlast_dotpos = '<'; + secondlast_dotpos = ckd_salloc(secondlast_dotpos); + ckd_free(rulename); + return secondlast_dotpos; + } + else { + *last_dotpos = '.'; + return rulename; + } + } + else { + return rulename; + } +} + +#define NO_NODE -1 +#define RECURSIVE_NODE -2 + +/** + * + * Expand a right-hand-side of a rule (i.e. a single alternate). + * + * @returns the FSG state at the end of this rule, NO_NODE if there's an + * error, and RECURSIVE_NODE if the right-hand-side ended in right-recursion (i.e. + * a link to an earlier FSG state). + */ +static int +expand_rhs(jsgf_t * grammar, jsgf_rule_t * rule, jsgf_rhs_t * rhs, + int rule_entry, int rule_exit) +{ + gnode_t *gn; + int lastnode; + + /* Last node expanded in this sequence. */ + lastnode = rule_entry; + + /* Iterate over atoms in rhs and generate links/nodes */ + for (gn = rhs->atoms; gn; gn = gnode_next(gn)) { + jsgf_atom_t *atom = gnode_ptr(gn); + + if (jsgf_atom_is_rule(atom)) { + jsgf_rule_t *subrule; + char *fullname; + gnode_t *subnode; + jsgf_rule_stack_t *rule_stack_entry = NULL; + + /* Special case for and pseudo-rules + If this is the only atom in the rhs, and it's the + first rhs in the rule, then emit a null transition, + creating an exit state if needed. */ + if (0 == strcmp(atom->name, "")) { + if (gn == rhs->atoms && gnode_next(gn) == NULL) { + if (rule_exit == NO_NODE) { + jsgf_add_link(grammar, atom, + lastnode, grammar->nstate); + rule_exit = lastnode = grammar->nstate; + ++grammar->nstate; + } + else { + jsgf_add_link(grammar, atom, lastnode, rule_exit); + } + } + continue; + } + else if (0 == strcmp(atom->name, "")) { + /* Make this entire RHS unspeakable */ + return NO_NODE; + } + + fullname = jsgf_fullname_from_rule(rule, atom->name); + if (hash_table_lookup + (grammar->rules, fullname, (void **) &subrule) == -1) { + E_ERROR("Undefined rule in RHS: %s\n", fullname); + ckd_free(fullname); + return NO_NODE; + } + ckd_free(fullname); + + /* Look for this subrule in the stack of expanded rules */ + for (subnode = grammar->rulestack; subnode; + subnode = gnode_next(subnode)) { + rule_stack_entry = + (jsgf_rule_stack_t *) gnode_ptr(subnode); + if (rule_stack_entry->rule == subrule) + break; + } + + if (subnode != NULL) { + /* Allow right-recursion only. */ + if (gnode_next(gn) != NULL) { + E_ERROR + ("Only right-recursion is permitted (in %s.%s)\n", + grammar->name, rule->name); + return NO_NODE; + } + /* Add a link back to the beginning of this rule instance */ + E_INFO("Right recursion %s %d => %d\n", atom->name, + lastnode, rule_stack_entry->entry); + jsgf_add_link(grammar, atom, lastnode, + rule_stack_entry->entry); + + /* Let our caller know that this rhs didn't reach an + end state. */ + lastnode = RECURSIVE_NODE; + } + else { + /* If this is the last atom in this rhs, link its + expansion to the parent rule's exit state. + Otherwise, create a new exit state for it. */ + int subruleexit = NO_NODE; + if (gnode_next(gn) == NULL && rule_exit >= 0) + subruleexit = rule_exit; + + /* Expand the subrule */ + lastnode = + expand_rule(grammar, subrule, lastnode, subruleexit); + + if (lastnode == NO_NODE) + return NO_NODE; + } + } + else { + /* An exit-state is created if this isn't the last atom + in the rhs, or if the containing rule doesn't have an + exit state yet. + Otherwise, the rhs's exit state becomes the containing + rule's exit state. */ + int exitstate; + if (gnode_next(gn) == NULL && rule_exit >= 0) { + exitstate = rule_exit; + } + else { + exitstate = grammar->nstate; + ++grammar->nstate; + } + + /* Add a link for this token */ + jsgf_add_link(grammar, atom, lastnode, exitstate); + lastnode = exitstate; + } + } + + return lastnode; +} + +static int +expand_rule(jsgf_t * grammar, jsgf_rule_t * rule, int rule_entry, + int rule_exit) +{ + jsgf_rule_stack_t *rule_stack_entry; + jsgf_rhs_t *rhs; + + /* Push this rule onto the stack */ + rule_stack_entry = + (jsgf_rule_stack_t *) ckd_calloc(1, sizeof(jsgf_rule_stack_t)); + rule_stack_entry->rule = rule; + rule_stack_entry->entry = rule_entry; + grammar->rulestack = glist_add_ptr(grammar->rulestack, + rule_stack_entry); + + for (rhs = rule->rhs; rhs; rhs = rhs->alt) { + int lastnode; + + lastnode = expand_rhs(grammar, rule, rhs, rule_entry, rule_exit); + + if (lastnode == NO_NODE) { + return NO_NODE; + } + else if (lastnode == RECURSIVE_NODE) { + /* The rhs ended with right-recursion, i.e. a transition to + an earlier state. Nothing needs to happen at this level. */ + ; + } + else if (rule_exit == NO_NODE) { + /* If this rule doesn't have an exit state yet, use the exit + state of its first right-hand-side. + All other right-hand-sides will use this exit state. */ + assert(lastnode >= 0); + rule_exit = lastnode; + } + } + + /* If no exit-state was created, use the entry-state. */ + if (rule_exit == NO_NODE) { + rule_exit = rule_entry; + } + + /* Pop this rule from the rule stack */ + ckd_free(gnode_ptr(grammar->rulestack)); + grammar->rulestack = gnode_free(grammar->rulestack, NULL); + + return rule_exit; +} + +jsgf_rule_iter_t * +jsgf_rule_iter(jsgf_t * grammar) +{ + return hash_table_iter(grammar->rules); +} + +jsgf_rule_t * +jsgf_get_rule(jsgf_t * grammar, char const *name) +{ + void *val; + char *fullname; + + fullname = string_join("<", name, ">", NULL); + if (hash_table_lookup(grammar->rules, fullname, &val) < 0) { + ckd_free(fullname); + return NULL; + } + ckd_free(fullname); + return (jsgf_rule_t *) val; +} + +jsgf_rule_t * +jsgf_get_public_rule(jsgf_t * grammar) +{ + jsgf_rule_iter_t *itor; + jsgf_rule_t *public_rule = NULL; + + for (itor = jsgf_rule_iter(grammar); itor; + itor = jsgf_rule_iter_next(itor)) { + jsgf_rule_t *rule = jsgf_rule_iter_rule(itor); + if (jsgf_rule_public(rule)) { + const char *rule_name = jsgf_rule_name(rule); + char *dot_pos; + if ((dot_pos = strrchr(rule_name + 1, '.')) == NULL) { + public_rule = rule; + jsgf_rule_iter_free(itor); + break; + } + if (0 == + strncmp(rule_name + 1, jsgf_grammar_name(grammar), + dot_pos - rule_name - 1)) { + public_rule = rule; + jsgf_rule_iter_free(itor); + break; + } + } + } + return public_rule; +} + +char const * +jsgf_rule_name(jsgf_rule_t * rule) +{ + return rule->name; +} + +int +jsgf_rule_public(jsgf_rule_t * rule) +{ + return rule->is_public; +} + +static fsg_model_t * +jsgf_build_fsg_internal(jsgf_t * grammar, jsgf_rule_t * rule, + logmath_t * lmath, float32 lw, int do_closure) +{ + fsg_model_t *fsg; + glist_t nulls; + gnode_t *gn; + int rule_entry, rule_exit; + + if (grammar == NULL || rule == NULL) + return NULL; + + /* Clear previous links */ + for (gn = grammar->links; gn; gn = gnode_next(gn)) { + ckd_free(gnode_ptr(gn)); + } + glist_free(grammar->links); + grammar->links = NULL; + grammar->nstate = 0; + + /* Create the top-level entry state, and expand the + top-level rule. */ + rule_entry = grammar->nstate++; + rule_exit = expand_rule(grammar, rule, rule_entry, NO_NODE); + + /* If no exit-state was created, create one. */ + if (rule_exit == NO_NODE) { + rule_exit = grammar->nstate++; + jsgf_add_link(grammar, NULL, rule_entry, rule_exit); + } + + fsg = fsg_model_init(rule->name, lmath, lw, grammar->nstate); + fsg->start_state = rule_entry; + fsg->final_state = rule_exit; + grammar->links = glist_reverse(grammar->links); + for (gn = grammar->links; gn; gn = gnode_next(gn)) { + jsgf_link_t *link = gnode_ptr(gn); + + if (link->atom) { + if (jsgf_atom_is_rule(link->atom)) { + fsg_model_null_trans_add(fsg, link->from, link->to, + logmath_log(lmath, + link->atom->weight)); + } + else { + int wid = fsg_model_word_add(fsg, link->atom->name); + fsg_model_trans_add(fsg, link->from, link->to, + logmath_log(lmath, link->atom->weight), + wid); + } + } + else { + fsg_model_null_trans_add(fsg, link->from, link->to, 0); + } + } + if (do_closure) { + nulls = fsg_model_null_trans_closure(fsg, NULL); + glist_free(nulls); + } + + return fsg; +} + +fsg_model_t * +jsgf_build_fsg(jsgf_t * grammar, jsgf_rule_t * rule, + logmath_t * lmath, float32 lw) +{ + return jsgf_build_fsg_internal(grammar, rule, lmath, lw, TRUE); +} + +fsg_model_t * +jsgf_build_fsg_raw(jsgf_t * grammar, jsgf_rule_t * rule, + logmath_t * lmath, float32 lw) +{ + return jsgf_build_fsg_internal(grammar, rule, lmath, lw, FALSE); +} + +fsg_model_t * +jsgf_read_file(const char *file, logmath_t * lmath, float32 lw) +{ + fsg_model_t *fsg; + jsgf_rule_t *rule; + jsgf_t *jsgf; + jsgf_rule_iter_t *itor; + + if ((jsgf = jsgf_parse_file(file, NULL)) == NULL) { + E_ERROR("Error parsing file: %s\n", file); + return NULL; + } + + rule = NULL; + for (itor = jsgf_rule_iter(jsgf); itor; + itor = jsgf_rule_iter_next(itor)) { + rule = jsgf_rule_iter_rule(itor); + if (jsgf_rule_public(rule)) { + jsgf_rule_iter_free(itor); + break; + } + } + if (rule == NULL) { + E_ERROR("No public rules found in %s\n", file); + return NULL; + } + fsg = jsgf_build_fsg(jsgf, rule, lmath, lw); + jsgf_grammar_free(jsgf); + return fsg; +} + +fsg_model_t * +jsgf_read_string(const char *string, logmath_t * lmath, float32 lw) +{ + fsg_model_t *fsg; + jsgf_rule_t *rule; + jsgf_t *jsgf; + jsgf_rule_iter_t *itor; + + if ((jsgf = jsgf_parse_string(string, NULL)) == NULL) { + E_ERROR("Error parsing input string\n"); + return NULL; + } + + rule = NULL; + for (itor = jsgf_rule_iter(jsgf); itor; + itor = jsgf_rule_iter_next(itor)) { + rule = jsgf_rule_iter_rule(itor); + if (jsgf_rule_public(rule)) { + jsgf_rule_iter_free(itor); + break; + } + } + if (rule == NULL) { + jsgf_grammar_free(jsgf); + E_ERROR("No public rules found in input string\n"); + return NULL; + } + fsg = jsgf_build_fsg(jsgf, rule, lmath, lw); + jsgf_grammar_free(jsgf); + return fsg; +} + + +int +jsgf_write_fsg(jsgf_t * grammar, jsgf_rule_t * rule, FILE * outfh) +{ + fsg_model_t *fsg; + logmath_t *lmath = logmath_init(1.0001, 0, 0); + + if ((fsg = jsgf_build_fsg_raw(grammar, rule, lmath, 1.0)) == NULL) + goto error_out; + + fsg_model_write(fsg, outfh); + logmath_free(lmath); + return 0; + + error_out: + logmath_free(lmath); + return -1; +} + +jsgf_rule_t * +jsgf_define_rule(jsgf_t * jsgf, char *name, jsgf_rhs_t * rhs, + int is_public) +{ + jsgf_rule_t *rule; + void *val; + + if (name == NULL) { + name = ckd_malloc(strlen(jsgf->name) + 16); + sprintf(name, "<%s.g%05d>", jsgf->name, + hash_table_inuse(jsgf->rules)); + } + else { + char *newname; + + newname = jsgf_fullname(jsgf, name); + name = newname; + } + + rule = ckd_calloc(1, sizeof(*rule)); + rule->refcnt = 1; + rule->name = ckd_salloc(name); + rule->rhs = rhs; + rule->is_public = is_public; + + E_INFO("Defined rule: %s%s\n", + rule->is_public ? "PUBLIC " : "", rule->name); + val = hash_table_enter(jsgf->rules, name, rule); + if (val != (void *) rule) { + E_WARN("Multiply defined symbol: %s\n", name); + } + return rule; +} + +jsgf_rule_t * +jsgf_rule_retain(jsgf_rule_t * rule) +{ + ++rule->refcnt; + return rule; +} + +int +jsgf_rule_free(jsgf_rule_t * rule) +{ + if (rule == NULL) + return 0; + if (--rule->refcnt > 0) + return rule->refcnt; + jsgf_rhs_free(rule->rhs); + ckd_free(rule->name); + ckd_free(rule); + return 0; +} + + +/* FIXME: This should go in libsphinxutil */ +static char * +path_list_search(glist_t paths, char *path) +{ + gnode_t *gn; + + for (gn = paths; gn; gn = gnode_next(gn)) { + char *fullpath; + FILE *tmp; + + fullpath = string_join(gnode_ptr(gn), "/", path, NULL); + tmp = fopen(fullpath, "r"); + if (tmp != NULL) { + fclose(tmp); + return fullpath; + } + else { + ckd_free(fullpath); + } + } + return NULL; +} + +jsgf_rule_t * +jsgf_import_rule(jsgf_t * jsgf, char *name) +{ + char *c, *path, *newpath; + size_t namelen, packlen; + void *val; + jsgf_t *imp; + int import_all; + + /* Trim the leading and trailing <> */ + namelen = strlen(name); + path = ckd_malloc(namelen - 2 + 6); /* room for a trailing .gram */ + strcpy(path, name + 1); + /* Split off the first part of the name */ + c = strrchr(path, '.'); + if (c == NULL) { + E_ERROR("Imported rule is not qualified: %s\n", name); + ckd_free(path); + return NULL; + } + packlen = c - path; + *c = '\0'; + + /* Look for import foo.* */ + import_all = (strlen(name) > 2 + && 0 == strcmp(name + namelen - 3, ".*>")); + + /* Construct a filename. */ + for (c = path; *c; ++c) + if (*c == '.') + *c = '/'; + strcat(path, ".gram"); + newpath = path_list_search(jsgf->searchpath, path); + if (newpath == NULL) { + E_ERROR("Failed to find grammar %s\n", path); + ckd_free(path); + return NULL; + } + ckd_free(path); + + path = newpath; + E_INFO("Importing %s from %s to %s\n", name, path, jsgf->name); + + /* FIXME: Also, we need to make sure that path is fully qualified + * here, by adding any prefixes from jsgf->name to it. */ + /* See if we have parsed it already */ + if (hash_table_lookup(jsgf->imports, path, &val) == 0) { + E_INFO("Already imported %s\n", path); + imp = val; + ckd_free(path); + } + else { + /* If not, parse it. */ + imp = jsgf_parse_file(path, jsgf); + val = hash_table_enter(jsgf->imports, path, imp); + if (val != (void *) imp) { + E_WARN("Multiply imported file: %s\n", path); + } + } + if (imp != NULL) { + hash_iter_t *itor; + /* Look for public rules matching rulename. */ + for (itor = hash_table_iter(imp->rules); itor; + itor = hash_table_iter_next(itor)) { + hash_entry_t *he = itor->ent; + jsgf_rule_t *rule = hash_entry_val(he); + int rule_matches; + char *rule_name = importname2rulename(name); + + if (import_all) { + /* Match package name (symbol table is shared) */ + rule_matches = + !strncmp(rule_name, rule->name, packlen + 1); + } + else { + /* Exact match */ + rule_matches = !strcmp(rule_name, rule->name); + } + ckd_free(rule_name); + if (rule->is_public && rule_matches) { + void *val; + char *newname; + + /* Link this rule into the current namespace. */ + c = strrchr(rule->name, '.'); + assert(c != NULL); + newname = jsgf_fullname(jsgf, c); + + E_INFO("Imported %s\n", newname); + val = hash_table_enter(jsgf->rules, newname, + jsgf_rule_retain(rule)); + if (val != (void *) rule) { + E_WARN("Multiply defined symbol: %s\n", newname); + } + if (!import_all) { + hash_table_iter_free(itor); + return rule; + } + } + } + } + + return NULL; +} + +static void +jsgf_set_search_path(jsgf_t * jsgf, const char *filename) +{ + char *jsgf_path; + +#if !defined(_WIN32_WCE) + if ((jsgf_path = getenv("JSGF_PATH")) != NULL) { + char *word, *c; + /* FIXME: This should be a function in libsphinxbase. */ + word = jsgf_path = ckd_salloc(jsgf_path); + while ((c = strchr(word, ':'))) { + *c = '\0'; + jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word); + word = c + 1; + } + jsgf->searchpath = glist_add_ptr(jsgf->searchpath, word); + jsgf->searchpath = glist_reverse(jsgf->searchpath); + return; + } +#endif + + if (!filename) { + jsgf->searchpath = + glist_add_ptr(jsgf->searchpath, ckd_salloc(".")); + return; + } + + jsgf_path = ckd_salloc(filename); + path2dirname(filename, jsgf_path); + jsgf->searchpath = glist_add_ptr(jsgf->searchpath, jsgf_path); +} + +jsgf_t * +jsgf_parse_file(const char *filename, jsgf_t * parent) +{ + yyscan_t yyscanner; + jsgf_t *jsgf; + int yyrv; + FILE *in = NULL; + + yylex_init(&yyscanner); + if (filename == NULL) { + yyset_in(stdin, yyscanner); + } + else { + in = fopen(filename, "r"); + if (in == NULL) { + E_ERROR_SYSTEM("Failed to open %s for parsing", filename); + return NULL; + } + yyset_in(in, yyscanner); + } + + jsgf = jsgf_grammar_new(parent); + + if (!parent) + jsgf_set_search_path(jsgf, filename); + + yyrv = yyparse(yyscanner, jsgf); + if (yyrv != 0) { + E_ERROR("Failed to parse JSGF grammar from '%s'\n", + filename ? filename : "(stdin)"); + jsgf_grammar_free(jsgf); + yylex_destroy(yyscanner); + return NULL; + } + if (in) + fclose(in); + yylex_destroy(yyscanner); + + return jsgf; +} + +jsgf_t * +jsgf_parse_string(const char *string, jsgf_t * parent) +{ + yyscan_t yyscanner; + jsgf_t *jsgf; + int yyrv; + YY_BUFFER_STATE buf; + + yylex_init(&yyscanner); + buf = yy_scan_string(string, yyscanner); + + jsgf = jsgf_grammar_new(parent); + if (!parent) + jsgf_set_search_path(jsgf, NULL); + + yyrv = yyparse(yyscanner, jsgf); + if (yyrv != 0) { + E_ERROR("Failed to parse JSGF grammar from input string\n"); + jsgf_grammar_free(jsgf); + yy_delete_buffer(buf, yyscanner); + yylex_destroy(yyscanner); + return NULL; + } + yy_delete_buffer(buf, yyscanner); + yylex_destroy(yyscanner); + + return jsgf; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_internal.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..a5cbc98336b2653de040888689917721d346bb13 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_internal.h @@ -0,0 +1,140 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef __JSGF_INTERNAL_H__ +#define __JSGF_INTERNAL_H__ + +/** + * @file jsgf_internal.h Internal definitions for JSGF grammar compiler + */ + +#include + +#include +#include +#include +#include +#include +#include + + +/* Flex uses strdup which is missing on WinCE */ +#if defined(_WIN32) || defined(_WIN32_WCE) +#define strdup _strdup +#endif + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +/* Fool Emacs. */ +} +#endif + +#define YY_NO_INPUT /* Silence a compiler warning. */ + +typedef struct jsgf_rhs_s jsgf_rhs_t; +typedef struct jsgf_atom_s jsgf_atom_t; +typedef struct jsgf_link_s jsgf_link_t; +typedef struct jsgf_rule_stack_s jsgf_rule_stack_t; + +struct jsgf_s { + char *version; /**< JSGF version (from header) */ + char *charset; /**< JSGF charset (default UTF-8) */ + char *locale; /**< JSGF locale (default C) */ + char *name; /**< Grammar name */ + + hash_table_t *rules; /**< Defined or imported rules in this grammar. */ + hash_table_t *imports; /**< Pointers to imported grammars. */ + jsgf_t *parent; /**< Parent grammar (if this is an imported one) */ + glist_t searchpath; /**< List of directories to search for grammars. */ + + /* Scratch variables for FSG conversion. */ + int nstate; /**< Number of generated states. */ + glist_t links; /**< Generated FSG links. */ + glist_t rulestack; /**< Stack of currently expanded rules. */ +}; + +/* A type to keep track of the stack of rules currently being expanded. */ +struct jsgf_rule_stack_s { + jsgf_rule_t *rule; /**< The rule being expanded */ + int entry; /**< The entry-state for this expansion */ +}; + +struct jsgf_rule_s { + int refcnt; /**< Reference count. */ + char *name; /**< Rule name (NULL for an alternation/grouping) */ + int is_public; /**< Is this rule marked 'public'? */ + jsgf_rhs_t *rhs; /**< Expansion */ +}; + +struct jsgf_rhs_s { + glist_t atoms; /**< Sequence of items */ + jsgf_rhs_t *alt; /**< Linked list of alternates */ +}; + +struct jsgf_atom_s { + char *name; /**< Rule or token name */ + glist_t tags; /**< Tags, if any (glist_t of char *) */ + float weight; /**< Weight (default 1) */ +}; + +struct jsgf_link_s { + jsgf_atom_t *atom; /**< Name, tags, weight */ + int from; /**< From state */ + int to; /**< To state */ +}; + +#define jsgf_atom_is_rule(atom) ((atom)->name[0] == '<') + +void jsgf_add_link(jsgf_t *grammar, jsgf_atom_t *atom, int from, int to); +jsgf_atom_t *jsgf_atom_new(char *name, float weight); +jsgf_atom_t *jsgf_kleene_new(jsgf_t *jsgf, jsgf_atom_t *atom, int plus); +jsgf_rule_t *jsgf_optional_new(jsgf_t *jsgf, jsgf_rhs_t *exp); +jsgf_rule_t *jsgf_define_rule(jsgf_t *jsgf, char *name, jsgf_rhs_t *rhs, int is_public); +jsgf_rule_t *jsgf_import_rule(jsgf_t *jsgf, char *name); + +int jsgf_atom_free(jsgf_atom_t *atom); +int jsgf_rule_free(jsgf_rule_t *rule); +jsgf_rule_t *jsgf_rule_retain(jsgf_rule_t *rule); + +#ifdef __cplusplus +} +#endif + + +#endif /* __JSGF_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_parser.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_parser.c new file mode 100644 index 0000000000000000000000000000000000000000..9314afb5df9f45130a323ad0a47c44a39a65c94a --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_parser.c @@ -0,0 +1,1800 @@ + +/* A Bison parser, made by GNU Bison 2.4.1. */ + +/* Skeleton implementation for Bison's Yacc-like parsers in C + + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 + Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* C LALR(1) parser skeleton written by Richard Stallman, by + simplifying the original so-called "semantic" parser. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +/* Identify Bison output. */ +#define YYBISON 1 + +/* Bison version. */ +#define YYBISON_VERSION "2.4.1" + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 1 + +/* Push parsers. */ +#define YYPUSH 0 + +/* Pull parsers. */ +#define YYPULL 1 + +/* Using locations. */ +#define YYLSP_NEEDED 0 + + + +/* Copy the first part of user declarations. */ + +/* Line 189 of yacc.c */ +#line 37 "jsgf_parser.y" + +#define YYERROR_VERBOSE + +#include +#include + +#include +#include +#include + +#include "jsgf_internal.h" +#include "jsgf_parser.h" +#include "jsgf_scanner.h" + +/* Suppress warnings from generated code */ +#if defined _MSC_VER +#pragma warning(disable: 4273) +#endif + +void yyerror(yyscan_t lex, jsgf_t *jsgf, const char *s); + + + +/* Line 189 of yacc.c */ +#line 97 "jsgf_parser.c" + +/* Enabling traces. */ +#ifndef YYDEBUG +# define YYDEBUG 0 +#endif + +/* Enabling verbose error messages. */ +#ifdef YYERROR_VERBOSE +# undef YYERROR_VERBOSE +# define YYERROR_VERBOSE 1 +#else +# define YYERROR_VERBOSE 0 +#endif + +/* Enabling the token table. */ +#ifndef YYTOKEN_TABLE +# define YYTOKEN_TABLE 0 +#endif + + +/* Tokens. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + /* Put the tokens into the symbol table, so that GDB and other debuggers + know about them. */ + enum yytokentype { + HEADER = 258, + GRAMMAR = 259, + IMPORT = 260, + PUBLIC = 261, + TOKEN = 262, + RULENAME = 263, + TAG = 264, + WEIGHT = 265 + }; +#endif +/* Tokens. */ +#define HEADER 258 +#define GRAMMAR 259 +#define IMPORT 260 +#define PUBLIC 261 +#define TOKEN 262 +#define RULENAME 263 +#define TAG 264 +#define WEIGHT 265 + + + + +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +typedef union YYSTYPE +{ + +/* Line 214 of yacc.c */ +#line 65 "jsgf_parser.y" + + char *name; + float weight; + jsgf_rule_t *rule; + jsgf_rhs_t *rhs; + jsgf_atom_t *atom; + + + +/* Line 214 of yacc.c */ +#line 163 "jsgf_parser.c" +} YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define yystype YYSTYPE /* obsolescent; will be withdrawn */ +# define YYSTYPE_IS_DECLARED 1 +#endif + + +/* Copy the second part of user declarations. */ + + +/* Line 264 of yacc.c */ +#line 175 "jsgf_parser.c" + +#ifdef short +# undef short +#endif + +#ifdef YYTYPE_UINT8 +typedef YYTYPE_UINT8 yytype_uint8; +#else +typedef unsigned char yytype_uint8; +#endif + +#ifdef YYTYPE_INT8 +typedef YYTYPE_INT8 yytype_int8; +#elif (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +typedef signed char yytype_int8; +#else +typedef short int yytype_int8; +#endif + +#ifdef YYTYPE_UINT16 +typedef YYTYPE_UINT16 yytype_uint16; +#else +typedef unsigned short int yytype_uint16; +#endif + +#ifdef YYTYPE_INT16 +typedef YYTYPE_INT16 yytype_int16; +#else +typedef short int yytype_int16; +#endif + +#ifndef YYSIZE_T +# ifdef __SIZE_TYPE__ +# define YYSIZE_T __SIZE_TYPE__ +# elif defined size_t +# define YYSIZE_T size_t +# elif ! defined YYSIZE_T && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +# include /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# else +# define YYSIZE_T unsigned int +# endif +#endif + +#define YYSIZE_MAXIMUM ((YYSIZE_T) -1) + +#ifndef YY_ +# if YYENABLE_NLS +# if ENABLE_NLS +# include /* INFRINGES ON USER NAME SPACE */ +# define YY_(msgid) dgettext ("bison-runtime", msgid) +# endif +# endif +# ifndef YY_ +# define YY_(msgid) msgid +# endif +#endif + +/* Suppress unused-variable warnings by "using" E. */ +#if ! defined lint || defined __GNUC__ +# define YYUSE(e) ((void) (e)) +#else +# define YYUSE(e) /* empty */ +#endif + +/* Identity function, used to suppress warnings about constant conditions. */ +#ifndef lint +# define YYID(n) (n) +#else +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static int +YYID (int yyi) +#else +static int +YYID (yyi) + int yyi; +#endif +{ + return yyi; +} +#endif + +#if ! defined yyoverflow || YYERROR_VERBOSE + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# ifdef YYSTACK_USE_ALLOCA +# if YYSTACK_USE_ALLOCA +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# elif defined __BUILTIN_VA_ARG_INCR +# include /* INFRINGES ON USER NAME SPACE */ +# elif defined _AIX +# define YYSTACK_ALLOC __alloca +# elif defined _MSC_VER +# include /* INFRINGES ON USER NAME SPACE */ +# define alloca _alloca +# else +# define YYSTACK_ALLOC alloca +# if ! defined _ALLOCA_H && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +# include /* INFRINGES ON USER NAME SPACE */ +# ifndef _STDLIB_H +# define _STDLIB_H 1 +# endif +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's `empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (YYID (0)) +# ifndef YYSTACK_ALLOC_MAXIMUM + /* The OS might guarantee only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + invoke alloca (N) if N exceeds 4096. Use a slightly smaller number + to allow for a few compiler-allocated temporary stack slots. */ +# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ +# endif +# else +# define YYSTACK_ALLOC YYMALLOC +# define YYSTACK_FREE YYFREE +# ifndef YYSTACK_ALLOC_MAXIMUM +# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM +# endif +# if (defined __cplusplus && ! defined _STDLIB_H \ + && ! ((defined YYMALLOC || defined malloc) \ + && (defined YYFREE || defined free))) +# include /* INFRINGES ON USER NAME SPACE */ +# ifndef _STDLIB_H +# define _STDLIB_H 1 +# endif +# endif +# ifndef YYMALLOC +# define YYMALLOC malloc +# if ! defined malloc && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +void *malloc (YYSIZE_T); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# ifndef YYFREE +# define YYFREE free +# if ! defined free && ! defined _STDLIB_H && (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +void free (void *); /* INFRINGES ON USER NAME SPACE */ +# endif +# endif +# endif +#endif /* ! defined yyoverflow || YYERROR_VERBOSE */ + + +#if (! defined yyoverflow \ + && (! defined __cplusplus \ + || (defined YYSTYPE_IS_TRIVIAL && YYSTYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + yytype_int16 yyss_alloc; + YYSTYPE yyvs_alloc; +}; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (sizeof (yytype_int16) + sizeof (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +/* Copy COUNT objects from FROM to TO. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if defined __GNUC__ && 1 < __GNUC__ +# define YYCOPY(To, From, Count) \ + __builtin_memcpy (To, From, (Count) * sizeof (*(From))) +# else +# define YYCOPY(To, From, Count) \ + do \ + { \ + YYSIZE_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (To)[yyi] = (From)[yyi]; \ + } \ + while (YYID (0)) +# endif +# endif + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ + do \ + { \ + YYSIZE_T yynewbytes; \ + YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ + Stack = &yyptr->Stack_alloc; \ + yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / sizeof (*yyptr); \ + } \ + while (YYID (0)) + +#endif + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 7 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 54 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 20 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 16 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 33 +/* YYNRULES -- Number of states. */ +#define YYNSTATES 58 + +/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ +#define YYUNDEFTOK 2 +#define YYMAXUTOK 265 + +#define YYTRANSLATE(YYX) \ + ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) + +/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */ +static const yytype_uint8 yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 14, 15, 18, 19, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 11, + 2, 12, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 16, 2, 17, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 13, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10 +}; + +#if YYDEBUG +/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in + YYRHS. */ +static const yytype_uint8 yyprhs[] = +{ + 0, 0, 3, 5, 8, 12, 15, 18, 22, 27, + 33, 37, 39, 42, 46, 48, 51, 56, 62, 64, + 68, 70, 73, 75, 78, 80, 83, 87, 91, 93, + 95, 97, 99, 102 +}; + +/* YYRHS -- A `-1'-separated list of the rules' RHS. */ +static const yytype_int8 yyrhs[] = +{ + 21, 0, -1, 22, -1, 22, 27, -1, 22, 25, + 27, -1, 23, 24, -1, 3, 11, -1, 3, 7, + 11, -1, 3, 7, 7, 11, -1, 3, 7, 7, + 7, 11, -1, 4, 7, 11, -1, 26, -1, 25, + 26, -1, 5, 8, 11, -1, 28, -1, 27, 28, + -1, 8, 12, 29, 11, -1, 6, 8, 12, 29, + 11, -1, 30, -1, 29, 13, 30, -1, 31, -1, + 30, 31, -1, 32, -1, 31, 9, -1, 35, -1, + 10, 35, -1, 14, 29, 15, -1, 16, 29, 17, + -1, 7, -1, 8, -1, 33, -1, 34, -1, 35, + 18, -1, 35, 19, -1 +}; + +/* YYRLINE[YYN] -- source line where rule number YYN was defined. */ +static const yytype_uint8 yyrline[] = +{ + 0, 82, 82, 83, 84, 87, 90, 91, 92, 93, + 97, 100, 101, 104, 107, 108, 111, 112, 115, 116, + 121, 123, 127, 128, 132, 133, 136, 139, 142, 143, + 144, 145, 146, 147 +}; +#endif + +#if YYDEBUG || YYERROR_VERBOSE || YYTOKEN_TABLE +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "$end", "error", "$undefined", "HEADER", "GRAMMAR", "IMPORT", "PUBLIC", + "TOKEN", "RULENAME", "TAG", "WEIGHT", "';'", "'='", "'|'", "'('", "')'", + "'['", "']'", "'*'", "'+'", "$accept", "grammar", "header", + "jsgf_header", "grammar_header", "import_header", "import_statement", + "rule_list", "rule", "alternate_list", "rule_expansion", + "tagged_rule_item", "rule_item", "rule_group", "rule_optional", + "rule_atom", 0 +}; +#endif + +# ifdef YYPRINT +/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to + token YYLEX-NUM. */ +static const yytype_uint16 yytoknum[] = +{ + 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, + 265, 59, 61, 124, 40, 41, 91, 93, 42, 43 +}; +# endif + +/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ +static const yytype_uint8 yyr1[] = +{ + 0, 20, 21, 21, 21, 22, 23, 23, 23, 23, + 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, + 30, 30, 31, 31, 32, 32, 33, 34, 35, 35, + 35, 35, 35, 35 +}; + +/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ +static const yytype_uint8 yyr2[] = +{ + 0, 2, 1, 2, 3, 2, 2, 3, 4, 5, + 3, 1, 2, 3, 1, 2, 4, 5, 1, 3, + 1, 2, 1, 2, 1, 2, 3, 3, 1, 1, + 1, 1, 2, 2 +}; + +/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state + STATE-NUM when YYTABLE doesn't specify something else to do. Zero + means the default is an error. */ +static const yytype_uint8 yydefact[] = +{ + 0, 0, 0, 2, 0, 0, 6, 1, 0, 0, + 0, 0, 11, 3, 14, 0, 5, 0, 7, 0, + 0, 0, 12, 4, 15, 0, 0, 8, 13, 0, + 28, 29, 0, 0, 0, 0, 18, 20, 22, 30, + 31, 24, 10, 9, 0, 25, 0, 0, 16, 0, + 21, 23, 32, 33, 17, 26, 27, 19 +}; + +/* YYDEFGOTO[NTERM-NUM]. */ +static const yytype_int8 yydefgoto[] = +{ + -1, 2, 3, 4, 16, 11, 12, 13, 14, 35, + 36, 37, 38, 39, 40, 41 +}; + +/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +#define YYPACT_NINF -37 +static const yytype_int8 yypact[] = +{ + -1, -2, 36, 22, 35, 8, -37, -37, 32, 33, + 30, 22, -37, 17, -37, 37, -37, 13, -37, 34, + 31, -4, -37, 17, -37, 38, 39, -37, -37, -4, + -37, -37, 0, -4, -4, 18, -4, 42, -37, -37, + -37, 19, -37, -37, 21, 19, 20, 9, -37, -4, + 42, -37, -37, -37, -37, -37, -37, -4 +}; + +/* YYPGOTO[NTERM-NUM]. */ +static const yytype_int8 yypgoto[] = +{ + -37, -37, -37, -37, -37, -37, 41, 43, -12, -16, + -3, -36, -37, -37, -37, 15 +}; + +/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule which + number is the opposite. If zero, do what YYDEFACT says. + If YYTABLE_NINF, syntax error. */ +#define YYTABLE_NINF -1 +static const yytype_uint8 yytable[] = +{ + 50, 24, 1, 30, 31, 5, 32, 30, 31, 6, + 33, 24, 34, 44, 33, 17, 34, 46, 47, 18, + 26, 50, 49, 9, 27, 10, 56, 8, 9, 48, + 10, 49, 54, 49, 49, 55, 7, 52, 53, 15, + 19, 20, 21, 29, 25, 28, 57, 45, 0, 42, + 43, 51, 22, 0, 23 +}; + +static const yytype_int8 yycheck[] = +{ + 36, 13, 3, 7, 8, 7, 10, 7, 8, 11, + 14, 23, 16, 29, 14, 7, 16, 33, 34, 11, + 7, 57, 13, 6, 11, 8, 17, 5, 6, 11, + 8, 13, 11, 13, 13, 15, 0, 18, 19, 4, + 8, 8, 12, 12, 7, 11, 49, 32, -1, 11, + 11, 9, 11, -1, 11 +}; + +/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing + symbol of state STATE-NUM. */ +static const yytype_uint8 yystos[] = +{ + 0, 3, 21, 22, 23, 7, 11, 0, 5, 6, + 8, 25, 26, 27, 28, 4, 24, 7, 11, 8, + 8, 12, 26, 27, 28, 7, 7, 11, 11, 12, + 7, 8, 10, 14, 16, 29, 30, 31, 32, 33, + 34, 35, 11, 11, 29, 35, 29, 29, 11, 13, + 31, 9, 18, 19, 11, 15, 17, 30 +}; + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = YYEMPTY) +#define YYEMPTY (-2) +#define YYEOF 0 + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrorlab + + +/* Like YYERROR except do call yyerror. This remains here temporarily + to ease the transition to the new meaning of YYERROR, for GCC. + Once GCC version 2 has supplanted version 1, this can go. */ + +#define YYFAIL goto yyerrlab + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ +do \ + if (yychar == YYEMPTY && yylen == 1) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + yytoken = YYTRANSLATE (yychar); \ + YYPOPSTACK (1); \ + goto yybackup; \ + } \ + else \ + { \ + yyerror (yyscanner, jsgf, YY_("syntax error: cannot back up")); \ + YYERROR; \ + } \ +while (YYID (0)) + + +#define YYTERROR 1 +#define YYERRCODE 256 + + +/* YYLLOC_DEFAULT -- Set CURRENT to span from RHS[1] to RHS[N]. + If N is 0, then set CURRENT to the empty location which ends + the previous symbol: RHS[0] (always defined). */ + +#define YYRHSLOC(Rhs, K) ((Rhs)[K]) +#ifndef YYLLOC_DEFAULT +# define YYLLOC_DEFAULT(Current, Rhs, N) \ + do \ + if (YYID (N)) \ + { \ + (Current).first_line = YYRHSLOC (Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC (Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC (Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC (Rhs, N).last_column; \ + } \ + else \ + { \ + (Current).first_line = (Current).last_line = \ + YYRHSLOC (Rhs, 0).last_line; \ + (Current).first_column = (Current).last_column = \ + YYRHSLOC (Rhs, 0).last_column; \ + } \ + while (YYID (0)) +#endif + + +/* YY_LOCATION_PRINT -- Print the location on the stream. + This macro was not mandated originally: define only if we know + we won't break user code: when these are the locations we know. */ + +#ifndef YY_LOCATION_PRINT +# if YYLTYPE_IS_TRIVIAL +# define YY_LOCATION_PRINT(File, Loc) \ + fprintf (File, "%d.%d-%d.%d", \ + (Loc).first_line, (Loc).first_column, \ + (Loc).last_line, (Loc).last_column) +# else +# define YY_LOCATION_PRINT(File, Loc) ((void) 0) +# endif +#endif + + +/* YYLEX -- calling `yylex' with the right arguments. */ + +#ifdef YYLEX_PARAM +# define YYLEX yylex (&yylval, YYLEX_PARAM) +#else +# define YYLEX yylex (&yylval, yyscanner) +#endif + +/* Enable debugging if requested. */ +#if YYDEBUG + +# ifndef YYFPRINTF +# include /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (YYID (0)) + +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yy_symbol_print (stderr, \ + Type, Value, yyscanner, jsgf); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (YYID (0)) + + +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ + +/*ARGSUSED*/ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_symbol_value_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, void* yyscanner, jsgf_t *jsgf) +#else +static void +yy_symbol_value_print (yyoutput, yytype, yyvaluep, yyscanner, jsgf) + FILE *yyoutput; + int yytype; + YYSTYPE const * const yyvaluep; + void* yyscanner; + jsgf_t *jsgf; +#endif +{ + if (!yyvaluep) + return; + YYUSE (yyscanner); + YYUSE (jsgf); +# ifdef YYPRINT + if (yytype < YYNTOKENS) + YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); +# else + YYUSE (yyoutput); +# endif + switch (yytype) + { + default: + break; + } +} + + +/*--------------------------------. +| Print this symbol on YYOUTPUT. | +`--------------------------------*/ + +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_symbol_print (FILE *yyoutput, int yytype, YYSTYPE const * const yyvaluep, void* yyscanner, jsgf_t *jsgf) +#else +static void +yy_symbol_print (yyoutput, yytype, yyvaluep, yyscanner, jsgf) + FILE *yyoutput; + int yytype; + YYSTYPE const * const yyvaluep; + void* yyscanner; + jsgf_t *jsgf; +#endif +{ + if (yytype < YYNTOKENS) + YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); + else + YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); + + yy_symbol_value_print (yyoutput, yytype, yyvaluep, yyscanner, jsgf); + YYFPRINTF (yyoutput, ")"); +} + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (included). | +`------------------------------------------------------------------*/ + +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_stack_print (yytype_int16 *yybottom, yytype_int16 *yytop) +#else +static void +yy_stack_print (yybottom, yytop) + yytype_int16 *yybottom; + yytype_int16 *yytop; +#endif +{ + YYFPRINTF (stderr, "Stack now"); + for (; yybottom <= yytop; yybottom++) + { + int yybot = *yybottom; + YYFPRINTF (stderr, " %d", yybot); + } + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (YYID (0)) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yy_reduce_print (YYSTYPE *yyvsp, int yyrule, void* yyscanner, jsgf_t *jsgf) +#else +static void +yy_reduce_print (yyvsp, yyrule, yyscanner, jsgf) + YYSTYPE *yyvsp; + int yyrule; + void* yyscanner; + jsgf_t *jsgf; +#endif +{ + int yynrhs = yyr2[yyrule]; + int yyi; + unsigned long int yylno = yyrline[yyrule]; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %lu):\n", + yyrule - 1, yylno); + /* The symbols being reduced. */ + for (yyi = 0; yyi < yynrhs; yyi++) + { + YYFPRINTF (stderr, " $%d = ", yyi + 1); + yy_symbol_print (stderr, yyrhs[yyprhs[yyrule] + yyi], + &(yyvsp[(yyi + 1) - (yynrhs)]) + , yyscanner, jsgf); + YYFPRINTF (stderr, "\n"); + } +} + +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (yyvsp, Rule, yyscanner, jsgf); \ +} while (YYID (0)) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !YYDEBUG */ +# define YYDPRINTF(Args) +# define YY_SYMBOL_PRINT(Title, Type, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !YYDEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + + +#if YYERROR_VERBOSE + +# ifndef yystrlen +# if defined __GLIBC__ && defined _STRING_H +# define yystrlen strlen +# else +/* Return the length of YYSTR. */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static YYSIZE_T +yystrlen (const char *yystr) +#else +static YYSIZE_T +yystrlen (yystr) + const char *yystr; +#endif +{ + YYSIZE_T yylen; + for (yylen = 0; yystr[yylen]; yylen++) + continue; + return yylen; +} +# endif +# endif + +# ifndef yystpcpy +# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE +# define yystpcpy stpcpy +# else +/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in + YYDEST. */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static char * +yystpcpy (char *yydest, const char *yysrc) +#else +static char * +yystpcpy (yydest, yysrc) + char *yydest; + const char *yysrc; +#endif +{ + char *yyd = yydest; + const char *yys = yysrc; + + while ((*yyd++ = *yys++) != '\0') + continue; + + return yyd - 1; +} +# endif +# endif + +# ifndef yytnamerr +/* Copy to YYRES the contents of YYSTR after stripping away unnecessary + quotes and backslashes, so that it's suitable for yyerror. The + heuristic is that double-quoting is unnecessary unless the string + contains an apostrophe, a comma, or backslash (other than + backslash-backslash). YYSTR is taken from yytname. If YYRES is + null, do not copy; instead, return the length of what the result + would have been. */ +static YYSIZE_T +yytnamerr (char *yyres, const char *yystr) +{ + if (*yystr == '"') + { + YYSIZE_T yyn = 0; + char const *yyp = yystr; + + for (;;) + switch (*++yyp) + { + case '\'': + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + /* Fall through. */ + default: + if (yyres) + yyres[yyn] = *yyp; + yyn++; + break; + + case '"': + if (yyres) + yyres[yyn] = '\0'; + return yyn; + } + do_not_strip_quotes: ; + } + + if (! yyres) + return yystrlen (yystr); + + return yystpcpy (yyres, yystr) - yyres; +} +# endif + +/* Copy into YYRESULT an error message about the unexpected token + YYCHAR while in state YYSTATE. Return the number of bytes copied, + including the terminating null byte. If YYRESULT is null, do not + copy anything; just return the number of bytes that would be + copied. As a special case, return 0 if an ordinary "syntax error" + message will do. Return YYSIZE_MAXIMUM if overflow occurs during + size calculation. */ +static YYSIZE_T +yysyntax_error (char *yyresult, int yystate, int yychar) +{ + int yyn = yypact[yystate]; + + if (! (YYPACT_NINF < yyn && yyn <= YYLAST)) + return 0; + else + { + int yytype = YYTRANSLATE (yychar); + YYSIZE_T yysize0 = yytnamerr (0, yytname[yytype]); + YYSIZE_T yysize = yysize0; + YYSIZE_T yysize1; + int yysize_overflow = 0; + enum { YYERROR_VERBOSE_ARGS_MAXIMUM = 5 }; + char const *yyarg[YYERROR_VERBOSE_ARGS_MAXIMUM]; + int yyx; + +# if 0 + /* This is so xgettext sees the translatable formats that are + constructed on the fly. */ + YY_("syntax error, unexpected %s"); + YY_("syntax error, unexpected %s, expecting %s"); + YY_("syntax error, unexpected %s, expecting %s or %s"); + YY_("syntax error, unexpected %s, expecting %s or %s or %s"); + YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s"); +# endif + char *yyfmt; + char const *yyf; + static char const yyunexpected[] = "syntax error, unexpected %s"; + static char const yyexpecting[] = ", expecting %s"; + static char const yyor[] = " or %s"; + char yyformat[sizeof yyunexpected + + sizeof yyexpecting - 1 + + ((YYERROR_VERBOSE_ARGS_MAXIMUM - 2) + * (sizeof yyor - 1))]; + char const *yyprefix = yyexpecting; + + /* Start YYX at -YYN if negative to avoid negative indexes in + YYCHECK. */ + int yyxbegin = yyn < 0 ? -yyn : 0; + + /* Stay within bounds of both yycheck and yytname. */ + int yychecklim = YYLAST - yyn + 1; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; + int yycount = 1; + + yyarg[0] = yytname[yytype]; + yyfmt = yystpcpy (yyformat, yyunexpected); + + for (yyx = yyxbegin; yyx < yyxend; ++yyx) + if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) + { + if (yycount == YYERROR_VERBOSE_ARGS_MAXIMUM) + { + yycount = 1; + yysize = yysize0; + yyformat[sizeof yyunexpected - 1] = '\0'; + break; + } + yyarg[yycount++] = yytname[yyx]; + yysize1 = yysize + yytnamerr (0, yytname[yyx]); + yysize_overflow |= (yysize1 < yysize); + yysize = yysize1; + yyfmt = yystpcpy (yyfmt, yyprefix); + yyprefix = yyor; + } + + yyf = YY_(yyformat); + yysize1 = yysize + yystrlen (yyf); + yysize_overflow |= (yysize1 < yysize); + yysize = yysize1; + + if (yysize_overflow) + return YYSIZE_MAXIMUM; + + if (yyresult) + { + /* Avoid sprintf, as that infringes on the user's name space. + Don't have undefined behavior even if the translation + produced a string with the wrong number of "%s"s. */ + char *yyp = yyresult; + int yyi = 0; + while ((*yyp = *yyf) != '\0') + { + if (*yyp == '%' && yyf[1] == 's' && yyi < yycount) + { + yyp += yytnamerr (yyp, yyarg[yyi++]); + yyf += 2; + } + else + { + yyp++; + yyf++; + } + } + } + return yysize; + } +} +#endif /* YYERROR_VERBOSE */ + + +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +/*ARGSUSED*/ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +static void +yydestruct (const char *yymsg, int yytype, YYSTYPE *yyvaluep, void* yyscanner, jsgf_t *jsgf) +#else +static void +yydestruct (yymsg, yytype, yyvaluep, yyscanner, jsgf) + const char *yymsg; + int yytype; + YYSTYPE *yyvaluep; + void* yyscanner; + jsgf_t *jsgf; +#endif +{ + YYUSE (yyvaluep); + YYUSE (yyscanner); + YYUSE (jsgf); + + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yytype, yyvaluep, yylocationp); + + switch (yytype) + { + + default: + break; + } +} + +/* Prevent warnings from -Wmissing-prototypes. */ +#ifdef YYPARSE_PARAM +#if defined __STDC__ || defined __cplusplus +int yyparse (void *YYPARSE_PARAM); +#else +int yyparse (); +#endif +#else /* ! YYPARSE_PARAM */ +#if defined __STDC__ || defined __cplusplus +int yyparse (void* yyscanner, jsgf_t *jsgf); +#else +int yyparse (); +#endif +#endif /* ! YYPARSE_PARAM */ + + + + + +/*-------------------------. +| yyparse or yypush_parse. | +`-------------------------*/ + +#ifdef YYPARSE_PARAM +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +int +yyparse (void *YYPARSE_PARAM) +#else +int +yyparse (YYPARSE_PARAM) + void *YYPARSE_PARAM; +#endif +#else /* ! YYPARSE_PARAM */ +#if (defined __STDC__ || defined __C99__FUNC__ \ + || defined __cplusplus || defined _MSC_VER) +int +yyparse (void* yyscanner, jsgf_t *jsgf) +#else +int +yyparse (yyscanner, jsgf) + void* yyscanner; + jsgf_t *jsgf; +#endif +#endif +{ +/* The lookahead symbol. */ +int yychar; + +/* The semantic value of the lookahead symbol. */ +YYSTYPE yylval; + + /* Number of syntax errors so far. */ + int yynerrs; + + int yystate; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus; + + /* The stacks and their tools: + `yyss': related to states. + `yyvs': related to semantic values. + + Refer to the stacks thru separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* The state stack. */ + yytype_int16 yyssa[YYINITDEPTH]; + yytype_int16 *yyss; + yytype_int16 *yyssp; + + /* The semantic value stack. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs; + YYSTYPE *yyvsp; + + YYSIZE_T yystacksize; + + int yyn; + int yyresult; + /* Lookahead token as an internal (translated) token number. */ + int yytoken; + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + +#if YYERROR_VERBOSE + /* Buffer for error messages, and its allocated size. */ + char yymsgbuf[128]; + char *yymsg = yymsgbuf; + YYSIZE_T yymsg_alloc = sizeof yymsgbuf; +#endif + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + yytoken = 0; + yyss = yyssa; + yyvs = yyvsa; + yystacksize = YYINITDEPTH; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yystate = 0; + yyerrstatus = 0; + yynerrs = 0; + yychar = YYEMPTY; /* Cause a token to be read. */ + + /* Initialize stack pointers. + Waste one element of value and location stack + so that they stay on the same level as the state stack. + The wasted elements are never initialized. */ + yyssp = yyss; + yyvsp = yyvs; + + goto yysetstate; + +/*------------------------------------------------------------. +| yynewstate -- Push a new state, which is found in yystate. | +`------------------------------------------------------------*/ + yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; + + yysetstate: + *yyssp = yystate; + + if (yyss + yystacksize - 1 <= yyssp) + { + /* Get the current used size of the three stacks, in elements. */ + YYSIZE_T yysize = yyssp - yyss + 1; + +#ifdef yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + YYSTYPE *yyvs1 = yyvs; + yytype_int16 *yyss1 = yyss; + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * sizeof (*yyssp), + &yyvs1, yysize * sizeof (*yyvsp), + &yystacksize); + + yyss = yyss1; + yyvs = yyvs1; + } +#else /* no yyoverflow */ +# ifndef YYSTACK_RELOCATE + goto yyexhaustedlab; +# else + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + goto yyexhaustedlab; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + yytype_int16 *yyss1 = yyss; + union yyalloc *yyptr = + (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); + if (! yyptr) + goto yyexhaustedlab; + YYSTACK_RELOCATE (yyss_alloc, yyss); + YYSTACK_RELOCATE (yyvs_alloc, yyvs); +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif +#endif /* no yyoverflow */ + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + + YYDPRINTF ((stderr, "Stack size increased to %lu\n", + (unsigned long int) yystacksize)); + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } + + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + + if (yystate == YYFINAL) + YYACCEPT; + + goto yybackup; + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + + /* Do appropriate processing given the current state. Read a + lookahead token if we need one and don't already have one. */ + + /* First try to decide what to do without reference to lookahead token. */ + yyn = yypact[yystate]; + if (yyn == YYPACT_NINF) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ + if (yychar == YYEMPTY) + { + YYDPRINTF ((stderr, "Reading a token: ")); + yychar = YYLEX; + } + + if (yychar <= YYEOF) + { + yychar = yytoken = YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else + { + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yyn == 0 || yyn == YYTABLE_NINF) + goto yyerrlab; + yyn = -yyn; + goto yyreduce; + } + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + /* Shift the lookahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); + + /* Discard the shifted token. */ + yychar = YYEMPTY; + + yystate = yyn; + *++yyvsp = yylval; + + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- Do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + `$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; + + + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 5: + +/* Line 1455 of yacc.c */ +#line 87 "jsgf_parser.y" + { jsgf->name = (yyvsp[(2) - (2)].name); } + break; + + case 7: + +/* Line 1455 of yacc.c */ +#line 91 "jsgf_parser.y" + { jsgf->version = (yyvsp[(2) - (3)].name); } + break; + + case 8: + +/* Line 1455 of yacc.c */ +#line 92 "jsgf_parser.y" + { jsgf->version = (yyvsp[(2) - (4)].name); jsgf->charset = (yyvsp[(3) - (4)].name); } + break; + + case 9: + +/* Line 1455 of yacc.c */ +#line 93 "jsgf_parser.y" + { jsgf->version = (yyvsp[(2) - (5)].name); jsgf->charset = (yyvsp[(3) - (5)].name); + jsgf->locale = (yyvsp[(4) - (5)].name); } + break; + + case 10: + +/* Line 1455 of yacc.c */ +#line 97 "jsgf_parser.y" + { (yyval.name) = (yyvsp[(2) - (3)].name); } + break; + + case 13: + +/* Line 1455 of yacc.c */ +#line 104 "jsgf_parser.y" + { jsgf_import_rule(jsgf, (yyvsp[(2) - (3)].name)); ckd_free((yyvsp[(2) - (3)].name)); } + break; + + case 16: + +/* Line 1455 of yacc.c */ +#line 111 "jsgf_parser.y" + { jsgf_define_rule(jsgf, (yyvsp[(1) - (4)].name), (yyvsp[(3) - (4)].rhs), 0); ckd_free((yyvsp[(1) - (4)].name)); } + break; + + case 17: + +/* Line 1455 of yacc.c */ +#line 112 "jsgf_parser.y" + { jsgf_define_rule(jsgf, (yyvsp[(2) - (5)].name), (yyvsp[(4) - (5)].rhs), 1); ckd_free((yyvsp[(2) - (5)].name)); } + break; + + case 18: + +/* Line 1455 of yacc.c */ +#line 115 "jsgf_parser.y" + { (yyval.rhs) = (yyvsp[(1) - (1)].rhs); (yyval.rhs)->atoms = glist_reverse((yyval.rhs)->atoms); } + break; + + case 19: + +/* Line 1455 of yacc.c */ +#line 116 "jsgf_parser.y" + { (yyval.rhs) = (yyvsp[(3) - (3)].rhs); + (yyval.rhs)->atoms = glist_reverse((yyval.rhs)->atoms); + (yyval.rhs)->alt = (yyvsp[(1) - (3)].rhs); } + break; + + case 20: + +/* Line 1455 of yacc.c */ +#line 121 "jsgf_parser.y" + { (yyval.rhs) = ckd_calloc(1, sizeof(*(yyval.rhs))); + (yyval.rhs)->atoms = glist_add_ptr((yyval.rhs)->atoms, (yyvsp[(1) - (1)].atom)); } + break; + + case 21: + +/* Line 1455 of yacc.c */ +#line 123 "jsgf_parser.y" + { (yyval.rhs) = (yyvsp[(1) - (2)].rhs); + (yyval.rhs)->atoms = glist_add_ptr((yyval.rhs)->atoms, (yyvsp[(2) - (2)].atom)); } + break; + + case 23: + +/* Line 1455 of yacc.c */ +#line 128 "jsgf_parser.y" + { (yyval.atom) = (yyvsp[(1) - (2)].atom); + (yyval.atom)->tags = glist_add_ptr((yyval.atom)->tags, (yyvsp[(2) - (2)].name)); } + break; + + case 25: + +/* Line 1455 of yacc.c */ +#line 133 "jsgf_parser.y" + { (yyval.atom) = (yyvsp[(2) - (2)].atom); (yyval.atom)->weight = (yyvsp[(1) - (2)].weight); } + break; + + case 26: + +/* Line 1455 of yacc.c */ +#line 136 "jsgf_parser.y" + { (yyval.rule) = jsgf_define_rule(jsgf, NULL, (yyvsp[(2) - (3)].rhs), 0); } + break; + + case 27: + +/* Line 1455 of yacc.c */ +#line 139 "jsgf_parser.y" + { (yyval.rule) = jsgf_optional_new(jsgf, (yyvsp[(2) - (3)].rhs)); } + break; + + case 28: + +/* Line 1455 of yacc.c */ +#line 142 "jsgf_parser.y" + { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].name), 1.0); ckd_free((yyvsp[(1) - (1)].name)); } + break; + + case 29: + +/* Line 1455 of yacc.c */ +#line 143 "jsgf_parser.y" + { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].name), 1.0); ckd_free((yyvsp[(1) - (1)].name)); } + break; + + case 30: + +/* Line 1455 of yacc.c */ +#line 144 "jsgf_parser.y" + { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].rule)->name, 1.0); } + break; + + case 31: + +/* Line 1455 of yacc.c */ +#line 145 "jsgf_parser.y" + { (yyval.atom) = jsgf_atom_new((yyvsp[(1) - (1)].rule)->name, 1.0); } + break; + + case 32: + +/* Line 1455 of yacc.c */ +#line 146 "jsgf_parser.y" + { (yyval.atom) = jsgf_kleene_new(jsgf, (yyvsp[(1) - (2)].atom), 0); } + break; + + case 33: + +/* Line 1455 of yacc.c */ +#line 147 "jsgf_parser.y" + { (yyval.atom) = jsgf_kleene_new(jsgf, (yyvsp[(1) - (2)].atom), 1); } + break; + + + +/* Line 1455 of yacc.c */ +#line 1580 "jsgf_parser.c" + default: break; + } + YY_SYMBOL_PRINT ("-> $$ =", yyr1[yyn], &yyval, &yyloc); + + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + + *++yyvsp = yyval; + + /* Now `shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + + yyn = yyr1[yyn]; + + yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; + if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) + yystate = yytable[yystate]; + else + yystate = yydefgoto[yyn - YYNTOKENS]; + + goto yynewstate; + + +/*------------------------------------. +| yyerrlab -- here on detecting error | +`------------------------------------*/ +yyerrlab: + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; +#if ! YYERROR_VERBOSE + yyerror (yyscanner, jsgf, YY_("syntax error")); +#else + { + YYSIZE_T yysize = yysyntax_error (0, yystate, yychar); + if (yymsg_alloc < yysize && yymsg_alloc < YYSTACK_ALLOC_MAXIMUM) + { + YYSIZE_T yyalloc = 2 * yysize; + if (! (yysize <= yyalloc && yyalloc <= YYSTACK_ALLOC_MAXIMUM)) + yyalloc = YYSTACK_ALLOC_MAXIMUM; + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + yymsg = (char *) YYSTACK_ALLOC (yyalloc); + if (yymsg) + yymsg_alloc = yyalloc; + else + { + yymsg = yymsgbuf; + yymsg_alloc = sizeof yymsgbuf; + } + } + + if (0 < yysize && yysize <= yymsg_alloc) + { + (void) yysyntax_error (yymsg, yystate, yychar); + yyerror (yyscanner, jsgf, yymsg); + } + else + { + yyerror (yyscanner, jsgf, YY_("syntax error")); + if (yysize != 0) + goto yyexhaustedlab; + } + } +#endif + } + + + + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + if (yychar <= YYEOF) + { + /* Return failure if at end of input. */ + if (yychar == YYEOF) + YYABORT; + } + else + { + yydestruct ("Error: discarding", + yytoken, &yylval, yyscanner, jsgf); + yychar = YYEMPTY; + } + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab1; + + +/*---------------------------------------------------. +| yyerrorlab -- error raised explicitly by YYERROR. | +`---------------------------------------------------*/ +yyerrorlab: + + /* Pacify compilers like GCC when the user code never invokes + YYERROR and the label yyerrorlab therefore never appears in user + code. */ + if (/*CONSTCOND*/ 0) + goto yyerrorlab; + + /* Do not reclaim the symbols of the rule which action triggered + this YYERROR. */ + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + yystate = *yyssp; + goto yyerrlab1; + + +/*-------------------------------------------------------------. +| yyerrlab1 -- common code for both syntax error and YYERROR. | +`-------------------------------------------------------------*/ +yyerrlab1: + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + for (;;) + { + yyn = yypact[yystate]; + if (yyn != YYPACT_NINF) + { + yyn += YYTERROR; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; + + + yydestruct ("Error: popping", + yystos[yystate], yyvsp, yyscanner, jsgf); + YYPOPSTACK (1); + yystate = *yyssp; + YY_STACK_PRINT (yyss, yyssp); + } + + *++yyvsp = yylval; + + + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", yystos[yyn], yyvsp, yylsp); + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturn; + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturn; + +#if !defined(yyoverflow) || YYERROR_VERBOSE +/*-------------------------------------------------. +| yyexhaustedlab -- memory exhaustion comes here. | +`-------------------------------------------------*/ +yyexhaustedlab: + yyerror (yyscanner, jsgf, YY_("memory exhausted")); + yyresult = 2; + /* Fall through. */ +#endif + +yyreturn: + if (yychar != YYEMPTY) + yydestruct ("Cleanup: discarding lookahead", + yytoken, &yylval, yyscanner, jsgf); + /* Do not reclaim the symbols of the rule which action triggered + this YYABORT or YYACCEPT. */ + YYPOPSTACK (yylen); + YY_STACK_PRINT (yyss, yyssp); + while (yyssp != yyss) + { + yydestruct ("Cleanup: popping", + yystos[*yyssp], yyvsp, yyscanner, jsgf); + YYPOPSTACK (1); + } +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif +#if YYERROR_VERBOSE + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); +#endif + /* Make sure YYID is used. */ + return YYID (yyresult); +} + + + +/* Line 1675 of yacc.c */ +#line 150 "jsgf_parser.y" + + +void +yyerror(yyscan_t lex, jsgf_t *jsgf, const char *s) +{ + (void)jsgf; + E_ERROR("%s at line %d current token '%s'\n", s, yyget_lineno(lex), yyget_text(lex)); +} + diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_parser.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_parser.h new file mode 100644 index 0000000000000000000000000000000000000000..95f68e329581ad4c0d95740da694c2cfc2f0657e --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_parser.h @@ -0,0 +1,90 @@ + +/* A Bison parser, made by GNU Bison 2.4.1. */ + +/* Skeleton interface for Bison's Yacc-like parsers in C + + Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006 + Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + + +/* Tokens. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + /* Put the tokens into the symbol table, so that GDB and other debuggers + know about them. */ + enum yytokentype { + HEADER = 258, + GRAMMAR = 259, + IMPORT = 260, + PUBLIC = 261, + TOKEN = 262, + RULENAME = 263, + TAG = 264, + WEIGHT = 265 + }; +#endif +/* Tokens. */ +#define HEADER 258 +#define GRAMMAR 259 +#define IMPORT 260 +#define PUBLIC 261 +#define TOKEN 262 +#define RULENAME 263 +#define TAG 264 +#define WEIGHT 265 + + + + +#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED +typedef union YYSTYPE +{ + +/* Line 1676 of yacc.c */ +#line 65 "jsgf_parser.y" + + char *name; + float weight; + jsgf_rule_t *rule; + jsgf_rhs_t *rhs; + jsgf_atom_t *atom; + + + +/* Line 1676 of yacc.c */ +#line 82 "jsgf_parser.h" +} YYSTYPE; +# define YYSTYPE_IS_TRIVIAL 1 +# define yystype YYSTYPE /* obsolescent; will be withdrawn */ +# define YYSTYPE_IS_DECLARED 1 +#endif + + + + diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_parser.y b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_parser.y new file mode 100644 index 0000000000000000000000000000000000000000..eedaa76d9b752eb4825d1ff326168043b6c9a0c8 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_parser.y @@ -0,0 +1,156 @@ +/* -*- c-basic-offset:4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +%{ +#define YYERROR_VERBOSE + +#include +#include + +#include +#include +#include + +#include "jsgf_internal.h" +#include "jsgf_parser.h" +#include "jsgf_scanner.h" + +/* Suppress warnings from generated code */ +#if defined _MSC_VER +#pragma warning(disable: 4273) +#endif + +void yyerror(yyscan_t lex, jsgf_t *jsgf, const char *s); + +%} + +%pure-parser +%lex-param { void* yyscanner } +%parse-param { void* yyscanner } +%parse-param { jsgf_t *jsgf } + +%union { + char *name; + float weight; + jsgf_rule_t *rule; + jsgf_rhs_t *rhs; + jsgf_atom_t *atom; +} + +%token HEADER GRAMMAR IMPORT PUBLIC +%token TOKEN RULENAME TAG +%token WEIGHT +%type rule_atom rule_item tagged_rule_item +%type rule_expansion alternate_list +%type grammar_header +%type rule_group rule_optional +%% + +grammar: header + | header rule_list + | header import_header rule_list + ; + +header: jsgf_header grammar_header { jsgf->name = $2; } + ; + +jsgf_header: HEADER ';' + | HEADER TOKEN ';' { jsgf->version = $2; } + | HEADER TOKEN TOKEN ';' { jsgf->version = $2; jsgf->charset = $3; } + | HEADER TOKEN TOKEN TOKEN ';' { jsgf->version = $2; jsgf->charset = $3; + jsgf->locale = $4; } + ; + +grammar_header: GRAMMAR TOKEN ';' { $$ = $2; } + ; + +import_header: import_statement + | import_header import_statement + ; + +import_statement: IMPORT RULENAME ';' { jsgf_import_rule(jsgf, $2); ckd_free($2); } + ; + +rule_list: rule + | rule_list rule + ; + +rule: RULENAME '=' alternate_list ';' { jsgf_define_rule(jsgf, $1, $3, 0); ckd_free($1); } +| PUBLIC RULENAME '=' alternate_list ';' { jsgf_define_rule(jsgf, $2, $4, 1); ckd_free($2); } + ; + +alternate_list: rule_expansion { $$ = $1; $$->atoms = glist_reverse($$->atoms); } + | alternate_list '|' rule_expansion { $$ = $3; + $$->atoms = glist_reverse($$->atoms); + $$->alt = $1; } + ; + +rule_expansion: tagged_rule_item { $$ = ckd_calloc(1, sizeof(*$$)); + $$->atoms = glist_add_ptr($$->atoms, $1); } + | rule_expansion tagged_rule_item { $$ = $1; + $$->atoms = glist_add_ptr($$->atoms, $2); } + ; + +tagged_rule_item: rule_item + | tagged_rule_item TAG { $$ = $1; + $$->tags = glist_add_ptr($$->tags, $2); } + ; + +rule_item: rule_atom + | WEIGHT rule_atom { $$ = $2; $$->weight = $1; } + ; + +rule_group: '(' alternate_list ')' { $$ = jsgf_define_rule(jsgf, NULL, $2, 0); } + ; + +rule_optional: '[' alternate_list ']' { $$ = jsgf_optional_new(jsgf, $2); } + ; + +rule_atom: TOKEN { $$ = jsgf_atom_new($1, 1.0); ckd_free($1); } + | RULENAME { $$ = jsgf_atom_new($1, 1.0); ckd_free($1); } + | rule_group { $$ = jsgf_atom_new($1->name, 1.0); } + | rule_optional { $$ = jsgf_atom_new($1->name, 1.0); } + | rule_atom '*' { $$ = jsgf_kleene_new(jsgf, $1, 0); } + | rule_atom '+' { $$ = jsgf_kleene_new(jsgf, $1, 1); } + ; + +%% + +void +yyerror(yyscan_t lex, jsgf_t *jsgf, const char *s) +{ + E_ERROR("%s at line %d current token '%s'\n", s, yyget_lineno(lex), yyget_text(lex)); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_scanner.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_scanner.c new file mode 100644 index 0000000000000000000000000000000000000000..5123fa8b2f3aa4dd401d45fbba39b92a661164f8 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_scanner.c @@ -0,0 +1,2221 @@ +#line 2 "jsgf_scanner.c" + +#line 4 "jsgf_scanner.c" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 6 +#define YY_FLEX_SUBMINOR_VERSION 1 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include +#include +#include +#include + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have . Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +/* TODO: this is always defined, so inline it */ +#define yyconst const + +#if defined(__GNUC__) && __GNUC__ >= 3 +#define yynoreturn __attribute__((__noreturn__)) +#else +#define yynoreturn +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an unsigned + * integer for use as an array index. If the signed char is negative, + * we want to instead treat it as an 8-bit unsigned char, hence the + * double cast. + */ +#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) + +/* An opaque pointer. */ +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif + +/* For convenience, these vars (plus the bison vars far below) + are macros in the reentrant scanner. */ +#define yyin yyg->yyin_r +#define yyout yyg->yyout_r +#define yyextra yyg->yyextra_r +#define yyleng yyg->yyleng_r +#define yytext yyg->yytext_r +#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) +#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) +#define yy_flex_debug yyg->yy_flex_debug_r + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN yyg->yy_start = 1 + 2 * + +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START ((yyg->yy_start - 1) / 2) +#define YYSTATE YY_START + +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart(yyin ,yyscanner ) + +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k. + * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. + * Ditto for the __ia64__ case accordingly. + */ +#define YY_BUF_SIZE 32768 +#else +#define YY_BUF_SIZE 16384 +#endif /* __ia64__ */ +#endif + +/* The state buf must be large enough to hold one state per character in the main buffer. + */ +#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + + /* Note: We specifically omit the test for yy_rule_can_match_eol because it requires + * access to the local variable yy_act. Since yyless() is a macro, it would break + * existing scanners that call yyless() from OUTSIDE yylex. + * One obvious solution it to make yy_act a global. I tried that, and saw + * a 5% performance hit in a non-yylineno scanner, because yy_act is + * normally declared as a register variable-- so it is not worth it. + */ + #define YY_LESS_LINENO(n) \ + do { \ + int yyl;\ + for ( yyl = n; yyl < yyleng; ++yyl )\ + if ( yytext[yyl] == '\n' )\ + --yylineno;\ + }while(0) + #define YY_LINENO_REWIND_TO(dst) \ + do {\ + const char *p;\ + for ( p = yy_cp-1; p >= (dst); --p)\ + if ( *p == '\n' )\ + --yylineno;\ + }while(0) + +/* Return all but the first "n" matched characters back to the input stream. */ +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + *yy_cp = yyg->yy_hold_char; \ + YY_RESTORE_YY_MORE_OFFSET \ + yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner ) + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + int yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + * + * Returns the top of the stack, or NULL. + */ +#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \ + ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \ + : NULL) + +/* Same as previous macro, but useful when we know that the buffer stack is not + * NULL or when we need an lvalue. For internal use only. + */ +#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] + +void yyrestart (FILE *input_file ,yyscan_t yyscanner ); +void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +YY_BUFFER_STATE yy_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); +void yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void yy_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void yypush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +void yypop_buffer_state (yyscan_t yyscanner ); + +static void yyensure_buffer_stack (yyscan_t yyscanner ); +static void yy_load_buffer_state (yyscan_t yyscanner ); +static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file ,yyscan_t yyscanner ); + +#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER ,yyscanner) + +YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); + +void *yyalloc (yy_size_t ,yyscan_t yyscanner ); +void *yyrealloc (void *,yy_size_t ,yyscan_t yyscanner ); +void yyfree (void * ,yyscan_t yyscanner ); + +#define yy_new_buffer yy_create_buffer + +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! YY_CURRENT_BUFFER ){ \ + yyensure_buffer_stack (yyscanner); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ + } + +#define yy_set_bol(at_bol) \ + { \ + if ( ! YY_CURRENT_BUFFER ){\ + yyensure_buffer_stack (yyscanner); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ + } + +#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) + +/* Begin user sect3 */ + +#define yywrap(yyscanner) (/*CONSTCOND*/1) +#define YY_SKIP_YYWRAP + +typedef unsigned char YY_CHAR; + +typedef int yy_state_type; + +#define yytext_ptr yytext_r + +static yy_state_type yy_get_previous_state (yyscan_t yyscanner ); +static yy_state_type yy_try_NUL_trans (yy_state_type current_state ,yyscan_t yyscanner); +static int yy_get_next_buffer (yyscan_t yyscanner ); +static void yynoreturn yy_fatal_error (yyconst char* msg ,yyscan_t yyscanner ); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yyg->yytext_ptr = yy_bp; \ + yyleng = (int) (yy_cp - yy_bp); \ + yyg->yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yyg->yy_c_buf_p = yy_cp; + +#define YY_NUM_RULES 22 +#define YY_END_OF_BUFFER 23 +/* This struct is not used in this scanner, + but its presence is necessary. */ +struct yy_trans_info + { + flex_int32_t yy_verify; + flex_int32_t yy_nxt; + }; +static yyconst flex_int16_t yy_accept[98] = + { 0, + 0, 0, 0, 0, 0, 0, 0, 0, 23, 22, + 1, 22, 22, 22, 22, 22, 22, 22, 5, 1, + 5, 17, 1, 17, 21, 21, 18, 21, 21, 9, + 1, 9, 0, 3, 0, 0, 0, 0, 0, 0, + 4, 17, 17, 0, 17, 17, 7, 0, 20, 0, + 0, 0, 0, 0, 16, 8, 0, 0, 2, 14, + 0, 0, 0, 0, 19, 0, 17, 0, 17, 17, + 0, 0, 6, 20, 0, 15, 0, 0, 16, 0, + 0, 0, 0, 0, 19, 0, 0, 0, 10, 0, + 0, 0, 0, 12, 13, 11, 0 + + } ; + +static yyconst YY_CHAR yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 4, 5, 1, 1, 1, 1, 6, + 6, 7, 6, 1, 8, 9, 10, 11, 11, 11, + 11, 11, 11, 11, 11, 11, 11, 1, 12, 13, + 6, 14, 1, 1, 1, 1, 1, 1, 1, 15, + 16, 1, 1, 17, 1, 1, 1, 1, 1, 1, + 1, 1, 18, 1, 1, 1, 1, 1, 1, 1, + 6, 19, 6, 1, 1, 1, 20, 21, 22, 1, + + 23, 1, 24, 1, 25, 1, 1, 26, 27, 1, + 28, 29, 1, 30, 1, 31, 32, 1, 1, 1, + 1, 1, 33, 6, 34, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 35, 1, 1, 1, + 36, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 37, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static yyconst YY_CHAR yy_meta[38] = + { 0, + 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, + 1, 2, 3, 3, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 2, 2, 1, 1, 1 + } ; + +static yyconst flex_uint16_t yy_base[113] = + { 0, + 0, 36, 4, 12, 72, 105, 14, 20, 135, 312, + 312, 117, 2, 0, 103, 105, 99, 95, 312, 312, + 119, 0, 312, 138, 312, 21, 312, 0, 1, 312, + 312, 118, 109, 312, 123, 111, 104, 94, 101, 85, + 312, 0, 171, 14, 0, 204, 312, 109, 113, 41, + 106, 96, 21, 23, 312, 312, 88, 98, 312, 312, + 73, 71, 70, 89, 312, 44, 0, 39, 0, 237, + 43, 90, 312, 312, 57, 312, 37, 69, 43, 77, + 64, 57, 58, 64, 76, 94, 79, 59, 312, 39, + 14, 14, 4, 312, 312, 312, 312, 271, 274, 277, + + 280, 283, 0, 285, 288, 290, 293, 296, 299, 302, + 305, 308 + } ; + +static yyconst flex_int16_t yy_def[113] = + { 0, + 98, 98, 99, 99, 100, 100, 101, 101, 97, 97, + 97, 97, 97, 102, 97, 97, 97, 97, 97, 97, + 97, 103, 97, 104, 97, 97, 97, 105, 106, 97, + 97, 97, 97, 97, 107, 102, 97, 97, 97, 97, + 97, 103, 104, 108, 103, 109, 97, 97, 110, 97, + 97, 105, 106, 111, 97, 97, 97, 107, 97, 97, + 97, 97, 97, 97, 97, 112, 43, 108, 43, 109, + 97, 110, 97, 97, 97, 97, 106, 111, 106, 97, + 97, 97, 97, 97, 108, 112, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 0, 97, 97, 97, + + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97 + } ; + +static yyconst flex_uint16_t yy_nxt[350] = + { 0, + 42, 11, 11, 97, 12, 20, 11, 97, 34, 13, + 21, 35, 14, 20, 11, 31, 11, 65, 21, 54, + 32, 31, 11, 15, 16, 53, 32, 47, 17, 48, + 49, 50, 66, 96, 55, 95, 18, 11, 11, 54, + 12, 78, 65, 51, 94, 13, 44, 85, 14, 48, + 74, 50, 74, 87, 55, 54, 79, 66, 93, 15, + 16, 54, 86, 51, 17, 51, 74, 88, 74, 88, + 55, 53, 18, 23, 11, 24, 55, 25, 25, 65, + 33, 26, 92, 27, 28, 25, 91, 78, 74, 87, + 90, 89, 73, 84, 66, 83, 44, 85, 82, 81, + + 59, 51, 79, 80, 29, 25, 23, 11, 24, 76, + 25, 25, 86, 75, 26, 73, 27, 28, 25, 71, + 64, 63, 62, 61, 60, 59, 57, 56, 41, 40, + 39, 38, 37, 33, 97, 97, 97, 29, 25, 44, + 44, 45, 97, 44, 44, 97, 97, 44, 97, 44, + 44, 44, 97, 97, 97, 97, 46, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 44, 44, 44, 44, 45, 97, 44, 44, 97, 97, + 44, 97, 44, 44, 44, 97, 97, 97, 97, 46, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + + 97, 97, 97, 44, 44, 68, 44, 69, 97, 68, + 68, 97, 97, 68, 97, 68, 68, 68, 97, 97, + 97, 97, 70, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 68, 68, 68, 44, + 69, 97, 68, 68, 97, 97, 68, 97, 68, 68, + 68, 97, 97, 97, 97, 70, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 68, + 68, 10, 10, 10, 19, 19, 19, 22, 22, 22, + 30, 30, 30, 36, 36, 43, 43, 43, 52, 52, + 53, 53, 53, 58, 58, 58, 44, 44, 44, 67, + + 67, 67, 72, 72, 72, 77, 77, 77, 68, 68, + 68, 9, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97 + } ; + +static yyconst flex_int16_t yy_chk[350] = + { 0, + 103, 1, 1, 0, 1, 3, 3, 0, 13, 1, + 3, 13, 1, 4, 4, 7, 7, 44, 4, 29, + 7, 8, 8, 1, 1, 54, 8, 26, 1, 26, + 26, 26, 44, 93, 29, 92, 1, 2, 2, 53, + 2, 54, 68, 26, 91, 2, 66, 66, 2, 50, + 50, 50, 71, 71, 53, 77, 54, 68, 90, 2, + 2, 79, 66, 50, 2, 71, 75, 75, 88, 88, + 77, 78, 2, 5, 5, 5, 79, 5, 5, 85, + 84, 5, 83, 5, 5, 5, 82, 78, 87, 87, + 81, 80, 72, 64, 85, 63, 86, 86, 62, 61, + + 58, 87, 78, 57, 5, 5, 6, 6, 6, 52, + 6, 6, 86, 51, 6, 49, 6, 6, 6, 48, + 40, 39, 38, 37, 36, 35, 33, 32, 21, 18, + 17, 16, 15, 12, 9, 0, 0, 6, 6, 24, + 24, 24, 0, 24, 24, 0, 0, 24, 0, 24, + 24, 24, 0, 0, 0, 0, 24, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 24, 24, 43, 43, 43, 0, 43, 43, 0, 0, + 43, 0, 43, 43, 43, 0, 0, 0, 0, 43, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + + 0, 0, 0, 43, 43, 46, 46, 46, 0, 46, + 46, 0, 0, 46, 0, 46, 46, 46, 0, 0, + 0, 0, 46, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 46, 46, 70, 70, + 70, 0, 70, 70, 0, 0, 70, 0, 70, 70, + 70, 0, 0, 0, 0, 70, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 70, + 70, 98, 98, 98, 99, 99, 99, 100, 100, 100, + 101, 101, 101, 102, 102, 104, 104, 104, 105, 105, + 106, 106, 106, 107, 107, 107, 108, 108, 108, 109, + + 109, 109, 110, 110, 110, 111, 111, 111, 112, 112, + 112, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97, 97, + 97, 97, 97, 97, 97, 97, 97, 97, 97 + } ; + +/* Table of booleans, true if rule could match eol. */ +static yyconst flex_int32_t yy_rule_can_match_eol[23] = + { 0, +1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, + 0, 0, 0, }; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +#line 1 "_jsgf_scanner.l" +/* -*- mode: text -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* YOU MUST USE FLEX 2.6.1 OR NEWER TO PROCESS THIS FILE!!! */ +#line 39 "_jsgf_scanner.l" + +#include "jsgf_internal.h" +#include "jsgf_parser.h" + +#define YY_NO_UNISTD_H 1 + + + +#line 612 "jsgf_scanner.c" + +#define INITIAL 0 +#define COMMENT 1 +#define DECL 2 +#define DECLCOMMENT 3 + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +/* Holds the entire state of the reentrant scanner. */ +struct yyguts_t + { + + /* User-defined. Not touched by flex. */ + YY_EXTRA_TYPE yyextra_r; + + /* The rest are the same as the globals declared in the non-reentrant scanner. */ + FILE *yyin_r, *yyout_r; + size_t yy_buffer_stack_top; /**< index of top of stack. */ + size_t yy_buffer_stack_max; /**< capacity of stack. */ + YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */ + char yy_hold_char; + int yy_n_chars; + int yyleng_r; + char *yy_c_buf_p; + int yy_init; + int yy_start; + int yy_did_buffer_switch_on_eof; + int yy_start_stack_ptr; + int yy_start_stack_depth; + int *yy_start_stack; + yy_state_type yy_last_accepting_state; + char* yy_last_accepting_cpos; + + int yylineno_r; + int yy_flex_debug_r; + + char *yytext_r; + int yy_more_flag; + int yy_more_len; + + YYSTYPE * yylval_r; + + }; /* end struct yyguts_t */ + +static int yy_init_globals (yyscan_t yyscanner ); + + /* This must go here because YYSTYPE and YYLTYPE are included + * from bison output in section 1.*/ + # define yylval yyg->yylval_r + +int yylex_init (yyscan_t* scanner); + +int yylex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int yylex_destroy (yyscan_t yyscanner ); + +int yyget_debug (yyscan_t yyscanner ); + +void yyset_debug (int debug_flag ,yyscan_t yyscanner ); + +YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner ); + +void yyset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); + +FILE *yyget_in (yyscan_t yyscanner ); + +void yyset_in (FILE * _in_str ,yyscan_t yyscanner ); + +FILE *yyget_out (yyscan_t yyscanner ); + +void yyset_out (FILE * _out_str ,yyscan_t yyscanner ); + + int yyget_leng (yyscan_t yyscanner ); + +char *yyget_text (yyscan_t yyscanner ); + +int yyget_lineno (yyscan_t yyscanner ); + +void yyset_lineno (int _line_number ,yyscan_t yyscanner ); + +int yyget_column (yyscan_t yyscanner ); + +void yyset_column (int _column_no ,yyscan_t yyscanner ); + +YYSTYPE * yyget_lval (yyscan_t yyscanner ); + +void yyset_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap (yyscan_t yyscanner ); +#else +extern int yywrap (yyscan_t yyscanner ); +#endif +#endif + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner); +#endif + +#ifndef YY_NO_INPUT + +#ifdef __cplusplus +static int yyinput (yyscan_t yyscanner ); +#else +static int input (yyscan_t yyscanner ); +#endif + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k */ +#define YY_READ_BUF_SIZE 16384 +#else +#define YY_READ_BUF_SIZE 8192 +#endif /* __ia64__ */ +#endif + +/* Copy whatever the last rule matched to the standard output. */ +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO do { if (fwrite( yytext, (size_t) yyleng, 1, yyout )) {} } while (0) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ + { \ + int c = '*'; \ + size_t n; \ + for ( n = 0; n < (size_t)max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else \ + { \ + errno=0; \ + while ( (result = (int) fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \ + { \ + if( errno != EINTR) \ + { \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + break; \ + } \ + errno=0; \ + clearerr(yyin); \ + } \ + }\ +\ + +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner) +#endif + +/* end tables serialization structures and prototypes */ + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int yylex \ + (YYSTYPE * yylval_param ,yyscan_t yyscanner); + +#define YY_DECL int yylex \ + (YYSTYPE * yylval_param , yyscan_t yyscanner) +#endif /* !YY_DECL */ + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK /*LINTED*/break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +/** The main scanner function which does all the work. + */ +YY_DECL +{ + yy_state_type yy_current_state; + char *yy_cp, *yy_bp; + int yy_act; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + yylval = yylval_param; + + if ( !yyg->yy_init ) + { + yyg->yy_init = 1; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! yyg->yy_start ) + yyg->yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! YY_CURRENT_BUFFER ) { + yyensure_buffer_stack (yyscanner); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); + } + + yy_load_buffer_state(yyscanner ); + } + + { +#line 59 "_jsgf_scanner.l" + + +#line 883 "jsgf_scanner.c" + + while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ + { + yy_cp = yyg->yy_c_buf_p; + + /* Support of yytext. */ + *yy_cp = yyg->yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = yyg->yy_start; +yy_match: + do + { + YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ; + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 98 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (flex_int16_t) yy_c]; + ++yy_cp; + } + while ( yy_current_state != 97 ); + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + + YY_DO_BEFORE_ACTION; + + if ( yy_act != YY_END_OF_BUFFER && yy_rule_can_match_eol[yy_act] ) + { + yy_size_t yyl; + for ( yyl = 0; yyl < (yy_size_t)yyleng; ++yyl ) + if ( yytext[yyl] == '\n' ) + + do{ yylineno++; + yycolumn=0; + }while(0) +; + } + +do_action: /* This label is used only to access EOF actions. */ + + switch ( yy_act ) + { /* beginning of action switch */ + case 0: /* must back up */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yyg->yy_hold_char; + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + goto yy_find_action; + +case 1: +/* rule 1 can match eol */ +YY_RULE_SETUP +#line 61 "_jsgf_scanner.l" +; /* ignore whitespace */ + YY_BREAK +case 2: +/* rule 2 can match eol */ +YY_RULE_SETUP +#line 62 "_jsgf_scanner.l" +; /* single-line comments */ + YY_BREAK +case 3: +YY_RULE_SETUP +#line 63 "_jsgf_scanner.l" +{ BEGIN(COMMENT); } /* C-style comments */ + YY_BREAK +case 4: +YY_RULE_SETUP +#line 64 "_jsgf_scanner.l" +{ BEGIN(INITIAL); } + YY_BREAK +case 5: +YY_RULE_SETUP +#line 65 "_jsgf_scanner.l" +; /* Ignore stuff in comment mode */ + YY_BREAK +case 6: +/* rule 6 can match eol */ +YY_RULE_SETUP +#line 67 "_jsgf_scanner.l" +; /* single-line comments inside decl */ + YY_BREAK +case 7: +YY_RULE_SETUP +#line 68 "_jsgf_scanner.l" +{ BEGIN(DECLCOMMENT); } /* C-style comments inside decl */ + YY_BREAK +case 8: +YY_RULE_SETUP +#line 69 "_jsgf_scanner.l" +{ BEGIN(DECL); } + YY_BREAK +case 9: +YY_RULE_SETUP +#line 70 "_jsgf_scanner.l" +; /* Ignore stuff in comment mode */ + YY_BREAK +case 10: +YY_RULE_SETUP +#line 72 "_jsgf_scanner.l" +{BEGIN(DECL); return HEADER;} + YY_BREAK +case 11: +YY_RULE_SETUP +#line 73 "_jsgf_scanner.l" +{BEGIN(DECL); return GRAMMAR;} + YY_BREAK +case 12: +YY_RULE_SETUP +#line 74 "_jsgf_scanner.l" +{BEGIN(DECL); return IMPORT;} + YY_BREAK +case 13: +YY_RULE_SETUP +#line 75 "_jsgf_scanner.l" +{BEGIN(DECL); return PUBLIC;} + YY_BREAK +case 14: +/* rule 14 can match eol */ +YY_RULE_SETUP +#line 77 "_jsgf_scanner.l" +{ BEGIN(DECL); yylval->name = strdup(yytext); return RULENAME; } + YY_BREAK +case 15: +/* rule 15 can match eol */ +YY_RULE_SETUP +#line 78 "_jsgf_scanner.l" +{ yylval->name = strdup(yytext); return RULENAME; } + YY_BREAK +case 16: +/* rule 16 can match eol */ +YY_RULE_SETUP +#line 80 "_jsgf_scanner.l" +{ yylval->name = strdup(yytext); return TAG; } + YY_BREAK +case 17: +YY_RULE_SETUP +#line 81 "_jsgf_scanner.l" +{ yylval->name = strdup(yytext); return TOKEN; } + YY_BREAK +case 18: +YY_RULE_SETUP +#line 82 "_jsgf_scanner.l" +{ BEGIN(INITIAL); return yytext[0]; } + YY_BREAK +case 19: +/* rule 19 can match eol */ +YY_RULE_SETUP +#line 83 "_jsgf_scanner.l" +{ yylval->name = strdup(yytext); return TOKEN; } + YY_BREAK +case 20: +YY_RULE_SETUP +#line 84 "_jsgf_scanner.l" +{ yylval->weight = atof_c(yytext+1); return WEIGHT; } + YY_BREAK +case 21: +YY_RULE_SETUP +#line 85 "_jsgf_scanner.l" +return yytext[0]; /* Single-character tokens */ + YY_BREAK +case 22: +YY_RULE_SETUP +#line 87 "_jsgf_scanner.l" +ECHO; + YY_BREAK +#line 1065 "jsgf_scanner.c" +case YY_STATE_EOF(INITIAL): +case YY_STATE_EOF(COMMENT): +case YY_STATE_EOF(DECL): +case YY_STATE_EOF(DECLCOMMENT): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = yyg->yy_hold_char; + YY_RESTORE_YY_MORE_OFFSET + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between YY_CURRENT_BUFFER and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( yyscanner ); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner); + + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++yyg->yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer( yyscanner ) ) + { + case EOB_ACT_END_OF_FILE: + { + yyg->yy_did_buffer_switch_on_eof = 0; + + if ( yywrap(yyscanner ) ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! yyg->yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + yyg->yy_c_buf_p = + yyg->yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( yyscanner ); + + yy_cp = yyg->yy_c_buf_p; + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yyg->yy_c_buf_p = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars]; + + yy_current_state = yy_get_previous_state( yyscanner ); + + yy_cp = yyg->yy_c_buf_p; + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of user's declarations */ +} /* end of yylex */ + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ +static int yy_get_next_buffer (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; + char *source = yyg->yytext_ptr; + yy_size_t number_to_move, i; + int ret_val; + + if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (yy_size_t) (yyg->yy_c_buf_p - yyg->yytext_ptr) - 1; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0; + + else + { + int num_to_read = + YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; + + int yy_c_buf_p_offset = + (int) (yyg->yy_c_buf_p - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ,yyscanner ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = NULL; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - + number_to_move - 1; + + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), + yyg->yy_n_chars, num_to_read ); + + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + if ( yyg->yy_n_chars == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart(yyin ,yyscanner); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + if ((int) (yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { + /* Extend the array by 50%, plus the number we really need. */ + int new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1); + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc((void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf,new_size ,yyscanner ); + if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); + } + + yyg->yy_n_chars += number_to_move; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; + + return ret_val; +} + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + + static yy_state_type yy_get_previous_state (yyscan_t yyscanner) +{ + yy_state_type yy_current_state; + char *yy_cp; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + yy_current_state = yyg->yy_start; + + for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp ) + { + YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 98 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (flex_int16_t) yy_c]; + } + + return yy_current_state; +} + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner) +{ + int yy_is_jam; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */ + char *yy_cp = yyg->yy_c_buf_p; + + YY_CHAR yy_c = 1; + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 98 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (flex_int16_t) yy_c]; + yy_is_jam = (yy_current_state == 97); + + (void)yyg; + return yy_is_jam ? 0 : yy_current_state; +} + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus + static int yyinput (yyscan_t yyscanner) +#else + static int input (yyscan_t yyscanner) +#endif + +{ + int c; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + *yyg->yy_c_buf_p = yyg->yy_hold_char; + + if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) + /* This was really a NUL. */ + *yyg->yy_c_buf_p = '\0'; + + else + { /* need more input */ + int offset = yyg->yy_c_buf_p - yyg->yytext_ptr; + ++yyg->yy_c_buf_p; + + switch ( yy_get_next_buffer( yyscanner ) ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart(yyin ,yyscanner); + + /*FALLTHROUGH*/ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap(yyscanner ) ) + return 0; + + if ( ! yyg->yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(yyscanner); +#else + return input(yyscanner); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + yyg->yy_c_buf_p = yyg->yytext_ptr + offset; + break; + } + } + } + + c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */ + *yyg->yy_c_buf_p = '\0'; /* preserve yytext */ + yyg->yy_hold_char = *++yyg->yy_c_buf_p; + + if ( c == '\n' ) + + do{ yylineno++; + yycolumn=0; + }while(0) +; + + return c; +} +#endif /* ifndef YY_NO_INPUT */ + +/** Immediately switch to a different input stream. + * @param input_file A readable stream. + * @param yyscanner The scanner object. + * @note This function does not reset the start condition to @c INITIAL . + */ + void yyrestart (FILE * input_file , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if ( ! YY_CURRENT_BUFFER ){ + yyensure_buffer_stack (yyscanner); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer(yyin,YY_BUF_SIZE ,yyscanner); + } + + yy_init_buffer(YY_CURRENT_BUFFER,input_file ,yyscanner); + yy_load_buffer_state(yyscanner ); +} + +/** Switch to a different input buffer. + * @param new_buffer The new input buffer. + * @param yyscanner The scanner object. + */ + void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* TODO. We should be able to replace this entire function body + * with + * yypop_buffer_state(); + * yypush_buffer_state(new_buffer); + */ + yyensure_buffer_stack (yyscanner); + if ( YY_CURRENT_BUFFER == new_buffer ) + return; + + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *yyg->yy_c_buf_p = yyg->yy_hold_char; + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + YY_CURRENT_BUFFER_LVALUE = new_buffer; + yy_load_buffer_state(yyscanner ); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yyg->yy_did_buffer_switch_on_eof = 1; +} + +static void yy_load_buffer_state (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; + yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; + yyg->yy_hold_char = *yyg->yy_c_buf_p; +} + +/** Allocate and initialize an input buffer state. + * @param file A readable stream. + * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. + * @param yyscanner The scanner object. + * @return the allocated buffer state. + */ + YY_BUFFER_STATE yy_create_buffer (FILE * file, int size , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ,yyscanner ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = (yy_size_t)size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 ,yyscanner ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer(b,file ,yyscanner); + + return b; +} + +/** Destroy the buffer. + * @param b a buffer created with yy_create_buffer() + * @param yyscanner The scanner object. + */ + void yy_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if ( ! b ) + return; + + if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ + YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yyfree((void *) b->yy_ch_buf ,yyscanner ); + + yyfree((void *) b ,yyscanner ); +} + +/* Initializes or reinitializes a buffer. + * This function is sometimes called more than once on the same buffer, + * such as during a yyrestart() or at EOF. + */ + static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner) + +{ + int oerrno = errno; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + yy_flush_buffer(b ,yyscanner); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + + /* If b is the current buffer, then yy_init_buffer was _probably_ + * called from yyrestart() or through yy_get_next_buffer. + * In that case, we don't want to reset the lineno or column. + */ + if (b != YY_CURRENT_BUFFER){ + b->yy_bs_lineno = 1; + b->yy_bs_column = 0; + } + + b->yy_is_interactive = 0; + + errno = oerrno; +} + +/** Discard all buffered characters. On the next scan, YY_INPUT will be called. + * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. + * @param yyscanner The scanner object. + */ + void yy_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == YY_CURRENT_BUFFER ) + yy_load_buffer_state(yyscanner ); +} + +/** Pushes the new state onto the stack. The new state becomes + * the current state. This function will allocate the stack + * if necessary. + * @param new_buffer The new state. + * @param yyscanner The scanner object. + */ +void yypush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if (new_buffer == NULL) + return; + + yyensure_buffer_stack(yyscanner); + + /* This block is copied from yy_switch_to_buffer. */ + if ( YY_CURRENT_BUFFER ) + { + /* Flush out information for old buffer. */ + *yyg->yy_c_buf_p = yyg->yy_hold_char; + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + /* Only push if top exists. Otherwise, replace top. */ + if (YY_CURRENT_BUFFER) + yyg->yy_buffer_stack_top++; + YY_CURRENT_BUFFER_LVALUE = new_buffer; + + /* copied from yy_switch_to_buffer. */ + yy_load_buffer_state(yyscanner ); + yyg->yy_did_buffer_switch_on_eof = 1; +} + +/** Removes and deletes the top of the stack, if present. + * The next element becomes the new top. + * @param yyscanner The scanner object. + */ +void yypop_buffer_state (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if (!YY_CURRENT_BUFFER) + return; + + yy_delete_buffer(YY_CURRENT_BUFFER ,yyscanner); + YY_CURRENT_BUFFER_LVALUE = NULL; + if (yyg->yy_buffer_stack_top > 0) + --yyg->yy_buffer_stack_top; + + if (YY_CURRENT_BUFFER) { + yy_load_buffer_state(yyscanner ); + yyg->yy_did_buffer_switch_on_eof = 1; + } +} + +/* Allocates the stack if it does not exist. + * Guarantees space for at least one push. + */ +static void yyensure_buffer_stack (yyscan_t yyscanner) +{ + int num_to_alloc; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (!yyg->yy_buffer_stack) { + + /* First allocation is just for 2 elements, since we don't know if this + * scanner will even need a stack. We use 2 instead of 1 to avoid an + * immediate realloc on the next call. + */ + num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */ + yyg->yy_buffer_stack = (struct yy_buffer_state**)yyalloc + (num_to_alloc * sizeof(struct yy_buffer_state*) + , yyscanner); + if ( ! yyg->yy_buffer_stack ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*)); + + yyg->yy_buffer_stack_max = num_to_alloc; + yyg->yy_buffer_stack_top = 0; + return; + } + + if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){ + + /* Increase the buffer to prepare for a possible push. */ + yy_size_t grow_size = 8 /* arbitrary grow size */; + + num_to_alloc = yyg->yy_buffer_stack_max + grow_size; + yyg->yy_buffer_stack = (struct yy_buffer_state**)yyrealloc + (yyg->yy_buffer_stack, + num_to_alloc * sizeof(struct yy_buffer_state*) + , yyscanner); + if ( ! yyg->yy_buffer_stack ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + /* zero only the new slots.*/ + memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*)); + yyg->yy_buffer_stack_max = num_to_alloc; + } +} + +/** Setup the input buffer state to scan directly from a user-specified character buffer. + * @param base the character buffer + * @param size the size in bytes of the character buffer + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return NULL; + + b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ,yyscanner ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = NULL; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer(b ,yyscanner ); + + return b; +} + +/** Setup the input buffer state to scan a string. The next call to yylex() will + * scan from a @e copy of @a str. + * @param yystr a NUL-terminated string to scan + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + * @note If you want to scan bytes that may contain NUL values, then use + * yy_scan_bytes() instead. + */ +YY_BUFFER_STATE yy_scan_string (yyconst char * yystr , yyscan_t yyscanner) +{ + + return yy_scan_bytes(yystr,(int) strlen(yystr) ,yyscanner); +} + +/** Setup the input buffer state to scan the given bytes. The next call to yylex() will + * scan from a @e copy of @a bytes. + * @param yybytes the byte buffer to scan + * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_bytes (yyconst char * yybytes, int _yybytes_len , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + yy_size_t i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = (yy_size_t) _yybytes_len + 2; + buf = (char *) yyalloc(n ,yyscanner ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < (yy_size_t)_yybytes_len; ++i ) + buf[i] = yybytes[i]; + + buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer(buf,n ,yyscanner); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; +} + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +static void yynoreturn yy_fatal_error (yyconst char* msg , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + (void) fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); +} + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + yytext[yyleng] = yyg->yy_hold_char; \ + yyg->yy_c_buf_p = yytext + yyless_macro_arg; \ + yyg->yy_hold_char = *yyg->yy_c_buf_p; \ + *yyg->yy_c_buf_p = '\0'; \ + yyleng = yyless_macro_arg; \ + } \ + while ( 0 ) + +/* Accessor methods (get/set functions) to struct members. */ + +/** Get the user-defined data for this scanner. + * @param yyscanner The scanner object. + */ +YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyextra; +} + +/** Get the current line number. + * @param yyscanner The scanner object. + */ +int yyget_lineno (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (! YY_CURRENT_BUFFER) + return 0; + + return yylineno; +} + +/** Get the current column number. + * @param yyscanner The scanner object. + */ +int yyget_column (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (! YY_CURRENT_BUFFER) + return 0; + + return yycolumn; +} + +/** Get the input stream. + * @param yyscanner The scanner object. + */ +FILE *yyget_in (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyin; +} + +/** Get the output stream. + * @param yyscanner The scanner object. + */ +FILE *yyget_out (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyout; +} + +/** Get the length of the current token. + * @param yyscanner The scanner object. + */ +int yyget_leng (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyleng; +} + +/** Get the current token. + * @param yyscanner The scanner object. + */ + +char *yyget_text (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yytext; +} + +/** Set the user-defined data. This data is never touched by the scanner. + * @param user_defined The data to be associated with this scanner. + * @param yyscanner The scanner object. + */ +void yyset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyextra = user_defined ; +} + +/** Set the current line number. + * @param _line_number line number + * @param yyscanner The scanner object. + */ +void yyset_lineno (int _line_number , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* lineno is only valid if an input buffer exists. */ + if (! YY_CURRENT_BUFFER ) + YY_FATAL_ERROR( "yyset_lineno called with no buffer" ); + + yylineno = _line_number; +} + +/** Set the current column. + * @param _column_no column number + * @param yyscanner The scanner object. + */ +void yyset_column (int _column_no , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* column is only valid if an input buffer exists. */ + if (! YY_CURRENT_BUFFER ) + YY_FATAL_ERROR( "yyset_column called with no buffer" ); + + yycolumn = _column_no; +} + +/** Set the input stream. This does not discard the current + * input buffer. + * @param _in_str A readable stream. + * @param yyscanner The scanner object. + * @see yy_switch_to_buffer + */ +void yyset_in (FILE * _in_str , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyin = _in_str ; +} + +void yyset_out (FILE * _out_str , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyout = _out_str ; +} + +int yyget_debug (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yy_flex_debug; +} + +void yyset_debug (int _bdebug , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yy_flex_debug = _bdebug ; +} + +/* Accessor methods for yylval and yylloc */ + +YYSTYPE * yyget_lval (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yylval; +} + +void yyset_lval (YYSTYPE * yylval_param , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yylval = yylval_param; +} + +/* User-visible API */ + +/* yylex_init is special because it creates the scanner itself, so it is + * the ONLY reentrant function that doesn't take the scanner as the last argument. + * That's why we explicitly handle the declaration, instead of using our macros. + */ + +int yylex_init(yyscan_t* ptr_yy_globals) + +{ + if (ptr_yy_globals == NULL){ + errno = EINVAL; + return 1; + } + + *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), NULL ); + + if (*ptr_yy_globals == NULL){ + errno = ENOMEM; + return 1; + } + + /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */ + memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); + + return yy_init_globals ( *ptr_yy_globals ); +} + +/* yylex_init_extra has the same functionality as yylex_init, but follows the + * convention of taking the scanner as the last argument. Note however, that + * this is a *pointer* to a scanner, as it will be allocated by this call (and + * is the reason, too, why this function also must handle its own declaration). + * The user defined value in the first argument will be available to yyalloc in + * the yyextra field. + */ + +int yylex_init_extra(YY_EXTRA_TYPE yy_user_defined,yyscan_t* ptr_yy_globals ) + +{ + struct yyguts_t dummy_yyguts; + + yyset_extra (yy_user_defined, &dummy_yyguts); + + if (ptr_yy_globals == NULL){ + errno = EINVAL; + return 1; + } + + *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), &dummy_yyguts ); + + if (*ptr_yy_globals == NULL){ + errno = ENOMEM; + return 1; + } + + /* By setting to 0xAA, we expose bugs in + yy_init_globals. Leave at 0x00 for releases. */ + memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); + + yyset_extra (yy_user_defined, *ptr_yy_globals); + + return yy_init_globals ( *ptr_yy_globals ); +} + +static int yy_init_globals (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + /* Initialization is the same as for the non-reentrant scanner. + * This function is called from yylex_destroy(), so don't allocate here. + */ + + yyg->yy_buffer_stack = NULL; + yyg->yy_buffer_stack_top = 0; + yyg->yy_buffer_stack_max = 0; + yyg->yy_c_buf_p = NULL; + yyg->yy_init = 0; + yyg->yy_start = 0; + + yyg->yy_start_stack_ptr = 0; + yyg->yy_start_stack_depth = 0; + yyg->yy_start_stack = NULL; + +/* Defined in main.c */ +#ifdef YY_STDINIT + yyin = stdin; + yyout = stdout; +#else + yyin = NULL; + yyout = NULL; +#endif + + /* For future reference: Set errno on error, since we are called by + * yylex_init() + */ + return 0; +} + +/* yylex_destroy is for both reentrant and non-reentrant scanners. */ +int yylex_destroy (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* Pop the buffer stack, destroying each element. */ + while(YY_CURRENT_BUFFER){ + yy_delete_buffer(YY_CURRENT_BUFFER ,yyscanner ); + YY_CURRENT_BUFFER_LVALUE = NULL; + yypop_buffer_state(yyscanner); + } + + /* Destroy the stack itself. */ + yyfree(yyg->yy_buffer_stack ,yyscanner); + yyg->yy_buffer_stack = NULL; + + /* Destroy the start condition stack. */ + yyfree(yyg->yy_start_stack ,yyscanner ); + yyg->yy_start_stack = NULL; + + /* Reset the globals. This is important in a non-reentrant scanner so the next time + * yylex() is called, initialization will occur. */ + yy_init_globals( yyscanner); + + /* Destroy the main struct (reentrant only). */ + yyfree ( yyscanner , yyscanner ); + yyscanner = NULL; + return 0; +} + +/* + * Internal utility routines. + */ + +#ifndef yytext_ptr +static void yy_flex_strncpy (char* s1, yyconst char * s2, int n , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + + int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; +} +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * s , yyscan_t yyscanner) +{ + int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; +} +#endif + +void *yyalloc (yy_size_t size , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + return malloc(size); +} + +void *yyrealloc (void * ptr, yy_size_t size , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return realloc(ptr, size); +} + +void yyfree (void * ptr , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + free( (char *) ptr ); /* see yyrealloc() for (char *) cast */ +} + +#define YYTABLES_NAME "yytables" + +#line 87 "_jsgf_scanner.l" + + + diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_scanner.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_scanner.h new file mode 100644 index 0000000000000000000000000000000000000000..8ccc56e3d7ae52f4a0fbd2279174f6be0906708c --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/jsgf_scanner.h @@ -0,0 +1,342 @@ +#ifndef yyHEADER_H +#define yyHEADER_H 1 +#define yyIN_HEADER 1 + +#line 6 "jsgf_scanner.h" + +#line 8 "jsgf_scanner.h" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 6 +#define YY_FLEX_SUBMINOR_VERSION 1 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include +#include +#include +#include + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have . Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +/* TODO: this is always defined, so inline it */ +#define yyconst const + +#if defined(__GNUC__) && __GNUC__ >= 3 +#define yynoreturn __attribute__((__noreturn__)) +#else +#define yynoreturn +#endif + +/* An opaque pointer. */ +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif + +/* For convenience, these vars (plus the bison vars far below) + are macros in the reentrant scanner. */ +#define yyin yyg->yyin_r +#define yyout yyg->yyout_r +#define yyextra yyg->yyextra_r +#define yyleng yyg->yyleng_r +#define yytext yyg->yytext_r +#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) +#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) +#define yy_flex_debug yyg->yy_flex_debug_r + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k. + * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. + * Ditto for the __ia64__ case accordingly. + */ +#define YY_BUF_SIZE 32768 +#else +#define YY_BUF_SIZE 16384 +#endif /* __ia64__ */ +#endif + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + int yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +void yyrestart (FILE *input_file ,yyscan_t yyscanner ); +void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +YY_BUFFER_STATE yy_create_buffer (FILE *file,int size ,yyscan_t yyscanner ); +void yy_delete_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void yy_flush_buffer (YY_BUFFER_STATE b ,yyscan_t yyscanner ); +void yypush_buffer_state (YY_BUFFER_STATE new_buffer ,yyscan_t yyscanner ); +void yypop_buffer_state (yyscan_t yyscanner ); + +YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size ,yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str ,yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len ,yyscan_t yyscanner ); + +void *yyalloc (yy_size_t ,yyscan_t yyscanner ); +void *yyrealloc (void *,yy_size_t ,yyscan_t yyscanner ); +void yyfree (void * ,yyscan_t yyscanner ); + +/* Begin user sect3 */ + +#define yywrap(yyscanner) (/*CONSTCOND*/1) +#define YY_SKIP_YYWRAP + +#define yytext_ptr yytext_r + +#ifdef YY_HEADER_EXPORT_START_CONDITIONS +#define INITIAL 0 +#define COMMENT 1 +#define DECL 2 +#define DECLCOMMENT 3 + +#endif + +#ifndef YY_EXTRA_TYPE +#define YY_EXTRA_TYPE void * +#endif + +int yylex_init (yyscan_t* scanner); + +int yylex_init_extra (YY_EXTRA_TYPE user_defined,yyscan_t* scanner); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int yylex_destroy (yyscan_t yyscanner ); + +int yyget_debug (yyscan_t yyscanner ); + +void yyset_debug (int debug_flag ,yyscan_t yyscanner ); + +YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner ); + +void yyset_extra (YY_EXTRA_TYPE user_defined ,yyscan_t yyscanner ); + +FILE *yyget_in (yyscan_t yyscanner ); + +void yyset_in (FILE * _in_str ,yyscan_t yyscanner ); + +FILE *yyget_out (yyscan_t yyscanner ); + +void yyset_out (FILE * _out_str ,yyscan_t yyscanner ); + + int yyget_leng (yyscan_t yyscanner ); + +char *yyget_text (yyscan_t yyscanner ); + +int yyget_lineno (yyscan_t yyscanner ); + +void yyset_lineno (int _line_number ,yyscan_t yyscanner ); + +int yyget_column (yyscan_t yyscanner ); + +void yyset_column (int _column_no ,yyscan_t yyscanner ); + +YYSTYPE * yyget_lval (yyscan_t yyscanner ); + +void yyset_lval (YYSTYPE * yylval_param ,yyscan_t yyscanner ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap (yyscan_t yyscanner ); +#else +extern int yywrap (yyscan_t yyscanner ); +#endif +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy (char *,yyconst char *,int ,yyscan_t yyscanner); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (yyconst char * ,yyscan_t yyscanner); +#endif + +#ifndef YY_NO_INPUT + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k */ +#define YY_READ_BUF_SIZE 16384 +#else +#define YY_READ_BUF_SIZE 8192 +#endif /* __ia64__ */ +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int yylex \ + (YYSTYPE * yylval_param ,yyscan_t yyscanner); + +#define YY_DECL int yylex \ + (YYSTYPE * yylval_param , yyscan_t yyscanner) +#endif /* !YY_DECL */ + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + +#undef YY_NEW_FILE +#undef YY_FLUSH_BUFFER +#undef yy_set_bol +#undef yy_new_buffer +#undef yy_set_interactive +#undef YY_DO_BEFORE_ACTION + +#ifdef YY_DECL_IS_OURS +#undef YY_DECL_IS_OURS +#undef YY_DECL +#endif + +#line 87 "_jsgf_scanner.l" + + +#line 341 "jsgf_scanner.h" +#undef yyIN_HEADER +#endif /* yyHEADER_H */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/lm_trie.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/lm_trie.c new file mode 100644 index 0000000000000000000000000000000000000000..f91dc898d4ee15c7d64fea989366ddf259eccf7a --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/lm_trie.c @@ -0,0 +1,914 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2015 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include +#include +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include "lm_trie.h" +#include "lm_trie_quant.h" + +static void lm_trie_alloc_ngram(lm_trie_t * trie, uint32 * counts, int order); + +static uint32 +base_size(uint32 entries, uint32 max_vocab, uint8 remaining_bits) +{ + uint8 total_bits = bitarr_required_bits(max_vocab) + remaining_bits; + /* Extra entry for next pointer at the end. + * +7 then / 8 to round up bits and convert to bytes + * +sizeof(uint64) so that ReadInt57 etc don't go segfault. + * Note that this waste is O(order), not O(number of ngrams).*/ + return ((1 + entries) * total_bits + 7) / 8 + sizeof(uint64); +} + +uint32 +middle_size(uint8 quant_bits, uint32 entries, uint32 max_vocab, + uint32 max_ptr) +{ + return base_size(entries, max_vocab, + quant_bits + bitarr_required_bits(max_ptr)); +} + +uint32 +longest_size(uint8 quant_bits, uint32 entries, uint32 max_vocab) +{ + return base_size(entries, max_vocab, quant_bits); +} + +static void +base_init(base_t * base, void *base_mem, uint32 max_vocab, + uint8 remaining_bits) +{ + base->word_bits = bitarr_required_bits(max_vocab); + base->word_mask = (1U << base->word_bits) - 1U; + if (base->word_bits > 25) + E_ERROR + ("Sorry, word indices more than %d are not implemented. Edit util/bit_packing.hh and fix the bit packing functions\n", + (1U << 25)); + base->total_bits = base->word_bits + remaining_bits; + + base->base = (uint8 *) base_mem; + base->insert_index = 0; + base->max_vocab = max_vocab; +} + +void +middle_init(middle_t * middle, void *base_mem, uint8 quant_bits, + uint32 entries, uint32 max_vocab, uint32 max_next, + void *next_source) +{ + middle->quant_bits = quant_bits; + bitarr_mask_from_max(&middle->next_mask, max_next); + middle->next_source = next_source; + if (entries + 1 >= (1U << 25) || (max_next >= (1U << 25))) + E_ERROR + ("Sorry, this does not support more than %d n-grams of a particular order. Edit util/bit_packing.hh and fix the bit packing functions\n", + (1U << 25)); + base_init(&middle->base, base_mem, max_vocab, + quant_bits + middle->next_mask.bits); +} + +void +longest_init(longest_t * longest, void *base_mem, uint8 quant_bits, + uint32 max_vocab) +{ + base_init(&longest->base, base_mem, max_vocab, quant_bits); +} + +static bitarr_address_t +middle_insert(middle_t * middle, uint32 word, int order, int max_order) +{ + uint32 at_pointer; + uint32 next; + bitarr_address_t address; + assert(word <= middle->base.word_mask); + address.base = middle->base.base; + address.offset = middle->base.insert_index * middle->base.total_bits; + bitarr_write_int25(address, middle->base.word_bits, word); + address.offset += middle->base.word_bits; + at_pointer = address.offset; + address.offset += middle->quant_bits; + if (order == max_order - 1) { + next = ((longest_t *) middle->next_source)->base.insert_index; + } + else { + next = ((middle_t *) middle->next_source)->base.insert_index; + } + + bitarr_write_int25(address, middle->next_mask.bits, next); + middle->base.insert_index++; + address.offset = at_pointer; + return address; +} + +static bitarr_address_t +longest_insert(longest_t * longest, uint32 index) +{ + bitarr_address_t address; + assert(index <= longest->base.word_mask); + address.base = longest->base.base; + address.offset = longest->base.insert_index * longest->base.total_bits; + bitarr_write_int25(address, longest->base.word_bits, index); + address.offset += longest->base.word_bits; + longest->base.insert_index++; + return address; +} + +static void +middle_finish_loading(middle_t * middle, uint32 next_end) +{ + bitarr_address_t address; + address.base = middle->base.base; + address.offset = + (middle->base.insert_index + 1) * middle->base.total_bits - + middle->next_mask.bits; + bitarr_write_int25(address, middle->next_mask.bits, next_end); +} + +static uint32 +unigram_next(lm_trie_t * trie, int order) +{ + return order == + 2 ? trie->longest->base.insert_index : trie->middle_begin->base. + insert_index; +} + +void +lm_trie_fix_counts(ngram_raw_t ** raw_ngrams, uint32 * counts, + uint32 * fixed_counts, int order) +{ + priority_queue_t *ngrams = + priority_queue_create(order - 1, &ngram_ord_comparator); + uint32 raw_ngram_ptrs[NGRAM_MAX_ORDER - 1]; + uint32 words[NGRAM_MAX_ORDER]; + int i; + + memset(words, -1, sizeof(words)); + memcpy(fixed_counts, counts, order * sizeof(*fixed_counts)); + for (i = 2; i <= order; i++) { + ngram_raw_t *tmp_ngram; + + if (counts[i - 1] <= 0) + continue; + + raw_ngram_ptrs[i - 2] = 0; + + tmp_ngram = + (ngram_raw_t *) ckd_calloc(1, sizeof(*tmp_ngram)); + *tmp_ngram = raw_ngrams[i - 2][0]; + tmp_ngram->order = i; + priority_queue_add(ngrams, tmp_ngram); + } + + for (;;) { + int32 to_increment = TRUE; + ngram_raw_t *top; + if (priority_queue_size(ngrams) == 0) { + break; + } + top = (ngram_raw_t *) priority_queue_poll(ngrams); + if (top->order == 2) { + memcpy(words, top->words, 2 * sizeof(*words)); + } + else { + for (i = 0; (uint32)i < top->order - 1; i++) { + if (words[i] != top->words[i]) { + int num; + num = (i == 0) ? 1 : i; + memcpy(words, top->words, + (num + 1) * sizeof(*words)); + fixed_counts[num]++; + to_increment = FALSE; + break; + } + } + words[top->order - 1] = top->words[top->order - 1]; + } + if (to_increment) { + raw_ngram_ptrs[top->order - 2]++; + } + if (raw_ngram_ptrs[top->order - 2] < counts[top->order - 1]) { + *top = raw_ngrams[top->order - 2][raw_ngram_ptrs[top->order - 2]]; + priority_queue_add(ngrams, top); + } + else { + ckd_free(top); + } + } + + assert(priority_queue_size(ngrams) == 0); + priority_queue_free(ngrams, NULL); +} + + +static void +recursive_insert(lm_trie_t * trie, ngram_raw_t ** raw_ngrams, + uint32 * counts, int order) +{ + uint32 unigram_idx = 0; + uint32 *words; + float *probs; + const uint32 unigram_count = (uint32) counts[0]; + priority_queue_t *ngrams = + priority_queue_create(order, &ngram_ord_comparator); + ngram_raw_t *ngram; + uint32 *raw_ngrams_ptr; + int i; + + words = (uint32 *) ckd_calloc(order, sizeof(*words)); + probs = (float *) ckd_calloc(order - 1, sizeof(*probs)); + ngram = (ngram_raw_t *) ckd_calloc(1, sizeof(*ngram)); + ngram->order = 1; + ngram->words = &unigram_idx; + priority_queue_add(ngrams, ngram); + raw_ngrams_ptr = + (uint32 *) ckd_calloc(order - 1, sizeof(*raw_ngrams_ptr)); + for (i = 2; i <= order; ++i) { + ngram_raw_t *tmp_ngram; + + if (counts[i - 1] <= 0) + continue; + + raw_ngrams_ptr[i - 2] = 0; + tmp_ngram = + (ngram_raw_t *) ckd_calloc(1, sizeof(*tmp_ngram)); + *tmp_ngram = raw_ngrams[i - 2][0]; + tmp_ngram->order = i; + + priority_queue_add(ngrams, tmp_ngram); + } + + for (;;) { + ngram_raw_t *top = + (ngram_raw_t *) priority_queue_poll(ngrams); + + if (top->order == 1) { + trie->unigrams[unigram_idx].next = unigram_next(trie, order); + words[0] = unigram_idx; + probs[0] = trie->unigrams[unigram_idx].prob; + if (++unigram_idx == unigram_count + 1) { + ckd_free(top); + break; + } + priority_queue_add(ngrams, top); + } + else { + for (i = 0; (uint32)i < top->order - 1; i++) { + if (words[i] != top->words[i]) { + /* need to insert dummy suffixes to make ngram of higher order reachable */ + int j; + assert(i > 0); /* unigrams are not pruned without removing ngrams that contains them */ + for (j = i; (uint32)j < top->order - 1; j++) { + middle_t *middle = &trie->middle_begin[j - 1]; + bitarr_address_t address = + middle_insert(middle, top->words[j], + j + 1, order); + /* calculate prob for blank */ + float calc_prob = + probs[j - 1] + + trie->unigrams[top->words[j]].bo; + probs[j] = calc_prob; + lm_trie_quant_mwrite(trie->quant, address, j - 1, + calc_prob, 0.0f); + } + } + } + memcpy(words, top->words, + top->order * sizeof(*words)); + if (top->order == (uint32)order) { + bitarr_address_t address = + longest_insert(trie->longest, + top->words[top->order - 1]); + lm_trie_quant_lwrite(trie->quant, address, top->prob); + } + else { + middle_t *middle = &trie->middle_begin[top->order - 2]; + bitarr_address_t address = + middle_insert(middle, + top->words[top->order - 1], + top->order, order); + /* write prob and backoff */ + probs[top->order - 1] = top->prob; + lm_trie_quant_mwrite(trie->quant, address, top->order - 2, + top->prob, top->backoff); + } + raw_ngrams_ptr[top->order - 2]++; + if (raw_ngrams_ptr[top->order - 2] < counts[top->order - 1]) { + *top = raw_ngrams[top->order - + 2][raw_ngrams_ptr[top->order - 2]]; + + priority_queue_add(ngrams, top); + } + else { + ckd_free(top); + } + } + } + assert(priority_queue_size(ngrams) == 0); + priority_queue_free(ngrams, NULL); + ckd_free(raw_ngrams_ptr); + ckd_free(words); + ckd_free(probs); +} + +static lm_trie_t * +lm_trie_init(uint32 unigram_count) +{ + lm_trie_t *trie; + + trie = (lm_trie_t *) ckd_calloc(1, sizeof(*trie)); + memset(trie->hist_cache, -1, sizeof(trie->hist_cache)); /* prepare request history */ + memset(trie->backoff_cache, 0, sizeof(trie->backoff_cache)); + trie->unigrams = + (unigram_t *) ckd_calloc((unigram_count + 1), + sizeof(*trie->unigrams)); + trie->ngram_mem = NULL; + return trie; +} + +lm_trie_t * +lm_trie_create(uint32 unigram_count, int order) +{ + lm_trie_t *trie = lm_trie_init(unigram_count); + trie->quant = + (order > 1) ? lm_trie_quant_create(order) : 0; + return trie; +} + +static size_t +lm_trie_read_ug(lm_trie_t * trie, uint32 * counts, FILE * fp) +{ + size_t rv = fread(trie->unigrams, sizeof(*trie->unigrams), + (counts[0] + 1), fp); + if (SWAP_LM_TRIE) { + int i; + for (i = 0; (uint32)i < counts[0] + 1; ++i) { + SWAP_FLOAT32(&trie->unigrams[i].prob); + SWAP_FLOAT32(&trie->unigrams[i].bo); + SWAP_INT32(&trie->unigrams[i].next); + } + } + return rv; +} + +lm_trie_t * +lm_trie_read_bin(uint32 * counts, int order, FILE * fp) +{ + lm_trie_t *trie = lm_trie_init(counts[0]); + trie->quant = (order > 1) ? lm_trie_quant_read_bin(fp, order) : NULL; + E_INFO("pos after quant: %ld\n", ftell(fp)); + lm_trie_read_ug(trie, counts, fp); + E_INFO("pos after ug: %ld\n", ftell(fp)); + if (order > 1) { + lm_trie_alloc_ngram(trie, counts, order); + fread(trie->ngram_mem, 1, trie->ngram_mem_size, fp); + E_INFO("#ngram_mem: %ld\n", trie->ngram_mem_size); + } + return trie; +} + +static size_t +lm_trie_write_ug(lm_trie_t * trie, uint32 unigram_count, FILE * fp) +{ + if (SWAP_LM_TRIE) { + int i; + for (i = 0; (uint32)i < unigram_count + 1; ++i) { + unigram_t ug = trie->unigrams[i]; + SWAP_FLOAT32(&ug.prob); + SWAP_FLOAT32(&ug.bo); + SWAP_INT32(&ug.next); + if (fwrite(&ug, sizeof(ug), 1, fp) != 1) + return -1; + } + return (size_t)i; + } + else + return fwrite(trie->unigrams, sizeof(*trie->unigrams), + (unigram_count + 1), fp); +} + +void +lm_trie_write_bin(lm_trie_t * trie, uint32 unigram_count, FILE * fp) +{ + + if (trie->quant) + lm_trie_quant_write_bin(trie->quant, fp); + E_INFO("pos after quant: %ld\n", ftell(fp)); + lm_trie_write_ug(trie, unigram_count, fp); + E_INFO("pos after ug: %ld\n", ftell(fp)); + if (trie->ngram_mem) { + fwrite(trie->ngram_mem, 1, trie->ngram_mem_size, fp); + E_INFO("#ngram_mem: %ld\n", trie->ngram_mem_size); + } +} + +void +lm_trie_free(lm_trie_t * trie) +{ + if (trie->ngram_mem) { + ckd_free(trie->ngram_mem); + ckd_free(trie->middle_begin); + ckd_free(trie->longest); + } + if (trie->quant) + lm_trie_quant_free(trie->quant); + ckd_free(trie->unigrams); + ckd_free(trie); +} + +static void +lm_trie_alloc_ngram(lm_trie_t * trie, uint32 * counts, int order) +{ + int i; + uint8 *mem_ptr; + uint8 **middle_starts; + + trie->ngram_mem_size = 0; + for (i = 1; i < order - 1; i++) { + trie->ngram_mem_size += + middle_size(lm_trie_quant_msize(trie->quant), counts[i], + counts[0], counts[i + 1]); + } + trie->ngram_mem_size += + longest_size(lm_trie_quant_lsize(trie->quant), counts[order - 1], + counts[0]); + trie->ngram_mem = + (uint8 *) ckd_calloc(trie->ngram_mem_size, + sizeof(*trie->ngram_mem)); + mem_ptr = trie->ngram_mem; + trie->middle_begin = + (middle_t *) ckd_calloc(order - 2, sizeof(*trie->middle_begin)); + trie->middle_end = trie->middle_begin + (order - 2); + middle_starts = + (uint8 **) ckd_calloc(order - 2, sizeof(*middle_starts)); + for (i = 2; i < order; i++) { + middle_starts[i - 2] = mem_ptr; + mem_ptr += + middle_size(lm_trie_quant_msize(trie->quant), counts[i - 1], + counts[0], counts[i]); + } + trie->longest = (longest_t *) ckd_calloc(1, sizeof(*trie->longest)); + /* Crazy backwards thing so we initialize using pointers to ones that have already been initialized */ + for (i = order - 1; i >= 2; --i) { + middle_t *middle_ptr = &trie->middle_begin[i - 2]; + middle_init(middle_ptr, middle_starts[i - 2], + lm_trie_quant_msize(trie->quant), counts[i - 1], + counts[0], counts[i], + (i == + order - + 1) ? (void *) trie->longest : (void *) &trie-> + middle_begin[i - 1]); + } + ckd_free(middle_starts); + longest_init(trie->longest, mem_ptr, lm_trie_quant_lsize(trie->quant), + counts[0]); +} + +void +lm_trie_build(lm_trie_t * trie, ngram_raw_t ** raw_ngrams, uint32 * counts, uint32 *out_counts, + int order) +{ + int i; + + lm_trie_fix_counts(raw_ngrams, counts, out_counts, order); + lm_trie_alloc_ngram(trie, out_counts, order); + + if (order > 1) + E_INFO("Training quantizer\n"); + for (i = 2; i < order; i++) { + lm_trie_quant_train(trie->quant, i, counts[i - 1], + raw_ngrams[i - 2]); + } + lm_trie_quant_train_prob(trie->quant, order, counts[order - 1], + raw_ngrams[order - 2]); + + E_INFO("Building LM trie\n"); + recursive_insert(trie, raw_ngrams, counts, order); + /* Set ending offsets so the last entry will be sized properly */ + /* Last entry for unigrams was already set. */ + if (trie->middle_begin != trie->middle_end) { + middle_t *middle_ptr; + for (middle_ptr = trie->middle_begin; + middle_ptr != trie->middle_end - 1; ++middle_ptr) { + middle_t *next_middle_ptr = middle_ptr + 1; + middle_finish_loading(middle_ptr, + next_middle_ptr->base.insert_index); + } + middle_ptr = trie->middle_end - 1; + middle_finish_loading(middle_ptr, + trie->longest->base.insert_index); + } +} + +unigram_t * +unigram_find(unigram_t * u, uint32 word, node_range_t * next) +{ + unigram_t *ptr = &u[word]; + next->begin = ptr->next; + next->end = (ptr + 1)->next; + return ptr; +} + +static size_t +calc_pivot(uint32 off, uint32 range, uint32 width) +{ + return (size_t) ((off * width) / (range + 1)); +} + +static uint8 +uniform_find(void *base, uint8 total_bits, uint8 key_bits, uint32 key_mask, + uint32 before_it, uint32 before_v, + uint32 after_it, uint32 after_v, uint32 key, uint32 * out) +{ + bitarr_address_t address; + address.base = base; + + /* If we look for unigram added later */ + if (key > after_v) + return FALSE; + + while (after_it - before_it > 1) { + uint32 mid; + uint32 pivot = + before_it + (1 + + calc_pivot(key - before_v, after_v - before_v, + after_it - before_it - 1)); + /* access by pivot */ + address.offset = pivot * (uint32) total_bits; + mid = bitarr_read_int25(address, key_bits, key_mask); + if (mid < key) { + before_it = pivot; + before_v = mid; + } + else if (mid > key) { + after_it = pivot; + after_v = mid; + } + else { + *out = pivot; + return TRUE; + } + } + return FALSE; +} + +static bitarr_address_t +middle_find(middle_t * middle, uint32 word, node_range_t * range) +{ + uint32 at_pointer; + bitarr_address_t address; + + /* finding BitPacked with uniform find */ + if (!uniform_find + ((void *) middle->base.base, middle->base.total_bits, + middle->base.word_bits, middle->base.word_mask, range->begin - 1, + 0, range->end, middle->base.max_vocab, word, &at_pointer)) { + address.base = NULL; + address.offset = 0; + return address; + } + + address.base = middle->base.base; + at_pointer *= middle->base.total_bits; + at_pointer += middle->base.word_bits; + address.offset = at_pointer + middle->quant_bits; + range->begin = + bitarr_read_int25(address, middle->next_mask.bits, + middle->next_mask.mask); + address.offset += middle->base.total_bits; + range->end = + bitarr_read_int25(address, middle->next_mask.bits, + middle->next_mask.mask); + address.offset = at_pointer; + + return address; +} + +static bitarr_address_t +longest_find(longest_t * longest, uint32 word, node_range_t * range) +{ + uint32 at_pointer; + bitarr_address_t address; + + /* finding BitPacked with uniform find */ + if (!uniform_find + ((void *) longest->base.base, longest->base.total_bits, + longest->base.word_bits, longest->base.word_mask, + range->begin - 1, 0, range->end, longest->base.max_vocab, word, + &at_pointer)) { + address.base = NULL; + address.offset = 0; + return address; + } + address.base = longest->base.base; + address.offset = + at_pointer * longest->base.total_bits + longest->base.word_bits; + return address; +} + +static float +get_available_prob(lm_trie_t * trie, int32 wid, int32 * hist, + int max_order, int32 n_hist, int32 * n_used) +{ + float prob; + node_range_t node; + bitarr_address_t address; + int order_minus_2; + uint8 independent_left; + int32 *hist_iter, *hist_end; + + *n_used = 1; + prob = unigram_find(trie->unigrams, wid, &node)->prob; + if (n_hist == 0) { + return prob; + } + + /* find ngrams of higher order if any */ + order_minus_2 = 0; + independent_left = (node.begin == node.end); + hist_iter = hist; + hist_end = hist + n_hist; + for (;; order_minus_2++, hist_iter++) { + if (hist_iter == hist_end) + return prob; + if (independent_left) + return prob; + if (order_minus_2 == max_order - 2) + break; + + address = + middle_find(&trie->middle_begin[order_minus_2], *hist_iter, + &node); + independent_left = (address.base == NULL) + || (node.begin == node.end); + + /* didn't find entry */ + if (address.base == NULL) + return prob; + prob = lm_trie_quant_mpread(trie->quant, address, order_minus_2); + *n_used = order_minus_2 + 2; + } + + address = longest_find(trie->longest, *hist_iter, &node); + if (address.base != NULL) { + prob = lm_trie_quant_lpread(trie->quant, address); + *n_used = max_order; + } + return prob; +} + +static float +get_available_backoff(lm_trie_t * trie, int32 start, int32 * hist, + int32 n_hist) +{ + float backoff = 0.0f; + int order_minus_2; + int32 *hist_iter; + node_range_t node; + unigram_t *first_hist = unigram_find(trie->unigrams, hist[0], &node); + if (start <= 1) { + backoff += first_hist->bo; + start = 2; + } + order_minus_2 = start - 2; + for (hist_iter = hist + start - 1; hist_iter < hist + n_hist; + hist_iter++, order_minus_2++) { + bitarr_address_t address = + middle_find(&trie->middle_begin[order_minus_2], *hist_iter, + &node); + if (address.base == NULL) + break; + backoff += + lm_trie_quant_mboread(trie->quant, address, order_minus_2); + } + return backoff; +} + +static float +lm_trie_nobo_score(lm_trie_t * trie, int32 wid, int32 * hist, + int max_order, int32 n_hist, int32 * n_used) +{ + float prob = + get_available_prob(trie, wid, hist, max_order, n_hist, n_used); + if (n_hist < *n_used) + return prob; + return prob + get_available_backoff(trie, *n_used, hist, n_hist); +} + +static float +lm_trie_hist_score(lm_trie_t * trie, int32 wid, int32 * hist, int32 n_hist, + int32 * n_used) +{ + float prob; + int i, j; + node_range_t node; + bitarr_address_t address; + + *n_used = 1; + prob = unigram_find(trie->unigrams, wid, &node)->prob; + if (n_hist == 0) + return prob; + for (i = 0; i < n_hist - 1; i++) { + address = middle_find(&trie->middle_begin[i], hist[i], &node); + if (address.base == NULL) { + for (j = i; j < n_hist; j++) { + prob += trie->backoff_cache[j]; + } + return prob; + } + else { + (*n_used)++; + prob = lm_trie_quant_mpread(trie->quant, address, i); + } + } + address = longest_find(trie->longest, hist[n_hist - 1], &node); + if (address.base == NULL) { + return prob + trie->backoff_cache[n_hist - 1]; + } + else { + (*n_used)++; + return lm_trie_quant_lpread(trie->quant, address); + } +} + +static uint8 +history_matches(int32 * hist, int32 * prev_hist, int32 n_hist) +{ + int i; + for (i = 0; i < n_hist; i++) { + if (hist[i] != prev_hist[i]) { + return FALSE; + } + } + return TRUE; +} + +static void +update_backoff(lm_trie_t * trie, int32 * hist, int32 n_hist) +{ + int i; + node_range_t node; + bitarr_address_t address; + + memset(trie->backoff_cache, 0, sizeof(trie->backoff_cache)); + trie->backoff_cache[0] = unigram_find(trie->unigrams, hist[0], &node)->bo; + for (i = 1; i < n_hist; i++) { + address = middle_find(&trie->middle_begin[i - 1], hist[i], &node); + if (address.base == NULL) { + break; + } + trie->backoff_cache[i] = + lm_trie_quant_mboread(trie->quant, address, i - 1); + } + memcpy(trie->hist_cache, hist, n_hist * sizeof(*hist)); +} + +float +lm_trie_score(lm_trie_t * trie, int order, int32 wid, int32 * hist, + int32 n_hist, int32 * n_used) +{ + if (n_hist < order - 1) { + return lm_trie_nobo_score(trie, wid, hist, order, n_hist, n_used); + } + else { + assert(n_hist == order - 1); + if (!history_matches(hist, (int32 *) trie->hist_cache, n_hist)) { + update_backoff(trie, hist, n_hist); + } + return lm_trie_hist_score(trie, wid, hist, n_hist, n_used); + } +} + +void +lm_trie_fill_raw_ngram(lm_trie_t * trie, + ngram_raw_t * raw_ngrams, uint32 * raw_ngram_idx, + uint32 * counts, node_range_t range, uint32 * hist, + int n_hist, int order, int max_order) +{ + if (n_hist > 0 && range.begin == range.end) { + return; + } + if (n_hist == 0) { + uint32 i; + for (i = 0; i < counts[0]; i++) { + node_range_t node; + unigram_find(trie->unigrams, i, &node); + hist[0] = i; + lm_trie_fill_raw_ngram(trie, raw_ngrams, raw_ngram_idx, counts, + node, hist, 1, order, max_order); + } + } + else if (n_hist < order - 1) { + uint32 ptr; + node_range_t node; + bitarr_address_t address; + uint32 new_word; + middle_t *middle = &trie->middle_begin[n_hist - 1]; + for (ptr = range.begin; ptr < range.end; ptr++) { + address.base = middle->base.base; + address.offset = ptr * middle->base.total_bits; + new_word = + bitarr_read_int25(address, middle->base.word_bits, + middle->base.word_mask); + hist[n_hist] = new_word; + address.offset += middle->base.word_bits + middle->quant_bits; + node.begin = + bitarr_read_int25(address, middle->next_mask.bits, + middle->next_mask.mask); + address.offset = + (ptr + 1) * middle->base.total_bits + + middle->base.word_bits + middle->quant_bits; + node.end = + bitarr_read_int25(address, middle->next_mask.bits, + middle->next_mask.mask); + lm_trie_fill_raw_ngram(trie, raw_ngrams, raw_ngram_idx, counts, + node, hist, n_hist + 1, order, max_order); + } + } + else { + bitarr_address_t address; + uint32 ptr; + float prob, backoff; + int i; + assert(n_hist == order - 1); + for (ptr = range.begin; ptr < range.end; ptr++) { + ngram_raw_t *raw_ngram = &raw_ngrams[*raw_ngram_idx]; + if (order == max_order) { + longest_t *longest = trie->longest; + address.base = longest->base.base; + address.offset = ptr * longest->base.total_bits; + hist[n_hist] = + bitarr_read_int25(address, longest->base.word_bits, + longest->base.word_mask); + address.offset += longest->base.word_bits; + prob = lm_trie_quant_lpread(trie->quant, address); + } + else { + middle_t *middle = &trie->middle_begin[n_hist - 1]; + address.base = middle->base.base; + address.offset = ptr * middle->base.total_bits; + hist[n_hist] = + bitarr_read_int25(address, middle->base.word_bits, + middle->base.word_mask); + address.offset += middle->base.word_bits; + prob = + lm_trie_quant_mpread(trie->quant, address, n_hist - 1); + backoff = + lm_trie_quant_mboread(trie->quant, address, + n_hist - 1); + raw_ngram->backoff = backoff; + } + raw_ngram->prob = prob; + raw_ngram->words = + (uint32 *) ckd_calloc(order, sizeof(*raw_ngram->words)); + for (i = 0; i <= n_hist; i++) { + raw_ngram->words[i] = hist[n_hist - i]; + } + (*raw_ngram_idx)++; + } + } +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/lm_trie.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/lm_trie.h new file mode 100644 index 0000000000000000000000000000000000000000..d39df6c10f53a0b009f74d8c2c8e7baea54c2df9 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/lm_trie.h @@ -0,0 +1,114 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2015 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef __LM_TRIE_H__ +#define __LM_TRIE_H__ + +#include +#include + +#include "ngram_model_internal.h" +#include "lm_trie_quant.h" + +typedef struct unigram_s { + float prob; + float bo; + uint32 next; +} unigram_t; + +typedef struct node_range_s { + uint32 begin; + uint32 end; +} node_range_t; + +typedef struct base_s { + uint8 word_bits; + uint8 total_bits; + uint32 word_mask; + uint8 *base; + uint32 insert_index; + uint32 max_vocab; +} base_t; + +typedef struct middle_s { + base_t base; + bitarr_mask_t next_mask; + uint8 quant_bits; + void *next_source; +} middle_t; + +typedef struct longest_s { + base_t base; + uint8 quant_bits; +} longest_t; + +typedef struct lm_trie_s { + uint8 *ngram_mem; /*<< This appears to be a bitarr.h bit array */ + size_t ngram_mem_size; + unigram_t *unigrams; + middle_t *middle_begin; + middle_t *middle_end; + longest_t *longest; + lm_trie_quant_t *quant; + + float backoff_cache[NGRAM_MAX_ORDER]; + uint32 hist_cache[NGRAM_MAX_ORDER - 1]; +} lm_trie_t; + +/** + * Creates lm_trie structure. Fills it if binary file with correspondent data is provided + */ +lm_trie_t *lm_trie_create(uint32 unigram_count, int order); + +lm_trie_t *lm_trie_read_bin(uint32 * counts, int order, FILE * fp); + +void lm_trie_write_bin(lm_trie_t * trie, uint32 unigram_count, FILE * fp); + +void lm_trie_free(lm_trie_t * trie); + +void lm_trie_build(lm_trie_t * trie, ngram_raw_t ** raw_ngrams, + uint32 * counts, uint32 *out_counts, int order); + +void lm_trie_fill_raw_ngram(lm_trie_t * trie, + ngram_raw_t * raw_ngrams, uint32 * raw_ngram_idx, + uint32 * counts, node_range_t range, uint32 * hist, + int n_hist, int order, int max_order); + +float lm_trie_score(lm_trie_t * trie, int order, int32 wid, int32 * hist, + int32 n_hist, int32 * n_used); + +#endif /* __LM_TRIE_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/lm_trie_quant.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/lm_trie_quant.c new file mode 100644 index 0000000000000000000000000000000000000000..4c14ff7f533aed5d0a3890d5872e6ad175f24c90 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/lm_trie_quant.c @@ -0,0 +1,353 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2015 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include + +#include +#include +#include +#include + +#include "ngram_model_internal.h" +#include "lm_trie_quant.h" + +/* FIXME: WTF, no, that's not how this works!!! */ +#define FLOAT_INF (0x7f800000) + +typedef struct bins_s { + float32 *begin; + const float32 *end; +} bins_t; + +struct lm_trie_quant_s { + bins_t tables[NGRAM_MAX_ORDER - 1][2]; + bins_t *longest; + float32 *values; + size_t nvalues; + uint8 prob_bits; + uint8 bo_bits; + uint32 prob_mask; + uint32 bo_mask; +}; + +static void +bins_create(bins_t * bins, uint8 bits, float32 *begin) +{ + bins->begin = begin; + bins->end = bins->begin + (1ULL << bits); +} + +static float32 * +lower_bound(float32 *first, const float32 *last, float32 val) +{ + int count, step; + float32 *it; + + count = last - first; + while (count > 0) { + it = first; + step = count / 2; + it += step; + if (*it < val) { + first = ++it; + count -= step + 1; + } + else { + count = step; + } + } + return first; +} + +static uint64 +bins_encode(bins_t * bins, float32 value) +{ + float32 *above = lower_bound(bins->begin, bins->end, value); + if (above == bins->begin) + return 0; + if (above == bins->end) + return bins->end - bins->begin - 1; + return above - bins->begin - (value - *(above - 1) < *above - value); +} + +static float32 +bins_decode(bins_t * bins, size_t off) +{ + return bins->begin[off]; +} + +static size_t +quant_size(int order) +{ + int prob_bits = 16; + int bo_bits = 16; + size_t longest_table = (1U << prob_bits); + size_t middle_table = (1U << bo_bits) + longest_table; + /* unigrams are currently not quantized so no need for a table. */ + return (order - 2) * middle_table + longest_table; +} + +lm_trie_quant_t * +lm_trie_quant_create(int order) +{ + float32 *start; + int i; + lm_trie_quant_t *quant = + (lm_trie_quant_t *) ckd_calloc(1, sizeof(*quant)); + quant->nvalues = quant_size(order); + quant->values = + (float32 *) ckd_calloc(quant->nvalues, sizeof(*quant->values)); + + quant->prob_bits = 16; + quant->bo_bits = 16; + quant->prob_mask = (1U << quant->prob_bits) - 1; + quant->bo_mask = (1U << quant->bo_bits) - 1; + + start = (float32 *) (quant->values); + for (i = 0; i < order - 2; i++) { + bins_create(&quant->tables[i][0], quant->prob_bits, start); + start += (1ULL << quant->prob_bits); + bins_create(&quant->tables[i][1], quant->bo_bits, start); + start += (1ULL << quant->bo_bits); + } + bins_create(&quant->tables[order - 2][0], quant->prob_bits, start); + quant->longest = &quant->tables[order - 2][0]; + return quant; +} + + +lm_trie_quant_t * +lm_trie_quant_read_bin(FILE * fp, int order) +{ + int dummy; + lm_trie_quant_t *quant; + + fread(&dummy, sizeof(dummy), 1, fp); + quant = lm_trie_quant_create(order); + if (fread(quant->values, sizeof(*quant->values), + quant->nvalues, fp) != quant->nvalues) { + E_ERROR("Failed to read %d quantization values\n", + quant->nvalues); + lm_trie_quant_free(quant); + return NULL; + } + if (SWAP_LM_TRIE) { + size_t i; + for (i = 0; i < quant->nvalues; ++i) + SWAP_FLOAT32(&quant->values[i]); + } + + return quant; +} + +void +lm_trie_quant_write_bin(lm_trie_quant_t * quant, FILE * fp) +{ + /* Before it was quantization type */ + int dummy = 1; + fwrite(&dummy, sizeof(dummy), 1, fp); + if (SWAP_LM_TRIE) { + size_t i; + for (i = 0; i < quant->nvalues; ++i) { + float32 value = quant->values[i]; + SWAP_FLOAT32(&value); + if (fwrite(&value, sizeof(value), 1, fp) != 1) { + E_ERROR("Failed to write quantization value\n"); + return; /* WTF, FIXME */ + } + } + } + else { + if (fwrite(quant->values, sizeof(*quant->values), + quant->nvalues, fp) != quant->nvalues) { + E_ERROR("Failed to write %d quantization values\n", + quant->nvalues); + } + } +} + +void +lm_trie_quant_free(lm_trie_quant_t * quant) +{ + if (quant->values) + ckd_free(quant->values); + ckd_free(quant); +} + +uint8 +lm_trie_quant_msize(lm_trie_quant_t * quant) +{ + (void)quant; + return 32; +} + +uint8 +lm_trie_quant_lsize(lm_trie_quant_t * quant) +{ + (void)quant; + return 16; +} + +static int +weights_comparator(const void *a, const void *b) +{ + return (int) (*(float32 *) a - *(float32 *) b); +} + +static void +make_bins(float32 *values, uint32 values_num, float32 *centers, uint32 bins) +{ + float32 *finish, *start; + uint32 i; + + qsort(values, values_num, sizeof(*values), &weights_comparator); + start = values; + for (i = 0; i < bins; i++, centers++, start = finish) { + finish = values + (size_t) ((uint64) values_num * (i + 1) / bins); + if (finish == start) { + /* zero length bucket. */ + *centers = i ? *(centers - 1) : -FLOAT_INF; + } + else { + float32 sum = 0.0f; + float32 *ptr; + for (ptr = start; ptr != finish; ptr++) { + sum += *ptr; + } + *centers = sum / (float32) (finish - start); + } + } +} + +void +lm_trie_quant_train(lm_trie_quant_t * quant, int order, uint32 counts, + ngram_raw_t * raw_ngrams) +{ + float32 *probs; + float32 *backoffs; + float32 *centers; + uint32 backoff_num; + uint32 prob_num; + ngram_raw_t *raw_ngrams_end; + + probs = (float32 *) ckd_calloc(counts, sizeof(*probs)); + backoffs = (float32 *) ckd_calloc(counts, sizeof(*backoffs)); + raw_ngrams_end = raw_ngrams + counts; + + for (backoff_num = 0, prob_num = 0; raw_ngrams != raw_ngrams_end; + raw_ngrams++) { + probs[prob_num++] = raw_ngrams->prob; + backoffs[backoff_num++] = raw_ngrams->backoff; + } + + make_bins(probs, prob_num, quant->tables[order - 2][0].begin, + 1ULL << quant->prob_bits); + centers = quant->tables[order - 2][1].begin; + make_bins(backoffs, backoff_num, centers, (1ULL << quant->bo_bits)); + ckd_free(probs); + ckd_free(backoffs); +} + +void +lm_trie_quant_train_prob(lm_trie_quant_t * quant, int order, uint32 counts, + ngram_raw_t * raw_ngrams) +{ + float32 *probs; + uint32 prob_num; + ngram_raw_t *raw_ngrams_end; + + probs = (float32 *) ckd_calloc(counts, sizeof(*probs)); + raw_ngrams_end = raw_ngrams + counts; + + for (prob_num = 0; raw_ngrams != raw_ngrams_end; raw_ngrams++) { + probs[prob_num++] = raw_ngrams->prob; + } + + make_bins(probs, prob_num, quant->tables[order - 2][0].begin, + 1ULL << quant->prob_bits); + ckd_free(probs); +} + +void +lm_trie_quant_mwrite(lm_trie_quant_t * quant, bitarr_address_t address, + int order_minus_2, float32 prob, float32 backoff) +{ + bitarr_write_int57(address, quant->prob_bits + quant->bo_bits, + (uint64) ((bins_encode + (&quant->tables[order_minus_2][0], + prob) << quant-> + bo_bits) | bins_encode(&quant-> + tables + [order_minus_2] + [1], + backoff))); +} + +void +lm_trie_quant_lwrite(lm_trie_quant_t * quant, bitarr_address_t address, + float32 prob) +{ + bitarr_write_int25(address, quant->prob_bits, + (uint32) bins_encode(quant->longest, prob)); +} + +float32 +lm_trie_quant_mboread(lm_trie_quant_t * quant, bitarr_address_t address, + int order_minus_2) +{ + return bins_decode(&quant->tables[order_minus_2][1], + bitarr_read_int25(address, quant->bo_bits, + quant->bo_mask)); +} + +float32 +lm_trie_quant_mpread(lm_trie_quant_t * quant, bitarr_address_t address, + int order_minus_2) +{ + address.offset += quant->bo_bits; + return bins_decode(&quant->tables[order_minus_2][0], + bitarr_read_int25(address, quant->prob_bits, + quant->prob_mask)); +} + +float32 +lm_trie_quant_lpread(lm_trie_quant_t * quant, bitarr_address_t address) +{ + return bins_decode(quant->longest, + bitarr_read_int25(address, quant->prob_bits, + quant->prob_mask)); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/lm_trie_quant.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/lm_trie_quant.h new file mode 100644 index 0000000000000000000000000000000000000000..2ccbb66154da5ed8c2d4597162ced01d604eaa88 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/lm_trie_quant.h @@ -0,0 +1,134 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2015 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#ifndef __LM_TRIE_QUANT_H__ +#define __LM_TRIE_QUANT_H__ + +#include + +#include "ngrams_raw.h" + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#if defined(DEBUG_ENDIAN) || defined(WORDS_BIGENDIAN) +/* For some reason nobody ever considered the endianness of + this file. I declare it to be canonically little-endian. */ +#define SWAP_LM_TRIE 1 +#else +#define SWAP_LM_TRIE 0 +#endif + + +typedef struct lm_trie_quant_s lm_trie_quant_t; + +/** + * Create qunatizing + */ +lm_trie_quant_t *lm_trie_quant_create(int order); + +/** + * Write quant data to binary file + */ +lm_trie_quant_t *lm_trie_quant_read_bin(FILE * fp, int order); + +/** + * Write quant data to binary file + */ +void lm_trie_quant_write_bin(lm_trie_quant_t * quant, FILE * fp); + +/** + * Free quant + */ +void lm_trie_quant_free(lm_trie_quant_t * quant); + +/** + * Memory required for storing weights of middle-order ngrams. + * Both backoff and probability should be stored + */ +uint8 lm_trie_quant_msize(lm_trie_quant_t * quant); + +/** + * Memory required for storing weights of largest-order ngrams. + * Only probability should be stored + */ +uint8 lm_trie_quant_lsize(lm_trie_quant_t * quant); + +/** + * Trains prob and backoff quantizer for specified ngram order on provided raw ngram list + */ +void lm_trie_quant_train(lm_trie_quant_t * quant, int order, uint32 counts, + ngram_raw_t * raw_ngrams); + +/** + * Trains only prob quantizer for specified ngram order on provided raw ngram list + */ +void lm_trie_quant_train_prob(lm_trie_quant_t * quant, int order, + uint32 counts, ngram_raw_t * raw_ngrams); + +/** + * Writes specified weight for middle-order ngram. Quantize it if needed + */ +void lm_trie_quant_mwrite(lm_trie_quant_t * quant, + bitarr_address_t address, int order_minus_2, + float prob, float backoff); + +/** + * Writes specified weight for largest-order ngram. Quantize it if needed + */ +void lm_trie_quant_lwrite(lm_trie_quant_t * quant, + bitarr_address_t address, float prob); + +/** + * Reads and decodes if needed backoff for middle-order ngram + */ +float lm_trie_quant_mboread(lm_trie_quant_t * quant, + bitarr_address_t address, int order_minus_2); + +/** + * Reads and decodes if needed prob for middle-order ngram + */ +float lm_trie_quant_mpread(lm_trie_quant_t * quant, + bitarr_address_t address, int order_minus_2); + +/** + * Reads and decodes if needed prob for largest-order ngram + */ +float lm_trie_quant_lpread(lm_trie_quant_t * quant, + bitarr_address_t address); + +#endif /* __LM_TRIE_QUANT_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model.c new file mode 100644 index 0000000000000000000000000000000000000000..5e4f2853de0fdaf288747bfdc767874c0d1e16ef --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model.c @@ -0,0 +1,1063 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * \file ngram_model.c N-Gram language models. + * + * Author: David Huggins-Daines, much code taken from sphinx3/src/libs3decoder/liblm + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include "sphinxbase/ngram_model.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/filename.h" +#include "sphinxbase/pio.h" +#include "sphinxbase/err.h" +#include "sphinxbase/logmath.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/case.h" + +#include "ngram_model_internal.h" +#include "ngram_model_trie.h" + +ngram_file_type_t +ngram_file_name_to_type(const char *file_name) +{ + const char *ext; + + ext = strrchr(file_name, '.'); + if (ext == NULL) { + return NGRAM_INVALID; + } + if (0 == strcmp_nocase(ext, ".gz")) { + while (--ext >= file_name) { + if (*ext == '.') + break; + } + if (ext < file_name) { + return NGRAM_INVALID; + } + } + else if (0 == strcmp_nocase(ext, ".bz2")) { + while (--ext >= file_name) { + if (*ext == '.') + break; + } + if (ext < file_name) { + return NGRAM_INVALID; + } + } + /* We use strncmp because there might be a .gz on the end. */ + if (0 == strncmp_nocase(ext, ".ARPA", 5)) + return NGRAM_ARPA; + if (0 == strncmp_nocase(ext, ".DMP", 4) + || 0 == strncmp_nocase(ext, ".BIN", 4)) + return NGRAM_BIN; + return NGRAM_INVALID; +} + +ngram_file_type_t +ngram_str_to_type(const char *str_name) +{ + if (0 == strcmp_nocase(str_name, "arpa")) + return NGRAM_ARPA; + if (0 == strcmp_nocase(str_name, "dmp") + || 0 == strcmp_nocase(str_name, "bin")) + return NGRAM_BIN; + return NGRAM_INVALID; +} + +char const * +ngram_type_to_str(int type) +{ + switch (type) { + case NGRAM_ARPA: + return "arpa"; + case NGRAM_BIN: + return "dmp/bin"; + default: + return NULL; + } +} + + +ngram_model_t * +ngram_model_read(cmd_ln_t * config, + const char *file_name, + ngram_file_type_t file_type, logmath_t * lmath) +{ + ngram_model_t *model = NULL; + switch (file_type) { + case NGRAM_AUTO:{ + if ((model = + ngram_model_trie_read_bin(config, file_name, + lmath)) != NULL) + break; + if ((model = + ngram_model_trie_read_arpa(config, file_name, + lmath)) != NULL) + break; + if ((model = + ngram_model_trie_read_dmp(config, file_name, + lmath)) != NULL) + break; + return NULL; + } + case NGRAM_ARPA: + model = ngram_model_trie_read_arpa(config, file_name, lmath); + break; + case NGRAM_BIN: + if ((model = + ngram_model_trie_read_bin(config, file_name, lmath)) != NULL) + break; + if ((model = + ngram_model_trie_read_dmp(config, file_name, lmath)) != NULL) + break; + return NULL; + default: + E_ERROR("language model file type not supported\n"); + return NULL; + } + + /* Now set weights based on config if present. */ + if (config) { + float32 lw = 1.0; + float32 wip = 1.0; + + if (cmd_ln_exists_r(config, "-lw")) + lw = cmd_ln_float32_r(config, "-lw"); + if (cmd_ln_exists_r(config, "-wip")) + wip = cmd_ln_float32_r(config, "-wip"); + + ngram_model_apply_weights(model, lw, wip); + } + + return model; +} + +int +ngram_model_write(ngram_model_t * model, const char *file_name, + ngram_file_type_t file_type) +{ + switch (file_type) { + case NGRAM_AUTO:{ + file_type = ngram_file_name_to_type(file_name); + /* Default to ARPA (catches .lm and other things) */ + if (file_type == NGRAM_INVALID) + file_type = NGRAM_ARPA; + return ngram_model_write(model, file_name, file_type); + } + case NGRAM_ARPA: + return ngram_model_trie_write_arpa(model, file_name); + case NGRAM_BIN: + return ngram_model_trie_write_bin(model, file_name); + default: + E_ERROR("language model file type not supported\n"); + return -1; + } + E_ERROR("language model file type not supported\n"); + return -1; +} + +int32 +ngram_model_init(ngram_model_t * base, + ngram_funcs_t * funcs, + logmath_t * lmath, int32 n, int32 n_unigram) +{ + base->refcount = 1; + base->funcs = funcs; + base->n = n; + /* If this was previously initialized... */ + if (base->n_counts == NULL) + base->n_counts = (uint32 *) ckd_calloc(n, sizeof(*base->n_counts)); + /* Don't reset weights if logmath object hasn't changed. */ + if (base->lmath != lmath) { + /* Set default values for weights. */ + base->lw = 1.0; + base->log_wip = 0; /* i.e. 1.0 */ + base->log_zero = logmath_get_zero(lmath); + base->lmath = lmath; + } + /* Allocate or reallocate space for word strings. */ + if (base->word_str) { + /* Free all previous word strings if they were allocated. */ + if (base->writable) { + int32 i; + for (i = 0; i < base->n_words; ++i) { + ckd_free(base->word_str[i]); + base->word_str[i] = NULL; + } + } + base->word_str = + (char **) ckd_realloc(base->word_str, + n_unigram * sizeof(char *)); + } + else { + base->word_str = (char **) ckd_calloc(n_unigram, sizeof(char *)); + } + /* NOTE: They are no longer case-insensitive since we are allowing + * other encodings for word strings. Beware. */ + if (base->wid) + hash_table_empty(base->wid); + else + base->wid = hash_table_new(n_unigram, FALSE); + base->n_counts[0] = base->n_1g_alloc = base->n_words = n_unigram; + + return 0; +} + +ngram_model_t * +ngram_model_retain(ngram_model_t * model) +{ + ++model->refcount; + return model; +} + +void +ngram_model_flush(ngram_model_t * model) +{ + if (model->funcs && model->funcs->flush) + (*model->funcs->flush) (model); +} + +int +ngram_model_free(ngram_model_t * model) +{ + int i; + + if (model == NULL) + return 0; + if (--model->refcount > 0) + return model->refcount; + if (model->funcs && model->funcs->free) + (*model->funcs->free) (model); + if (model->writable) { + /* Free all words. */ + for (i = 0; i < model->n_words; ++i) { + ckd_free(model->word_str[i]); + } + } + else { + /* Free all class words. */ + for (i = 0; i < model->n_classes; ++i) { + ngram_class_t *lmclass; + int32 j; + + lmclass = model->classes[i]; + for (j = 0; j < lmclass->n_words; ++j) { + ckd_free(model->word_str[lmclass->start_wid + j]); + } + for (j = 0; j < lmclass->n_hash; ++j) { + if (lmclass->nword_hash[j].wid != -1) { + ckd_free(model->word_str[lmclass->nword_hash[j].wid]); + } + } + } + } + for (i = 0; i < model->n_classes; ++i) { + ngram_class_free(model->classes[i]); + } + ckd_free(model->classes); + hash_table_free(model->wid); + ckd_free(model->word_str); + ckd_free(model->n_counts); + ckd_free(model); + return 0; +} + +int +ngram_model_casefold(ngram_model_t * model, int kase) +{ + int writable, i; + hash_table_t *new_wid; + + /* Were word strings already allocated? */ + writable = model->writable; + /* Either way, we are going to allocate some word strings. */ + model->writable = TRUE; + + /* And, don't forget, we need to rebuild the word to unigram ID + * mapping. */ + new_wid = hash_table_new(model->n_words, FALSE); + for (i = 0; i < model->n_words; ++i) { + char *outstr; + if (writable) { + outstr = model->word_str[i]; + } + else { + outstr = ckd_salloc(model->word_str[i]); + } + /* Don't case-fold or [classes] */ + if (outstr[0] == '<' || outstr[0] == '[') { + } + else { + switch (kase) { + case NGRAM_UPPER: + ucase(outstr); + break; + case NGRAM_LOWER: + lcase(outstr); + break; + default: + ; + } + } + model->word_str[i] = outstr; + + /* Now update the hash table. We might have terrible + * collisions here, so warn about them. */ + if (hash_table_enter_int32(new_wid, model->word_str[i], i) != i) { + E_WARN("Duplicate word in dictionary after conversion: %s\n", + model->word_str[i]); + } + } + /* Swap out the hash table. */ + hash_table_free(model->wid); + model->wid = new_wid; + return 0; +} + +int +ngram_model_apply_weights(ngram_model_t * model, float32 lw, float32 wip) +{ + return (*model->funcs->apply_weights) (model, lw, wip); +} + +float32 +ngram_model_get_weights(ngram_model_t * model, int32 * out_log_wip) +{ + if (out_log_wip) + *out_log_wip = model->log_wip; + return model->lw; +} + + +int32 +ngram_ng_score(ngram_model_t * model, int32 wid, int32 * history, + int32 n_hist, int32 * n_used) +{ + int32 score, class_weight = 0; + int i; + + /* Closed vocabulary, OOV word probability is zero */ + if (wid == NGRAM_INVALID_WID) + return model->log_zero; + + /* "Declassify" wid and history */ + if (NGRAM_IS_CLASSWID(wid)) { + ngram_class_t *lmclass = model->classes[NGRAM_CLASSID(wid)]; + + class_weight = ngram_class_prob(lmclass, wid); + if (class_weight == 1) /* Meaning, not found in class. */ + return model->log_zero; + wid = lmclass->tag_wid; + } + for (i = 0; i < n_hist; ++i) { + if (history[i] != NGRAM_INVALID_WID + && NGRAM_IS_CLASSWID(history[i])) + history[i] = + model->classes[NGRAM_CLASSID(history[i])]->tag_wid; + } + score = (*model->funcs->score) (model, wid, history, n_hist, n_used); + + /* Multiply by unigram in-class weight. */ + return score + class_weight; +} + +int32 +ngram_score(ngram_model_t * model, const char *word, ...) +{ + va_list history; + const char *hword; + int32 *histid; + int32 n_hist; + int32 n_used; + int32 prob; + + va_start(history, word); + n_hist = 0; + while ((hword = va_arg(history, const char *)) != NULL) + ++n_hist; + va_end(history); + + histid = ckd_calloc(n_hist, sizeof(*histid)); + va_start(history, word); + n_hist = 0; + while ((hword = va_arg(history, const char *)) != NULL) { + histid[n_hist] = ngram_wid(model, hword); + ++n_hist; + } + va_end(history); + + prob = ngram_ng_score(model, ngram_wid(model, word), + histid, n_hist, &n_used); + ckd_free(histid); + return prob; +} + +int32 +ngram_tg_score(ngram_model_t * model, int32 w3, int32 w2, int32 w1, + int32 * n_used) +{ + int32 hist[2]; + hist[0] = w2; + hist[1] = w1; + return ngram_ng_score(model, w3, hist, 2, n_used); +} + +int32 +ngram_bg_score(ngram_model_t * model, int32 w2, int32 w1, int32 * n_used) +{ + return ngram_ng_score(model, w2, &w1, 1, n_used); +} + +int32 +ngram_ng_prob(ngram_model_t * model, int32 wid, int32 * history, + int32 n_hist, int32 * n_used) +{ + int32 prob, class_weight = 0; + int i; + + /* Closed vocabulary, OOV word probability is zero */ + if (wid == NGRAM_INVALID_WID) + return model->log_zero; + + /* "Declassify" wid and history */ + if (NGRAM_IS_CLASSWID(wid)) { + ngram_class_t *lmclass = model->classes[NGRAM_CLASSID(wid)]; + + class_weight = ngram_class_prob(lmclass, wid); + if (class_weight == 1) /* Meaning, not found in class. */ + return class_weight; + wid = lmclass->tag_wid; + } + for (i = 0; i < n_hist; ++i) { + if (history[i] != NGRAM_INVALID_WID + && NGRAM_IS_CLASSWID(history[i])) + history[i] = + model->classes[NGRAM_CLASSID(history[i])]->tag_wid; + } + prob = (*model->funcs->raw_score) (model, wid, history, + n_hist, n_used); + /* Multiply by unigram in-class weight. */ + return prob + class_weight; +} + +int32 +ngram_probv(ngram_model_t * model, const char *word, ...) +{ + va_list history; + const char *hword; + int32 *histid; + int32 n_hist; + int32 n_used; + int32 prob; + + va_start(history, word); + n_hist = 0; + while ((hword = va_arg(history, const char *)) != NULL) + ++n_hist; + va_end(history); + + histid = ckd_calloc(n_hist, sizeof(*histid)); + va_start(history, word); + n_hist = 0; + while ((hword = va_arg(history, const char *)) != NULL) { + histid[n_hist] = ngram_wid(model, hword); + ++n_hist; + } + va_end(history); + + prob = ngram_ng_prob(model, ngram_wid(model, word), + histid, n_hist, &n_used); + ckd_free(histid); + return prob; +} + +int32 +ngram_prob(ngram_model_t * model, const char* const *words, int32 n) +{ + int32 *ctx_id; + int32 nused; + int32 prob; + int32 wid; + uint32 i; + + ctx_id = (int32 *) ckd_calloc(n - 1, sizeof(*ctx_id)); + for (i = 1; i < (uint32) n; ++i) + ctx_id[i - 1] = ngram_wid(model, words[i]); + + wid = ngram_wid(model, *words); + prob = ngram_ng_prob(model, wid, ctx_id, n - 1, &nused); + ckd_free(ctx_id); + + return prob; +} + +int32 +ngram_score_to_prob(ngram_model_t * base, int32 score) +{ + int32 prob; + + /* Undo insertion penalty. */ + prob = score - base->log_wip; + /* Undo language weight. */ + prob = (int32) (prob / base->lw); + + return prob; +} + +int32 +ngram_unknown_wid(ngram_model_t * model) +{ + int32 val; + + /* FIXME: This could be memoized for speed if necessary. */ + /* Look up , if not found return NGRAM_INVALID_WID. */ + if (hash_table_lookup_int32(model->wid, "", &val) == -1) + return NGRAM_INVALID_WID; + else + return val; +} + +int32 +ngram_zero(ngram_model_t * model) +{ + return model->log_zero; +} + +int32 +ngram_model_get_size(ngram_model_t * model) +{ + if (model != NULL) + return model->n; + return 0; +} + +uint32 const * +ngram_model_get_counts(ngram_model_t * model) +{ + if (model != NULL) + return model->n_counts; + return NULL; +} + +int32 +ngram_wid(ngram_model_t * model, const char *word) +{ + int32 val; + + if (hash_table_lookup_int32(model->wid, word, &val) == -1) + return ngram_unknown_wid(model); + else + return val; +} + +const char * +ngram_word(ngram_model_t * model, int32 wid) +{ + /* Remove any class tag */ + wid = NGRAM_BASEWID(wid); + if (wid >= model->n_words) + return NULL; + return model->word_str[wid]; +} + +/** + * Add a word to the word string and ID mapping. + */ +int32 +ngram_add_word_internal(ngram_model_t * model, + const char *word, int32 classid) +{ + + /* Check for hash collisions. */ + int32 wid; + if (hash_table_lookup_int32(model->wid, word, &wid) == 0) { + E_WARN("Omit duplicate word '%s'\n", word); + return wid; + } + + /* Take the next available word ID */ + wid = model->n_words; + if (classid >= 0) { + wid = NGRAM_CLASSWID(wid, classid); + } + + /* Reallocate word_str if necessary. */ + if (model->n_words >= model->n_1g_alloc) { + model->n_1g_alloc += UG_ALLOC_STEP; + model->word_str = ckd_realloc(model->word_str, + sizeof(*model->word_str) * + model->n_1g_alloc); + } + /* Add the word string in the appropriate manner. */ + /* Class words are always dynamically allocated. */ + model->word_str[model->n_words] = ckd_salloc(word); + /* Now enter it into the hash table. */ + if (hash_table_enter_int32 + (model->wid, model->word_str[model->n_words], wid) != wid) { + E_ERROR + ("Hash insertion failed for word %s => %p (should not happen)\n", + model->word_str[model->n_words], (void *) (long) (wid)); + } + /* Increment number of words. */ + ++model->n_words; + return wid; +} + +int32 +ngram_model_add_word(ngram_model_t * model, + const char *word, float32 weight) +{ + int32 wid, prob = model->log_zero; + + /* If we add word to unwritable model, we need to make it writable */ + if (!model->writable) { + E_WARN("Can't add word '%s' to read-only language model. " + "Disable mmap with '-mmap no' to make it writable\n", word); + return -1; + } + + wid = ngram_add_word_internal(model, word, -1); + if (wid == NGRAM_INVALID_WID) + return wid; + + /* Do what needs to be done to add the word to the unigram. */ + if (model->funcs && model->funcs->add_ug) + prob = + (*model->funcs->add_ug) (model, wid, + logmath_log(model->lmath, weight)); + if (prob == 0) + return -1; + + return wid; +} + +ngram_class_t * +ngram_class_new(ngram_model_t * model, int32 tag_wid, int32 start_wid, + glist_t classwords) +{ + ngram_class_t *lmclass; + gnode_t *gn; + float32 tprob; + int i; + + lmclass = ckd_calloc(1, sizeof(*lmclass)); + lmclass->tag_wid = tag_wid; + /* wid_base is the wid (minus class tag) of the first word in the list. */ + lmclass->start_wid = start_wid; + lmclass->n_words = glist_count(classwords); + lmclass->prob1 = ckd_calloc(lmclass->n_words, sizeof(*lmclass->prob1)); + lmclass->nword_hash = NULL; + lmclass->n_hash = 0; + tprob = 0.0; + for (gn = classwords; gn; gn = gnode_next(gn)) { + tprob += gnode_float32(gn); + } + if (tprob > 1.1 || tprob < 0.9) { + E_INFO("Total class probability is %f, will normalize\n", tprob); + for (gn = classwords; gn; gn = gnode_next(gn)) { + gn->data.fl /= tprob; + } + } + for (i = 0, gn = classwords; gn; ++i, gn = gnode_next(gn)) { + lmclass->prob1[i] = logmath_log(model->lmath, gnode_float32(gn)); + } + + return lmclass; +} + +int32 +ngram_class_add_word(ngram_class_t * lmclass, int32 wid, int32 lweight) +{ + int32 hash; + + if (lmclass->nword_hash == NULL) { + /* Initialize everything in it to -1 */ + lmclass->nword_hash = + ckd_malloc(NGRAM_HASH_SIZE * sizeof(*lmclass->nword_hash)); + memset(lmclass->nword_hash, 0xff, + NGRAM_HASH_SIZE * sizeof(*lmclass->nword_hash)); + lmclass->n_hash = NGRAM_HASH_SIZE; + lmclass->n_hash_inuse = 0; + } + /* Stupidest possible hash function. This will work pretty well + * when this function is called repeatedly with contiguous word + * IDs, though... */ + hash = wid & (lmclass->n_hash - 1); + if (lmclass->nword_hash[hash].wid == -1) { + /* Good, no collision. */ + lmclass->nword_hash[hash].wid = wid; + lmclass->nword_hash[hash].prob1 = lweight; + ++lmclass->n_hash_inuse; + return hash; + } + else { + int32 next; /**< Next available bucket. */ + /* Collision... Find the end of the hash chain. */ + while (lmclass->nword_hash[hash].next != -1) + hash = lmclass->nword_hash[hash].next; + assert(hash != -1); + /* Does we has any more bukkit? */ + if (lmclass->n_hash_inuse == lmclass->n_hash) { + /* Oh noes! Ok, we makes more. */ + lmclass->nword_hash = ckd_realloc(lmclass->nword_hash, + lmclass->n_hash * 2 * + sizeof(*lmclass-> + nword_hash)); + memset(lmclass->nword_hash + lmclass->n_hash, 0xff, + lmclass->n_hash * sizeof(*lmclass->nword_hash)); + /* Just use the next allocated one (easy) */ + next = lmclass->n_hash; + lmclass->n_hash *= 2; + } + else { + /* Look for any available bucket. We hope this doesn't happen. */ + for (next = 0; next < lmclass->n_hash; ++next) + if (lmclass->nword_hash[next].wid == -1) + break; + /* This should absolutely not happen. */ + assert(next != lmclass->n_hash); + } + lmclass->nword_hash[next].wid = wid; + lmclass->nword_hash[next].prob1 = lweight; + lmclass->nword_hash[hash].next = next; + ++lmclass->n_hash_inuse; + return next; + } +} + +void +ngram_class_free(ngram_class_t * lmclass) +{ + ckd_free(lmclass->nword_hash); + ckd_free(lmclass->prob1); + ckd_free(lmclass); +} + +int32 +ngram_model_add_class_word(ngram_model_t * model, + const char *classname, + const char *word, float32 weight) +{ + ngram_class_t *lmclass; + int32 classid, tag_wid, wid, i, scale; + float32 fprob; + + /* Find the class corresponding to classname. Linear search + * probably okay here since there won't be very many classes, and + * this doesn't have to be fast. */ + tag_wid = ngram_wid(model, classname); + if (tag_wid == NGRAM_INVALID_WID) { + E_ERROR("No such word or class tag: %s\n", classname); + return tag_wid; + } + for (classid = 0; classid < model->n_classes; ++classid) { + if (model->classes[classid]->tag_wid == tag_wid) + break; + } + /* Hmm, no such class. It's probably not a good idea to create one. */ + if (classid == model->n_classes) { + E_ERROR + ("Word %s is not a class tag (call ngram_model_add_class() first)\n", + classname); + return NGRAM_INVALID_WID; + } + lmclass = model->classes[classid]; + + /* Add this word to the model's set of words. */ + wid = ngram_add_word_internal(model, word, classid); + if (wid == NGRAM_INVALID_WID) + return wid; + + /* This is the fixed probability of the new word. */ + fprob = weight * 1.0f / (lmclass->n_words + lmclass->n_hash_inuse + 1); + /* Now normalize everything else to fit it in. This is + * accomplished by simply scaling all the other probabilities + * by (1-fprob). */ + scale = logmath_log(model->lmath, 1.0 - fprob); + for (i = 0; i < lmclass->n_words; ++i) + lmclass->prob1[i] += scale; + for (i = 0; i < lmclass->n_hash; ++i) + if (lmclass->nword_hash[i].wid != -1) + lmclass->nword_hash[i].prob1 += scale; + + /* Now add it to the class hash table. */ + return ngram_class_add_word(lmclass, wid, + logmath_log(model->lmath, fprob)); +} + +int32 +ngram_model_add_class(ngram_model_t * model, + const char *classname, + float32 classweight, + char **words, const float32 * weights, int32 n_words) +{ + ngram_class_t *lmclass; + glist_t classwords = NULL; + int32 i, start_wid = -1; + int32 classid, tag_wid; + + /* Check if classname already exists in model. If not, add it. */ + if ((tag_wid = + ngram_wid(model, classname)) == ngram_unknown_wid(model)) { + tag_wid = ngram_model_add_word(model, classname, classweight); + if (tag_wid == NGRAM_INVALID_WID) + return -1; + } + + if (model->n_classes == 128) { + E_ERROR("Number of classes cannot exceed 128 (sorry)\n"); + return -1; + } + classid = model->n_classes; + for (i = 0; i < n_words; ++i) { + int32 wid; + + wid = ngram_add_word_internal(model, words[i], classid); + if (wid == NGRAM_INVALID_WID) + return -1; + if (start_wid == -1) + start_wid = NGRAM_BASEWID(wid); + classwords = glist_add_float32(classwords, weights[i]); + } + classwords = glist_reverse(classwords); + lmclass = ngram_class_new(model, tag_wid, start_wid, classwords); + glist_free(classwords); + if (lmclass == NULL) + return -1; + + ++model->n_classes; + if (model->classes == NULL) + model->classes = ckd_calloc(1, sizeof(*model->classes)); + else + model->classes = ckd_realloc(model->classes, + model->n_classes * + sizeof(*model->classes)); + model->classes[classid] = lmclass; + return classid; +} + +int32 +ngram_class_prob(ngram_class_t * lmclass, int32 wid) +{ + int32 base_wid = NGRAM_BASEWID(wid); + + if (base_wid < lmclass->start_wid + || base_wid > lmclass->start_wid + lmclass->n_words) { + int32 hash; + + /* Look it up in the hash table. */ + hash = wid & (lmclass->n_hash - 1); + while (hash != -1 && lmclass->nword_hash[hash].wid != wid) + hash = lmclass->nword_hash[hash].next; + if (hash == -1) + return 1; + return lmclass->nword_hash[hash].prob1; + } + else { + return lmclass->prob1[base_wid - lmclass->start_wid]; + } +} + +int32 +read_classdef_file(hash_table_t * classes, const char *file_name) +{ + FILE *fp; + int32 is_pipe; + int inclass; /**< Are we currently reading a list of class words? */ + int32 rv = -1; + gnode_t *gn; + glist_t classwords = NULL; + glist_t classprobs = NULL; + char *classname = NULL; + + if ((fp = fopen_comp(file_name, "r", &is_pipe)) == NULL) { + E_ERROR("File %s not found\n", file_name); + return -1; + } + + inclass = FALSE; + while (!feof(fp)) { + char line[512]; + char *wptr[2]; + int n_words; + + if (fgets(line, sizeof(line), fp) == NULL) + break; + + n_words = str2words(line, wptr, 2); + if (n_words <= 0) + continue; + + if (inclass) { + /* Look for an end of class marker. */ + if (n_words == 2 && 0 == strcmp(wptr[0], "END")) { + classdef_t *classdef; + gnode_t *word, *weight; + int32 i; + + if (classname == NULL || 0 != strcmp(wptr[1], classname)) + goto error_out; + inclass = FALSE; + + /* Construct a class from the list of words collected. */ + classdef = ckd_calloc(1, sizeof(*classdef)); + classwords = glist_reverse(classwords); + classprobs = glist_reverse(classprobs); + classdef->n_words = glist_count(classwords); + classdef->words = ckd_calloc(classdef->n_words, + sizeof(*classdef->words)); + classdef->weights = ckd_calloc(classdef->n_words, + sizeof(*classdef->weights)); + word = classwords; + weight = classprobs; + for (i = 0; i < classdef->n_words; ++i) { + classdef->words[i] = gnode_ptr(word); + classdef->weights[i] = gnode_float32(weight); + word = gnode_next(word); + weight = gnode_next(weight); + } + + /* Add this class to the hash table. */ + if (hash_table_enter(classes, classname, classdef) != + classdef) { + classdef_free(classdef); + goto error_out; + } + + /* Reset everything. */ + glist_free(classwords); + glist_free(classprobs); + classwords = NULL; + classprobs = NULL; + classname = NULL; + } + else { + float32 fprob; + + if (n_words == 2) + fprob = atof_c(wptr[1]); + else + fprob = 1.0f; + /* Add it to the list of words for this class. */ + classwords = + glist_add_ptr(classwords, ckd_salloc(wptr[0])); + classprobs = glist_add_float32(classprobs, fprob); + } + } + else { + /* Start a new LM class if the LMCLASS marker is seen */ + if (n_words == 2 && 0 == strcmp(wptr[0], "LMCLASS")) { + if (inclass) + goto error_out; + inclass = TRUE; + classname = ckd_salloc(wptr[1]); + } + /* Otherwise, just ignore whatever junk we got */ + } + } + rv = 0; /* Success. */ + + error_out: + /* Free all the stuff we might have allocated. */ + fclose_comp(fp, is_pipe); + for (gn = classwords; gn; gn = gnode_next(gn)) + ckd_free(gnode_ptr(gn)); + glist_free(classwords); + glist_free(classprobs); + ckd_free(classname); + + return rv; +} + +void +classdef_free(classdef_t * classdef) +{ + int32 i; + for (i = 0; i < classdef->n_words; ++i) + ckd_free(classdef->words[i]); + ckd_free(classdef->words); + ckd_free(classdef->weights); + ckd_free(classdef); +} + + +int32 +ngram_model_read_classdef(ngram_model_t * model, const char *file_name) +{ + hash_table_t *classes; + glist_t hl = NULL; + gnode_t *gn; + int32 rv = -1; + + classes = hash_table_new(0, FALSE); + if (read_classdef_file(classes, file_name) < 0) { + hash_table_free(classes); + return -1; + } + + /* Create a new class in the language model for each classdef. */ + hl = hash_table_tolist(classes, NULL); + for (gn = hl; gn; gn = gnode_next(gn)) { + hash_entry_t *he = gnode_ptr(gn); + classdef_t *classdef = he->val; + + if (ngram_model_add_class(model, he->key, 1.0, + classdef->words, + classdef->weights, + classdef->n_words) < 0) + goto error_out; + } + rv = 0; + + error_out: + for (gn = hl; gn; gn = gnode_next(gn)) { + hash_entry_t *he = gnode_ptr(gn); + ckd_free((char *) he->key); + classdef_free(he->val); + } + glist_free(hl); + hash_table_free(classes); + return rv; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_internal.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..45616782fa6dfe701affcc27c52967c183b51046 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_internal.h @@ -0,0 +1,197 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * \file ngram_model_internal.h Internal structures for N-Gram models + * + * Author: David Huggins-Daines + */ + +#ifndef __NGRAM_MODEL_INTERNAL_H__ +#define __NGRAM_MODEL_INTERNAL_H__ + +#include "sphinxbase/ngram_model.h" +#include "sphinxbase/hash_table.h" + +/** + * Common implementation of ngram_model_t. + * + * The details of bigram, trigram, and higher-order N-gram storage, if any, can + * vary somewhat depending on the file format in use. + */ +struct ngram_model_s { + int refcount; /**< Reference count */ + uint32 *n_counts; /**< Counts for 1, 2, 3, ... grams */ + int32 n_1g_alloc; /**< Number of allocated word strings (for new word addition) */ + int32 n_words; /**< Number of actual word strings (NOT the same as the + number of unigrams, due to class words). */ + + uint8 n; /**< This is an n-gram model (1, 2, 3, ...). */ + uint8 n_classes; /**< Number of classes (maximum 128) */ + uint8 writable; /**< Are word strings writable? */ + uint8 flags; /**< Any other flags we might care about + (FIXME: Merge this and writable) */ + logmath_t *lmath; /**< Log-math object */ + float32 lw; /**< Language model scaling factor */ + int32 log_wip; /**< Log of word insertion penalty */ + int32 log_zero; /**< Zero probability, cached here for quick lookup */ + char **word_str; /**< Unigram names */ + hash_table_t *wid; /**< Mapping of unigram names to word IDs. */ + int32 *tmp_wids; /**< Temporary array of word IDs for ngram_model_get_ngram() */ + struct ngram_class_s **classes; /**< Word class definitions. */ + struct ngram_funcs_s *funcs; /**< Implementation-specific methods. */ +}; + +/** + * Implementation of ngram_class_t. + */ +struct ngram_class_s { + int32 tag_wid; /**< Base word ID for this class tag */ + int32 start_wid; /**< Starting base word ID for this class' words */ + int32 n_words; /**< Number of base words for this class */ + int32 *prob1; /**< Probability table for base words */ + /** + * Custom hash table for additional words. + */ + struct ngram_hash_s { + int32 wid; /**< Word ID of this bucket */ + int32 prob1; /**< Probability for this word */ + int32 next; /**< Index of next bucket (or -1 for no collision) */ + } *nword_hash; + int32 n_hash; /**< Number of buckets in nword_hash (power of 2) */ + int32 n_hash_inuse; /**< Number of words in nword_hash */ +}; + +#define NGRAM_MAX_ORDER 5 + +#define NGRAM_HASH_SIZE 128 + +#define NGRAM_BASEWID(wid) ((wid)&0xffffff) +#define NGRAM_CLASSID(wid) (((wid)>>24) & 0x7f) +#define NGRAM_CLASSWID(wid,classid) (((classid)<<24) | 0x80000000 | (wid)) +#define NGRAM_IS_CLASSWID(wid) ((wid)&0x80000000) + +#define UG_ALLOC_STEP 10 + +/** Implementation-specific functions for operating on ngram_model_t objects */ +typedef struct ngram_funcs_s { + /** + * Implementation-specific function for freeing an ngram_model_t. + */ + void (*free) (ngram_model_t * model); + /** + * Implementation-specific function for applying language model weights. + */ + int (*apply_weights) (ngram_model_t * model, float32 lw, float32 wip); + /** + * Implementation-specific function for querying language model score. + */ + int32(*score) (ngram_model_t * model, + int32 wid, + int32 * history, int32 n_hist, int32 * n_used); + /** + * Implementation-specific function for querying raw language + * model probability. + */ + int32(*raw_score) (ngram_model_t * model, + int32 wid, + int32 * history, int32 n_hist, int32 * n_used); + /** + * Implementation-specific function for adding unigrams. + * + * This function updates the internal structures of a language + * model to add the given unigram with the given weight (defined + * as a log-factor applied to the uniform distribution). This + * includes reallocating or otherwise resizing the set of unigrams. + * + * @return The language model score (not raw log-probability) of + * the new word, or 0 for failure. + */ + int32(*add_ug) (ngram_model_t * model, int32 wid, int32 lweight); + + /** + * Implementation-specific function for purging N-Gram cache + */ + void (*flush) (ngram_model_t * model); +} ngram_funcs_t; + +/** + * One class definition from a classdef file. + */ +typedef struct classdef_s { + char **words; + float32 *weights; + int32 n_words; +} classdef_t; + +/** + * Initialize the base ngram_model_t structure. + */ +int32 +ngram_model_init(ngram_model_t * model, + ngram_funcs_t * funcs, + logmath_t * lmath, int32 n, int32 n_unigram); + +/** + * Read a probdef file. + */ +int32 read_classdef_file(hash_table_t * classes, + const char *classdef_file); + +/** + * Free a class definition. + */ +void classdef_free(classdef_t * classdef); + +/** + * Allocate and initialize an N-Gram class. + */ +ngram_class_t *ngram_class_new(ngram_model_t * model, int32 tag_wid, + int32 start_wid, glist_t classwords); + +/** + * Deallocate an N-Gram class. + */ +void ngram_class_free(ngram_class_t * lmclass); + +/** + * Get the in-class log probability for a word in an N-Gram class. + * + * @return This probability, or 1 if word not found. + */ +int32 ngram_class_prob(ngram_class_t * lmclass, int32 wid); + +#endif /* __NGRAM_MODEL_INTERNAL_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_set.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_set.c new file mode 100644 index 0000000000000000000000000000000000000000..c3ff13474709707518037d0855531225a200e6bd --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_set.c @@ -0,0 +1,868 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file ngram_model_set.c Set of language models. + * @author David Huggins-Daines + */ + +#include +#include + +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/filename.h" + +#include "ngram_model_set.h" + +static ngram_funcs_t ngram_model_set_funcs; + +static int +my_compare(const void *a, const void *b) +{ + /* Make sure floats to the beginning. */ + if (strcmp(*(char *const *) a, "") == 0) + return -1; + else if (strcmp(*(char *const *) b, "") == 0) + return 1; + else + return strcmp(*(char *const *) a, *(char *const *) b); +} + +static void +build_widmap(ngram_model_t * base, logmath_t * lmath, int32 n) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + ngram_model_t **models = set->lms; + hash_table_t *vocab; + glist_t hlist; + gnode_t *gn; + int32 i; + + /* Construct a merged vocabulary and a set of word-ID mappings. */ + vocab = hash_table_new(models[0]->n_words, FALSE); + /* Create the set of merged words. */ + for (i = 0; i < set->n_models; ++i) { + int32 j; + for (j = 0; j < models[i]->n_words; ++j) { + /* Ignore collisions. */ + (void) hash_table_enter_int32(vocab, models[i]->word_str[j], + j); + } + } + /* Create the array of words, then sort it. */ + if (hash_table_lookup(vocab, "", NULL) != 0) + (void) hash_table_enter_int32(vocab, "", 0); + /* Now we know the number of unigrams, initialize the base model. */ + ngram_model_init(base, &ngram_model_set_funcs, lmath, n, + hash_table_inuse(vocab)); + base->writable = FALSE; /* We will reuse the pointers from the submodels. */ + i = 0; + hlist = hash_table_tolist(vocab, NULL); + for (gn = hlist; gn; gn = gnode_next(gn)) { + hash_entry_t *ent = gnode_ptr(gn); + base->word_str[i++] = (char *) ent->key; + } + glist_free(hlist); + qsort(base->word_str, base->n_words, sizeof(*base->word_str), + my_compare); + + /* Now create the word ID mappings. */ + if (set->widmap) + ckd_free_2d((void **) set->widmap); + set->widmap = (int32 **) ckd_calloc_2d(base->n_words, set->n_models, + sizeof(**set->widmap)); + for (i = 0; i < base->n_words; ++i) { + int32 j; + /* Also create the master wid mapping. */ + (void) hash_table_enter_int32(base->wid, base->word_str[i], i); + /* printf("%s: %d => ", base->word_str[i], i); */ + for (j = 0; j < set->n_models; ++j) { + set->widmap[i][j] = ngram_wid(models[j], base->word_str[i]); + /* printf("%d ", set->widmap[i][j]); */ + } + /* printf("\n"); */ + } + hash_table_free(vocab); +} + +ngram_model_t * +ngram_model_set_init(cmd_ln_t * config, + ngram_model_t ** models, + char **names, const float32 * weights, int32 n_models) +{ + ngram_model_set_t *model; + ngram_model_t *base; + logmath_t *lmath; + int32 i, n; + + (void)config; + if (n_models == 0) /* WTF */ + return NULL; + + /* Do consistency checking on the models. They must all use the + * same logbase and shift. */ + lmath = models[0]->lmath; + for (i = 1; i < n_models; ++i) { + if (logmath_get_base(models[i]->lmath) != logmath_get_base(lmath) + || logmath_get_shift(models[i]->lmath) != + logmath_get_shift(lmath)) { + E_ERROR + ("Log-math parameters don't match, will not create LM set\n"); + return NULL; + } + } + + /* Allocate the combined model, initialize it. */ + model = ckd_calloc(1, sizeof(*model)); + base = &model->base; + model->n_models = n_models; + model->lms = ckd_calloc(n_models, sizeof(*model->lms)); + model->names = ckd_calloc(n_models, sizeof(*model->names)); + /* Initialize weights to a uniform distribution */ + model->lweights = ckd_calloc(n_models, sizeof(*model->lweights)); + { + int32 uniform = logmath_log(lmath, 1.0 / n_models); + for (i = 0; i < n_models; ++i) + model->lweights[i] = uniform; + } + /* Default to interpolate if weights were given. */ + if (weights) + model->cur = -1; + + n = 0; + for (i = 0; i < n_models; ++i) { + model->lms[i] = ngram_model_retain(models[i]); + model->names[i] = ckd_salloc(names[i]); + if (weights) + model->lweights[i] = logmath_log(lmath, weights[i]); + /* N is the maximum of all merged models. */ + if (models[i]->n > n) + n = models[i]->n; + } + /* Allocate the history mapping table. */ + model->maphist = ckd_calloc(n - 1, sizeof(*model->maphist)); + + /* Now build the word-ID mapping and merged vocabulary. */ + build_widmap(base, lmath, n); + return base; +} + +ngram_model_t * +ngram_model_set_read(cmd_ln_t * config, + const char *lmctlfile, logmath_t * lmath) +{ + FILE *ctlfp; + glist_t lms = NULL; + glist_t lmnames = NULL; + __BIGSTACKVARIABLE__ char str[1024]; + ngram_model_t *set = NULL; + hash_table_t *classes; + char *basedir, *c; + + /* Read all the class definition files to accumulate a mapping of + * classnames to definitions. */ + classes = hash_table_new(0, FALSE); + if ((ctlfp = fopen(lmctlfile, "r")) == NULL) { + E_ERROR_SYSTEM("Failed to open %s", lmctlfile); + return NULL; + } + + /* Try to find the base directory to append to relative paths in + * the lmctl file. */ + if ((c = strrchr(lmctlfile, '/')) || (c = strrchr(lmctlfile, '\\'))) { + /* Include the trailing slash. */ + basedir = ckd_calloc(c - lmctlfile + 2, 1); + memcpy(basedir, lmctlfile, c - lmctlfile + 1); + } + else { + basedir = NULL; + } + E_INFO("Reading LM control file '%s'\n", lmctlfile); + if (basedir) + E_INFO("Will prepend '%s' to unqualified paths\n", basedir); + + if (fscanf(ctlfp, "%1023s", str) == 1) { + if (strcmp(str, "{") == 0) { + /* Load LMclass files */ + while ((fscanf(ctlfp, "%1023s", str) == 1) + && (strcmp(str, "}") != 0)) { + char *deffile; + if (basedir && !path_is_absolute(str)) + deffile = string_join(basedir, str, NULL); + else + deffile = ckd_salloc(str); + E_INFO("Reading classdef from '%s'\n", deffile); + if (read_classdef_file(classes, deffile) < 0) { + ckd_free(deffile); + goto error_out; + } + ckd_free(deffile); + } + + if (strcmp(str, "}") != 0) { + E_ERROR("Unexpected EOF in %s\n", lmctlfile); + goto error_out; + } + + /* This might be the first LM name. */ + if (fscanf(ctlfp, "%1023s", str) != 1) + str[0] = '\0'; + } + } + else + str[0] = '\0'; + + /* Read in one LM at a time and add classes to them as necessary. */ + while (str[0] != '\0') { + char *lmfile; + ngram_model_t *lm; + + if (basedir && str[0] != '/' && str[0] != '\\') + lmfile = string_join(basedir, str, NULL); + else + lmfile = ckd_salloc(str); + E_INFO("Reading lm from '%s'\n", lmfile); + lm = ngram_model_read(config, lmfile, NGRAM_AUTO, lmath); + if (lm == NULL) { + ckd_free(lmfile); + goto error_out; + } + if (fscanf(ctlfp, "%1023s", str) != 1) { + E_ERROR("LMname missing after LMFileName '%s'\n", lmfile); + ckd_free(lmfile); + goto error_out; + } + ckd_free(lmfile); + lms = glist_add_ptr(lms, lm); + lmnames = glist_add_ptr(lmnames, ckd_salloc(str)); + + if (fscanf(ctlfp, "%1023s", str) == 1) { + if (strcmp(str, "{") == 0) { + /* LM uses classes; read their names */ + while ((fscanf(ctlfp, "%1023s", str) == 1) && + (strcmp(str, "}") != 0)) { + void *val; + classdef_t *classdef; + + if (hash_table_lookup(classes, str, &val) == -1) { + E_ERROR("Unknown class %s in control file\n", str); + goto error_out; + } + classdef = val; + if (ngram_model_add_class(lm, str, 1.0, + classdef->words, + classdef->weights, + classdef->n_words) < 0) { + goto error_out; + } + E_INFO("Added class %s containing %d words\n", + str, classdef->n_words); + } + if (strcmp(str, "}") != 0) { + E_ERROR("Unexpected EOF in %s\n", lmctlfile); + goto error_out; + } + if (fscanf(ctlfp, "%1023s", str) != 1) + str[0] = '\0'; + } + } + else + str[0] = '\0'; + } + fclose(ctlfp); + + /* Now construct arrays out of lms and lmnames, and build an + * ngram_model_set. */ + lms = glist_reverse(lms); + lmnames = glist_reverse(lmnames); + { + int32 n_models; + ngram_model_t **lm_array; + char **name_array; + gnode_t *lm_node, *name_node; + int32 i; + + n_models = glist_count(lms); + lm_array = ckd_calloc(n_models, sizeof(*lm_array)); + name_array = ckd_calloc(n_models, sizeof(*name_array)); + lm_node = lms; + name_node = lmnames; + for (i = 0; i < n_models; ++i) { + lm_array[i] = gnode_ptr(lm_node); + name_array[i] = gnode_ptr(name_node); + lm_node = gnode_next(lm_node); + name_node = gnode_next(name_node); + } + set = ngram_model_set_init(config, lm_array, name_array, + NULL, n_models); + + for (i = 0; i < n_models; ++i) { + ngram_model_free(lm_array[i]); + } + ckd_free(lm_array); + ckd_free(name_array); + } + error_out: + { + gnode_t *gn; + glist_t hlist; + + if (set == NULL) { + for (gn = lms; gn; gn = gnode_next(gn)) { + ngram_model_free(gnode_ptr(gn)); + } + } + glist_free(lms); + for (gn = lmnames; gn; gn = gnode_next(gn)) { + ckd_free(gnode_ptr(gn)); + } + glist_free(lmnames); + hlist = hash_table_tolist(classes, NULL); + for (gn = hlist; gn; gn = gnode_next(gn)) { + hash_entry_t *he = gnode_ptr(gn); + ckd_free((char *) he->key); + classdef_free(he->val); + } + glist_free(hlist); + hash_table_free(classes); + ckd_free(basedir); + } + return set; +} + +int32 +ngram_model_set_count(ngram_model_t * base) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + return set->n_models; +} + +ngram_model_set_iter_t * +ngram_model_set_iter(ngram_model_t * base) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + ngram_model_set_iter_t *itor; + + if (set == NULL || set->n_models == 0) + return NULL; + itor = ckd_calloc(1, sizeof(*itor)); + itor->set = set; + return itor; +} + +ngram_model_set_iter_t * +ngram_model_set_iter_next(ngram_model_set_iter_t * itor) +{ + if (++itor->cur == itor->set->n_models) { + ngram_model_set_iter_free(itor); + return NULL; + } + return itor; +} + +void +ngram_model_set_iter_free(ngram_model_set_iter_t * itor) +{ + ckd_free(itor); +} + +ngram_model_t * +ngram_model_set_iter_model(ngram_model_set_iter_t * itor, + char const **lmname) +{ + if (lmname) + *lmname = itor->set->names[itor->cur]; + return itor->set->lms[itor->cur]; +} + +ngram_model_t * +ngram_model_set_lookup(ngram_model_t * base, const char *name) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + int32 i; + + if (name == NULL) { + if (set->cur == -1) + return NULL; + else + return set->lms[set->cur]; + } + + /* There probably won't be very many submodels. */ + for (i = 0; i < set->n_models; ++i) + if (0 == strcmp(set->names[i], name)) + break; + if (i == set->n_models) + return NULL; + return set->lms[i]; +} + +ngram_model_t * +ngram_model_set_select(ngram_model_t * base, const char *name) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + int32 i; + + /* There probably won't be very many submodels. */ + for (i = 0; i < set->n_models; ++i) + if (0 == strcmp(set->names[i], name)) + break; + if (i == set->n_models) + return NULL; + set->cur = i; + return set->lms[set->cur]; +} + +const char * +ngram_model_set_current(ngram_model_t * base) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + + if (set->cur == -1) + return NULL; + else + return set->names[set->cur]; +} + +int32 +ngram_model_set_current_wid(ngram_model_t * base, int32 set_wid) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + + if (set->cur == -1 || set_wid >= base->n_words) + return NGRAM_INVALID_WID; + else + return set->widmap[set_wid][set->cur]; +} + +int32 +ngram_model_set_known_wid(ngram_model_t * base, int32 set_wid) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + + if (set_wid >= base->n_words) + return FALSE; + else if (set->cur == -1) { + int32 i; + for (i = 0; i < set->n_models; ++i) { + if (set->widmap[set_wid][i] != ngram_unknown_wid(set->lms[i])) + return TRUE; + } + return FALSE; + } + else + return (set->widmap[set_wid][set->cur] + != ngram_unknown_wid(set->lms[set->cur])); +} + +ngram_model_t * +ngram_model_set_interp(ngram_model_t * base, + const char **names, const float32 * weights) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + + /* If we have a set of weights here, then set them. */ + if (names && weights) { + int32 i, j; + + /* We hope there aren't many models. */ + for (i = 0; i < set->n_models; ++i) { + for (j = 0; j < set->n_models; ++j) + if (0 == strcmp(names[i], set->names[j])) + break; + if (j == set->n_models) { + E_ERROR("Unknown LM name %s\n", names[i]); + return NULL; + } + set->lweights[j] = logmath_log(base->lmath, weights[i]); + } + } + else if (weights) { + memcpy(set->lweights, weights, + set->n_models * sizeof(*set->lweights)); + } + /* Otherwise just enable existing weights. */ + set->cur = -1; + return base; +} + +ngram_model_t * +ngram_model_set_add(ngram_model_t * base, + ngram_model_t * model, + const char *name, float32 weight, int reuse_widmap) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + float32 fprob; + int32 scale, i; + + /* Add it to the array of lms. */ + ++set->n_models; + set->lms = ckd_realloc(set->lms, set->n_models * sizeof(*set->lms)); + set->lms[set->n_models - 1] = model; + set->names = + ckd_realloc(set->names, set->n_models * sizeof(*set->names)); + set->names[set->n_models - 1] = ckd_salloc(name); + /* Expand the history mapping table if necessary. */ + if (model->n > base->n) { + base->n = model->n; + set->maphist = ckd_realloc(set->maphist, + (model->n - 1) * sizeof(*set->maphist)); + } + + /* Renormalize the interpolation weights. */ + fprob = weight * 1.0f / set->n_models; + set->lweights = ckd_realloc(set->lweights, + set->n_models * sizeof(*set->lweights)); + set->lweights[set->n_models - 1] = logmath_log(base->lmath, fprob); + /* Now normalize everything else to fit it in. This is + * accomplished by simply scaling all the other probabilities + * by (1-fprob). */ + scale = logmath_log(base->lmath, 1.0 - fprob); + for (i = 0; i < set->n_models - 1; ++i) + set->lweights[i] += scale; + + /* Reuse the old word ID mapping if requested. */ + if (reuse_widmap) { + int32 **new_widmap; + + /* Tack another column onto the widmap array. */ + new_widmap = (int32 **) ckd_calloc_2d(base->n_words, set->n_models, + sizeof(**new_widmap)); + for (i = 0; i < base->n_words; ++i) { + /* Copy all the existing mappings. */ + memcpy(new_widmap[i], set->widmap[i], + (set->n_models - 1) * sizeof(**new_widmap)); + /* Create the new mapping. */ + new_widmap[i][set->n_models - 1] = + ngram_wid(model, base->word_str[i]); + } + ckd_free_2d((void **) set->widmap); + set->widmap = new_widmap; + } + else { + build_widmap(base, base->lmath, base->n); + } + return model; +} + +ngram_model_t * +ngram_model_set_remove(ngram_model_t * base, + const char *name, int reuse_widmap) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + ngram_model_t *submodel; + int32 lmidx, scale, n, i; + float32 fprob; + + for (lmidx = 0; lmidx < set->n_models; ++lmidx) + if (0 == strcmp(name, set->names[lmidx])) + break; + if (lmidx == set->n_models) + return NULL; + submodel = set->lms[lmidx]; + + /* Renormalize the interpolation weights by scaling them by + * 1/(1-fprob) */ + fprob = (float32) logmath_exp(base->lmath, set->lweights[lmidx]); + scale = logmath_log(base->lmath, 1.0 - fprob); + + /* Remove it from the array of lms, renormalize remaining weights, + * and recalcluate n. */ + --set->n_models; + n = 0; + ckd_free(set->names[lmidx]); + set->names[lmidx] = NULL; + for (i = 0; i < set->n_models; ++i) { + if (i >= lmidx) { + set->lms[i] = set->lms[i + 1]; + set->names[i] = set->names[i + 1]; + set->lweights[i] = set->lweights[i + 1]; + } + set->lweights[i] -= scale; + if (set->lms[i]->n > n) + n = set->lms[i]->n; + } + /* There's no need to shrink these arrays. */ + set->lms[set->n_models] = NULL; + set->lweights[set->n_models] = base->log_zero; + /* No need to shrink maphist either. */ + + /* Reuse the existing word ID mapping if requested. */ + if (reuse_widmap) { + /* Just go through and shrink each row. */ + for (i = 0; i < base->n_words; ++i) { + memmove(set->widmap[i] + lmidx, set->widmap[i] + lmidx + 1, + (set->n_models - lmidx) * sizeof(**set->widmap)); + } + } + else { + build_widmap(base, base->lmath, n); + } + return submodel; +} + +void +ngram_model_set_map_words(ngram_model_t * base, + const char **words, int32 n_words) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + int32 i; + + /* Recreate the word mapping. */ + if (base->writable) { + for (i = 0; i < base->n_words; ++i) { + ckd_free(base->word_str[i]); + } + } + ckd_free(base->word_str); + ckd_free_2d((void **) set->widmap); + base->writable = TRUE; + base->n_words = base->n_1g_alloc = n_words; + base->word_str = ckd_calloc(n_words, sizeof(*base->word_str)); + set->widmap = + (int32 **) ckd_calloc_2d(n_words, set->n_models, + sizeof(**set->widmap)); + hash_table_empty(base->wid); + for (i = 0; i < n_words; ++i) { + int32 j; + base->word_str[i] = ckd_salloc(words[i]); + (void) hash_table_enter_int32(base->wid, base->word_str[i], i); + for (j = 0; j < set->n_models; ++j) { + set->widmap[i][j] = ngram_wid(set->lms[j], base->word_str[i]); + } + } +} + +static int +ngram_model_set_apply_weights(ngram_model_t * base, float32 lw, + float32 wip) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + int32 i; + + /* Apply weights to each sub-model. */ + for (i = 0; i < set->n_models; ++i) + ngram_model_apply_weights(set->lms[i], lw, wip); + return 0; +} + +static int32 +ngram_model_set_score(ngram_model_t * base, int32 wid, + int32 * history, int32 n_hist, int32 * n_used) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + int32 mapwid; + int32 score; + int32 i; + + /* Truncate the history. */ + if (n_hist > base->n - 1) + n_hist = base->n - 1; + + /* Interpolate if there is no current. */ + if (set->cur == -1) { + score = base->log_zero; + for (i = 0; i < set->n_models; ++i) { + int32 j; + /* Map word and history IDs for each model. */ + mapwid = set->widmap[wid][i]; + for (j = 0; j < n_hist; ++j) { + if (history[j] == NGRAM_INVALID_WID) + set->maphist[j] = NGRAM_INVALID_WID; + else + set->maphist[j] = set->widmap[history[j]][i]; + } + score = logmath_add(base->lmath, score, + set->lweights[i] + + ngram_ng_score(set->lms[i], + mapwid, set->maphist, + n_hist, n_used)); + } + } + else { + int32 j; + /* Map word and history IDs (FIXME: do this in a function?) */ + mapwid = set->widmap[wid][set->cur]; + for (j = 0; j < n_hist; ++j) { + if (history[j] == NGRAM_INVALID_WID) + set->maphist[j] = NGRAM_INVALID_WID; + else + set->maphist[j] = set->widmap[history[j]][set->cur]; + } + score = ngram_ng_score(set->lms[set->cur], + mapwid, set->maphist, n_hist, n_used); + } + + return score; +} + +static int32 +ngram_model_set_raw_score(ngram_model_t * base, int32 wid, + int32 * history, int32 n_hist, int32 * n_used) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + int32 mapwid; + int32 score; + int32 i; + + /* Truncate the history. */ + if (n_hist > base->n - 1) + n_hist = base->n - 1; + + /* Interpolate if there is no current. */ + if (set->cur == -1) { + score = base->log_zero; + for (i = 0; i < set->n_models; ++i) { + int32 j; + /* Map word and history IDs for each model. */ + mapwid = set->widmap[wid][i]; + for (j = 0; j < n_hist; ++j) { + if (history[j] == NGRAM_INVALID_WID) + set->maphist[j] = NGRAM_INVALID_WID; + else + set->maphist[j] = set->widmap[history[j]][i]; + } + score = logmath_add(base->lmath, score, + set->lweights[i] + + ngram_ng_prob(set->lms[i], + mapwid, set->maphist, n_hist, + n_used)); + } + } + else { + int32 j; + /* Map word and history IDs (FIXME: do this in a function?) */ + mapwid = set->widmap[wid][set->cur]; + for (j = 0; j < n_hist; ++j) { + if (history[j] == NGRAM_INVALID_WID) + set->maphist[j] = NGRAM_INVALID_WID; + else + set->maphist[j] = set->widmap[history[j]][set->cur]; + } + score = ngram_ng_prob(set->lms[set->cur], + mapwid, set->maphist, n_hist, n_used); + } + + return score; +} + +static int32 +ngram_model_set_add_ug(ngram_model_t * base, int32 wid, int32 lweight) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + int32 *newwid; + int32 i, prob; + + /* At this point the word has already been added to the master + model and we have a new word ID for it. Add it to active + submodels and track the word IDs. */ + newwid = ckd_calloc(set->n_models, sizeof(*newwid)); + prob = base->log_zero; + for (i = 0; i < set->n_models; ++i) { + int32 wprob, n_hist; + + /* Only add to active models. */ + if (set->cur == -1 || set->cur == i) { + /* Did this word already exist? */ + newwid[i] = ngram_wid(set->lms[i], base->word_str[wid]); + if (newwid[i] == NGRAM_INVALID_WID) { + /* Add it to the submodel. */ + newwid[i] = + ngram_model_add_word(set->lms[i], base->word_str[wid], + (float32) logmath_exp(base->lmath, + lweight)); + if (newwid[i] == NGRAM_INVALID_WID) { + ckd_free(newwid); + return base->log_zero; + } + } + /* Now get the unigram probability for the new word and either + * interpolate it or use it (if this is the current model). */ + wprob = + ngram_ng_prob(set->lms[i], newwid[i], NULL, 0, &n_hist); + if (set->cur == i) + prob = wprob; + else if (set->cur == -1) + prob = + logmath_add(base->lmath, prob, + set->lweights[i] + wprob); + } + else { + newwid[i] = NGRAM_INVALID_WID; + } + } + /* Okay we have the word IDs for this in all the submodels. Now + do some complicated memory mangling to add this to the + widmap. */ + set->widmap = + ckd_realloc(set->widmap, base->n_words * sizeof(*set->widmap)); + set->widmap[0] = + ckd_realloc(set->widmap[0], + base->n_words * set->n_models * sizeof(**set->widmap)); + for (i = 0; i < base->n_words; ++i) + set->widmap[i] = set->widmap[0] + i * set->n_models; + memcpy(set->widmap[wid], newwid, set->n_models * sizeof(*newwid)); + ckd_free(newwid); + return prob; +} + +static void +ngram_model_set_free(ngram_model_t * base) +{ + ngram_model_set_t *set = (ngram_model_set_t *) base; + int32 i; + + for (i = 0; i < set->n_models; ++i) + ngram_model_free(set->lms[i]); + ckd_free(set->lms); + for (i = 0; i < set->n_models; ++i) + ckd_free(set->names[i]); + ckd_free(set->names); + ckd_free(set->lweights); + ckd_free(set->maphist); + ckd_free_2d((void **) set->widmap); +} + +static ngram_funcs_t ngram_model_set_funcs = { + ngram_model_set_free, /* free */ + ngram_model_set_apply_weights, /* apply_weights */ + ngram_model_set_score, /* score */ + ngram_model_set_raw_score, /* raw_score */ + ngram_model_set_add_ug, /* add_ug */ + NULL /* flush */ +}; diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_set.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_set.h new file mode 100644 index 0000000000000000000000000000000000000000..09fbdabda044077cb1bd58b57d1a0510b4aa64b3 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_set.h @@ -0,0 +1,70 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file ngram_model_set.h Set of language models. + * @author David Huggins-Daines + */ + +#ifndef __NGRAM_MODEL_SET_H__ +#define __NGRAM_MODEL_SET_H__ + +#include "ngram_model_internal.h" + +/** + * Subclass of ngram_model for grouping language models. + */ +typedef struct ngram_model_set_s { + ngram_model_t base; /**< Base ngram_model_t structure. */ + + int32 n_models; /**< Number of models in this set. */ + int32 cur; /**< Currently selected model, or -1 for none. */ + ngram_model_t **lms; /**< Language models in this set. */ + char **names; /**< Names for language models. */ + int32 *lweights; /**< Log interpolation weights. */ + int32 **widmap; /**< Word ID mapping for submodels. */ + int32 *maphist; /**< Word ID mapping for N-Gram history. */ +} ngram_model_set_t; + +/** + * Iterator over a model set. + */ +struct ngram_model_set_iter_s { + ngram_model_set_t *set; + int32 cur; +}; + +#endif /* __NGRAM_MODEL_SET_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_trie.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_trie.c new file mode 100644 index 0000000000000000000000000000000000000000..1e79ac359745aa611a2aa626d594e58f7a55ee0d --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_trie.c @@ -0,0 +1,711 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2015 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include +#include + +#include "ngram_model_trie.h" + +static const char trie_hdr[] = "Trie Language Model"; +static const char dmp_hdr[] = "Darpa Trigram LM"; +static ngram_funcs_t ngram_model_trie_funcs; + +/* + * Read and return #unigrams, #bigrams, #trigrams as stated in input file. + */ +static int +read_counts_arpa(lineiter_t ** li, uint32 * counts, int *order) +{ + int32 ngram, prev_ngram; + uint32 ngram_cnt; + + /* skip file until past the '\data\' marker */ + while (*li) { + if (strcmp((*li)->buf, "\\data\\") == 0) + break; + *li = lineiter_next(*li); + } + + if (*li == NULL || strcmp((*li)->buf, "\\data\\") != 0) { + E_INFO("No \\data\\ mark in LM file\n"); + return -1; + } + + prev_ngram = 0; + *order = 0; + while ((*li = lineiter_next(*li))) { + if (sscanf((*li)->buf, "ngram %d=%d", &ngram, &ngram_cnt) != 2) + break; + if (ngram != prev_ngram + 1) { + E_ERROR + ("Ngram counts in LM file is not in order. %d goes after %d\n", + ngram, prev_ngram); + return -1; + } + prev_ngram = ngram; + counts[*order] = ngram_cnt; + (*order)++; + } + + if (*li == NULL) { + E_ERROR("EOF while reading ngram counts\n"); + return -1; + } + + return 0; +} + +static int +read_1grams_arpa(lineiter_t ** li, uint32 count, ngram_model_t * base, + unigram_t * unigrams) +{ + uint32 i; + int n; + int n_parts; + char *wptr[3]; + + while (*li && strcmp((*li)->buf, "\\1-grams:") != 0) { + *li = lineiter_next(*li); + } + if (*li == NULL) { + E_ERROR_SYSTEM("Failed to read \\1-grams: mark"); + return -1; + } + + n_parts = 2; + for (i = 0; i < count; i++) { + unigram_t *unigram; + + *li = lineiter_next(*li); + if (*li == NULL) { + E_ERROR + ("Unexpected end of ARPA file. Failed to read unigram %d\n", + i + 1); + return -1; + } + if ((n = str2words((*li)->buf, wptr, 3)) < n_parts) { + E_ERROR("Format error at line %d, Failed to read unigrams\n", (*li)->lineno); + return -1; + } + + unigram = &unigrams[i]; + unigram->prob = + logmath_log10_to_log_float(base->lmath, atof_c(wptr[0])); + if (unigram->prob > 0) { + E_WARN("Unigram '%s' has positive probability\n", wptr[1]); + unigram->prob = 0; + } + if (n == n_parts + 1) { + unigram->bo = + logmath_log10_to_log_float(base->lmath, + atof_c(wptr[2])); + } + else { + unigram->bo = 0.0f; + } + + /* TODO: classify float with fpclassify and warn if bad value occurred */ + base->word_str[i] = ckd_salloc(wptr[1]); + } + + /* fill hash-table that maps unigram names to their word ids */ + for (i = 0; i < count; i++) { + if ((hash_table_enter + (base->wid, base->word_str[i], + (void *) (long) i)) != (void *) (long) i) { + E_WARN("Duplicate word in dictionary: %s\n", + base->word_str[i]); + } + } + return 0; +} + +ngram_model_t * +ngram_model_trie_read_arpa(cmd_ln_t * config, + const char *path, logmath_t * lmath) +{ + FILE *fp; + lineiter_t *li; + ngram_model_trie_t *model; + ngram_model_t *base; + ngram_raw_t **raw_ngrams; + int32 is_pipe; + uint32 counts[NGRAM_MAX_ORDER]; + int order; + int i; + + (void)config; + E_INFO("Trying to read LM in arpa format\n"); + if ((fp = fopen_comp(path, "r", &is_pipe)) == NULL) { + E_ERROR("File %s not found\n", path); + return NULL; + } + + model = (ngram_model_trie_t *) ckd_calloc(1, sizeof(*model)); + li = lineiter_start_clean(fp); + /* Read n-gram counts from file */ + if (read_counts_arpa(&li, counts, &order) == -1) { + ckd_free(model); + lineiter_free(li); + fclose_comp(fp, is_pipe); + return NULL; + } + + E_INFO("LM of order %d\n", order); + for (i = 0; i < order; i++) { + E_INFO("#%d-grams: %d\n", i + 1, counts[i]); + } + + base = &model->base; + ngram_model_init(base, &ngram_model_trie_funcs, lmath, order, + (int32) counts[0]); + base->writable = TRUE; + + model->trie = lm_trie_create(counts[0], order); + if (read_1grams_arpa(&li, counts[0], base, model->trie->unigrams) < 0) { + ngram_model_free(base); + lineiter_free(li); + fclose_comp(fp, is_pipe); + return NULL; + } + + if (order > 1) { + raw_ngrams = + ngrams_raw_read_arpa(&li, base->lmath, counts, order, + base->wid); + if (raw_ngrams == NULL) { + ngram_model_free(base); + lineiter_free(li); + fclose_comp(fp, is_pipe); + return NULL; + } + lm_trie_build(model->trie, raw_ngrams, counts, base->n_counts, order); + ngrams_raw_free(raw_ngrams, counts, order); + } + + lineiter_free(li); + fclose_comp(fp, is_pipe); + + return base; +} + +int +ngram_model_trie_write_arpa(ngram_model_t * base, const char *path) +{ + int i; + uint32 j; + ngram_model_trie_t *model = (ngram_model_trie_t *) base; + FILE *fp = fopen(path, "w"); + if (!fp) { + E_ERROR("Unable to open %s to write arpa LM from trie\n", path); + return -1; + } + fprintf(fp, + "This is an ARPA-format language model file, generated by CMU Sphinx\n"); + /* Write N-gram counts. */ + fprintf(fp, "\\data\\\n"); + for (i = 0; i < base->n; ++i) { + fprintf(fp, "ngram %d=%d\n", i + 1, base->n_counts[i]); + } + /* Write 1-grams */ + fprintf(fp, "\n\\1-grams:\n"); + for (j = 0; j < base->n_counts[0]; j++) { + unigram_t *unigram = &model->trie->unigrams[j]; + fprintf(fp, "%.4f\t%s", + logmath_log_float_to_log10(base->lmath, unigram->prob), + base->word_str[j]); + if (base->n > 1) { + fprintf(fp, "\t%.4f", + logmath_log_float_to_log10(base->lmath, unigram->bo)); + } + fprintf(fp, "\n"); + } + /* Write ngrams */ + if (base->n > 1) { + for (i = 2; i <= base->n; ++i) { + ngram_raw_t *raw_ngrams = + (ngram_raw_t *) ckd_calloc((size_t) base->n_counts[i - 1], + sizeof(*raw_ngrams)); + uint32 raw_ngram_idx; + uint32 j; + uint32 hist[NGRAM_MAX_ORDER]; + node_range_t range; + raw_ngram_idx = 0; + range.begin = range.end = 0; + + /* we need to iterate over a trie here. recursion should do the job */ + lm_trie_fill_raw_ngram(model->trie, raw_ngrams, + &raw_ngram_idx, base->n_counts, range, hist, 0, + i, base->n); + assert(raw_ngram_idx == base->n_counts[i - 1]); + qsort(raw_ngrams, (size_t) base->n_counts[i - 1], + sizeof(ngram_raw_t), &ngram_ord_comparator); + + fprintf(fp, "\n\\%d-grams:\n", i); + for (j = 0; j < base->n_counts[i - 1]; j++) { + int k; + fprintf(fp, "%.4f", logmath_log_float_to_log10(base->lmath, raw_ngrams[j].prob)); + for (k = 0; k < i; k++) { + fprintf(fp, "\t%s", + base->word_str[raw_ngrams[j].words[k]]); + } + ckd_free(raw_ngrams[j].words); + if (i < base->n) { + fprintf(fp, "\t%.4f", logmath_log_float_to_log10(base->lmath, raw_ngrams[j].backoff)); + } + fprintf(fp, "\n"); + } + ckd_free(raw_ngrams); + } + } + fprintf(fp, "\n\\end\\\n"); + return fclose(fp); +} + +static void +read_word_str(ngram_model_t * base, FILE * fp, int do_swap) +{ + int32 k; + uint32 i, j; + char *tmp_word_str; + /* read ascii word strings */ + base->writable = TRUE; + fread(&k, sizeof(k), 1, fp); + if (do_swap) + SWAP_INT32(&k); + E_INFO("#word_str: %d\n", k); + tmp_word_str = (char *) ckd_calloc((size_t) k, 1); + fread(tmp_word_str, 1, (size_t) k, fp); + + /* First make sure string just read contains n_counts[0] words (PARANOIA!!) */ + for (i = 0, j = 0; i < (uint32) k; i++) + if (tmp_word_str[i] == '\0') + j++; + if (j != base->n_counts[0]) { + E_ERROR + ("Error reading word strings (%d doesn't match n_unigrams %d)\n", + j, base->n_counts[0]); + } + + /* Break up string just read into words */ + j = 0; + for (i = 0; i < base->n_counts[0]; i++) { + base->word_str[i] = ckd_salloc(tmp_word_str + j); + if (hash_table_enter(base->wid, base->word_str[i], + (void *) (long) i) != (void *) (long) i) { + E_WARN("Duplicate word in dictionary: %s\n", + base->word_str[i]); + } + j += strlen(base->word_str[i]) + 1; + } + free(tmp_word_str); +} + +ngram_model_t * +ngram_model_trie_read_bin(cmd_ln_t * config, + const char *path, logmath_t * lmath) +{ + int32 is_pipe; + FILE *fp; + size_t hdr_size; + char *hdr; + int cmp_res; + uint8 i, order; + uint32 counts[NGRAM_MAX_ORDER]; + ngram_model_trie_t *model; + ngram_model_t *base; + + (void)config; + E_INFO("Trying to read LM in trie binary format\n"); + if ((fp = fopen_comp(path, "rb", &is_pipe)) == NULL) { + E_ERROR("File %s not found\n", path); + return NULL; + } + hdr_size = strlen(trie_hdr); + hdr = (char *) ckd_calloc(hdr_size + 1, sizeof(*hdr)); + fread(hdr, sizeof(*hdr), hdr_size, fp); + cmp_res = strcmp(hdr, trie_hdr); + ckd_free(hdr); + if (cmp_res) { + E_INFO("Header doesn't match\n"); + fclose_comp(fp, is_pipe); + return NULL; + } + model = (ngram_model_trie_t *) ckd_calloc(1, sizeof(*model)); + base = &model->base; + fread(&order, sizeof(order), 1, fp); + for (i = 0; i < order; i++) { + fread(&counts[i], sizeof(counts[i]), 1, fp); + if (SWAP_LM_TRIE) + SWAP_INT32(&counts[i]); + E_INFO("#%d-grams: %d\n", i + 1, counts[i]); + } + ngram_model_init(base, &ngram_model_trie_funcs, lmath, order, + (int32) counts[0]); + for (i = 0; i < order; i++) { + base->n_counts[i] = counts[i]; + } + + model->trie = lm_trie_read_bin(counts, order, fp); + read_word_str(base, fp, SWAP_LM_TRIE); + fclose_comp(fp, is_pipe); + + return base; +} + +static void +write_word_str(FILE * fp, ngram_model_t * model, int do_swap) +{ + int32 k; + uint32 i; + + k = 0; + for (i = 0; i < model->n_counts[0]; i++) + k += strlen(model->word_str[i]) + 1; + E_INFO("#word_str: %d\n", k); + if (do_swap) + SWAP_INT32(&k); + fwrite(&k, sizeof(k), 1, fp); + for (i = 0; i < model->n_counts[0]; i++) + fwrite(model->word_str[i], 1, strlen(model->word_str[i]) + 1, fp); +} + +int +ngram_model_trie_write_bin(ngram_model_t * base, const char *path) +{ + int i; + int32 is_pipe; + ngram_model_trie_t *model = (ngram_model_trie_t *) base; + FILE *fp = fopen_comp(path, "wb", &is_pipe); + if (!fp) { + E_ERROR("Unable to open %s to write binary trie LM\n", path); + return -1; + } + + fwrite(trie_hdr, sizeof(*trie_hdr), strlen(trie_hdr), fp); + fwrite(&model->base.n, sizeof(model->base.n), 1, fp); + for (i = 0; i < model->base.n; i++) { + uint32 count = model->base.n_counts[i]; + if (SWAP_LM_TRIE) + SWAP_INT32(&count); + fwrite(&count, sizeof(count), 1, fp); + } + lm_trie_write_bin(model->trie, base->n_counts[0], fp); + write_word_str(fp, base, SWAP_LM_TRIE); + fclose_comp(fp, is_pipe); + return 0; +} + +ngram_model_t * +ngram_model_trie_read_dmp(cmd_ln_t * config, + const char *file_name, logmath_t * lmath) +{ + uint8 do_swap; + int32 is_pipe; + int32 k; + uint32 j; + int32 vn, ts; + int32 count; + uint32 counts[3]; + uint32 *unigram_next; + int order; + char str[1024]; + FILE *fp; + ngram_model_trie_t *model; + ngram_model_t *base; + ngram_raw_t **raw_ngrams; + + (void)config; + E_INFO("Trying to read LM in dmp format\n"); + if ((fp = fopen_comp(file_name, "rb", &is_pipe)) == NULL) { + E_ERROR("Dump file %s not found\n", file_name); + return NULL; + } + + do_swap = FALSE; + fread(&k, sizeof(k), 1, fp); + if (k != strlen(dmp_hdr) + 1) { + SWAP_INT32(&k); + if (k != strlen(dmp_hdr) + 1) { + E_ERROR + ("Wrong magic header size number %x: %s is not a dump file\n", + k, file_name); + return NULL; + } + do_swap = 1; + } + if (fread(str, 1, k, fp) != (size_t) k) { + E_ERROR("Cannot read header\n"); + return NULL; + } + if (strncmp(str, dmp_hdr, k) != 0) { + E_ERROR("Wrong header %s: %s is not a dump file\n", dmp_hdr); + return NULL; + } + + if (fread(&k, sizeof(k), 1, fp) != 1) + return NULL; + if (do_swap) + SWAP_INT32(&k); + if (fread(str, 1, k, fp) != (size_t) k) { + E_ERROR("Cannot read LM filename in header\n"); + return NULL; + } + + /* read version#, if present (must be <= 0) */ + if (fread(&vn, sizeof(vn), 1, fp) != 1) + return NULL; + if (do_swap) + SWAP_INT32(&vn); + if (vn <= 0) { + /* read and don't compare timestamps (we don't care) */ + if (fread(&ts, sizeof(ts), 1, fp) != 1) + return NULL; + if (do_swap) + SWAP_INT32(&ts); + + /* read and skip format description */ + for (;;) { + if (fread(&k, sizeof(k), 1, fp) != 1) + return NULL; + if (do_swap) + SWAP_INT32(&k); + if (k == 0) + break; + if (fread(str, 1, k, fp) != (size_t) k) { + E_ERROR("Failed to read word\n"); + return NULL; + } + } + /* read model->ucount */ + if (fread(&count, sizeof(count), 1, fp) != 1) + return NULL; + if (do_swap) + SWAP_INT32(&count); + counts[0] = count; + } + else { + counts[0] = vn; + } + /* read model->bcount, tcount */ + if (fread(&count, sizeof(count), 1, fp) != 1) + return NULL; + if (do_swap) + SWAP_INT32(&count); + counts[1] = count; + if (fread(&count, sizeof(count), 1, fp) != 1) + return NULL; + if (do_swap) + SWAP_INT32(&count); + counts[2] = count; + E_INFO("ngrams 1=%d, 2=%d, 3=%d\n", counts[0], counts[1], counts[2]); + + model = (ngram_model_trie_t *) ckd_calloc(1, sizeof(*model)); + base = &model->base; + if (counts[2] > 0) + order = 3; + else if (counts[1] > 0) + order = 2; + else + order = 1; + ngram_model_init(base, &ngram_model_trie_funcs, lmath, order, + (int32) counts[0]); + + model->trie = lm_trie_create(counts[0], order); + + unigram_next = + (uint32 *) ckd_calloc((int32) counts[0] + 1, sizeof(unigram_next)); + for (j = 0; j <= counts[0]; j++) { + int32 bigrams; + int32 mapid; + dmp_weight_t weightp; + dmp_weight_t weightb; + + /* Skip over the mapping ID, we don't care about it. */ + /* Read the weights from actual unigram structure. */ + fread(&mapid, sizeof(int32), 1, fp); + fread(&weightp, sizeof(weightp), 1, fp); + fread(&weightb, sizeof(weightb), 1, fp); + fread(&bigrams, sizeof(int32), 1, fp); + if (do_swap) { + SWAP_INT32(&weightp.l); + SWAP_INT32(&weightb.l); + SWAP_INT32(&bigrams); + } + model->trie->unigrams[j].prob = logmath_log10_to_log_float(lmath, weightp.f); + model->trie->unigrams[j].bo = logmath_log10_to_log_float(lmath, weightb.f); + model->trie->unigrams[j].next = bigrams; + unigram_next[j] = bigrams; + } + + if (order > 1) { + raw_ngrams = + ngrams_raw_read_dmp(fp, lmath, counts, order, unigram_next, + do_swap); + if (raw_ngrams == NULL) { + ngram_model_free(base); + ckd_free(unigram_next); + fclose_comp(fp, is_pipe); + return NULL; + } + lm_trie_build(model->trie, raw_ngrams, counts, base->n_counts, order); + ngrams_raw_free(raw_ngrams, counts, order); + } + + /* Sentinel unigram and bigrams read before */ + ckd_free(unigram_next); + + /* read ascii word strings */ + read_word_str(base, fp, do_swap); + + fclose_comp(fp, is_pipe); + return base; +} + +static void +ngram_model_trie_free(ngram_model_t * base) +{ + ngram_model_trie_t *model = (ngram_model_trie_t *) base; + lm_trie_free(model->trie); +} + +static int +trie_apply_weights(ngram_model_t * base, float32 lw, float32 wip) +{ + /* just update weights that are going to be used on score calculation */ + base->lw = lw; + base->log_wip = logmath_log(base->lmath, wip); + return 0; +} + +static int32 +weight_score(ngram_model_t * base, int32 score) +{ + return (int32) (score * base->lw + base->log_wip); +} + +static int32 +ngram_model_trie_raw_score(ngram_model_t * base, int32 wid, int32 * hist, + int32 n_hist, int32 * n_used) +{ + int32 i; + ngram_model_trie_t *model = (ngram_model_trie_t *) base; + + if (n_hist > model->base.n - 1) + n_hist = model->base.n - 1; + for (i = 0; i < n_hist; i++) { + if (hist[i] < 0) { + n_hist = i; + break; + } + } + + return (int32) lm_trie_score(model->trie, model->base.n, wid, hist, + n_hist, n_used); +} + +static int32 +ngram_model_trie_score(ngram_model_t * base, int32 wid, int32 * hist, + int32 n_hist, int32 * n_used) +{ + return weight_score(base, + ngram_model_trie_raw_score(base, wid, hist, n_hist, + n_used)); +} + +static int32 +lm_trie_add_ug(ngram_model_t * base, int32 wid, int32 lweight) +{ + ngram_model_trie_t *model = (ngram_model_trie_t *) base; + + /* This would be very bad if this happened! */ + assert(!NGRAM_IS_CLASSWID(wid)); + + /* Reallocate unigram array. */ + model->trie->unigrams = + (unigram_t *) ckd_realloc(model->trie->unigrams, + sizeof(*model->trie->unigrams) * + (base->n_1g_alloc + 1)); + memset(model->trie->unigrams + (base->n_counts[0] + 1), 0, + (size_t) (base->n_1g_alloc - + base->n_counts[0]) * sizeof(*model->trie->unigrams)); + ++base->n_counts[0]; + lweight += logmath_log(base->lmath, 1.0 / base->n_counts[0]); + model->trie->unigrams[wid + 1].next = model->trie->unigrams[wid].next; + model->trie->unigrams[wid].prob = (float) lweight; + /* This unigram by definition doesn't participate in any bigrams, + * so its backoff weight is undefined and next pointer same as in finish unigram*/ + model->trie->unigrams[wid].bo = 0; + /* Finally, increase the unigram count */ + /* FIXME: Note that this can actually be quite bogus due to the + * presence of class words. If wid falls outside the unigram + * count, increase it to compensate, at the cost of no longer + * really knowing how many unigrams we have :( */ + if ((uint32) wid >= base->n_counts[0]) + base->n_counts[0] = wid + 1; + + return (int32) weight_score(base, lweight); +} + +static void +lm_trie_flush(ngram_model_t * base) +{ + ngram_model_trie_t *model = (ngram_model_trie_t *) base; + lm_trie_t *trie = model->trie; + memset(trie->hist_cache, -1, sizeof(trie->hist_cache)); + memset(trie->backoff_cache, 0, sizeof(trie->backoff_cache)); + return; +} + +static ngram_funcs_t ngram_model_trie_funcs = { + ngram_model_trie_free, /* free */ + trie_apply_weights, /* apply_weights */ + ngram_model_trie_score, /* score */ + ngram_model_trie_raw_score, /* raw_score */ + lm_trie_add_ug, /* add_ug */ + lm_trie_flush /* flush */ +}; diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_trie.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_trie.h new file mode 100644 index 0000000000000000000000000000000000000000..bc73df6921f876d9f805c8ac51cebb12308298bf --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngram_model_trie.h @@ -0,0 +1,82 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2015 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#ifndef __NGRAM_MODEL_TRIE_H__ +#define __NGRAM_MODEL_TRIE_H__ + +#include +#include + +#include "ngram_model_internal.h" +#include "lm_trie.h" + +typedef struct ngram_model_trie_s { + ngram_model_t base; /**< Base ngram_model_t structure */ + lm_trie_t *trie; /**< Trie structure that stores ngram relations and weights */ +} ngram_model_trie_t; + +/** + * Read N-Gram model from and ARPABO text file and arrange it in trie structure + */ +ngram_model_t *ngram_model_trie_read_arpa(cmd_ln_t * config, + const char *path, + logmath_t * lmath); + +/** + * Write N-Gram model stored in trie structure in ARPABO format + */ +int ngram_model_trie_write_arpa(ngram_model_t * base, const char *path); + +/** + * Read N-Gram model from the binary file and arrange it in a trie structure + */ +ngram_model_t *ngram_model_trie_read_bin(cmd_ln_t * config, + const char *path, + logmath_t * lmath); + +/** + * Write trie to binary file + */ +int ngram_model_trie_write_bin(ngram_model_t * model, const char *path); + +/** + * Read N-Gram model from DMP file and arrange it in trie structure + */ +ngram_model_t *ngram_model_trie_read_dmp(cmd_ln_t * config, + const char *file_name, + logmath_t * lmath); + +#endif /* __NGRAM_MODEL_TRIE_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngrams_raw.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngrams_raw.c new file mode 100644 index 0000000000000000000000000000000000000000..0ff9f9a19b640c14d999c9b129d6bed0c4e0cd85 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngrams_raw.c @@ -0,0 +1,387 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2015 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include + +#include +#include +#include +#include +#include + +#include "ngram_model_internal.h" +#include "ngrams_raw.h" + +int +ngram_ord_comparator(const void *a_raw, const void *b_raw) +{ + ngram_raw_t *a = (ngram_raw_t *) a_raw; + ngram_raw_t *b = (ngram_raw_t *) b_raw; + int a_w_ptr = 0; + int b_w_ptr = 0; + while ((uint32)a_w_ptr < a->order && (uint32)b_w_ptr < b->order) { + if (a->words[a_w_ptr] == b->words[b_w_ptr]) { + a_w_ptr++; + b_w_ptr++; + continue; + } + if (a->words[a_w_ptr] < b->words[b_w_ptr]) + return -1; + else + return 1; + } + return a->order - b->order; +} + +static int +ngrams_raw_read_line(lineiter_t *li, hash_table_t *wid, + logmath_t *lmath, int order, int order_max, + ngram_raw_t *raw_ngram) +{ + int n, i; + int words_expected; + char *wptr[NGRAM_MAX_ORDER + 1]; + uint32 *word_out; + + words_expected = order + 1; + if ((n = + str2words(li->buf, wptr, + NGRAM_MAX_ORDER + 1)) < words_expected) { + E_ERROR("Format error; %d-gram ignored at line %d\n", order, li->lineno); + return -1; + } + + raw_ngram->order = order; + + if (order == order_max) { + raw_ngram->prob = atof_c(wptr[0]); + if (raw_ngram->prob > 0) { + E_WARN("%d-gram '%s' has positive probability\n", order, wptr[1]); + raw_ngram->prob = 0.0f; + } + raw_ngram->prob = + logmath_log10_to_log_float(lmath, raw_ngram->prob); + } + else { + float weight, backoff; + + weight = atof_c(wptr[0]); + if (weight > 0) { + E_WARN("%d-gram '%s' has positive probability\n", order, wptr[1]); + raw_ngram->prob = 0.0f; + } + else { + raw_ngram->prob = + logmath_log10_to_log_float(lmath, weight); + } + + if (n == order + 1) { + raw_ngram->backoff = 0.0f; + } + else { + backoff = atof_c(wptr[order + 1]); + raw_ngram->backoff = + logmath_log10_to_log_float(lmath, backoff); + } + } + raw_ngram->words = + (uint32 *) ckd_calloc(order, sizeof(*raw_ngram->words)); + for (word_out = raw_ngram->words + order - 1, i = 1; + word_out >= raw_ngram->words; --word_out, i++) { + hash_table_lookup_int32(wid, wptr[i], (int32 *) word_out); + } + return 0; +} + +static int +ngrams_raw_read_section(ngram_raw_t ** raw_ngrams, lineiter_t ** li, + hash_table_t * wid, logmath_t * lmath, uint32 *count, + int order, int order_max) +{ + char expected_header[20]; + uint32 i, cur; + + sprintf(expected_header, "\\%d-grams:", order); + while (*li && strcmp((*li)->buf, expected_header) != 0) { + *li = lineiter_next(*li); + } + + if (*li == NULL) { + E_ERROR("Failed to find '%s', language model file truncated\n", expected_header); + return -1; + } + + *raw_ngrams = (ngram_raw_t *) ckd_calloc(*count, sizeof(ngram_raw_t)); + for (i = 0, cur = 0; i < *count && *li != NULL; i++) { + *li = lineiter_next(*li); + if (*li == NULL) { + E_ERROR("Unexpected end of ARPA file. Failed to read %d-gram\n", + order); + return -1; + } + if (ngrams_raw_read_line(*li, wid, lmath, order, order_max, + *raw_ngrams + cur) == 0) { + cur++; + } + } + *count = cur; + qsort(*raw_ngrams, *count, sizeof(ngram_raw_t), &ngram_ord_comparator); + return 0; +} + +ngram_raw_t ** +ngrams_raw_read_arpa(lineiter_t ** li, logmath_t * lmath, uint32 * counts, + int order, hash_table_t * wid) +{ + ngram_raw_t **raw_ngrams; + int order_it; + + raw_ngrams = + (ngram_raw_t **) ckd_calloc(order - 1, sizeof(*raw_ngrams)); + + for (order_it = 2; order_it <= order; order_it++) { + if (ngrams_raw_read_section(&raw_ngrams[order_it - 2], li, wid, lmath, + counts + order_it - 1, order_it, order) < 0) + break; + } + + /* Check if we found ARPA end-mark */ + if (*li == NULL) { + E_ERROR("ARPA file ends without end-mark\n"); + ngrams_raw_free(raw_ngrams, counts, order); + return NULL; + } else { + *li = lineiter_next(*li); + if (strcmp((*li)->buf, "\\end\\") != 0) { + E_WARN + ("Finished reading ARPA file. Expecting end mark but found '%s'\n", + (*li)->buf); + } + } + + return raw_ngrams; +} + +static void +read_dmp_weight_array(FILE * fp, logmath_t * lmath, uint8 do_swap, + int32 counts, ngram_raw_t * raw_ngrams, + int weight_idx) +{ + int32 i, k; + dmp_weight_t *tmp_weight_arr; + + fread(&k, sizeof(k), 1, fp); + if (do_swap) + SWAP_INT32(&k); + tmp_weight_arr = + (dmp_weight_t *) ckd_calloc(k, sizeof(*tmp_weight_arr)); + fread(tmp_weight_arr, sizeof(*tmp_weight_arr), k, fp); + for (i = 0; i < k; i++) { + if (do_swap) + SWAP_INT32(&tmp_weight_arr[i].l); + /* Convert values to log. */ + tmp_weight_arr[i].f = + logmath_log10_to_log_float(lmath, tmp_weight_arr[i].f); + } + /* replace indexes with real probs in raw bigrams */ + for (i = 0; i < counts; i++) { + if (weight_idx == 0) { + raw_ngrams[i].prob = + tmp_weight_arr[(int) raw_ngrams[i].prob].f; + } else { + raw_ngrams[i].backoff = + tmp_weight_arr[(int) raw_ngrams[i].backoff].f; + } + } + ckd_free(tmp_weight_arr); +} + +#define BIGRAM_SEGMENT_SIZE 9 + +ngram_raw_t ** +ngrams_raw_read_dmp(FILE * fp, logmath_t * lmath, uint32 * counts, + int order, uint32 * unigram_next, uint8 do_swap) +{ + uint32 j, ngram_idx; + uint16 *bigrams_next; + ngram_raw_t **raw_ngrams = + (ngram_raw_t **) ckd_calloc(order - 1, sizeof(*raw_ngrams)); + + /* read bigrams */ + raw_ngrams[0] = + (ngram_raw_t *) ckd_calloc((size_t) (counts[1] + 1), + sizeof(*raw_ngrams[0])); + bigrams_next = + (uint16 *) ckd_calloc((size_t) (counts[1] + 1), + sizeof(*bigrams_next)); + ngram_idx = 1; + for (j = 0; j <= counts[1]; j++) { + uint16 wid, prob_idx, bo_idx; + ngram_raw_t *raw_ngram = &raw_ngrams[0][j]; + + fread(&wid, sizeof(wid), 1, fp); + if (do_swap) + SWAP_INT16(&wid); + raw_ngram->order = 2; + while (ngram_idx < counts[0] && j == unigram_next[ngram_idx]) { + ngram_idx++; + } + + if (j != counts[1]) { + raw_ngram->words = + (uint32 *) ckd_calloc(2, sizeof(*raw_ngram->words)); + raw_ngram->words[0] = (uint32) wid; + raw_ngram->words[1] = (uint32) ngram_idx - 1; + } + + fread(&prob_idx, sizeof(prob_idx), 1, fp); + fread(&bo_idx, sizeof(bo_idx), 1, fp); + fread(&bigrams_next[j], sizeof(bigrams_next[j]), 1, fp); + if (do_swap) { + SWAP_INT16(&prob_idx); + SWAP_INT16(&bo_idx); + SWAP_INT16(&bigrams_next[j]); + } + + if (j != counts[1]) { + raw_ngram->prob = prob_idx + 0.5f; /* keep index in float. ugly but avoiding using extra memory */ + raw_ngram->backoff = bo_idx + 0.5f; + } + } + + if (ngram_idx < counts[0]) { + E_ERROR("Corrupted model, not enough unigrams %d %d\n", ngram_idx, counts[0]); + ckd_free(bigrams_next); + ngrams_raw_free(raw_ngrams, counts, order); + return NULL; + } + + /* read trigrams */ + if (order > 2) { + raw_ngrams[1] = + (ngram_raw_t *) ckd_calloc((size_t) counts[2], + sizeof(*raw_ngrams[1])); + for (j = 0; j < counts[2]; j++) { + uint16 wid, prob_idx; + ngram_raw_t *raw_ngram = &raw_ngrams[1][j]; + + fread(&wid, sizeof(wid), 1, fp); + fread(&prob_idx, sizeof(prob_idx), 1, fp); + if (do_swap) { + SWAP_INT16(&wid); + SWAP_INT16(&prob_idx); + } + + raw_ngram->order = 3; + raw_ngram->words = + (uint32 *) ckd_calloc(3, sizeof(*raw_ngram->words)); + raw_ngram->words[0] = (uint32) wid; + raw_ngram->prob = prob_idx + 0.5f; /* keep index in float. ugly but avoiding using extra memory */ + } + } + + /* read prob2 */ + read_dmp_weight_array(fp, lmath, do_swap, (int32) counts[1], + raw_ngrams[0], 0); + /* read bo2 */ + if (order > 2) { + int32 k; + int32 *tseg_base; + read_dmp_weight_array(fp, lmath, do_swap, (int32) counts[1], + raw_ngrams[0], 1); + /* read prob3 */ + read_dmp_weight_array(fp, lmath, do_swap, (int32) counts[2], + raw_ngrams[1], 0); + /* Read tseg_base size and tseg_base to fill trigram's first words */ + fread(&k, sizeof(k), 1, fp); + if (do_swap) + SWAP_INT32(&k); + tseg_base = (int32 *) ckd_calloc(k, sizeof(int32)); + fread(tseg_base, sizeof(int32), k, fp); + if (do_swap) { + for (j = 0; j < (uint32) k; j++) { + SWAP_INT32(&tseg_base[j]); + } + } + ngram_idx = 0; + for (j = 1; j <= counts[1]; j++) { + uint32 next_ngram_idx = + (uint32) (tseg_base[j >> BIGRAM_SEGMENT_SIZE] + + bigrams_next[j]); + while (ngram_idx < next_ngram_idx) { + raw_ngrams[1][ngram_idx].words[1] = + raw_ngrams[0][j - 1].words[0]; + raw_ngrams[1][ngram_idx].words[2] = + raw_ngrams[0][j - 1].words[1]; + ngram_idx++; + } + } + ckd_free(tseg_base); + + if (ngram_idx < counts[2]) { + E_ERROR("Corrupted model, some trigrams have no corresponding bigram\n"); + ckd_free(bigrams_next); + ngrams_raw_free(raw_ngrams, counts, order); + return NULL; + } + } + ckd_free(bigrams_next); + + /* sort raw ngrams for reverse trie */ + qsort(raw_ngrams[0], (size_t) counts[1], sizeof(*raw_ngrams[0]), + &ngram_ord_comparator); + if (order > 2) { + qsort(raw_ngrams[1], (size_t) counts[2], sizeof(*raw_ngrams[1]), + &ngram_ord_comparator); + } + return raw_ngrams; +} + +void +ngrams_raw_free(ngram_raw_t ** raw_ngrams, uint32 * counts, int order) +{ + uint32 num; + int order_it; + + for (order_it = 0; order_it < order - 1; order_it++) { + for (num = 0; num < counts[order_it + 1]; num++) { + ckd_free(raw_ngrams[order_it][num].words); + } + ckd_free(raw_ngrams[order_it]); + } + ckd_free(raw_ngrams); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngrams_raw.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngrams_raw.h new file mode 100644 index 0000000000000000000000000000000000000000..725d09aa5a02dbabd54780fc41ec37e61d8f18e5 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/lm/ngrams_raw.h @@ -0,0 +1,94 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2015 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef __NGRAMS_RAW_H__ +#define __NGRAMS_RAW_H__ + +#include +#include +#include +#include +#include + +typedef struct ngram_raw_s { + uint32 *words; /* array of word indexes, length corresponds to ngram order */ + float32 prob; + float32 backoff; + uint32 order; +} ngram_raw_t; + +typedef union { + float32 f; + int32 l; +} dmp_weight_t; + +/** + * Raw ordered ngrams comparator + */ +int ngram_ord_comparator(const void *a_raw, const void *b_raw); + +/** + * Read ngrams of order > 1 from ARPA file + * @param li [in] sphinxbase file line iterator that point to bigram description in ARPA file + * @param wid [in] hashtable that maps string word representation to id + * @param lmath [in] log math used for log convertions + * @param counts [in] amount of ngrams for each order + * @param order [in] maximum order of ngrams + * @return raw ngrams of order bigger than 1 + */ +ngram_raw_t **ngrams_raw_read_arpa(lineiter_t ** li, logmath_t * lmath, + uint32 * counts, int order, + hash_table_t * wid); + +/** + * Reads ngrams of order > 1 from DMP file. + * @param fp [in] file to read from. Position in file corresponds to start of bigram description + * @param lmath [in] log math used for log convertions + * @param counts [in] amount of ngrams for each order + * @param order [in] maximum order of ngrams + * @param unigram_next [in] array of next word pointers for unigrams. Needed to define forst word of bigrams + * @param do_swap [in] wether to do swap of bits + * @return raw ngrams of order bigger than 1 + */ +ngram_raw_t **ngrams_raw_read_dmp(FILE * fp, logmath_t * lmath, + uint32 * counts, int order, + uint32 * unigram_next, uint8 do_swap); + +void ngrams_raw_free(ngram_raw_t ** raw_ngrams, uint32 * counts, + int order); + +#endif /* __LM_NGRAMS_RAW_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/mdef.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/mdef.c new file mode 100644 index 0000000000000000000000000000000000000000..c78b0e168bd11facb7d737807e632f1e4a24ecf4 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/mdef.c @@ -0,0 +1,766 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * mdef.c -- HMM model definition: base (CI) phones and triphones + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * + * + * 22-Nov-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Imported from s3.2, for supporting s3 format continuous + * acoustic models. + * + * 14-Oct-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added mdef_sseq2sen_active(). + * + * 06-May-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * In mdef_phone_id(), added backing off to silence phone context from filler + * context if original triphone not found. + * + * 30-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added senone-sequence id (ssid) to phone_t and appropriate functions to + * maintain it. Instead, moved state sequence info to mdef_t. + * + * 13-Jul-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. + * Added mdef_phone_str(). + * + * 01-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. + * Allowed mdef_phone_id_nearest to return base phone id if either + * left or right context (or both) is undefined. + * + * 01-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. + * Created. + */ + + +/* + * Major assumptions: + * All phones have same #states, same topology. + * Every phone has exactly one non-emitting, final state--the last one. + * CI phones must appear first in model definition file. + */ + +/* System headers. */ +#include +#include +#include +#include + +/* SphinxBase headers. */ +#include +#include + +/* Local headers. */ +#include "mdef.h" + + +#define MODEL_DEF_VERSION "0.3" + +static void +ciphone_add(mdef_t * m, char *ci, int p) +{ + assert(p < m->n_ciphone); + + m->ciphone[p].name = (char *) ckd_salloc(ci); /* freed in mdef_free */ + if (hash_table_enter(m->ciphone_ht, m->ciphone[p].name, + (void *)(long)p) != (void *)(long)p) + E_FATAL("hash_table_enter(%s) failed; duplicate CIphone?\n", + m->ciphone[p].name); +} + + +static ph_lc_t * +find_ph_lc(ph_lc_t * lclist, int lc) +{ + ph_lc_t *lcptr; + + for (lcptr = lclist; lcptr && (lcptr->lc != lc); lcptr = lcptr->next); + return lcptr; +} + + +static ph_rc_t * +find_ph_rc(ph_rc_t * rclist, int rc) +{ + ph_rc_t *rcptr; + + for (rcptr = rclist; rcptr && (rcptr->rc != rc); rcptr = rcptr->next); + return rcptr; +} + + +static void +triphone_add(mdef_t * m, + int ci, int lc, int rc, word_posn_t wpos, + int p) +{ + ph_lc_t *lcptr; + ph_rc_t *rcptr; + + assert(p < m->n_phone); + + /* Fill in phone[p] information (state and tmat mappings added later) */ + m->phone[p].ci = ci; + m->phone[p].lc = lc; + m->phone[p].rc = rc; + m->phone[p].wpos = wpos; + + /* Create -> p mapping if not a CI phone */ + if (p >= m->n_ciphone) { + if ((lcptr = find_ph_lc(m->wpos_ci_lclist[wpos][(int) ci], lc)) + == NULL) { + lcptr = (ph_lc_t *) ckd_calloc(1, sizeof(ph_lc_t)); /* freed at mdef_free, I believe */ + lcptr->lc = lc; + lcptr->next = m->wpos_ci_lclist[wpos][(int) ci]; + m->wpos_ci_lclist[wpos][(int) ci] = lcptr; /* This is what needs to be freed */ + } + if ((rcptr = find_ph_rc(lcptr->rclist, rc)) != NULL) { + __BIGSTACKVARIABLE__ char buf[4096]; + + mdef_phone_str(m, rcptr->pid, buf); + E_FATAL("Duplicate triphone: %s\n", buf); + } + + rcptr = (ph_rc_t *) ckd_calloc(1, sizeof(ph_rc_t)); /* freed in mdef_free, I believe */ + rcptr->rc = rc; + rcptr->pid = p; + rcptr->next = lcptr->rclist; + lcptr->rclist = rcptr; + } +} + + +int +mdef_ciphone_id(mdef_t * m, char *ci) +{ + int32 id; + if (hash_table_lookup_int32(m->ciphone_ht, ci, &id) < 0) + return -1; + return id; +} + + +const char * +mdef_ciphone_str(mdef_t * m, int id) +{ + assert(m); + assert((id >= 0) && (id < m->n_ciphone)); + + return (m->ciphone[id].name); +} + + +int +mdef_phone_str(mdef_t * m, int pid, char *buf) +{ + char *wpos_name; + + assert(m); + assert((pid >= 0) && (pid < m->n_phone)); + wpos_name = WPOS_NAME; + + buf[0] = '\0'; + if (pid < m->n_ciphone) + sprintf(buf, "%s", mdef_ciphone_str(m, pid)); + else { + sprintf(buf, "%s %s %s %c", + mdef_ciphone_str(m, m->phone[pid].ci), + mdef_ciphone_str(m, m->phone[pid].lc), + mdef_ciphone_str(m, m->phone[pid].rc), + wpos_name[m->phone[pid].wpos]); + } + return 0; +} + + +int +mdef_phone_id(mdef_t * m, + int ci, int lc, int rc, word_posn_t wpos) +{ + ph_lc_t *lcptr; + ph_rc_t *rcptr; + int newl, newr; + + assert(m); + assert((ci >= 0) && (ci < m->n_ciphone)); + assert((lc >= 0) && (lc < m->n_ciphone)); + assert((rc >= 0) && (rc < m->n_ciphone)); + assert((wpos >= 0) && (wpos < N_WORD_POSN)); + + if (((lcptr = + find_ph_lc(m->wpos_ci_lclist[wpos][(int) ci], lc)) == NULL) + || ((rcptr = find_ph_rc(lcptr->rclist, rc)) == NULL)) { + /* Not found; backoff to silence context if non-silence filler context */ + if (m->sil < 0) + return -1; + + newl = m->ciphone[(int) lc].filler ? m->sil : lc; + newr = m->ciphone[(int) rc].filler ? m->sil : rc; + if ((newl == lc) && (newr == rc)) + return -1; + + return (mdef_phone_id(m, ci, newl, newr, wpos)); + } + + return (rcptr->pid); +} + +int +mdef_is_ciphone(mdef_t * m, int p) +{ + assert(m); + assert((p >= 0) && (p < m->n_phone)); + + return ((p < m->n_ciphone) ? 1 : 0); +} + +int +mdef_is_cisenone(mdef_t * m, int s) +{ + assert(m); + if (s >= m->n_sen) { + return 0; + } + assert(s >= 0); + return ((s == m->cd2cisen[s]) ? 1 : 0); +} + + +/* Parse tmat and state->senone mappings for phone p and fill in structure */ +static void +parse_tmat_senmap(mdef_t * m, char *line, long off, int p) +{ + int32 wlen, n, s; + char *lp; + __BIGSTACKVARIABLE__ char word[1024]; + + lp = line + off; + + /* Read transition matrix id */ + if ((sscanf(lp, "%d%n", &n, &wlen) != 1) || (n < 0)) + E_FATAL("Missing or bad transition matrix id: %s\n", line); + m->phone[p].tmat = n; + if (m->n_tmat <= n) + E_FATAL("tmat-id(%d) > #tmat in header(%d): %s\n", n, m->n_tmat, + line); + lp += wlen; + + /* Read senone mappings for each emitting state */ + for (n = 0; n < m->n_emit_state; n++) { + if ((sscanf(lp, "%d%n", &s, &wlen) != 1) || (s < 0)) + E_FATAL("Missing or bad state[%d]->senone mapping: %s\n", n, + line); + + if ((p < m->n_ciphone) && (m->n_ci_sen <= s)) + E_FATAL("CI-senone-id(%d) > #CI-senones(%d): %s\n", s, + m->n_ci_sen, line); + if (m->n_sen <= s) + E_FATAL("Senone-id(%d) > #senones(%d): %s\n", s, m->n_sen, + line); + + m->sseq[p][n] = s; + lp += wlen; + } + + /* Check for the last non-emitting state N */ + if ((sscanf(lp, "%s%n", word, &wlen) != 1) || (strcmp(word, "N") != 0)) + E_FATAL("Missing non-emitting state spec: %s\n", line); + lp += wlen; + + /* Check for end of line */ + if (sscanf(lp, "%s%n", word, &wlen) == 1) + E_FATAL("Non-empty beyond non-emitting final state: %s\n", line); +} + + +static void +parse_base_line(mdef_t * m, char *line, int p) +{ + int32 wlen, n; + __BIGSTACKVARIABLE__ char word[1024], *lp; + int ci; + + lp = line; + + /* Read base phone name */ + if (sscanf(lp, "%s%n", word, &wlen) != 1) + E_FATAL("Missing base phone name: %s\n", line); + lp += wlen; + + /* Make sure it's not a duplicate */ + ci = mdef_ciphone_id(m, word); + if (ci >= 0) + E_FATAL("Duplicate base phone: %s\n", line); + + /* Add ciphone to ciphone table with id p */ + ciphone_add(m, word, p); + ci = (int) p; + + /* Read and skip "-" for lc, rc, wpos */ + for (n = 0; n < 3; n++) { + if ((sscanf(lp, "%s%n", word, &wlen) != 1) + || (strcmp(word, "-") != 0)) + E_FATAL("Bad context info for base phone: %s\n", line); + lp += wlen; + } + + /* Read filler attribute, if present */ + if (sscanf(lp, "%s%n", word, &wlen) != 1) + E_FATAL("Missing filler attribute field: %s\n", line); + lp += wlen; + if (strcmp(word, "filler") == 0) + m->ciphone[(int) ci].filler = 1; + else if (strcmp(word, "n/a") == 0) + m->ciphone[(int) ci].filler = 0; + else + E_FATAL("Bad filler attribute field: %s\n", line); + + triphone_add(m, ci, -1, -1, WORD_POSN_UNDEFINED, p); + + /* Parse remainder of line: transition matrix and state->senone mappings */ + parse_tmat_senmap(m, line, lp - line, p); +} + + +static void +parse_tri_line(mdef_t * m, char *line, int p) +{ + int32 wlen; + __BIGSTACKVARIABLE__ char word[1024], *lp; + int ci, lc, rc; + word_posn_t wpos = WORD_POSN_BEGIN; + + lp = line; + + /* Read base phone name */ + if (sscanf(lp, "%s%n", word, &wlen) != 1) + E_FATAL("Missing base phone name: %s\n", line); + lp += wlen; + + ci = mdef_ciphone_id(m, word); + if (ci < 0) + E_FATAL("Unknown base phone: %s\n", line); + + /* Read lc */ + if (sscanf(lp, "%s%n", word, &wlen) != 1) + E_FATAL("Missing left context: %s\n", line); + lp += wlen; + lc = mdef_ciphone_id(m, word); + if (lc < 0) + E_FATAL("Unknown left context: %s\n", line); + + /* Read rc */ + if (sscanf(lp, "%s%n", word, &wlen) != 1) + E_FATAL("Missing right context: %s\n", line); + lp += wlen; + rc = mdef_ciphone_id(m, word); + if (rc < 0) + E_FATAL("Unknown right context: %s\n", line); + + /* Read tripone word-position within word */ + if ((sscanf(lp, "%s%n", word, &wlen) != 1) || (word[1] != '\0')) + E_FATAL("Missing or bad word-position spec: %s\n", line); + lp += wlen; + switch (word[0]) { + case 'b': + wpos = WORD_POSN_BEGIN; + break; + case 'e': + wpos = WORD_POSN_END; + break; + case 's': + wpos = WORD_POSN_SINGLE; + break; + case 'i': + wpos = WORD_POSN_INTERNAL; + break; + default: + E_FATAL("Bad word-position spec: %s\n", line); + } + + /* Read filler attribute, if present. Must match base phone attribute */ + if (sscanf(lp, "%s%n", word, &wlen) != 1) + E_FATAL("Missing filler attribute field: %s\n", line); + lp += wlen; + if (((strcmp(word, "filler") == 0) && (m->ciphone[(int) ci].filler)) || + ((strcmp(word, "n/a") == 0) && (!m->ciphone[(int) ci].filler))) { + /* Everything is fine */ + } + else + E_FATAL("Bad filler attribute field: %s\n", line); + + triphone_add(m, ci, lc, rc, wpos, p); + + /* Parse remainder of line: transition matrix and state->senone mappings */ + parse_tmat_senmap(m, line, lp - line, p); +} + + +static void +sseq_compress(mdef_t * m) +{ + hash_table_t *h; + uint16 **sseq; + int32 n_sseq; + int32 p, j, k; + glist_t g; + gnode_t *gn; + hash_entry_t *he; + + k = m->n_emit_state * sizeof(int16); + + h = hash_table_new(m->n_phone, HASH_CASE_YES); + n_sseq = 0; + + /* Identify unique senone-sequence IDs. BUG: tmat-id not being considered!! */ + for (p = 0; p < m->n_phone; p++) { + /* Add senone sequence to hash table */ + if (n_sseq + == (j = hash_table_enter_bkey_int32(h, (char *)m->sseq[p], k, n_sseq))) + n_sseq++; + + m->phone[p].ssid = j; + } + + /* Generate compacted sseq table */ + sseq = ckd_calloc_2d(n_sseq, m->n_emit_state, sizeof(**sseq)); /* freed in mdef_free() */ + + g = hash_table_tolist(h, &j); + assert(j == n_sseq); + + for (gn = g; gn; gn = gnode_next(gn)) { + he = (hash_entry_t *) gnode_ptr(gn); + j = (int32)(long)hash_entry_val(he); + memcpy(sseq[j], hash_entry_key(he), k); + } + glist_free(g); + + /* Free the old, temporary senone sequence table, replace with compacted one */ + ckd_free_2d(m->sseq); + m->sseq = sseq; + m->n_sseq = n_sseq; + + hash_table_free(h); +} + + +static int32 +noncomment_line(char *line, int32 size, FILE * fp) +{ + while (fgets(line, size, fp) != NULL) { + if (line[0] != '#') + return 0; + } + return -1; +} + + +/* + * Initialize phones (ci and triphones) and state->senone mappings from .mdef file. + */ +mdef_t * +mdef_init(char *mdeffile, int32 breport) +{ + FILE *fp; + int32 n_ci, n_tri, n_map, n; + __BIGSTACKVARIABLE__ char tag[1024], buf[1024]; + uint16 **senmap; + int p; + int32 s, ci, cd; + mdef_t *m; + + if (!mdeffile) + E_FATAL("No mdef-file\n"); + + if (breport) + E_INFO("Reading model definition: %s\n", mdeffile); + + m = (mdef_t *) ckd_calloc(1, sizeof(mdef_t)); /* freed in mdef_free */ + + if ((fp = fopen(mdeffile, "r")) == NULL) + E_FATAL_SYSTEM("Failed to open mdef file '%s' for reading", mdeffile); + + if (noncomment_line(buf, sizeof(buf), fp) < 0) + E_FATAL("Empty file: %s\n", mdeffile); + + if (strncmp(buf, "BMDF", 4) == 0 || strncmp(buf, "FDMB", 4) == 0) { + E_INFO + ("Found byte-order mark %.4s, assuming this is a binary mdef file\n", + buf); + fclose(fp); + ckd_free(m); + return NULL; + } + if (strncmp(buf, MODEL_DEF_VERSION, strlen(MODEL_DEF_VERSION)) != 0) + E_FATAL("Version error: Expecing %s, but read %s\n", + MODEL_DEF_VERSION, buf); + + /* Read #base phones, #triphones, #senone mappings defined in header */ + n_ci = -1; + n_tri = -1; + n_map = -1; + m->n_ci_sen = -1; + m->n_sen = -1; + m->n_tmat = -1; + do { + if (noncomment_line(buf, sizeof(buf), fp) < 0) + E_FATAL("Incomplete header\n"); + + if ((sscanf(buf, "%d %s", &n, tag) != 2) || (n < 0)) + E_FATAL("Error in header: %s\n", buf); + + if (strcmp(tag, "n_base") == 0) + n_ci = n; + else if (strcmp(tag, "n_tri") == 0) + n_tri = n; + else if (strcmp(tag, "n_state_map") == 0) + n_map = n; + else if (strcmp(tag, "n_tied_ci_state") == 0) + m->n_ci_sen = n; + else if (strcmp(tag, "n_tied_state") == 0) + m->n_sen = n; + else if (strcmp(tag, "n_tied_tmat") == 0) + m->n_tmat = n; + else + E_FATAL("Unknown header line: %s\n", buf); + } while ((n_ci < 0) || (n_tri < 0) || (n_map < 0) || + (m->n_ci_sen < 0) || (m->n_sen < 0) || (m->n_tmat < 0)); + + if ((n_ci == 0) || (m->n_ci_sen == 0) || (m->n_tmat == 0) + || (m->n_ci_sen > m->n_sen)) + E_FATAL("%s: Error in header\n", mdeffile); + + /* Check typesize limits */ + if (n_ci >= MAX_INT16) + E_FATAL("%s: #CI phones (%d) exceeds limit (%d)\n", mdeffile, n_ci, + MAX_INT16); + if (n_ci + n_tri >= MAX_INT32) /* Comparison is always false... */ + E_FATAL("%s: #Phones (%d) exceeds limit (%d)\n", mdeffile, + n_ci + n_tri, MAX_INT32); + if (m->n_sen >= MAX_INT16) + E_FATAL("%s: #senones (%d) exceeds limit (%d)\n", mdeffile, + m->n_sen, MAX_INT16); + if (m->n_tmat >= MAX_INT32) /* Comparison is always false... */ + E_FATAL("%s: #tmats (%d) exceeds limit (%d)\n", mdeffile, + m->n_tmat, MAX_INT32); + + m->n_emit_state = (n_map / (n_ci + n_tri)) - 1; + if ((m->n_emit_state + 1) * (n_ci + n_tri) != n_map) + E_FATAL + ("Header error: n_state_map not a multiple of n_ci*n_tri\n"); + + /* Initialize ciphone info */ + m->n_ciphone = n_ci; + m->ciphone_ht = hash_table_new(n_ci, HASH_CASE_YES); /* With case-insensitive string names *//* freed in mdef_free */ + m->ciphone = (ciphone_t *) ckd_calloc(n_ci, sizeof(ciphone_t)); /* freed in mdef_free */ + + /* Initialize phones info (ciphones + triphones) */ + m->n_phone = n_ci + n_tri; + m->phone = (phone_t *) ckd_calloc(m->n_phone, sizeof(phone_t)); /* freed in mdef_free */ + + /* Allocate space for state->senone map for each phone */ + senmap = ckd_calloc_2d(m->n_phone, m->n_emit_state, sizeof(**senmap)); /* freed in mdef_free */ + m->sseq = senmap; /* TEMPORARY; until it is compressed into just the unique ones */ + + /* Allocate initial space for -> pid mapping */ + m->wpos_ci_lclist = (ph_lc_t ***) ckd_calloc_2d(N_WORD_POSN, m->n_ciphone, sizeof(ph_lc_t *)); /* freed in mdef_free */ + + /* + * Read base phones and triphones. They'll simply be assigned a running sequence + * number as their "phone-id". If the phone-id < n_ci, it's a ciphone. + */ + + /* Read base phones */ + for (p = 0; p < n_ci; p++) { + if (noncomment_line(buf, sizeof(buf), fp) < 0) + E_FATAL("Premature EOF reading CIphone %d\n", p); + parse_base_line(m, buf, p); + } + m->sil = mdef_ciphone_id(m, S3_SILENCE_CIPHONE); + + /* Read triphones, if any */ + for (; p < m->n_phone; p++) { + if (noncomment_line(buf, sizeof(buf), fp) < 0) + E_FATAL("Premature EOF reading phone %d\n", p); + parse_tri_line(m, buf, p); + } + + if (noncomment_line(buf, sizeof(buf), fp) >= 0) + E_ERROR("Non-empty file beyond expected #phones (%d)\n", + m->n_phone); + + /* Build CD senones to CI senones map */ + if (m->n_ciphone * m->n_emit_state != m->n_ci_sen) + E_FATAL + ("#CI-senones(%d) != #CI-phone(%d) x #emitting-states(%d)\n", + m->n_ci_sen, m->n_ciphone, m->n_emit_state); + m->cd2cisen = (int16 *) ckd_calloc(m->n_sen, sizeof(*m->cd2cisen)); /* freed in mdef_free */ + + m->sen2cimap = (int16 *) ckd_calloc(m->n_sen, sizeof(*m->sen2cimap)); /* freed in mdef_free */ + + for (s = 0; s < m->n_sen; s++) + m->sen2cimap[s] = -1; + for (s = 0; s < m->n_ci_sen; s++) { /* CI senones */ + m->cd2cisen[s] = s; + m->sen2cimap[s] = s / m->n_emit_state; + } + for (p = n_ci; p < m->n_phone; p++) { /* CD senones */ + for (s = 0; s < m->n_emit_state; s++) { + cd = m->sseq[p][s]; + ci = m->sseq[m->phone[p].ci][s]; + m->cd2cisen[cd] = ci; + m->sen2cimap[cd] = m->phone[p].ci; + } + } + + sseq_compress(m); + fclose(fp); + + return m; +} + +void +mdef_report(mdef_t * m) +{ + E_INFO_NOFN("Initialization of mdef_t, report:\n"); + E_INFO_NOFN + ("%d CI-phone, %d CD-phone, %d emitstate/phone, %d CI-sen, %d Sen, %d Sen-Seq\n", + m->n_ciphone, m->n_phone - m->n_ciphone, m->n_emit_state, + m->n_ci_sen, m->n_sen, m->n_sseq); + E_INFO_NOFN("\n"); + +} + +/* RAH 4.23.01, Need to step down the ->next list to see if there are + any more things to free + */ + + + +/* RAH 4.19.01, Attempt to free memory that was allocated within this module + I have not verified that all the memory has been freed. I've taken only a + reasonable effort for now. + RAH 4.24.01 - verified that all memory is released. + */ +void +mdef_free_recursive_lc(ph_lc_t * lc) +{ + if (lc == NULL) + return; + + if (lc->rclist) + mdef_free_recursive_rc(lc->rclist); + + if (lc->next) + mdef_free_recursive_lc(lc->next); + + ckd_free((void *) lc); +} + +void +mdef_free_recursive_rc(ph_rc_t * rc) +{ + if (rc == NULL) + return; + + if (rc->next) + mdef_free_recursive_rc(rc->next); + + ckd_free((void *) rc); +} + + +/* RAH, Free memory that was allocated in mdef_init + Rational purify shows that no leaks exist + */ + +void +mdef_free(mdef_t * m) +{ + int i, j; + + if (m) { + if (m->sen2cimap) + ckd_free((void *) m->sen2cimap); + if (m->cd2cisen) + ckd_free((void *) m->cd2cisen); + + /* RAH, go down the ->next list and delete all the pieces */ + for (i = 0; i < N_WORD_POSN; i++) + for (j = 0; j < m->n_ciphone; j++) + if (m->wpos_ci_lclist[i][j]) { + mdef_free_recursive_lc(m->wpos_ci_lclist[i][j]->next); + mdef_free_recursive_rc(m->wpos_ci_lclist[i][j]-> + rclist); + } + + for (i = 0; i < N_WORD_POSN; i++) + for (j = 0; j < m->n_ciphone; j++) + if (m->wpos_ci_lclist[i][j]) + ckd_free((void *) m->wpos_ci_lclist[i][j]); + + + if (m->wpos_ci_lclist) + ckd_free_2d((void *) m->wpos_ci_lclist); + if (m->sseq) + ckd_free_2d((void *) m->sseq); + /* Free phone context */ + if (m->phone) + ckd_free((void *) m->phone); + if (m->ciphone_ht) + hash_table_free(m->ciphone_ht); + + for (i = 0; i < m->n_ciphone; i++) { + if (m->ciphone[i].name) + ckd_free((void *) m->ciphone[i].name); + } + + + if (m->ciphone) + ckd_free((void *) m->ciphone); + + ckd_free((void *) m); + } +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/mdef.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/mdef.h new file mode 100644 index 0000000000000000000000000000000000000000..42d325cc4e34bfffff8ba734dce1015efd483600 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/mdef.h @@ -0,0 +1,274 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * mdef.h -- HMM model definition: base (CI) phones and triphones + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + */ + + +#ifndef __MDEF_H__ +#define __MDEF_H__ + + +/* System headers. */ +#include + +/* SphinxBase headers. */ +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** \file mdef.h + * \brief Model definition + */ + +/** \enum word_posn_t + * \brief Union of different type of word position + */ + +typedef enum { + WORD_POSN_INTERNAL = 0, /**< Internal phone of word */ + WORD_POSN_BEGIN = 1, /**< Beginning phone of word */ + WORD_POSN_END = 2, /**< Ending phone of word */ + WORD_POSN_SINGLE = 3, /**< Single phone word (i.e. begin & end) */ + WORD_POSN_UNDEFINED = 4 /**< Undefined value, used for initial conditions, etc */ +} word_posn_t; +#define N_WORD_POSN 4 /**< total # of word positions (excluding undefined) */ +#define WPOS_NAME "ibesu" /**< Printable code for each word position above */ +#define S3_SILENCE_CIPHONE "SIL" /**< Hard-coded silence CI phone name */ + +/** + \struct ciphone_t + \brief CI phone information +*/ +typedef struct ciphone_s { + char *name; /**< The name of the CI phone */ + int32 filler; /**< Whether a filler phone; if so, can be substituted by + silence phone in left or right context position */ +} ciphone_t; + +/** + * \struct phone_t + * \brief Triphone information, including base phones as a subset. For the latter, lc, rc and wpos are non-existent. + */ +typedef struct phone_s { + int32 ssid; /**< State sequence (or senone sequence) ID, considering the + n_emit_state senone-ids are a unit. The senone sequences + themselves are in a separate table */ + int32 tmat; /**< Transition matrix id */ + int16 ci, lc, rc; /**< Base, left, right context ciphones */ + word_posn_t wpos; /**< Word position */ + +} phone_t; + +/** + * \struct ph_rc_t + * \brief Structures needed for mapping into pid. (See mdef_t.wpos_ci_lclist below.) (lc = left context; rc = right context.) + * NOTE: Both ph_rc_t and ph_lc_t FOR INTERNAL USE ONLY. + */ +typedef struct ph_rc_s { + int16 rc; /**< Specific rc for a parent */ + int32 pid; /**< Triphone id for above rc instance */ + struct ph_rc_s *next; /**< Next rc entry for same parent */ +} ph_rc_t; + +/** + * \struct ph_lc_t + * \brief Structures for storing the left context. + */ + +typedef struct ph_lc_s { + int16 lc; /**< Specific lc for a parent */ + ph_rc_t *rclist; /**< rc list for above lc instance */ + struct ph_lc_s *next; /**< Next lc entry for same parent */ +} ph_lc_t; + + +/** The main model definition structure */ +/** + \struct mdef_t + \brief structure for storing the model definition. +*/ +typedef struct mdef_s { + int32 n_ciphone; /**< number basephones actually present */ + int32 n_phone; /**< number basephones + number triphones actually present */ + int32 n_emit_state; /**< number emitting states per phone */ + int32 n_ci_sen; /**< number CI senones; these are the first */ + int32 n_sen; /**< number senones (CI+CD) */ + int32 n_tmat; /**< number transition matrices */ + + hash_table_t *ciphone_ht; /**< Hash table for mapping ciphone strings to ids */ + ciphone_t *ciphone; /**< CI-phone information for all ciphones */ + phone_t *phone; /**< Information for all ciphones and triphones */ + uint16 **sseq; /**< Unique state (or senone) sequences in this model, shared + among all phones/triphones */ + int32 n_sseq; /**< No. of unique senone sequences in this model */ + + int16 *cd2cisen; /**< Parent CI-senone id for each senone; the first + n_ci_sen are identity mappings; the CD-senones are + contiguous for each parent CI-phone */ + int16 *sen2cimap; /**< Parent CI-phone for each senone (CI or CD) */ + + int16 sil; /**< SILENCE_CIPHONE id */ + + ph_lc_t ***wpos_ci_lclist; /**< wpos_ci_lclist[wpos][ci] = list of lc for . + wpos_ci_lclist[wpos][ci][lc].rclist = list of rc for + . Only entries for the known triphones + are created to conserve space. + (NOTE: FOR INTERNAL USE ONLY.) */ +} mdef_t; + +/** Access macros; not meant for arbitrary use */ +#define mdef_is_fillerphone(m,p) ((m)->ciphone[p].filler) +#define mdef_n_ciphone(m) ((m)->n_ciphone) +#define mdef_n_phone(m) ((m)->n_phone) +#define mdef_n_sseq(m) ((m)->n_sseq) +#define mdef_n_emit_state(m) ((m)->n_emit_state) +#define mdef_n_sen(m) ((m)->n_sen) +#define mdef_n_tmat(m) ((m)->n_tmat) +#define mdef_pid2ssid(m,p) ((m)->phone[p].ssid) +#define mdef_pid2tmatid(m,p) ((m)->phone[p].tmat) +#define mdef_silphone(m) ((m)->sil) +#define mdef_sen2cimap(m) ((m)->sen2cimap) +#define mdef_sseq2sen(m,ss,pos) ((m)->sseq[ss][pos]) +#define mdef_pid2ci(m,p) ((m)->phone[p].ci) +#define mdef_cd2cisen(m) ((m)->cd2cisen) + +/** + * Initialize the phone structure from the given model definition file. + * It should be treated as a READ-ONLY structure. + * @return pointer to the phone structure created. + */ +mdef_t *mdef_init (char *mdeffile, /**< In: Model definition file */ + int breport /**< In: whether to report the progress or not */ + ); + + +/** + Get the ciphone id given a string name + @return ciphone id for the given ciphone string name +*/ +int mdef_ciphone_id(mdef_t *m, /**< In: Model structure being queried */ + char *ciphone /**< In: ciphone for which id wanted */ + ); + +/** + Get the phone string given the ci phone id. + @return: READ-ONLY ciphone string name for the given ciphone id +*/ +const char *mdef_ciphone_str(mdef_t *m, /**< In: Model structure being queried */ + int ci /**< In: ciphone id for which name wanted */ + ); + +/** + Decide whether the phone is ci phone. + @return 1 if given triphone argument is a ciphone, 0 if not, -1 if error +*/ +int mdef_is_ciphone (mdef_t *m, /**< In: Model structure being queried */ + int p /**< In: triphone id being queried */ + ); + +/** + Decide whether the senone is a senone for a ci phone, or a ci senone + @return 1 if a given senone is a ci senone +*/ +int mdef_is_cisenone(mdef_t *m, /**< In: Model structure being queried */ + int s /**< In: senone id being queried */ + ); + +/** + Decide the phone id given the left, right and base phones. + @return: phone id for the given constituents if found, else BAD_S3PID +*/ +int mdef_phone_id (mdef_t *m, /**< In: Model structure being queried */ + int b, /**< In: base ciphone id */ + int l, /**< In: left context ciphone id */ + int r, /**< In: right context ciphone id */ + word_posn_t pos /**< In: Word position */ + ); + +/** + * Create a phone string for the given phone (base or triphone) id in the given buf. + * @return 0 if successful, -1 if error. + */ +int mdef_phone_str(mdef_t *m, /**< In: Model structure being queried */ + int pid, /**< In: phone id being queried */ + char *buf /**< Out: On return, buf has the string */ + ); + +/** + * Compare the underlying HMMs for two given phones (i.e., compare the two transition + * matrix IDs and the individual state(senone) IDs). + * @return 0 iff the HMMs are identical, -1 otherwise. + */ +int mdef_hmm_cmp (mdef_t *m, /**< In: Model being queried */ + int p1, /**< In: One of the two triphones being compared */ + int p2 /**< In: One of the two triphones being compared */ + ); + +/** Report the model definition's parameters */ +void mdef_report(mdef_t *m /**< In: model definition structure */ + ); + +/** RAH, For freeing memory */ +void mdef_free_recursive_lc (ph_lc_t *lc /**< In: A list of left context */ + ); +void mdef_free_recursive_rc (ph_rc_t *rc /**< In: A list of right context */ + ); + +/** Free an mdef_t */ +void mdef_free (mdef_t *mdef /**< In : The model definition*/ + ); + + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_gauden.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_gauden.c new file mode 100644 index 0000000000000000000000000000000000000000..0961acd6264bfbbb9972633ca4543f901c05f7d6 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_gauden.c @@ -0,0 +1,571 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "ms_gauden.h" + +#define GAUDEN_PARAM_VERSION "1.0" + +#ifndef M_PI +#define M_PI 3.1415926535897932385e0 +#endif + +#define WORST_DIST (int32)(0x80000000) + +void +gauden_dump(const gauden_t * g) +{ + int32 c; + + for (c = 0; c < g->n_mgau; c++) + gauden_dump_ind(g, c); +} + + +void +gauden_dump_ind(const gauden_t * g, int senidx) +{ + int32 f, d, i; + + for (f = 0; f < g->n_feat; f++) { + E_INFO("Codebook %d, Feature %d (%dx%d):\n", + senidx, f, g->n_density, g->featlen[f]); + + for (d = 0; d < g->n_density; d++) { + printf("m[%3d]", d); + for (i = 0; i < g->featlen[f]; i++) + printf(" %7.4f", MFCC2FLOAT(g->mean[senidx][f][d][i])); + printf("\n"); + } + printf("\n"); + + for (d = 0; d < g->n_density; d++) { + printf("v[%3d]", d); + for (i = 0; i < g->featlen[f]; i++) + printf(" %d", (int)g->var[senidx][f][d][i]); + printf("\n"); + } + printf("\n"); + + for (d = 0; d < g->n_density; d++) + printf("d[%3d] %d\n", d, (int)g->det[senidx][f][d]); + } + fflush(stderr); +} + +/** + * Reads gaussian parameters from a file + * + * @param: out_param output parameter + * @ + * + * @returns: allocated 4-d array of gaussians + * + */ +static float **** +gauden_param_read(const char *file_name, + int32 * out_n_mgau, + int32 * out_n_feat, + int32 * out_n_density, + int32 ** out_veclen) +{ + char tmp; + FILE *fp; + int32 i, j, k, l, n, blk; + int32 n_mgau; + int32 n_feat; + int32 n_density; + int32 *veclen; + int32 byteswap, chksum_present; + float32 ****out; + float32 *buf; + char **argname, **argval; + uint32 chksum; + + E_INFO("Reading mixture gaussian parameter: %s\n", file_name); + + if ((fp = fopen(file_name, "rb")) == NULL) { + E_ERROR_SYSTEM("Failed to open file '%s' for reading", file_name); + return NULL; + } + + /* Read header, including argument-value info and 32-bit byteorder magic */ + if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) { + E_ERROR("Failed to read header from file '%s'\n", file_name); + fclose(fp); + return NULL; + } + + /* Parse argument-value list */ + chksum_present = 0; + for (i = 0; argname[i]; i++) { + if (strcmp(argname[i], "version") == 0) { + if (strcmp(argval[i], GAUDEN_PARAM_VERSION) != 0) + E_WARN("Version mismatch(%s): %s, expecting %s\n", + file_name, argval[i], GAUDEN_PARAM_VERSION); + } + else if (strcmp(argname[i], "chksum0") == 0) { + chksum_present = 1; /* Ignore the associated value */ + } + } + bio_hdrarg_free(argname, argval); + argname = argval = NULL; + + chksum = 0; + + /* #Codebooks */ + if (bio_fread(&n_mgau, sizeof(int32), 1, fp, byteswap, &chksum) != 1) { + E_ERROR("Failed to read number fo codebooks from %s\n", file_name); + fclose(fp); + return NULL; + } + *out_n_mgau = n_mgau; + + /* #Features/codebook */ + if (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != 1) { + E_ERROR("Failed to read number of features from %s\n", file_name); + fclose(fp); + return NULL; + } + *out_n_feat = n_feat; + + /* #Gaussian densities/feature in each codebook */ + if (bio_fread(&n_density, sizeof(int32), 1, fp, byteswap, &chksum) != 1) { + E_ERROR("fread(%s) (#density/codebook) failed\n", file_name); + } + *out_n_density = n_density; + + /* #Dimensions in each feature stream */ + veclen = ckd_calloc(n_feat, sizeof(uint32)); + *out_veclen = veclen; + if (bio_fread(veclen, sizeof(int32), n_feat, fp, byteswap, &chksum) != + n_feat) { + E_ERROR("fread(%s) (feature-lengths) failed\n", file_name); + fclose(fp); + return NULL; + } + + /* blk = total vector length of all feature streams */ + for (i = 0, blk = 0; i < n_feat; i++) + blk += veclen[i]; + + /* #Floats to follow; for the ENTIRE SET of CODEBOOKS */ + if (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1) { + E_ERROR("Failed to read number of parameters from %s\n", file_name); + fclose(fp); + return NULL; + } + + if (n != n_mgau * n_density * blk) { + E_ERROR + ("Number of parameters in %s(%d) doesn't match dimensions: %d x %d x %d\n", + file_name, n, n_mgau, n_density, blk); + fclose(fp); + return NULL; + } + + /* Allocate memory for mixture gaussian densities if not already allocated */ + out = (float32 ****) ckd_calloc_3d(n_mgau, n_feat, n_density, + sizeof(float32 *)); + buf = (float32 *) ckd_calloc(n, sizeof(float32)); + for (i = 0, l = 0; i < n_mgau; i++) { + for (j = 0; j < n_feat; j++) { + for (k = 0; k < n_density; k++) { + out[i][j][k] = &buf[l]; + l += veclen[j]; + } + } + } + + /* Read mixture gaussian densities data */ + if (bio_fread(buf, sizeof(float32), n, fp, byteswap, &chksum) != n) { + E_ERROR("Failed to read density data from file '%s'\n", file_name); + fclose(fp); + ckd_free_3d(out); + return NULL; + } + + if (chksum_present) + bio_verify_chksum(fp, byteswap, chksum); + + if (fread(&tmp, 1, 1, fp) == 1) { + E_ERROR("More data than expected in %s\n", file_name); + fclose(fp); + ckd_free_3d(out); + return NULL; + } + + fclose(fp); + + E_INFO("%d codebook, %d feature, size: \n", n_mgau, n_feat); + for (i = 0; i < n_feat; i++) + E_INFO(" %dx%d\n", n_density, veclen[i]); + + return out; +} + +static void +gauden_param_free(mfcc_t **** p) +{ + ckd_free(p[0][0][0]); + ckd_free_3d(p); +} + +/* + * Some of the gaussian density computation can be carried out in advance: + * log(determinant) calculation, + * 1/(2*var) in the exponent, + * NOTE; The density computation is performed in log domain. + */ +static int32 +gauden_dist_precompute(gauden_t * g, logmath_t *lmath, float32 varfloor) +{ + int32 i, m, f, d, flen; + mfcc_t *meanp; + mfcc_t *varp; + mfcc_t *detp; + int32 floored; + + floored = 0; + /* Allocate space for determinants */ + g->det = ckd_calloc_3d(g->n_mgau, g->n_feat, g->n_density, sizeof(***g->det)); + + for (m = 0; m < g->n_mgau; m++) { + for (f = 0; f < g->n_feat; f++) { + flen = g->featlen[f]; + + /* Determinants for all variance vectors in g->[m][f] */ + for (d = 0, detp = g->det[m][f]; d < g->n_density; d++, detp++) { + *detp = 0; + for (i = 0, varp = g->var[m][f][d], meanp = g->mean[m][f][d]; + i < flen; i++, varp++, meanp++) { + float32 *fvarp = (float32 *)varp; + +#ifdef FIXED_POINT + float32 *fmp = (float32 *)meanp; + *meanp = FLOAT2MFCC(*fmp); +#endif + if (*fvarp < varfloor) { + *fvarp = varfloor; + ++floored; + } + *detp += (mfcc_t)logmath_log(lmath, + 1.0 / sqrt(*fvarp * 2.0 * M_PI)); + /* Precompute this part of the exponential */ + *varp = (mfcc_t)logmath_ln_to_log(lmath, + (1.0 / (*fvarp * 2.0))); + } + } + } + } + + E_INFO("%d variance values floored\n", floored); + + return 0; +} + + +gauden_t * +gauden_init(char const *meanfile, char const *varfile, float32 varfloor, logmath_t *lmath) +{ + int32 i, m, f, d, *flen; + gauden_t *g; + + assert(meanfile != NULL); + assert(varfile != NULL); + assert(varfloor > 0.0); + + g = (gauden_t *) ckd_calloc(1, sizeof(gauden_t)); + g->lmath = logmath_retain(lmath); + + g->mean = (mfcc_t ****)gauden_param_read(meanfile, &g->n_mgau, &g->n_feat, &g->n_density, + &g->featlen); + if (g->mean == NULL) { + return NULL; + } + g->var = (mfcc_t ****)gauden_param_read(varfile, &m, &f, &d, &flen); + if (g->var == NULL) { + return NULL; + } + + /* Verify mean and variance parameter dimensions */ + if ((m != g->n_mgau) || (f != g->n_feat) || (d != g->n_density)) { + E_ERROR + ("Mixture-gaussians dimensions for means and variances differ\n"); + ckd_free(flen); + gauden_free(g); + return NULL; + } + for (i = 0; i < g->n_feat; i++) { + if (g->featlen[i] != flen[i]) { + E_ERROR("Feature lengths for means and variances differ\n"); + ckd_free(flen); + gauden_free(g); + return NULL; + } + } + + ckd_free(flen); + + gauden_dist_precompute(g, lmath, varfloor); + + return g; +} + +void +gauden_free(gauden_t * g) +{ + if (g == NULL) + return; + if (g->mean) + gauden_param_free(g->mean); + if (g->var) + gauden_param_free(g->var); + if (g->det) + ckd_free_3d(g->det); + if (g->featlen) + ckd_free(g->featlen); + if (g->lmath) + logmath_free(g->lmath); + ckd_free(g); +} + +/* See compute_dist below */ +static int32 +compute_dist_all(gauden_dist_t * out_dist, mfcc_t* obs, int32 featlen, + mfcc_t ** mean, mfcc_t ** var, mfcc_t * det, + int32 n_density) +{ + int32 i, d; + + for (d = 0; d < n_density; ++d) { + mfcc_t *m; + mfcc_t *v; + mfcc_t dval; + + m = mean[d]; + v = var[d]; + dval = det[d]; + + for (i = 0; i < featlen; i++) { + mfcc_t diff; +#ifdef FIXED_POINT + /* Have to check for underflows here. */ + mfcc_t pdval = dval; + diff = obs[i] - m[i]; + dval -= MFCCMUL(MFCCMUL(diff, diff), v[i]); + if (dval > pdval) { + dval = WORST_SCORE; + break; + } +#else + diff = obs[i] - m[i]; + /* The compiler really likes this to be a single + * expression, for whatever reason. */ + dval -= diff * diff * v[i]; +#endif + } + + out_dist[d].dist = dval; + out_dist[d].id = d; + } + + return 0; +} + + +/* + * Compute the top-N closest gaussians from the chosen set (mgau,feat) + * for the given input observation vector. + */ +static int32 +compute_dist(gauden_dist_t * out_dist, int32 n_top, + mfcc_t * obs, int32 featlen, + mfcc_t ** mean, mfcc_t ** var, mfcc_t * det, + int32 n_density) +{ + int32 i, j, d; + gauden_dist_t *worst; + + /* Special case optimization when n_density <= n_top */ + if (n_top >= n_density) + return (compute_dist_all + (out_dist, obs, featlen, mean, var, det, n_density)); + + for (i = 0; i < n_top; i++) + out_dist[i].dist = WORST_DIST; + worst = &(out_dist[n_top - 1]); + + for (d = 0; d < n_density; d++) { + mfcc_t *m; + mfcc_t *v; + mfcc_t dval; + + m = mean[d]; + v = var[d]; + dval = det[d]; + + for (i = 0; (i < featlen) && (dval >= worst->dist); i++) { + mfcc_t diff; +#ifdef FIXED_POINT + /* Have to check for underflows here. */ + mfcc_t pdval = dval; + diff = obs[i] - m[i]; + dval -= MFCCMUL(MFCCMUL(diff, diff), v[i]); + if (dval > pdval) { + dval = WORST_SCORE; + break; + } +#else + diff = obs[i] - m[i]; + /* The compiler really likes this to be a single + * expression, for whatever reason. */ + dval -= diff * diff * v[i]; +#endif + } + + if ((i < featlen) || (dval < worst->dist)) /* Codeword d worse than worst */ + continue; + + /* Codeword d at least as good as worst so far; insert in the ordered list */ + for (i = 0; (i < n_top) && (dval < out_dist[i].dist); i++); + assert(i < n_top); + for (j = n_top - 1; j > i; --j) + out_dist[j] = out_dist[j - 1]; + out_dist[i].dist = dval; + out_dist[i].id = d; + } + + return 0; +} + + +/* + * Compute distances of the input observation from the top N codewords in the given + * codebook (g->{mean,var}[mgau]). The input observation, obs, includes vectors for + * all features in the codebook. + */ +int32 +gauden_dist(gauden_t * g, + int mgau, int32 n_top, mfcc_t** obs, gauden_dist_t ** out_dist) +{ + int32 f; + + assert((n_top > 0) && (n_top <= g->n_density)); + + for (f = 0; f < g->n_feat; f++) { + compute_dist(out_dist[f], n_top, + obs[f], g->featlen[f], + g->mean[mgau][f], g->var[mgau][f], g->det[mgau][f], + g->n_density); + E_DEBUG("Top CW(%d,%d) = %d %d\n", mgau, f, out_dist[f][0].id, + (int)out_dist[f][0].dist >> SENSCR_SHIFT); + } + + return 0; +} + +int32 +gauden_mllr_transform(gauden_t *g, ps_mllr_t *mllr, cmd_ln_t *config) +{ + int32 i, m, f, d, *flen; + + /* Free data if already here */ + if (g->mean) + gauden_param_free(g->mean); + if (g->var) + gauden_param_free(g->var); + if (g->det) + ckd_free_3d(g->det); + if (g->featlen) + ckd_free(g->featlen); + g->det = NULL; + g->featlen = NULL; + + /* Reload means and variances (un-precomputed). */ + g->mean = (mfcc_t ****)gauden_param_read(cmd_ln_str_r(config, "_mean"), &g->n_mgau, &g->n_feat, &g->n_density, + &g->featlen); + g->var = (mfcc_t ****)gauden_param_read(cmd_ln_str_r(config, "_var"), &m, &f, &d, &flen); + + /* Verify mean and variance parameter dimensions */ + if ((m != g->n_mgau) || (f != g->n_feat) || (d != g->n_density)) + E_FATAL + ("Mixture-gaussians dimensions for means and variances differ\n"); + for (i = 0; i < g->n_feat; i++) + if (g->featlen[i] != flen[i]) + E_FATAL("Feature lengths for means and variances differ\n"); + ckd_free(flen); + + /* Transform codebook for each stream s */ + for (i = 0; i < g->n_mgau; ++i) { + for (f = 0; f < g->n_feat; ++f) { + float64 *temp; + temp = (float64 *) ckd_calloc(g->featlen[f], sizeof(float64)); + /* Transform each density d in selected codebook */ + for (d = 0; d < g->n_density; d++) { + int l; + for (l = 0; l < g->featlen[f]; l++) { + temp[l] = 0.0; + for (m = 0; m < g->featlen[f]; m++) { + /* FIXME: For now, only one class, hence the zeros below. */ + temp[l] += mllr->A[f][0][l][m] * g->mean[i][f][d][m]; + } + temp[l] += mllr->b[f][0][l]; + } + + for (l = 0; l < g->featlen[f]; l++) { + g->mean[i][f][d][l] = (float32) temp[l]; + g->var[i][f][d][l] *= mllr->h[f][0][l]; + } + } + ckd_free(temp); + } + } + + /* Re-precompute (if we aren't adapting variances this isn't + * actually necessary...) */ + gauden_dist_precompute(g, g->lmath, cmd_ln_float32_r(config, "-varfloor")); + return 0; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_gauden.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_gauden.h new file mode 100644 index 0000000000000000000000000000000000000000..1176085c61849fbb68f1c47fc6c849e4eecd8962 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_gauden.h @@ -0,0 +1,153 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef _LIBFBS_GAUDEN_H_ +#define _LIBFBS_GAUDEN_H_ + +/** \file ms_gauden.h + * \brief (Sphinx 3.0 specific) Gaussian density module. + * + * Gaussian density distribution implementation. There are two major + * difference bettwen ms_gauden and cont_mgau. One is the fact that + * ms_gauden only take cares of the Gaussian computation part where + * cont_mgau actually take care of senone computation as well. The + * other is the fact that ms_gauden is a multi-stream implementation + * of GMM computation. + * + */ + +/* SphinxBase headers. */ +#include +#include +#include + +/* Local headers. */ +#include "vector.h" +#include "pocketsphinx_internal.h" +#include "hmm.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * \struct gauden_dist_t + * \brief Structure to store distance (density) values for a given input observation wrt density values in some given codebook. + */ +typedef struct gauden_dist_s { + int32 id; /**< Index of codeword (gaussian density) */ + mfcc_t dist; /**< Density value for input observation wrt above codeword; + NOTE: result in logs3 domain, but var_t used for speed */ + +} gauden_dist_t; + +/** + * \struct gauden_t + * \brief Multivariate gaussian mixture density parameters + */ +typedef struct gauden_s { + mfcc_t ****mean; /**< mean[codebook][feature][codeword] vector */ + mfcc_t ****var; /**< like mean; diagonal covariance vector only */ + mfcc_t ***det; /**< log(determinant) for each variance vector; + actually, log(sqrt(2*pi*det)) */ + logmath_t *lmath; /**< log math computation */ + int32 n_mgau; /**< Number codebooks */ + int32 n_feat; /**< Number feature streams in each codebook */ + int32 n_density; /**< Number gaussian densities in each codebook-feature stream */ + int32 *featlen; /**< feature length for each feature */ +} gauden_t; + + +/** + * Read mixture gaussian codebooks from the given files. Allocate memory space needed + * for them. Apply the specified variance floor value. + * Return value: ptr to the model created; NULL if error. + * (See Sphinx3 model file-format documentation.) + */ +gauden_t * +gauden_init (char const *meanfile,/**< Input: File containing means of mixture gaussians */ + char const *varfile,/**< Input: File containing variances of mixture gaussians */ + float32 varfloor, /**< Input: Floor value to be applied to variances */ + logmath_t *lmath + ); + +/** Release memory allocated by gauden_init. */ +void gauden_free(gauden_t *g); /**< In: The gauden_t to free */ + +/** Transform Gaussians according to an MLLR matrix (or, eventually, more). */ +int32 gauden_mllr_transform(gauden_t *s, ps_mllr_t *mllr, cmd_ln_t *config); + +/** + * Compute gaussian density values for the given input observation vector wrt the + * specified mixture gaussian codebook (which may consist of several feature streams). + * Density values are left UNnormalized. + * @return 0 if successful, -1 otherwise. + */ +int32 +gauden_dist (gauden_t *g, /**< In: handle to entire ensemble of codebooks */ + int mgau, /**< In: codebook for which density values to be evaluated + (g->{mean,var}[mgau]) */ + int n_top, /**< In: Number top densities to be evaluated */ + mfcc_t **obs, /**< In: Observation vector; obs[f] = for feature f */ + gauden_dist_t **out_dist + /**< Out: n_top best codewords and density values, + in worsening order, for each feature stream. + out_dist[f][i] = i-th best density for feature f. + Caller must allocate memory for this output */ + ); + +/** + Dump the definitionn of Gaussian distribution. +*/ +void gauden_dump (const gauden_t *g /**< In: Gaussian distribution g*/ + ); + +/** + Dump the definition of Gaussian distribution of a particular index to the standard output stream +*/ +void gauden_dump_ind (const gauden_t *g, /**< In: Gaussian distribution g*/ + int senidx /**< In: The senone index of the Gaussian */ + ); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* GAUDEN_H */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_mgau.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_mgau.c new file mode 100644 index 0000000000000000000000000000000000000000..27a8a1daea72e4260ba8e9fa31a5388eb7cb10f0 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_mgau.c @@ -0,0 +1,283 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * ms_mgau.c -- Essentially a wrapper that wrap up gauden and + * senone. It supports multi-stream. + * + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1997 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * HISTORY + * $Log$ + * Revision 1.2 2006/02/22 16:56:01 arthchan2003 + * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: Added ms_mgau.[ch] into the trunk. It is a wrapper of ms_gauden and ms_senone + * + * Revision 1.1.2.4 2005/09/25 18:55:19 arthchan2003 + * Added a flag to turn on and off precomputation. + * + * Revision 1.1.2.3 2005/08/03 18:53:44 dhdfu + * Add memory deallocation functions. Also move all the initialization + * of ms_mgau_model_t into ms_mgau_init (duh!), which entails removing it + * from decode_anytopo and friends. + * + * Revision 1.1.2.2 2005/08/02 21:05:38 arthchan2003 + * 1, Added dist and mgau_active as intermediate variable for computation. 2, Added ms_cont_mgau_frame_eval, which is a multi stream version of GMM computation mainly s3.0 family of tools. 3, Fixed dox-doc. + * + * Revision 1.1.2.1 2005/07/20 19:37:09 arthchan2003 + * Added a multi-stream cont_mgau (ms_mgau) which is a wrapper of both gauden and senone. Add ms_mgau_init and model_set_mllr. This allow eliminating 600 lines of code in decode_anytopo/align/allphone. + * + * + * + */ + +/* Local headers. */ +#include "ms_mgau.h" + +static ps_mgaufuncs_t ms_mgau_funcs = { + "ms", + ms_cont_mgau_frame_eval, /* frame_eval */ + ms_mgau_mllr_transform, /* transform */ + ms_mgau_free /* free */ +}; + +ps_mgau_t * +ms_mgau_init(acmod_t *acmod, logmath_t *lmath, bin_mdef_t *mdef) +{ + /* Codebooks */ + ms_mgau_model_t *msg; + ps_mgau_t *mg; + gauden_t *g; + senone_t *s; + cmd_ln_t *config; + int i; + + config = acmod->config; + + msg = (ms_mgau_model_t *) ckd_calloc(1, sizeof(ms_mgau_model_t)); + msg->config = config; + msg->g = NULL; + msg->s = NULL; + + if ((g = msg->g = gauden_init(cmd_ln_str_r(config, "_mean"), + cmd_ln_str_r(config, "_var"), + cmd_ln_float32_r(config, "-varfloor"), + lmath)) == NULL) { + E_ERROR("Failed to read means and variances\n"); + goto error_out; + } + + /* Verify n_feat and veclen, against acmod. */ + if (g->n_feat != feat_dimension1(acmod->fcb)) { + E_ERROR("Number of streams does not match: %d != %d\n", + g->n_feat, feat_dimension1(acmod->fcb)); + goto error_out; + } + for (i = 0; i < g->n_feat; ++i) { + if ((uint32)g->featlen[i] != feat_dimension2(acmod->fcb, i)) { + E_ERROR("Dimension of stream %d does not match: %d != %d\n", i, + g->featlen[i], feat_dimension2(acmod->fcb, i)); + goto error_out; + } + } + + s = msg->s = senone_init(msg->g, + cmd_ln_str_r(config, "_mixw"), + cmd_ln_str_r(config, "_senmgau"), + cmd_ln_float32_r(config, "-mixwfloor"), + lmath, mdef); + + s->aw = cmd_ln_int32_r(config, "-aw"); + + /* Verify senone parameters against gauden parameters */ + if (s->n_feat != (uint32)g->n_feat) + E_FATAL("#Feature mismatch: gauden= %d, senone= %d\n", g->n_feat, + s->n_feat); + if (s->n_cw != (uint32)g->n_density) + E_FATAL("#Densities mismatch: gauden= %d, senone= %d\n", + g->n_density, s->n_cw); + if (s->n_gauden > (uint32)g->n_mgau) + E_FATAL("Senones need more codebooks (%d) than present (%d)\n", + s->n_gauden, g->n_mgau); + if (s->n_gauden < (uint32)g->n_mgau) + E_ERROR("Senones use fewer codebooks (%d) than present (%d)\n", + s->n_gauden, g->n_mgau); + + msg->topn = cmd_ln_int32_r(config, "-topn"); + E_INFO("The value of topn: %d\n", msg->topn); + if (msg->topn == 0 || msg->topn > msg->g->n_density) { + E_WARN + ("-topn argument (%d) invalid or > #density codewords (%d); set to latter\n", + msg->topn, msg->g->n_density); + msg->topn = msg->g->n_density; + } + + msg->dist = (gauden_dist_t ***) + ckd_calloc_3d(g->n_mgau, g->n_feat, msg->topn, + sizeof(gauden_dist_t)); + msg->mgau_active = ckd_calloc(g->n_mgau, sizeof(int8)); + + mg = (ps_mgau_t *)msg; + mg->vt = &ms_mgau_funcs; + return mg; +error_out: + ms_mgau_free(ps_mgau_base(msg)); + return NULL; +} + +void +ms_mgau_free(ps_mgau_t * mg) +{ + ms_mgau_model_t *msg = (ms_mgau_model_t *)mg; + if (msg == NULL) + return; + + if (msg->g) + gauden_free(msg->g); + if (msg->s) + senone_free(msg->s); + if (msg->dist) + ckd_free_3d((void *) msg->dist); + if (msg->mgau_active) + ckd_free(msg->mgau_active); + + ckd_free(msg); +} + +int +ms_mgau_mllr_transform(ps_mgau_t *s, + ps_mllr_t *mllr) +{ + ms_mgau_model_t *msg = (ms_mgau_model_t *)s; + return gauden_mllr_transform(msg->g, mllr, msg->config); +} + +int32 +ms_cont_mgau_frame_eval(ps_mgau_t * mg, + int16 *senscr, + uint8 *senone_active, + int32 n_senone_active, + mfcc_t ** feat, + int32 frame, + int32 compallsen) +{ + ms_mgau_model_t *msg = (ms_mgau_model_t *)mg; + int32 gid; + int32 topn; + int32 best; + gauden_t *g; + senone_t *sen; + + (void)frame; + topn = ms_mgau_topn(msg); + g = ms_mgau_gauden(msg); + sen = ms_mgau_senone(msg); + + if (compallsen) { + int32 s; + + for (gid = 0; gid < g->n_mgau; gid++) + gauden_dist(g, gid, topn, feat, msg->dist[gid]); + + best = (int32) 0x7fffffff; + for (s = 0; (uint32)s < sen->n_sen; s++) { + senscr[s] = senone_eval(sen, s, msg->dist[sen->mgau[s]], topn); + if (best > senscr[s]) { + best = senscr[s]; + } + } + + /* Normalize senone scores */ + for (s = 0; (uint32)s < sen->n_sen; s++) { + int32 bs = senscr[s] - best; + if (bs > 32767) + bs = 32767; + if (bs < -32768) + bs = -32768; + senscr[s] = bs; + } + } + else { + int32 i, n; + /* Flag all active mixture-gaussian codebooks */ + for (gid = 0; gid < g->n_mgau; gid++) + msg->mgau_active[gid] = 0; + + n = 0; + for (i = 0; i < n_senone_active; i++) { + /* senone_active consists of deltas. */ + int32 s = senone_active[i] + n; + msg->mgau_active[sen->mgau[s]] = 1; + n = s; + } + + /* Compute topn gaussian density values (for active codebooks) */ + for (gid = 0; gid < g->n_mgau; gid++) { + if (msg->mgau_active[gid]) + gauden_dist(g, gid, topn, feat, msg->dist[gid]); + } + + best = (int32) 0x7fffffff; + n = 0; + for (i = 0; i < n_senone_active; i++) { + int32 s = senone_active[i] + n; + senscr[s] = senone_eval(sen, s, msg->dist[sen->mgau[s]], topn); + if (best > senscr[s]) { + best = senscr[s]; + } + n = s; + } + + /* Normalize senone scores */ + n = 0; + for (i = 0; i < n_senone_active; i++) { + int32 s = senone_active[i] + n; + int32 bs = senscr[s] - best; + if (bs > 32767) + bs = 32767; + if (bs < -32768) + bs = -32768; + senscr[s] = bs; + n = s; + } + } + + return 0; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_mgau.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_mgau.h new file mode 100644 index 0000000000000000000000000000000000000000..69b3cef982c3f272c94f5e3b1164dbda55d5d6bd --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_mgau.h @@ -0,0 +1,153 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * ms_mgau.h -- Essentially a wrapper that wrap up gauden and + * senone. It supports multi-stream. + * + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1997 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * HISTORY + * $Log$ + * Revision 1.1 2006/04/05 20:27:30 dhdfu + * A Great Reorganzation of header files and executables + * + * Revision 1.3 2006/02/22 16:57:15 arthchan2003 + * Fixed minor dox-doc issue + * + * Revision 1.2 2006/02/22 16:56:01 arthchan2003 + * Merged from SPHINX3_5_2_RCI_IRII_BRANCH: Added ms_mgau.[ch] into the trunk. It is a wrapper of ms_gauden and ms_senone + * + * Revision 1.1.2.4 2005/09/25 18:55:19 arthchan2003 + * Added a flag to turn on and off precomputation. + * + * Revision 1.1.2.3 2005/08/03 18:53:44 dhdfu + * Add memory deallocation functions. Also move all the initialization + * of ms_mgau_model_t into ms_mgau_init (duh!), which entails removing it + * from decode_anytopo and friends. + * + * Revision 1.1.2.2 2005/08/02 21:05:38 arthchan2003 + * 1, Added dist and mgau_active as intermediate variable for computation. 2, Added ms_cont_mgau_frame_eval, which is a multi stream version of GMM computation mainly s3.0 family of tools. 3, Fixed dox-doc. + * + * Revision 1.1.2.1 2005/07/20 19:37:09 arthchan2003 + * Added a multi-stream cont_mgau (ms_mgau) which is a wrapper of both gauden and senone. Add ms_mgau_init and model_set_mllr. This allow eliminating 600 lines of code in decode_anytopo/align/allphone. + * + * + * + */ + +/** \file ms_mgau.h + * + * \brief (Sphinx 3.0 specific) A module that wraps up the code of + * gauden and senone because they are closely related. + * + * At the time at Sphinx 3.1 to 3.2, Ravi has decided to rewrite only + * single-stream part of the code into cont_mgau.[ch]. This marks the + * beginning of historical problem of having two sets of Gaussian + * distribution computation routine, one for single-stream and one of + * multi-stream. + * + * In Sphinx 3.5, when we figure out that it is possible to allow both + * 3.0 family of tools and 3.x family of tools to coexist. This + * becomes one problem we found that very hard to reconcile. That is + * why we currently allow two versions of the code in the code + * base. This is likely to change in the future. + */ + + +#ifndef _LIBFBS_MS_CONT_MGAU_H_ +#define _LIBFBS_MS_CONT_MGAU_H_ + +/* SphinxBase headers. */ +#include +#include +#include + +/* Local headers. */ +#include "acmod.h" +#include "bin_mdef.h" +#include "ms_gauden.h" +#include "ms_senone.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** \struct ms_mgau_t + \brief Multi-stream mixture gaussian. It is not necessary to be continr +*/ + +typedef struct ms_mgau_model_s { + ps_mgau_t base; + gauden_t* g; /**< The codebook */ + senone_t* s; /**< The senone */ + int topn; /**< Top-n gaussian will be computed */ + + /**< Intermediate used in computation */ + gauden_dist_t ***dist; + uint8 *mgau_active; + cmd_ln_t *config; +} ms_mgau_model_t; + +#define ms_mgau_gauden(msg) (msg->g) +#define ms_mgau_senone(msg) (msg->s) +#define ms_mgau_topn(msg) (msg->topn) + +ps_mgau_t* ms_mgau_init(acmod_t *acmod, logmath_t *lmath, bin_mdef_t *mdef); +void ms_mgau_free(ps_mgau_t *g); +int32 ms_cont_mgau_frame_eval(ps_mgau_t * msg, + int16 *senscr, + uint8 *senone_active, + int32 n_senone_active, + mfcc_t ** feat, + int32 frame, + int32 compallsen); +int32 ms_mgau_mllr_transform(ps_mgau_t *s, + ps_mllr_t *mllr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* _LIBFBS_MS_CONT_MGAU_H_*/ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_senone.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_senone.c new file mode 100644 index 0000000000000000000000000000000000000000..3bc9e29260341d4923434b7a32c69edc442d6754 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_senone.c @@ -0,0 +1,404 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* System headers. */ +#include +#include +#include + +/* SphinxBase headers. */ +#include + +/* Local headers. */ +#include "ms_senone.h" + +#define MIXW_PARAM_VERSION "1.0" +#define SPDEF_PARAM_VERSION "1.2" + +static int32 +senone_mgau_map_read(senone_t * s, char const *file_name) +{ + FILE *fp; + int32 byteswap, chksum_present, n_gauden_present; + uint32 chksum; + int32 i; + char eofchk; + char **argname, **argval; + void *ptr; + float32 v; + + E_INFO("Reading senone gauden-codebook map file: %s\n", file_name); + + if ((fp = fopen(file_name, "rb")) == NULL) + E_FATAL_SYSTEM("Failed to open map file '%s' for reading", file_name); + + /* Read header, including argument-value info and 32-bit byteorder magic */ + if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) + E_FATAL("Failed to read header from file '%s'\n", file_name); + + /* Parse argument-value list */ + chksum_present = 0; + n_gauden_present = 0; + for (i = 0; argname[i]; i++) { + if (strcmp(argname[i], "version") == 0) { + if (strcmp(argval[i], SPDEF_PARAM_VERSION) != 0) { + E_WARN("Version mismatch(%s): %s, expecting %s\n", + file_name, argval[i], SPDEF_PARAM_VERSION); + } + + /* HACK!! Convert version# to float32 and take appropriate action */ + if (sscanf(argval[i], "%f", &v) != 1) + E_FATAL("%s: Bad version no. string: %s\n", file_name, + argval[i]); + + n_gauden_present = (v > 1.1) ? 1 : 0; + } + else if (strcmp(argname[i], "chksum0") == 0) { + chksum_present = 1; /* Ignore the associated value */ + } + } + bio_hdrarg_free(argname, argval); + argname = argval = NULL; + + chksum = 0; + + /* Read #gauden (if version matches) */ + if (n_gauden_present) { + E_INFO("Reading number of codebooks from %s\n", file_name); + if (bio_fread + (&(s->n_gauden), sizeof(int32), 1, fp, byteswap, &chksum) != 1) + E_FATAL("fread(%s) (#gauden) failed\n", file_name); + } + + /* Read 1d array data */ + if (bio_fread_1d(&ptr, sizeof(uint32), &(s->n_sen), fp, + byteswap, &chksum) < 0) { + E_FATAL("bio_fread_1d(%s) failed\n", file_name); + } + s->mgau = ptr; + E_INFO("Mapping %d senones to %d codebooks\n", s->n_sen, s->n_gauden); + + /* Infer n_gauden if not present in this version */ + if (!n_gauden_present) { + s->n_gauden = 1; + for (i = 0; (uint32)i < s->n_sen; i++) + if (s->mgau[i] >= s->n_gauden) + s->n_gauden = s->mgau[i] + 1; + } + + if (chksum_present) + bio_verify_chksum(fp, byteswap, chksum); + + if (fread(&eofchk, 1, 1, fp) == 1) + E_FATAL("More data than expected in %s: %d\n", file_name, eofchk); + + fclose(fp); + + E_INFO("Read %d->%d senone-codebook mappings\n", s->n_sen, + s->n_gauden); + + return 1; +} + + +static int32 +senone_mixw_read(senone_t * s, char const *file_name, logmath_t *lmath) +{ + char eofchk; + FILE *fp; + int32 byteswap, chksum_present; + uint32 chksum; + float32 *pdf; + int32 i, f, c, p, n_err; + char **argname, **argval; + + E_INFO("Reading senone mixture weights: %s\n", file_name); + + if ((fp = fopen(file_name, "rb")) == NULL) + E_FATAL_SYSTEM("Failed to open mixture weights file '%s' for reading", file_name); + + /* Read header, including argument-value info and 32-bit byteorder magic */ + if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) + E_FATAL("Failed to read header from file '%s'\n", file_name); + + /* Parse argument-value list */ + chksum_present = 0; + for (i = 0; argname[i]; i++) { + if (strcmp(argname[i], "version") == 0) { + if (strcmp(argval[i], MIXW_PARAM_VERSION) != 0) + E_WARN("Version mismatch(%s): %s, expecting %s\n", + file_name, argval[i], MIXW_PARAM_VERSION); + } + else if (strcmp(argname[i], "chksum0") == 0) { + chksum_present = 1; /* Ignore the associated value */ + } + } + bio_hdrarg_free(argname, argval); + argname = argval = NULL; + + chksum = 0; + + /* Read #senones, #features, #codewords, arraysize */ + if ((bio_fread(&(s->n_sen), sizeof(int32), 1, fp, byteswap, &chksum) != + 1) + || + (bio_fread(&(s->n_feat), sizeof(int32), 1, fp, byteswap, &chksum) + != 1) + || (bio_fread(&(s->n_cw), sizeof(int32), 1, fp, byteswap, &chksum) + != 1) + || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { + E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); + } + if ((uint32)i != s->n_sen * s->n_feat * s->n_cw) { + E_FATAL + ("%s: #float32s(%d) doesn't match dimensions: %d x %d x %d\n", + file_name, i, s->n_sen, s->n_feat, s->n_cw); + } + + /* + * Compute #LSB bits to be dropped to represent mixwfloor with 8 bits. + * All PDF values will be truncated (in the LSB positions) by these many bits. + */ + if ((s->mixwfloor <= 0.0) || (s->mixwfloor >= 1.0)) + E_FATAL("mixwfloor (%e) not in range (0, 1)\n", s->mixwfloor); + + /* Use a fixed shift for compatibility with everything else. */ + E_INFO("Truncating senone logs3(pdf) values by %d bits\n", SENSCR_SHIFT); + + /* + * Allocate memory for senone PDF data. Organize normally or transposed depending on + * s->n_gauden. + */ + if (s->n_gauden > 1) { + E_INFO("Not transposing mixture weights in memory\n"); + s->pdf = + (senprob_t ***) ckd_calloc_3d(s->n_sen, s->n_feat, s->n_cw, + sizeof(senprob_t)); + } + else { + E_INFO("Transposing mixture weights in memory\n"); + s->pdf = + (senprob_t ***) ckd_calloc_3d(s->n_feat, s->n_cw, s->n_sen, + sizeof(senprob_t)); + } + + /* Temporary structure to read in floats */ + pdf = (float32 *) ckd_calloc(s->n_cw, sizeof(float32)); + + /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ + n_err = 0; + for (i = 0; (uint32)i < s->n_sen; i++) { + for (f = 0; (uint32)f < s->n_feat; f++) { + if (bio_fread + ((void *) pdf, sizeof(float32), s->n_cw, fp, byteswap, + &chksum) + != (int32)s->n_cw) { + E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); + } + + /* Normalize and floor */ + if (vector_sum_norm(pdf, s->n_cw) <= 0.0) + n_err++; + vector_floor(pdf, s->n_cw, s->mixwfloor); + vector_sum_norm(pdf, s->n_cw); + + /* Convert to logs3, truncate to 8 bits, and store in s->pdf */ + for (c = 0; (uint32)c < s->n_cw; c++) { + p = -(logmath_log(lmath, pdf[c])); + p += (1 << (SENSCR_SHIFT - 1)) - 1; /* Rounding before truncation */ + + if (s->n_gauden > 1) + s->pdf[i][f][c] = + (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255; + else + s->pdf[f][c][i] = + (p < (255 << SENSCR_SHIFT)) ? (p >> SENSCR_SHIFT) : 255; + } + } + } + if (n_err > 0) + E_WARN("Weight normalization failed for %d mixture weights components\n", n_err); + + ckd_free(pdf); + + if (chksum_present) + bio_verify_chksum(fp, byteswap, chksum); + + if (fread(&eofchk, 1, 1, fp) == 1) + E_FATAL("More data than expected in %s\n", file_name); + + fclose(fp); + + E_INFO + ("Read mixture weights for %d senones: %d features x %d codewords\n", + s->n_sen, s->n_feat, s->n_cw); + + return 1; +} + + +senone_t * +senone_init(gauden_t *g, char const *mixwfile, char const *sen2mgau_map_file, + float32 mixwfloor, logmath_t *lmath, bin_mdef_t *mdef) +{ + senone_t *s; + int32 n = 0, i; + + s = (senone_t *) ckd_calloc(1, sizeof(senone_t)); + s->lmath = logmath_init(logmath_get_base(lmath), SENSCR_SHIFT, TRUE); + s->mixwfloor = mixwfloor; + + s->n_gauden = g->n_mgau; + if (sen2mgau_map_file) { + if (!(strcmp(sen2mgau_map_file, ".semi.") == 0 + || strcmp(sen2mgau_map_file, ".ptm.") == 0 + || strcmp(sen2mgau_map_file, ".cont.") == 0)) { + senone_mgau_map_read(s, sen2mgau_map_file); + n = s->n_sen; + } + } + else { + if (s->n_gauden == 1) + sen2mgau_map_file = ".semi."; + else if (s->n_gauden == (uint32)bin_mdef_n_ciphone(mdef)) + sen2mgau_map_file = ".ptm."; + else + sen2mgau_map_file = ".cont."; + } + + senone_mixw_read(s, mixwfile, lmath); + + if (strcmp(sen2mgau_map_file, ".semi.") == 0) { + /* All-to-1 senones-codebook mapping */ + E_INFO("Mapping all senones to one codebook\n"); + s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau)); + } + else if (strcmp(sen2mgau_map_file, ".ptm.") == 0) { + /* All-to-ciphone-id senones-codebook mapping */ + E_INFO("Mapping senones to context-independent phone codebooks\n"); + s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau)); + for (i = 0; (uint32)i < s->n_sen; i++) + s->mgau[i] = bin_mdef_sen2cimap(mdef, i); + } + else if (strcmp(sen2mgau_map_file, ".cont.") == 0 + || strcmp(sen2mgau_map_file, ".s3cont.") == 0) { + /* 1-to-1 senone-codebook mapping */ + E_INFO("Mapping senones to individual codebooks\n"); + if (s->n_sen <= 1) + E_FATAL("#senone=%d; must be >1\n", s->n_sen); + + s->mgau = (uint32 *) ckd_calloc(s->n_sen, sizeof(*s->mgau)); + for (i = 0; (uint32)i < s->n_sen; i++) + s->mgau[i] = i; + /* Not sure why this is here, it probably does nothing. */ + s->n_gauden = s->n_sen; + } + else { + if (s->n_sen != (uint32)n) + E_FATAL("#senones inconsistent: %d in %s; %d in %s\n", + n, sen2mgau_map_file, s->n_sen, mixwfile); + } + + s->featscr = NULL; + return s; +} + +void +senone_free(senone_t * s) +{ + if (s == NULL) + return; + if (s->pdf) + ckd_free_3d((void *) s->pdf); + if (s->mgau) + ckd_free(s->mgau); + if (s->featscr) + ckd_free(s->featscr); + logmath_free(s->lmath); + ckd_free(s); +} + + +/* + * Compute senone score for one senone. + * NOTE: Remember that senone PDF tables contain SCALED, NEGATED logs3 values. + * NOTE: Remember also that PDF data may be transposed or not depending on s->n_gauden. + */ +int32 +senone_eval(senone_t * s, int id, gauden_dist_t ** dist, int32 n_top) +{ + int32 scr; /* total senone score */ + int32 fden; /* Gaussian density */ + int32 fscr; /* senone score for one feature */ + int32 fwscr; /* senone score for one feature, one codeword */ + int32 f, t; + gauden_dist_t *fdist; + + assert((id >= 0) && ((uint32)id < s->n_sen)); + assert((n_top > 0) && ((uint32)n_top <= s->n_cw)); + + scr = 0; + + for (f = 0; (uint32)f < s->n_feat; f++) { + fdist = dist[f]; + + fden = ((int32)fdist[0].dist + ((1<> SENSCR_SHIFT; + fscr = (s->n_gauden > 1) + ? (fden + -s->pdf[id][f][fdist[0].id]) /* untransposed */ + : (fden + -s->pdf[f][fdist[0].id][id]); /* transposed */ + /* Remaining of n_top codewords for feature f */ + for (t = 1; t < n_top; t++) { + fden = ((int32)fdist[t].dist + ((1<> SENSCR_SHIFT; + fwscr = (s->n_gauden > 1) ? + (fden + -s->pdf[id][f][fdist[t].id]) : + (fden + -s->pdf[f][fdist[t].id][id]); + fscr = logmath_add(s->lmath, fscr, fwscr); + } + /* Senone scores are also scaled, negated logs3 values. Hence + * we have to negate the stuff we calculated above. */ + scr -= fscr; + } + /* Downscale scores. */ + scr /= s->aw; + + /* Avoid overflowing int16 */ + if (scr > 32767) + scr = 32767; + if (scr < -32768) + scr = -32768; + return scr; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_senone.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_senone.h new file mode 100644 index 0000000000000000000000000000000000000000..31faeaf15acb9110d076c06c9c226543eb91d89c --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ms_senone.h @@ -0,0 +1,134 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * senone.h -- Mixture density weights associated with each tied state. + */ + +#ifndef _MS_SENONE_H_ +#define _MS_SENONE_H_ + + +/* SphinxBase headers. */ +#include +#include +#include +#include + +/* Local headers. */ +#include "ms_gauden.h" +#include "bin_mdef.h" + +/** \file ms_senone.h + * \brief (Sphinx 3.0 specific) multiple streams senones. used with ms_gauden.h + * In Sphinx 3.0 family of tools, ms_senone is used to combine the Gaussian scores. + * Its existence is crucial in Sphinx 3.0 because 3.0 supports both SCHMM and CDHMM. + * There are optimization scheme for SCHMM (e.g. compute the top-N Gaussian) that is + * applicable to SCHMM than CDHMM. This is wrapped in senone_eval_all. + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +typedef uint8 senprob_t; /**< Senone logs3-probs, truncated to 8 bits */ + +/** + * \struct senone_t + * \brief 8-bit senone PDF structure. + * + * 8-bit senone PDF structure. Senone pdf values are normalized, floored, converted to + * logs3 domain, and finally truncated to 8 bits precision to conserve memory space. + */ +typedef struct senone_s { + senprob_t ***pdf; /**< gaussian density mixture weights, organized two possible + ways depending on n_gauden: + if (n_gauden > 1): pdf[sen][feat][codeword]. Not an + efficient representation--memory access-wise--but + evaluating the many codebooks will be more costly. + if (n_gauden == 1): pdf[feat][codeword][sen]. Optimized + for the shared-distribution semi-continuous case. */ + logmath_t *lmath; /**< log math computation */ + uint32 n_sen; /**< Number senones in this set */ + uint32 n_feat; /**< Number feature streams */ + uint32 n_cw; /**< Number codewords per codebook,stream */ + uint32 n_gauden; /**< Number gaussian density codebooks referred to by senones */ + float32 mixwfloor; /**< floor applied to each PDF entry */ + uint32 *mgau; /**< senone-id -> mgau-id mapping for senones in this set */ + int32 *featscr; /**< The feature score for every senone, will be initialized inside senone_eval_all */ + int32 aw; /**< Inverse acoustic weight */ +} senone_t; + + +/** + * Load a set of senones (mixing weights and mixture gaussian codebook mappings) from + * the given files. Normalize weights for each codebook, apply the given floor, convert + * PDF values to logs3 domain and quantize to 8-bits. + * @return pointer to senone structure created. Caller MUST NOT change its contents. + */ +senone_t *senone_init (gauden_t *g, /**< In: codebooks */ + char const *mixwfile, /**< In: mixing weights file */ + char const *mgau_mapfile,/**< In: file specifying mapping from each + senone to mixture gaussian codebook. + If NULL all senones map to codebook 0 */ + float32 mixwfloor, /**< In: Floor value for senone weights */ + logmath_t *lmath, /**< In: log math computation */ + bin_mdef_t *mdef /**< In: model definition */ + ); + +/** Release memory allocated by senone_init. */ +void senone_free(senone_t *s); /**< In: The senone_t to free */ + +/** + * Evaluate the score for the given senone wrt to the given top N gaussian codewords. + * @return senone score (in logs3 domain). + */ +int32 senone_eval (senone_t *s, int id, /**< In: senone for which score desired */ + gauden_dist_t **dist, /**< In: top N codewords and densities for + all features, to be combined into + senone score. IE, dist[f][i] = i-th + best for feaure f */ + int n_top /**< In: Length of dist[f], for each f */ + ); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search.c new file mode 100644 index 0000000000000000000000000000000000000000..ad7ca3aa74b8741aee70145127c44fb7fb7f451d --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search.c @@ -0,0 +1,1408 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ngram_search.c N-Gram based multi-pass search ("FBS") + */ + +/* System headers. */ +#include +#include + +/* SphinxBase headers. */ +#include +#include +#include + +/* Local headers. */ +#include "pocketsphinx_internal.h" +#include "ps_lattice_internal.h" +#include "ngram_search.h" +#include "ngram_search_fwdtree.h" +#include "ngram_search_fwdflat.h" + +static int ngram_search_start(ps_search_t *search); +static int ngram_search_step(ps_search_t *search, int frame_idx); +static int ngram_search_finish(ps_search_t *search); +static int ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p); +static char const *ngram_search_hyp(ps_search_t *search, int32 *out_score); +static int32 ngram_search_prob(ps_search_t *search); +static ps_seg_t *ngram_search_seg_iter(ps_search_t *search); + +static ps_searchfuncs_t ngram_funcs = { + /* start: */ ngram_search_start, + /* step: */ ngram_search_step, + /* finish: */ ngram_search_finish, + /* reinit: */ ngram_search_reinit, + /* free: */ ngram_search_free, + /* lattice: */ ngram_search_lattice, + /* hyp: */ ngram_search_hyp, + /* prob: */ ngram_search_prob, + /* seg_iter: */ ngram_search_seg_iter, +}; + +static ngram_model_t *default_lm; + +static void +ngram_search_update_widmap(ngram_search_t *ngs) +{ + char const **words; + int32 i, n_words; + + /* It's okay to include fillers since they won't be in the LM */ + n_words = ps_search_n_words(ngs); + words = (char const**)ckd_calloc(n_words, sizeof(*words)); + /* This will include alternates, again, that's okay since they aren't in the LM */ + for (i = 0; i < n_words; ++i) + words[i] = dict_wordstr(ps_search_dict(ngs), i); + ngram_model_set_map_words(ngs->lmset, words, n_words); + ckd_free(words); +} + +static void +ngram_search_calc_beams(ngram_search_t *ngs) +{ + cmd_ln_t *config; + acmod_t *acmod; + + config = ps_search_config(ngs); + acmod = ps_search_acmod(ngs); + + /* Log beam widths. */ + ngs->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-beam"))>>SENSCR_SHIFT; + ngs->wbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-wbeam"))>>SENSCR_SHIFT; + ngs->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pbeam"))>>SENSCR_SHIFT; + ngs->lpbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lpbeam"))>>SENSCR_SHIFT; + ngs->lponlybeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-lponlybeam"))>>SENSCR_SHIFT; + ngs->fwdflatbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatbeam"))>>SENSCR_SHIFT; + ngs->fwdflatwbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-fwdflatwbeam"))>>SENSCR_SHIFT; + + /* Absolute pruning parameters. */ + ngs->maxwpf = cmd_ln_int32_r(config, "-maxwpf"); + ngs->maxhmmpf = cmd_ln_int32_r(config, "-maxhmmpf"); + + /* Various penalties which may or may not be useful. */ + ngs->wip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-wip")) >>SENSCR_SHIFT; + ngs->nwpen = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-nwpen")) >>SENSCR_SHIFT; + ngs->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pip")) >>SENSCR_SHIFT; + ngs->silpen = ngs->pip + + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-silprob"))>>SENSCR_SHIFT); + ngs->fillpen = ngs->pip + + (logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-fillprob"))>>SENSCR_SHIFT); + + /* Language weight ratios for fwdflat and bestpath search. */ + ngs->fwdflat_fwdtree_lw_ratio = + cmd_ln_float32_r(config, "-fwdflatlw") + / cmd_ln_float32_r(config, "-lw"); + ngs->bestpath_fwdtree_lw_ratio = + cmd_ln_float32_r(config, "-bestpathlw") + / cmd_ln_float32_r(config, "-lw"); + + /* Acoustic score scale for posterior probabilities. */ + ngs->ascale = 1.0 / cmd_ln_float32_r(config, "-ascale"); +} + +ps_search_t * +ngram_search_init(const char *name, + ngram_model_t *lm, + cmd_ln_t *config, + acmod_t *acmod, + dict_t *dict, + dict2pid_t *d2p) +{ + ngram_search_t *ngs; + static char *lmname = "default"; + + /* Make the acmod's feature buffer growable if we are doing two-pass + * search. */ + acmod_set_grow(acmod, cmd_ln_boolean_r(config, "-fwdflat") && + cmd_ln_boolean_r(config, "-fwdtree")); + + ngs = ckd_calloc(1, sizeof(*ngs)); + ps_search_init(&ngs->base, &ngram_funcs, PS_SEARCH_TYPE_NGRAM, name, config, acmod, dict, d2p); + + ngs->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), + acmod->tmat->tp, NULL, acmod->mdef->sseq); + if (ngs->hmmctx == NULL) { + ps_search_free(ps_search_base(ngs)); + return NULL; + } + ngs->chan_alloc = listelem_alloc_init(sizeof(chan_t)); + ngs->root_chan_alloc = listelem_alloc_init(sizeof(root_chan_t)); + ngs->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t)); + + /* Calculate various beam widths and such. */ + ngram_search_calc_beams(ngs); + + /* Allocate a billion different tables for stuff. */ + ngs->word_chan = ckd_calloc(dict_size(dict), + sizeof(*ngs->word_chan)); + ngs->word_lat_idx = ckd_calloc(dict_size(dict), + sizeof(*ngs->word_lat_idx)); + ngs->word_active = bitvec_alloc(dict_size(dict)); + ngs->last_ltrans = ckd_calloc(dict_size(dict), + sizeof(*ngs->last_ltrans)); + + /* FIXME: All these structures need to be made dynamic with + * garbage collection. */ + ngs->bp_table_size = cmd_ln_int32_r(config, "-latsize"); + ngs->bp_table = ckd_calloc(ngs->bp_table_size, + sizeof(*ngs->bp_table)); + /* FIXME: This thing is frickin' huge. */ + ngs->bscore_stack_size = ngs->bp_table_size * 20; + ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size, + sizeof(*ngs->bscore_stack)); + ngs->n_frame_alloc = 256; + ngs->bp_table_idx = ckd_calloc(ngs->n_frame_alloc + 1, + sizeof(*ngs->bp_table_idx)); + ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */ + + /* Allocate active word list array */ + ngs->active_word_list = ckd_calloc_2d(2, dict_size(dict), + sizeof(**ngs->active_word_list)); + + ngs->lmset = ngram_model_set_init(config, &lm, &lmname, NULL, 1); + if (!ngs->lmset) + goto error_out; + + if (ngram_wid(ngs->lmset, S3_FINISH_WORD) == + ngram_unknown_wid(ngs->lmset)) + { + E_ERROR("Language model/set does not contain , " + "recognition will fail\n"); + goto error_out; + } + + /* Create word mappings. */ + ngram_search_update_widmap(ngs); + + /* Initialize fwdtree, fwdflat, bestpath modules if necessary. */ + if (cmd_ln_boolean_r(config, "-fwdtree")) { + ngram_fwdtree_init(ngs); + ngs->fwdtree = TRUE; + ngs->fwdtree_perf.name = "fwdtree"; + ptmr_init(&ngs->fwdtree_perf); + } + if (cmd_ln_boolean_r(config, "-fwdflat")) { + ngram_fwdflat_init(ngs); + ngs->fwdflat = TRUE; + ngs->fwdflat_perf.name = "fwdflat"; + ptmr_init(&ngs->fwdflat_perf); + } + if (cmd_ln_boolean_r(config, "-bestpath")) { + ngs->bestpath = TRUE; + ngs->bestpath_perf.name = "bestpath"; + ptmr_init(&ngs->bestpath_perf); + } + + return (ps_search_t *)ngs; + +error_out: + ngram_search_free((ps_search_t *)ngs); + return NULL; +} + +static int +ngram_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + int old_n_words; + int rv = 0; + + /* Update the number of words. */ + old_n_words = search->n_words; + if (old_n_words != dict_size(dict)) { + search->n_words = dict_size(dict); + /* Reallocate these temporary arrays. */ + ckd_free(ngs->word_lat_idx); + ckd_free(ngs->word_active); + ckd_free(ngs->last_ltrans); + ckd_free_2d(ngs->active_word_list); + ngs->word_lat_idx = ckd_calloc(search->n_words, sizeof(*ngs->word_lat_idx)); + ngs->word_active = bitvec_alloc(search->n_words); + ngs->last_ltrans = ckd_calloc(search->n_words, sizeof(*ngs->last_ltrans)); + ngs->active_word_list + = ckd_calloc_2d(2, search->n_words, + sizeof(**ngs->active_word_list)); + } + + /* Free old dict2pid, dict */ + ps_search_base_reinit(search, dict, d2p); + + if (ngs->lmset == NULL) + return 0; + + /* Update beam widths. */ + ngram_search_calc_beams(ngs); + + /* Update word mappings. */ + ngram_search_update_widmap(ngs); + + /* Now rebuild lextrees. */ + if (ngs->fwdtree) { + if ((rv = ngram_fwdtree_reinit(ngs)) < 0) + return rv; + } + if (ngs->fwdflat) { + if ((rv = ngram_fwdflat_reinit(ngs)) < 0) + return rv; + } + + return rv; +} + +void +ngram_search_free(ps_search_t *search) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + if (ngs->fwdtree) + ngram_fwdtree_deinit(ngs); + if (ngs->fwdflat) + ngram_fwdflat_deinit(ngs); + if (ngs->bestpath) { + double n_speech = (double)ngs->n_tot_frame + / cmd_ln_int32_r(ps_search_config(ngs), "-frate"); + + E_INFO("TOTAL bestpath %.2f CPU %.3f xRT\n", + ngs->bestpath_perf.t_tot_cpu, + ngs->bestpath_perf.t_tot_cpu / n_speech); + E_INFO("TOTAL bestpath %.2f wall %.3f xRT\n", + ngs->bestpath_perf.t_tot_elapsed, + ngs->bestpath_perf.t_tot_elapsed / n_speech); + } + + ps_search_base_free(search); + hmm_context_free(ngs->hmmctx); + listelem_alloc_free(ngs->chan_alloc); + listelem_alloc_free(ngs->root_chan_alloc); + listelem_alloc_free(ngs->latnode_alloc); + ngram_model_free(ngs->lmset); + + ckd_free(ngs->word_chan); + ckd_free(ngs->word_lat_idx); + bitvec_free(ngs->word_active); + ckd_free(ngs->bp_table); + ckd_free(ngs->bscore_stack); + if (ngs->bp_table_idx != NULL) + ckd_free(ngs->bp_table_idx - 1); + ckd_free_2d(ngs->active_word_list); + ckd_free(ngs->last_ltrans); + ckd_free(ngs); +} + +int +ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx) +{ + if (frame_idx >= ngs->n_frame_alloc) { + ngs->n_frame_alloc *= 2; + ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1, + (ngs->n_frame_alloc + 1) + * sizeof(*ngs->bp_table_idx)); + if (ngs->frm_wordlist) { + ngs->frm_wordlist = ckd_realloc(ngs->frm_wordlist, + ngs->n_frame_alloc + * sizeof(*ngs->frm_wordlist)); + } + ++ngs->bp_table_idx; /* Make bptableidx[-1] valid */ + } + ngs->bp_table_idx[frame_idx] = ngs->bpidx; + return ngs->bpidx; +} + +static void +set_real_wid(ngram_search_t *ngs, int32 bp) +{ + bptbl_t *ent, *prev; + + assert(bp != NO_BP); + ent = ngs->bp_table + bp; + if (ent->bp == NO_BP) + prev = NULL; + else + prev = ngs->bp_table + ent->bp; + + /* Propagate lm state for fillers, rotate it for words. */ + if (dict_filler_word(ps_search_dict(ngs), ent->wid)) { + if (prev != NULL) { + ent->real_wid = prev->real_wid; + ent->prev_real_wid = prev->prev_real_wid; + } + else { + ent->real_wid = dict_basewid(ps_search_dict(ngs), + ent->wid); + ent->prev_real_wid = BAD_S3WID; + } + } + else { + ent->real_wid = dict_basewid(ps_search_dict(ngs), ent->wid); + if (prev != NULL) + ent->prev_real_wid = prev->real_wid; + else + ent->prev_real_wid = BAD_S3WID; + } +} + +#define NGRAM_HISTORY_LONG_WORD 2000 /* 20s */ + +void +ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, + int32 w, int32 score, int32 path, int32 rc) +{ + int32 bp; + + /* Look for an existing exit for this word in this frame. The + * only reason one would exist is from a different right context + * triphone, but of course that happens quite frequently. */ + bp = ngs->word_lat_idx[w]; + if (bp != NO_BP) { + + if (frame_idx - ngs->bp_table[path].frame > NGRAM_HISTORY_LONG_WORD) { + E_WARN("Word '%s' survived for %d frames, potential overpruning\n", dict_wordstr(ps_search_dict(ngs), w), + frame_idx - ngs->bp_table[path].frame); + } + + /* Keep only the best scoring one, we will reconstruct the + * others from the right context scores - usually the history + * is not lost. */ + if (ngs->bp_table[bp].score WORSE_THAN score) { + assert(path != bp); /* Pathological. */ + if (ngs->bp_table[bp].bp != path) { + int32 bplh[2], newlh[2]; + /* But, sometimes, the history *is* lost. If we wanted to + * do exact language model scoring we'd have to preserve + * these alternate histories. */ + E_DEBUG("Updating path history %d => %d frame %d\n", + ngs->bp_table[bp].bp, path, frame_idx); + bplh[0] = ngs->bp_table[bp].bp == -1 + ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].prev_real_wid; + bplh[1] = ngs->bp_table[bp].bp == -1 + ? -1 : ngs->bp_table[ngs->bp_table[bp].bp].real_wid; + newlh[0] = path == -1 + ? -1 : ngs->bp_table[path].prev_real_wid; + newlh[1] = path == -1 + ? -1 : ngs->bp_table[path].real_wid; + /* Actually it's worth checking how often the actual + * language model state changes. */ + if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) { + /* It's fairly rare that the actual language model + * state changes, but it does happen some + * times. */ + E_DEBUG("Updating language model state %s,%s => %s,%s frame %d\n", + dict_wordstr(ps_search_dict(ngs), bplh[0]), + dict_wordstr(ps_search_dict(ngs), bplh[1]), + dict_wordstr(ps_search_dict(ngs), newlh[0]), + dict_wordstr(ps_search_dict(ngs), newlh[1]), + frame_idx); + set_real_wid(ngs, bp); + } + ngs->bp_table[bp].bp = path; + } + ngs->bp_table[bp].score = score; + } + /* But do keep track of scores for all right contexts, since + * we need them to determine the starting path scores for any + * successors of this word exit. */ + if (ngs->bp_table[bp].s_idx != -1) + ngs->bscore_stack[ngs->bp_table[bp].s_idx + rc] = score; + } + else { + int32 i, rcsize; + bptbl_t *be; + + /* This might happen if recognition fails. */ + if (ngs->bpidx == NO_BP) { + E_ERROR("No entries in backpointer table!"); + return; + } + + /* Expand the backpointer tables if necessary. */ + if (ngs->bpidx >= ngs->bp_table_size) { + ngs->bp_table_size *= 2; + ngs->bp_table = ckd_realloc(ngs->bp_table, + ngs->bp_table_size + * sizeof(*ngs->bp_table)); + E_INFO("Resized backpointer table to %d entries\n", ngs->bp_table_size); + } + if (ngs->bss_head >= ngs->bscore_stack_size + - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) { + ngs->bscore_stack_size *= 2; + ngs->bscore_stack = ckd_realloc(ngs->bscore_stack, + ngs->bscore_stack_size + * sizeof(*ngs->bscore_stack)); + E_INFO("Resized score stack to %d entries\n", ngs->bscore_stack_size); + } + + ngs->word_lat_idx[w] = ngs->bpidx; + be = &(ngs->bp_table[ngs->bpidx]); + be->wid = w; + be->frame = frame_idx; + be->bp = path; + be->score = score; + be->s_idx = ngs->bss_head; + be->valid = TRUE; + assert(path != ngs->bpidx); + + /* DICT2PID */ + /* Get diphone ID for final phone and number of ssids corresponding to it. */ + be->last_phone = dict_last_phone(ps_search_dict(ngs),w); + if (dict_is_single_phone(ps_search_dict(ngs), w)) { + be->last2_phone = -1; + be->s_idx = -1; + rcsize = 0; + } + else { + be->last2_phone = dict_second_last_phone(ps_search_dict(ngs),w); + rcsize = dict2pid_rssid(ps_search_dict2pid(ngs), + be->last_phone, be->last2_phone)->n_ssid; + } + /* Allocate some space on the bscore_stack for all of these triphones. */ + for (i = 0; i < rcsize; ++i) + ngs->bscore_stack[ngs->bss_head + i] = WORST_SCORE; + if (rcsize) + ngs->bscore_stack[ngs->bss_head + rc] = score; + set_real_wid(ngs, ngs->bpidx); + + ngs->bpidx++; + ngs->bss_head += rcsize; + } +} + +int +ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score) +{ + /* End of backpointers for this frame. */ + int end_bpidx; + int best_exit, bp; + int32 best_score; + + /* No hypothesis means no exit node! */ + if (ngs->n_frame == 0) + return NO_BP; + + if (frame_idx == -1 || frame_idx >= ngs->n_frame) + frame_idx = ngs->n_frame - 1; + end_bpidx = ngs->bp_table_idx[frame_idx]; + + best_score = WORST_SCORE; + best_exit = NO_BP; + + /* Scan back to find a frame with some backpointers in it. */ + while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx) + --frame_idx; + /* This is NOT an error, it just means there is no hypothesis yet. */ + if (frame_idx < 0) + return NO_BP; + + /* Now find the entry for OR the best scoring entry. */ + assert(end_bpidx < ngs->bp_table_size); + for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) { + if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs) + || ngs->bp_table[bp].score BETTER_THAN best_score) { + best_score = ngs->bp_table[bp].score; + best_exit = bp; + } + if (ngs->bp_table[bp].wid == ps_search_finish_wid(ngs)) + break; + } + + if (out_best_score) { + *out_best_score = best_score; + } + return best_exit; +} + +char const * +ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx) +{ + ps_search_t *base = ps_search_base(ngs); + char *c; + size_t len; + int bp; + + if (bpidx == NO_BP) + return NULL; + + bp = bpidx; + len = 0; + while (bp != NO_BP) { + bptbl_t *be = &ngs->bp_table[bp]; + bp = be->bp; + if (dict_real_word(ps_search_dict(ngs), be->wid)) + len += strlen(dict_basestr(ps_search_dict(ngs), be->wid)) + 1; + } + + ckd_free(base->hyp_str); + if (len == 0) { + base->hyp_str = NULL; + return base->hyp_str; + } + base->hyp_str = ckd_calloc(1, len); + + bp = bpidx; + c = base->hyp_str + len - 1; + while (bp != NO_BP) { + bptbl_t *be = &ngs->bp_table[bp]; + size_t len; + + bp = be->bp; + if (dict_real_word(ps_search_dict(ngs), be->wid)) { + len = strlen(dict_basestr(ps_search_dict(ngs), be->wid)); + c -= len; + memcpy(c, dict_basestr(ps_search_dict(ngs), be->wid), len); + if (c > base->hyp_str) { + --c; + *c = ' '; + } + } + } + + return base->hyp_str; +} + +void +ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w) +{ + chan_t *hmm, *thmm; + xwdssid_t *rssid; + int32 i, tmatid, ciphone; + + /* DICT2PID */ + /* Get pointer to array of triphones for final diphone. */ + assert(!dict_is_single_phone(ps_search_dict(ngs), w)); + ciphone = dict_last_phone(ps_search_dict(ngs),w); + rssid = dict2pid_rssid(ps_search_dict2pid(ngs), + ciphone, + dict_second_last_phone(ps_search_dict(ngs),w)); + tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone); + hmm = ngs->word_chan[w]; + if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->hmm) != rssid->ssid[0])) { + hmm = listelem_malloc(ngs->chan_alloc); + hmm->next = ngs->word_chan[w]; + ngs->word_chan[w] = hmm; + + hmm->info.rc_id = 0; + hmm->ciphone = ciphone; + hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[0], tmatid); + E_DEBUG("allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n", + rssid->ssid[0], hmm->ciphone, + dict_second_last_phone(ps_search_dict(ngs),w), + dict_wordstr(ps_search_dict(ngs),w)); + } + for (i = 1; i < rssid->n_ssid; ++i) { + if ((hmm->next == NULL) || (hmm_nonmpx_ssid(&hmm->next->hmm) != rssid->ssid[i])) { + thmm = listelem_malloc(ngs->chan_alloc); + thmm->next = hmm->next; + hmm->next = thmm; + hmm = thmm; + + hmm->info.rc_id = i; + hmm->ciphone = ciphone; + hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, rssid->ssid[i], tmatid); + E_DEBUG("allocated rc_id %d ssid %d ciphone %d lc %d word %s\n", + i, rssid->ssid[i], hmm->ciphone, + dict_second_last_phone(ps_search_dict(ngs),w), + dict_wordstr(ps_search_dict(ngs),w)); + } + else + hmm = hmm->next; + } +} + +void +ngram_search_free_all_rc(ngram_search_t *ngs, int32 w) +{ + chan_t *hmm, *thmm; + + for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) { + thmm = hmm->next; + hmm_deinit(&hmm->hmm); + listelem_free(ngs->chan_alloc, hmm); + } + ngs->word_chan[w] = NULL; +} + +int32 +ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone) +{ + /* DICT2PID */ + /* Get the mapping from right context phone ID to index in the + * right context table and the bscore_stack. */ + if (pbe->last2_phone == -1) { + /* No right context for single phone predecessor words. */ + return pbe->score; + } + else { + xwdssid_t *rssid; + /* Find the index for the last diphone of the previous word + + * the first phone of the current word. */ + rssid = dict2pid_rssid(ps_search_dict2pid(ngs), + pbe->last_phone, pbe->last2_phone); + /* This may be WORST_SCORE, which means that there was no exit + * with rcphone as right context. */ + return ngs->bscore_stack[pbe->s_idx + rssid->cimap[rcphone]]; + } +} + +/* + * Compute acoustic and LM scores for a BPTable entry (segment). + */ +void +ngram_compute_seg_score(ngram_search_t *ngs, bptbl_t *be, float32 lwf, + int32 *out_ascr, int32 *out_lscr) +{ + bptbl_t *pbe; + int32 start_score; + + /* Start of utterance. */ + if (be->bp == NO_BP) { + *out_ascr = be->score; + *out_lscr = 0; + return; + } + + /* Otherwise, calculate lscr and ascr. */ + pbe = ngs->bp_table + be->bp; + start_score = ngram_search_exit_score(ngs, pbe, + dict_first_phone(ps_search_dict(ngs),be->wid)); + assert(start_score BETTER_THAN WORST_SCORE); + + /* FIXME: These result in positive acoustic scores when filler + words have non-filler pronunciations. That whole business + is still pretty much broken but at least it doesn't + segfault. */ + if (be->wid == ps_search_silence_wid(ngs)) { + *out_lscr = ngs->silpen; + } + else if (dict_filler_word(ps_search_dict(ngs), be->wid)) { + *out_lscr = ngs->fillpen; + } + else { + int32 n_used; + *out_lscr = ngram_tg_score(ngs->lmset, + be->real_wid, + pbe->real_wid, + pbe->prev_real_wid, + &n_used)>>SENSCR_SHIFT; + *out_lscr = *out_lscr * lwf; + } + *out_ascr = be->score - start_score - *out_lscr; +} + +static int +ngram_search_start(ps_search_t *search) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + ngs->done = FALSE; + ngram_model_flush(ngs->lmset); + if (ngs->fwdtree) + ngram_fwdtree_start(ngs); + else if (ngs->fwdflat) + ngram_fwdflat_start(ngs); + else + return -1; + return 0; +} + +static int +ngram_search_step(ps_search_t *search, int frame_idx) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + if (ngs->fwdtree) + return ngram_fwdtree_search(ngs, frame_idx); + else if (ngs->fwdflat) + return ngram_fwdflat_search(ngs, frame_idx); + else + return -1; +} + +void +dump_bptable(ngram_search_t *ngs) +{ + int i; + E_INFO("Backpointer table (%d entries):\n", ngs->bpidx); + for (i = 0; i < ngs->bpidx; ++i) { + bptbl_t *bpe = ngs->bp_table + i; + int j, rcsize; + + E_INFO_NOFN("%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d", + i, dict_wordstr(ps_search_dict(ngs), bpe->wid), + (bpe->bp == -1 + ? 0 : ngs->bp_table[bpe->bp].frame + 1), + bpe->frame, bpe->score, bpe->bp, + bpe->real_wid, bpe->prev_real_wid); + + if (bpe->last2_phone == -1) + rcsize = 0; + else + rcsize = dict2pid_rssid(ps_search_dict2pid(ngs), + bpe->last_phone, bpe->last2_phone)->n_ssid; + if (rcsize) { + E_INFOCONT("\tbss"); + for (j = 0; j < rcsize; ++j) + if (ngs->bscore_stack[bpe->s_idx + j] != WORST_SCORE) + E_INFOCONT(" %d", bpe->score - ngs->bscore_stack[bpe->s_idx + j]); + } + E_INFOCONT("\n"); + } +} + +static int +ngram_search_finish(ps_search_t *search) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + ngs->n_tot_frame += ngs->n_frame; + if (ngs->fwdtree) { + ngram_fwdtree_finish(ngs); + /* dump_bptable(ngs); */ + + /* Now do fwdflat search in its entirety, if requested. */ + if (ngs->fwdflat) { + int i; + /* Rewind the acoustic model. */ + if (acmod_rewind(ps_search_acmod(ngs)) < 0) + return -1; + /* Now redo search. */ + ngram_fwdflat_start(ngs); + i = 0; + while (ps_search_acmod(ngs)->n_feat_frame > 0) { + int nfr; + if ((nfr = ngram_fwdflat_search(ngs, i)) < 0) + return nfr; + acmod_advance(ps_search_acmod(ngs)); + ++i; + } + ngram_fwdflat_finish(ngs); + /* And now, we should have a result... */ + /* dump_bptable(ngs); */ + } + } + else if (ngs->fwdflat) { + ngram_fwdflat_finish(ngs); + } + + /* Mark the current utterance as done. */ + ngs->done = TRUE; + return 0; +} + +static ps_latlink_t * +ngram_search_bestpath(ps_search_t *search, int32 *out_score, int backward) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + (void)backward; + if (search->last_link == NULL) { + search->last_link = ps_lattice_bestpath(search->dag, ngs->lmset, + ngs->bestpath_fwdtree_lw_ratio, + ngs->ascale); + if (search->last_link == NULL) + return NULL; + /* Also calculate betas so we can fill in the posterior + * probability field in the segmentation. */ + if (search->post == 0) + search->post = ps_lattice_posterior(search->dag, ngs->lmset, + ngs->ascale); + } + if (out_score) + *out_score = search->last_link->path_scr + search->dag->final_node_ascr; + return search->last_link; +} + +static char const * +ngram_search_hyp(ps_search_t *search, int32 *out_score) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + /* Only do bestpath search if the utterance is complete. */ + if (ngs->bestpath && ngs->done) { + ps_lattice_t *dag; + ps_latlink_t *link; + char const *hyp; + double n_speech; + + ptmr_reset(&ngs->bestpath_perf); + ptmr_start(&ngs->bestpath_perf); + if ((dag = ngram_search_lattice(search)) == NULL) + return NULL; + if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL) + return NULL; + hyp = ps_lattice_hyp(dag, link); + ptmr_stop(&ngs->bestpath_perf); + n_speech = (double)dag->n_frames + / cmd_ln_int32_r(ps_search_config(ngs), "-frate"); + E_INFO("bestpath %.2f CPU %.3f xRT\n", + ngs->bestpath_perf.t_cpu, + ngs->bestpath_perf.t_cpu / n_speech); + E_INFO("bestpath %.2f wall %.3f xRT\n", + ngs->bestpath_perf.t_elapsed, + ngs->bestpath_perf.t_elapsed / n_speech); + return hyp; + } + else { + int32 bpidx; + + /* fwdtree and fwdflat use same backpointer table. */ + bpidx = ngram_search_find_exit(ngs, -1, out_score); + if (bpidx != NO_BP) + return ngram_search_bp_hyp(ngs, bpidx); + } + + return NULL; +} + +static void +ngram_search_bp2itor(ps_seg_t *seg, int bp) +{ + ngram_search_t *ngs = (ngram_search_t *)seg->search; + bptbl_t *be, *pbe; + + be = &ngs->bp_table[bp]; + pbe = be->bp == -1 ? NULL : &ngs->bp_table[be->bp]; + seg->word = dict_wordstr(ps_search_dict(ngs), be->wid); + seg->ef = be->frame; + seg->sf = pbe ? pbe->frame + 1 : 0; + seg->prob = 0; /* Bogus value... */ + /* Compute acoustic and LM scores for this segment. */ + if (pbe == NULL) { + seg->ascr = be->score; + seg->lscr = 0; + seg->lback = 0; + } + else { + int32 start_score; + + /* Find ending path score of previous word. */ + start_score = ngram_search_exit_score(ngs, pbe, + dict_first_phone(ps_search_dict(ngs), be->wid)); + assert(start_score BETTER_THAN WORST_SCORE); + if (be->wid == ps_search_silence_wid(ngs)) { + seg->lscr = ngs->silpen; + } + else if (dict_filler_word(ps_search_dict(ngs), be->wid)) { + seg->lscr = ngs->fillpen; + } + else { + seg->lscr = ngram_tg_score(ngs->lmset, + be->real_wid, + pbe->real_wid, + pbe->prev_real_wid, + &seg->lback)>>SENSCR_SHIFT; + seg->lscr = (int32)(seg->lscr * seg->lwf); + } + seg->ascr = be->score - start_score - seg->lscr; + } +} + +static void +ngram_bp_seg_free(ps_seg_t *seg) +{ + bptbl_seg_t *itor = (bptbl_seg_t *)seg; + + ckd_free(itor->bpidx); + ckd_free(itor); +} + +static ps_seg_t * +ngram_bp_seg_next(ps_seg_t *seg) +{ + bptbl_seg_t *itor = (bptbl_seg_t *)seg; + + if (++itor->cur == itor->n_bpidx) { + ngram_bp_seg_free(seg); + return NULL; + } + + ngram_search_bp2itor(seg, itor->bpidx[itor->cur]); + return seg; +} + +static ps_segfuncs_t ngram_bp_segfuncs = { + /* seg_next */ ngram_bp_seg_next, + /* seg_free */ ngram_bp_seg_free +}; + +static ps_seg_t * +ngram_search_bp_iter(ngram_search_t *ngs, int bpidx, float32 lwf) +{ + bptbl_seg_t *itor; + int bp, cur; + + /* Calling this an "iterator" is a bit of a misnomer since we have + * to get the entire backtrace in order to produce it. On the + * other hand, all we actually need is the bptbl IDs, and we can + * allocate a fixed-size array of them. */ + itor = ckd_calloc(1, sizeof(*itor)); + itor->base.vt = &ngram_bp_segfuncs; + itor->base.search = ps_search_base(ngs); + itor->base.lwf = lwf; + itor->n_bpidx = 0; + bp = bpidx; + while (bp != NO_BP) { + bptbl_t *be = &ngs->bp_table[bp]; + bp = be->bp; + ++itor->n_bpidx; + } + if (itor->n_bpidx == 0) { + ckd_free(itor); + return NULL; + } + itor->bpidx = ckd_calloc(itor->n_bpidx, sizeof(*itor->bpidx)); + cur = itor->n_bpidx - 1; + bp = bpidx; + while (bp != NO_BP) { + bptbl_t *be = &ngs->bp_table[bp]; + itor->bpidx[cur] = bp; + bp = be->bp; + --cur; + } + + /* Fill in relevant fields for first element. */ + ngram_search_bp2itor((ps_seg_t *)itor, itor->bpidx[0]); + + return (ps_seg_t *)itor; +} + +static ps_seg_t * +ngram_search_seg_iter(ps_search_t *search) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + /* Only do bestpath search if the utterance is done. */ + if (ngs->bestpath && ngs->done) { + ps_lattice_t *dag; + ps_latlink_t *link; + double n_speech; + ps_seg_t *itor; + + ptmr_reset(&ngs->bestpath_perf); + ptmr_start(&ngs->bestpath_perf); + if ((dag = ngram_search_lattice(search)) == NULL) + return NULL; + if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL) + return NULL; + itor = ps_lattice_seg_iter(dag, link, + ngs->bestpath_fwdtree_lw_ratio); + ptmr_stop(&ngs->bestpath_perf); + n_speech = (double)dag->n_frames + / cmd_ln_int32_r(ps_search_config(ngs), "-frate"); + E_INFO("bestpath %.2f CPU %.3f xRT\n", + ngs->bestpath_perf.t_cpu, + ngs->bestpath_perf.t_cpu / n_speech); + E_INFO("bestpath %.2f wall %.3f xRT\n", + ngs->bestpath_perf.t_elapsed, + ngs->bestpath_perf.t_elapsed / n_speech); + return itor; + } + else { + int32 bpidx; + + /* fwdtree and fwdflat use same backpointer table. */ + bpidx = ngram_search_find_exit(ngs, -1, NULL); + return ngram_search_bp_iter(ngs, bpidx, + /* but different language weights... */ + (ngs->done && ngs->fwdflat) + ? ngs->fwdflat_fwdtree_lw_ratio : 1.0); + } + + return NULL; +} + +static int32 +ngram_search_prob(ps_search_t *search) +{ + ngram_search_t *ngs = (ngram_search_t *)search; + + /* Only do bestpath search if the utterance is done. */ + if (ngs->bestpath && ngs->done) { + ps_lattice_t *dag; + ps_latlink_t *link; + + if ((dag = ngram_search_lattice(search)) == NULL) + return 0; + if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL) + return 0; + return search->post; + } + else { + /* FIXME: Give some kind of good estimate here, eventually. */ + return 0; + } +} + +static void +create_dag_nodes(ngram_search_t *ngs, ps_lattice_t *dag) +{ + bptbl_t *bp_ptr; + int32 i; + + for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) { + int32 sf, ef, wid; + ps_latnode_t *node; + + /* Skip invalid backpointers (these result from -maxwpf pruning) */ + if (!bp_ptr->valid) + continue; + + sf = (bp_ptr->bp < 0) ? 0 : ngs->bp_table[bp_ptr->bp].frame + 1; + ef = bp_ptr->frame; + wid = bp_ptr->wid; + + assert(ef < dag->n_frames); + /* Skip non-final entries. */ + if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->n_frames - 1)) + continue; + + /* Skip if word not in LM */ + if ((!dict_filler_word(ps_search_dict(ngs), wid)) + && (!ngram_model_set_known_wid(ngs->lmset, + dict_basewid(ps_search_dict(ngs), wid)))) + continue; + + /* See if bptbl entry already in lattice */ + for (node = dag->nodes; node; node = node->next) { + if ((node->wid == wid) && (node->sf == sf)) + break; + } + + /* For the moment, store bptbl indices in node.{fef,lef} */ + if (node) + node->lef = i; + else { + /* New node; link to head of list */ + node = listelem_malloc(dag->latnode_alloc); + node->wid = wid; + node->sf = sf; /* This is a frame index. */ + node->fef = node->lef = i; /* These are backpointer indices (argh) */ + node->reachable = FALSE; + node->entries = NULL; + node->exits = NULL; + + /* NOTE: This creates the list of nodes in reverse + * topological order, i.e. a node always precedes its + * antecedents in this list. */ + node->next = dag->nodes; + dag->nodes = node; + ++dag->n_nodes; + } + } +} + +static ps_latnode_t * +find_start_node(ngram_search_t *ngs, ps_lattice_t *dag) +{ + ps_latnode_t *node; + + /* Find start node .0 */ + for (node = dag->nodes; node; node = node->next) { + if ((node->wid == ps_search_start_wid(ngs)) && (node->sf == 0)) + break; + } + if (!node) { + /* This is probably impossible. */ + E_ERROR("Couldn't find in first frame\n"); + return NULL; + } + return node; +} + +static ps_latnode_t * +find_end_node(ngram_search_t *ngs, ps_lattice_t *dag, float32 lwf) +{ + ps_latnode_t *node; + int32 ef, bestbp, bp, bestscore; + + /* Find final node .last_frame; nothing can follow this node */ + for (node = dag->nodes; node; node = node->next) { + int32 lef = ngs->bp_table[node->lef].frame; + if ((node->wid == ps_search_finish_wid(ngs)) + && (lef == dag->n_frames - 1)) + break; + } + if (node != NULL) + return node; + + /* It is quite likely that no exited in the last frame. So, + * find the node corresponding to the best exit. */ + /* Find the last frame containing a word exit. */ + for (ef = dag->n_frames - 1; + ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx; + --ef); + if (ef < 0) { + E_ERROR("Empty backpointer table: can not build DAG.\n"); + return NULL; + } + + /* Find best word exit in that frame. */ + bestscore = WORST_SCORE; + bestbp = NO_BP; + for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) { + int32 n_used, l_scr, wid, prev_wid; + wid = ngs->bp_table[bp].real_wid; + prev_wid = ngs->bp_table[bp].prev_real_wid; + /* Always prefer , of which there will only be one per frame. */ + if (wid == ps_search_finish_wid(ngs)) { + bestbp = bp; + break; + } + l_scr = ngram_tg_score(ngs->lmset, ps_search_finish_wid(ngs), + wid, prev_wid, &n_used) >>SENSCR_SHIFT; + l_scr = l_scr * lwf; + if (ngs->bp_table[bp].score + l_scr BETTER_THAN bestscore) { + bestscore = ngs->bp_table[bp].score + l_scr; + bestbp = bp; + } + } + if (bestbp == NO_BP) { + E_ERROR("No word exits found in last frame (%d), assuming no recognition\n", ef); + return NULL; + } + E_INFO(" not found in last frame, using %s.%d instead\n", + dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid), ef); + + /* Now find the node that corresponds to it. */ + for (node = dag->nodes; node; node = node->next) { + if (node->lef == bestbp) + return node; + } + + /* FIXME: This seems to happen a lot! */ + E_ERROR("Failed to find DAG node corresponding to %s\n", + dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].wid)); + return NULL; +} + +/* + * Build lattice from bptable. + */ +ps_lattice_t * +ngram_search_lattice(ps_search_t *search) +{ + int32 i, score, ascr, lscr; + ps_latnode_t *node, *from, *to; + ngram_search_t *ngs; + ps_lattice_t *dag; + int min_endfr, nlink; + float lwf; + + ngs = (ngram_search_t *)search; + min_endfr = cmd_ln_int32_r(ps_search_config(search), "-min_endfr"); + + /* If the best score is WORST_SCORE or worse, there is no way to + * make a lattice. */ + if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE) + return NULL; + + /* Check to see if a lattice has previously been created over the + * same number of frames, and reuse it if so. */ + if (search->dag && search->dag->n_frames == ngs->n_frame) + return search->dag; + + /* Nope, create a new one. */ + ps_lattice_free(search->dag); + search->dag = NULL; + dag = ps_lattice_init_search(search, ngs->n_frame); + /* Compute these such that they agree with the fwdtree language weight. */ + lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0; + create_dag_nodes(ngs, dag); + if ((dag->start = find_start_node(ngs, dag)) == NULL) + goto error_out; + if ((dag->end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL) + goto error_out; + E_INFO("lattice start node %s.%d end node %s.%d\n", + dict_wordstr(search->dict, dag->start->wid), dag->start->sf, + dict_wordstr(search->dict, dag->end->wid), dag->end->sf); + + ngram_compute_seg_score(ngs, ngs->bp_table + dag->end->lef, lwf, + &dag->final_node_ascr, &lscr); + + /* + * At this point, dag->nodes is ordered such that nodes earlier in + * the list can follow (in time) those later in the list, but not + * vice versa (see above - also note that adjacency is purely + * determined by time which is why we can make this claim). Now + * create precedence links and simultanesously mark all nodes that + * can reach dag->end. (All nodes are reached from dag->start + * simply by definition - they were created that way). + * + * Note that this also means that any nodes before dag->end in the + * list can be discarded, meaning that dag->end will always be + * equal to dag->nodes (FIXME: except when loading from a file but + * we can fix that...) + */ + i = 0; + while (dag->nodes && dag->nodes != dag->end) { + ps_latnode_t *next = dag->nodes->next; + listelem_free(dag->latnode_alloc, dag->nodes); + dag->nodes = next; + ++i; + } + E_INFO("Eliminated %d nodes before end node\n", i); + dag->end->reachable = TRUE; + nlink = 0; + for (to = dag->end; to; to = to->next) { + int fef, lef; + + /* Skip if not reachable; it will never be reachable from dag->end */ + if (!to->reachable) + continue; + + /* Prune nodes with too few endpoints - heuristic + borrowed from Sphinx3 */ + fef = ngs->bp_table[to->fef].frame; + lef = ngs->bp_table[to->lef].frame; + if (to != dag->end && lef - fef < min_endfr) { + to->reachable = FALSE; + continue; + } + + /* Find predecessors of to : from->fef+1 <= to->sf <= from->lef+1 */ + for (from = to->next; from; from = from->next) { + bptbl_t *from_bpe; + + fef = ngs->bp_table[from->fef].frame; + lef = ngs->bp_table[from->lef].frame; + + if ((to->sf <= fef) || (to->sf > lef + 1)) + continue; + if (lef - fef < min_endfr) { + assert(!from->reachable); + continue; + } + + /* Find bptable entry for "from" that exactly precedes "to" */ + i = from->fef; + from_bpe = ngs->bp_table + i; + for (; i <= from->lef; i++, from_bpe++) { + if (from_bpe->wid != from->wid) + continue; + if (from_bpe->frame >= to->sf - 1) + break; + } + + if ((i > from->lef) || (from_bpe->frame != to->sf - 1)) + continue; + + /* Find acoustic score from.sf->to.sf-1 with right context = to */ + /* This gives us from_bpe's best acoustic score. */ + ngram_compute_seg_score(ngs, from_bpe, lwf, + &ascr, &lscr); + /* Now find the exact path score for from->to, including + * the appropriate final triphone. In fact this might not + * exist. */ + score = ngram_search_exit_score(ngs, from_bpe, + dict_first_phone(ps_search_dict(ngs), to->wid)); + /* Does not exist. Can't create a link here. */ + if (score == WORST_SCORE) + continue; + /* Adjust the arc score to match the correct triphone. */ + else + score = ascr + (score - from_bpe->score); + if (score BETTER_THAN 0) { + /* Scores must be negative, or Bad Things will happen. + In general, they are, except in corner cases + involving filler words. We don't want to throw any + links away so we'll keep these, but with some + arbitrarily improbable but recognizable score. */ + ps_lattice_link(dag, from, to, -424242, from_bpe->frame); + ++nlink; + from->reachable = TRUE; + } + else if (score BETTER_THAN WORST_SCORE) { + ps_lattice_link(dag, from, to, score, from_bpe->frame); + ++nlink; + from->reachable = TRUE; + } + } + } + + /* There must be at least one path between dag->start and dag->end */ + if (!dag->start->reachable) { + E_ERROR("End node of lattice isolated; unreachable\n"); + goto error_out; + } + + for (node = dag->nodes; node; node = node->next) { + /* Change node->{fef,lef} from bptbl indices to frames. */ + node->fef = ngs->bp_table[node->fef].frame; + node->lef = ngs->bp_table[node->lef].frame; + /* Find base wid for nodes. */ + node->basewid = dict_basewid(search->dict, node->wid); + } + + /* Link nodes with alternate pronunciations at the same timepoint. */ + for (node = dag->nodes; node; node = node->next) { + ps_latnode_t *alt; + /* Scan forward to find the next alternate, then stop. */ + for (alt = node->next; alt && alt->sf == node->sf; alt = alt->next) { + if (alt->basewid == node->basewid) { + alt->alt = node->alt; + node->alt = alt; + break; + } + } + } + E_INFO("Lattice has %d nodes, %d links\n", dag->n_nodes, nlink); + + /* Minor hack: If the final node is a filler word and not , + * then set its base word ID to , so that the language model + * scores won't be screwed up. */ + if (dict_filler_word(ps_search_dict(ngs), dag->end->wid)) + dag->end->basewid = ps_search_finish_wid(ngs); + + /* Free nodes unreachable from dag->end and their links */ + ps_lattice_delete_unreachable(dag); + + /* Add silprob and fillprob to corresponding links */ + ps_lattice_penalize_fillers(dag, ngs->silpen, ngs->fillpen); + + search->dag = dag; + return dag; + +error_out: + ps_lattice_free(dag); + return NULL; +} + +void ngram_search_set_lm(ngram_model_t *lm) +{ + default_lm = ngram_model_retain(lm); +} + diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search.h new file mode 100644 index 0000000000000000000000000000000000000000..a575fa32fcb55455ddeb058f97398c5785743e43 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search.h @@ -0,0 +1,449 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ngram_search.h N-Gram based multi-pass search ("FBS") + */ + +#ifndef __NGRAM_SEARCH_H__ +#define __NGRAM_SEARCH_H__ + +/* SphinxBase headers. */ +#include +#include +#include +#include +#include + +/* Local headers. */ +#include +#include "pocketsphinx_internal.h" +#include "hmm.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Lexical tree node data type. + * + * Not the first HMM for words, which multiplex HMMs based on + * different left contexts. This structure is used both in the + * dynamic HMM tree structure and in the per-word last-phone right + * context fanout. + */ +typedef struct chan_s { + hmm_t hmm; /**< Basic HMM structure. This *must* be first in + the structure because chan_t and root_chan_t are + sometimes used interchangeably */ + struct chan_s *next; /**< first descendant of this channel; or, in the + case of the last phone of a word, the next + alternative right context channel */ + struct chan_s *alt; /**< sibling; i.e., next descendant of parent HMM */ + + int32 ciphone; /**< ciphone for this node */ + union { + int32 penult_phn_wid; /**< list of words whose last phone follows this one; + this field indicates the first of the list; the + rest must be built up in a separate array. Used + only within HMM tree. -1 if none */ + int32 rc_id; /**< right-context id for last phone of words */ + } info; +} chan_t; + +/** + * Lexical tree node data type for the first phone (root) of each dynamic HMM tree + * structure. + * + * Each state may have a different parent static HMM. Most fields are + * similar to those in chan_t. + */ +typedef struct root_chan_s { + hmm_t hmm; /**< Basic HMM structure. This *must* be first in + the structure because chan_t and root_chan_t are + sometimes used interchangeably. */ + chan_t *next; /**< first descendant of this channel */ + + int32 penult_phn_wid; + int32 this_phn_wid; /**< list of words consisting of this single phone; + actually the first of the list, like penult_phn_wid; + -1 if none */ + int16 ciphone; /**< first ciphone of this node; all words rooted at this + node begin with this ciphone */ + int16 ci2phone; /**< second ciphone of this node; one root HMM for each + unique right context */ +} root_chan_t; + +/** + * Back pointer table (forward pass lattice; actually a tree) + */ +typedef struct bptbl_s { + frame_idx_t frame; /**< start or end frame */ + uint8 valid; /**< For absolute pruning */ + uint8 refcnt; /**< Reference count (number of successors) */ + int32 wid; /**< Word index */ + int32 bp; /**< Back Pointer */ + int32 score; /**< Score (best among all right contexts) */ + int32 s_idx; /**< Start of BScoreStack for various right contexts*/ + int32 real_wid; /**< wid of this or latest predecessor real word */ + int32 prev_real_wid; /**< wid of second-last real word */ + int16 last_phone; /**< last phone of this word */ + int16 last2_phone; /**< next-to-last phone of this word */ +} bptbl_t; + +/** + * Segmentation "iterator" for backpointer table results. + */ +typedef struct bptbl_seg_s { + ps_seg_t base; /**< Base structure. */ + int32 *bpidx; /**< Sequence of backpointer IDs. */ + int16 n_bpidx; /**< Number of backpointer IDs. */ + int16 cur; /**< Current position in bpidx. */ +} bptbl_seg_t; + +/* + * Candidates words for entering their last phones. Cleared and rebuilt in each + * frame. + * NOTE: candidates can only be multi-phone, real dictionary words. + */ +typedef struct lastphn_cand_s { + int32 wid; + int32 score; + int32 bp; + int32 next; /* next candidate starting at the same frame */ +} lastphn_cand_t; + +/* + * Since the same instance of a word (i.e., ) reaches its last + * phone several times, we can compute its best BP and LM transition score info + * just the first time and cache it for future occurrences. Structure for such + * a cache. + */ +typedef struct last_ltrans_s { + int32 sf; /* Start frame */ + int32 dscr; /* Delta-score upon entering last phone */ + int32 bp; /* Best BP */ +} last_ltrans_t; + +#define CAND_SF_ALLOCSIZE 32 +typedef struct cand_sf_s { + int32 bp_ef; + int32 cand; +} cand_sf_t; + +/* + * Structure for reorganizing the BP table entries in the current frame according + * to distinct right context ci-phones. Each entry contains the best BP entry for + * a given right context. Each successor word will pick up the correct entry based + * on its first ci-phone. + */ +typedef struct bestbp_rc_s { + int32 score; + int32 path; /* BP table index corresponding to this entry */ + int32 lc; /* right most ci-phone of above BP entry word */ +} bestbp_rc_t; + +#define NO_BP -1 + +/** + * Various statistics for profiling. + */ +typedef struct ngram_search_stats_s { + int32 n_phone_eval; + int32 n_root_chan_eval; + int32 n_nonroot_chan_eval; + int32 n_last_chan_eval; + int32 n_word_lastchan_eval; + int32 n_lastphn_cand_utt; + int32 n_fwdflat_chan; + int32 n_fwdflat_words; + int32 n_fwdflat_word_transition; + int32 n_senone_active_utt; +} ngram_search_stats_t; + + +/** + * N-Gram search module structure. + */ +struct ngram_search_s { + ps_search_t base; + ngram_model_t *lmset; /**< Set of language models. */ + hmm_context_t *hmmctx; /**< HMM context. */ + + /* Flags to quickly indicate which passes are enabled. */ + uint8 fwdtree; + uint8 fwdflat; + uint8 bestpath; + + /* State of processing. */ + uint8 done; + + /* Allocators */ + listelem_alloc_t *chan_alloc; /**< For chan_t */ + listelem_alloc_t *root_chan_alloc; /**< For root_chan_t */ + listelem_alloc_t *latnode_alloc; /**< For latnode_t */ + + /** + * Search structure of HMM instances. + * + * The word triphone sequences (HMM instances) are transformed + * into tree structures, one tree per unique left triphone in the + * entire dictionary (actually diphone, since its left context + * varies dyamically during the search process). The entire set + * of trees of channels is allocated once and for all during + * initialization (since dynamic management of active CHANs is + * time consuming), with one exception: the last phones of words, + * that need multiple right context modelling, are not maintained + * in this static structure since there are too many of them and + * few are active at any time. Instead they are maintained as + * linked lists of CHANs, one list per word, and each CHAN in this + * set is allocated only on demand and freed if inactive. + */ + root_chan_t *root_chan; /**< Roots of search tree. */ + int32 n_root_chan_alloc; /**< Number of root_chan allocated */ + int32 n_root_chan; /**< Number of valid root_chan */ + int32 n_nonroot_chan; /**< Number of valid non-root channels */ + int32 max_nonroot_chan; /**< Maximum possible number of non-root channels */ + root_chan_t *rhmm_1ph; /**< Root HMMs for single-phone words */ + + /** + * Channels associated with a given word (only used for right + * contexts, single-phone words in fwdtree search, and word HMMs + * in fwdflat search). WARNING: For single-phone words and + * fwdflat search, this actually contains pointers to root_chan_t, + * which are allocated using root_chan_alloc. This is a + * suboptimal state of affairs. + */ + chan_t **word_chan; + bitvec_t *word_active; /**< array of active flags for all words. */ + + /** + * Each node in the HMM tree structure may point to a set of words + * whose last phone would follow that node in the tree structure + * (but is not included in the tree structure for reasons + * explained above). The channel node points to one word in this + * set of words. The remaining words are linked through + * homophone_set[]. + * + * Single-phone words are not represented in the HMM tree; they + * are kept in word_chan. + * + * Specifically, homophone_set[w] = wid of next word in the same + * set as w. + */ + int32 *homophone_set; + int32 *single_phone_wid; /**< list of single-phone word ids */ + int32 n_1ph_words; /**< Number single phone words in dict (total) */ + int32 n_1ph_LMwords; /**< Number single phone dict words also in LM; + these come first in single_phone_wid */ + /** + * Array of active channels for current and next frame. + * + * In any frame, only some HMM tree nodes are active. + * active_chan_list[f mod 2] = list of nonroot channels in the HMM + * tree active in frame f. + */ + chan_t ***active_chan_list; + int32 n_active_chan[2]; /**< Number entries in active_chan_list */ + /** + * Array of active multi-phone words for current and next frame. + * + * Similarly to active_chan_list, active_word_list[f mod 2] = list + * of word ids for which active channels exist in word_chan in + * frame f. + * + * Statically allocated single-phone words are always active and + * should not appear in this list. + */ + int32 **active_word_list; + int32 n_active_word[2]; /**< Number entries in active_word_list */ + + /* + * FIXME: Document all of these bits. + */ + lastphn_cand_t *lastphn_cand; + int32 n_lastphn_cand; + last_ltrans_t *last_ltrans; /* one per word */ + int32 cand_sf_alloc; + cand_sf_t *cand_sf; + bestbp_rc_t *bestbp_rc; + + bptbl_t *bp_table; /* Forward pass lattice */ + int32 bpidx; /* First free BPTable entry */ + int32 bp_table_size; + int32 *bscore_stack; /* Score stack for all possible right contexts */ + int32 bss_head; /* First free BScoreStack entry */ + int32 bscore_stack_size; + + int32 n_frame_alloc; /**< Number of frames allocated in bp_table_idx and friends. */ + int32 n_frame; /**< Number of frames actually present. */ + int32 *bp_table_idx; /* First BPTable entry for each frame */ + int32 *word_lat_idx; /* BPTable index for any word in current frame; + cleared before each frame */ + + /* + * Flat lexicon (2nd pass) search stuff. + */ + ps_latnode_t **frm_wordlist; /**< List of active words in each frame. */ + int32 *fwdflat_wordlist; /**< List of active word IDs for utterance. */ + bitvec_t *expand_word_flag; + int32 *expand_word_list; + int32 n_expand_words; + int32 min_ef_width; + int32 max_sf_win; + float32 fwdflat_fwdtree_lw_ratio; + + int32 best_score; /**< Best Viterbi path score. */ + int32 last_phone_best_score; /**< Best Viterbi path score for last phone. */ + int32 renormalized; + + /* + * DAG (3rd pass) search stuff. + */ + float32 bestpath_fwdtree_lw_ratio; + float32 ascale; /**< Acoustic score scale for posterior probabilities. */ + + ngram_search_stats_t st; /**< Various statistics for profiling. */ + ptmr_t fwdtree_perf; + ptmr_t fwdflat_perf; + ptmr_t bestpath_perf; + int32 n_tot_frame; + + /* A collection of beam widths. */ + int32 beam; + int32 dynamic_beam; + int32 pbeam; + int32 wbeam; + int32 lpbeam; + int32 lponlybeam; + int32 fwdflatbeam; + int32 fwdflatwbeam; + int32 fillpen; + int32 silpen; + int32 wip; + int32 nwpen; + int32 pip; + int32 maxwpf; + int32 maxhmmpf; +}; +typedef struct ngram_search_s ngram_search_t; + +/** + * Initialize the N-Gram search module. + */ +ps_search_t *ngram_search_init(const char *name, + ngram_model_t *lm, + cmd_ln_t *config, + acmod_t *acmod, + dict_t *dict, + dict2pid_t *d2p); + +/** + * Finalize the N-Gram search module. + */ +void ngram_search_free(ps_search_t *ngs); + +/** + * Record the current frame's index in the backpointer table. + * + * @return the current backpointer index. + */ +int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx); + +/** + * Enter a word in the backpointer table. + */ +void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, + int32 score, int32 path, int32 rc); + +/** + * Allocate last phone channels for all possible right contexts for word w. + */ +void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w); + +/** + * Allocate last phone channels for all possible right contexts for word w. + */ +void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w); + +/** + * Find the best word exit for the current frame in the backpointer table. + * + * @return the backpointer index of the best word exit. + */ +POCKETSPHINX_EXPORT +int ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score); + +/** + * Backtrace from a given backpointer index to obtain a word hypothesis. + * + * @return a read-only string with the best hypothesis. + */ +POCKETSPHINX_EXPORT +char const *ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx); + +/** + * Compute language and acoustic scores for backpointer table entries. + */ +void ngram_compute_seg_scores(ngram_search_t *ngs, float32 lwf); + +/** + * Construct a word lattice from the current hypothesis. + */ +POCKETSPHINX_EXPORT +ps_lattice_t *ngram_search_lattice(ps_search_t *search); + +/** + * Get the exit score for a backpointer entry with a given right context. + */ +int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone); + +/** + * Sets the global language model. + * + * Sets the language model to use if nothing was passed in configuration + */ +void ngram_search_set_lm(ngram_model_t *lm); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __NGRAM_SEARCH_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search_fwdflat.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search_fwdflat.c new file mode 100644 index 0000000000000000000000000000000000000000..c393f04991733e8642f0064581fdb75d7fa4ff48 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search_fwdflat.c @@ -0,0 +1,965 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ngram_search_fwdflat.c Flat lexicon search. + */ + +/* System headers. */ +#include +#include + +/* SphinxBase headers. */ +#include +#include +#include + +/* Local headers. */ +#include "ngram_search.h" +#include "ps_lattice_internal.h" + +/* Turn this on to dump channels for debugging */ +#define __CHAN_DUMP__ 0 +#if __CHAN_DUMP__ +#define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr) +#else +#define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm) +#endif + +static void +ngram_fwdflat_expand_all(ngram_search_t *ngs) +{ + int n_words, i; + + /* For all "real words" (not fillers or /) in the dictionary, + * + * 1) Add the ones which are in the LM to the fwdflat wordlist + * 2) And to the expansion list (since we are expanding all) + */ + ngs->n_expand_words = 0; + n_words = ps_search_n_words(ngs); + bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs)); + for (i = 0; i < n_words; ++i) { + if (!ngram_model_set_known_wid(ngs->lmset, + dict_basewid(ps_search_dict(ngs),i))) + continue; + ngs->fwdflat_wordlist[ngs->n_expand_words] = i; + ngs->expand_word_list[ngs->n_expand_words] = i; + bitvec_set(ngs->expand_word_flag, i); + ngs->n_expand_words++; + } + E_INFO("Utterance vocabulary contains %d words\n", ngs->n_expand_words); + ngs->expand_word_list[ngs->n_expand_words] = -1; + ngs->fwdflat_wordlist[ngs->n_expand_words] = -1; +} + +static void +ngram_fwdflat_allocate_1ph(ngram_search_t *ngs) +{ + dict_t *dict = ps_search_dict(ngs); + int n_words = ps_search_n_words(ngs); + int i, w; + + /* Allocate single-phone words, since they won't have + * been allocated for us by fwdtree initialization. */ + ngs->n_1ph_words = 0; + for (w = 0; w < n_words; w++) { + if (dict_is_single_phone(dict, w)) + ++ngs->n_1ph_words; + } + ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words, + sizeof(*ngs->single_phone_wid)); + ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph)); + i = 0; + for (w = 0; w < n_words; w++) { + if (!dict_is_single_phone(dict, w)) + continue; + + /* DICT2PID location */ + ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w); + ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef); + hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE, + /* ssid */ bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, + ngs->rhmm_1ph[i].ciphone), + /* tmatid */ bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, + ngs->rhmm_1ph[i].ciphone)); + ngs->rhmm_1ph[i].next = NULL; + ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]); + ngs->single_phone_wid[i] = w; + i++; + } +} + +static void +ngram_fwdflat_free_1ph(ngram_search_t *ngs) +{ + int i, w; + int n_words = ps_search_n_words(ngs); + + for (i = w = 0; w < n_words; ++w) { + if (!dict_is_single_phone(ps_search_dict(ngs), w)) + continue; + hmm_deinit(&ngs->rhmm_1ph[i].hmm); + ++i; + } + ckd_free(ngs->rhmm_1ph); + ngs->rhmm_1ph = NULL; + ckd_free(ngs->single_phone_wid); +} + +void +ngram_fwdflat_init(ngram_search_t *ngs) +{ + int n_words; + + n_words = ps_search_n_words(ngs); + ngs->fwdflat_wordlist = ckd_calloc(n_words + 1, sizeof(*ngs->fwdflat_wordlist)); + ngs->expand_word_flag = bitvec_alloc(n_words); + ngs->expand_word_list = ckd_calloc(n_words + 1, sizeof(*ngs->expand_word_list)); + ngs->frm_wordlist = ckd_calloc(ngs->n_frame_alloc, sizeof(*ngs->frm_wordlist)); + ngs->min_ef_width = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatefwid"); + ngs->max_sf_win = cmd_ln_int32_r(ps_search_config(ngs), "-fwdflatsfwin"); + E_INFO("fwdflat: min_ef_width = %d, max_sf_win = %d\n", + ngs->min_ef_width, ngs->max_sf_win); + + /* No tree-search; pre-build the expansion list, including all LM words. */ + if (!ngs->fwdtree) { + /* Build full expansion list from LM words. */ + ngram_fwdflat_expand_all(ngs); + /* Allocate single phone words. */ + ngram_fwdflat_allocate_1ph(ngs); + } +} + +void +ngram_fwdflat_deinit(ngram_search_t *ngs) +{ + double n_speech = (double)ngs->n_tot_frame + / cmd_ln_int32_r(ps_search_config(ngs), "-frate"); + + E_INFO("TOTAL fwdflat %.2f CPU %.3f xRT\n", + ngs->fwdflat_perf.t_tot_cpu, + ngs->fwdflat_perf.t_tot_cpu / n_speech); + E_INFO("TOTAL fwdflat %.2f wall %.3f xRT\n", + ngs->fwdflat_perf.t_tot_elapsed, + ngs->fwdflat_perf.t_tot_elapsed / n_speech); + + /* Free single-phone words if we allocated them. */ + if (!ngs->fwdtree) { + ngram_fwdflat_free_1ph(ngs); + } + ckd_free(ngs->fwdflat_wordlist); + bitvec_free(ngs->expand_word_flag); + ckd_free(ngs->expand_word_list); + ckd_free(ngs->frm_wordlist); +} + +int +ngram_fwdflat_reinit(ngram_search_t *ngs) +{ + /* Reallocate things that depend on the number of words. */ + int n_words; + + ckd_free(ngs->fwdflat_wordlist); + ckd_free(ngs->expand_word_list); + bitvec_free(ngs->expand_word_flag); + n_words = ps_search_n_words(ngs); + ngs->fwdflat_wordlist = ckd_calloc(n_words + 1, sizeof(*ngs->fwdflat_wordlist)); + ngs->expand_word_flag = bitvec_alloc(n_words); + ngs->expand_word_list = ckd_calloc(n_words + 1, sizeof(*ngs->expand_word_list)); + + /* No tree-search; take care of the expansion list and single phone words. */ + if (!ngs->fwdtree) { + /* Free single-phone words. */ + ngram_fwdflat_free_1ph(ngs); + /* Reallocate word_chan. */ + ckd_free(ngs->word_chan); + ngs->word_chan = ckd_calloc(dict_size(ps_search_dict(ngs)), + sizeof(*ngs->word_chan)); + /* Rebuild full expansion list from LM words. */ + ngram_fwdflat_expand_all(ngs); + /* Allocate single phone words. */ + ngram_fwdflat_allocate_1ph(ngs); + } + /* Otherwise there is nothing to do since the wordlist is + * generated anew every utterance. */ + return 0; +} + +/** + * Find all active words in backpointer table and sort by frame. + */ +static void +build_fwdflat_wordlist(ngram_search_t *ngs) +{ + int32 i, f, sf, ef, wid, nwd; + bptbl_t *bp; + ps_latnode_t *node, *prevnode, *nextnode; + + /* No tree-search, use statically allocated wordlist. */ + if (!ngs->fwdtree) + return; + + memset(ngs->frm_wordlist, 0, ngs->n_frame_alloc * sizeof(*ngs->frm_wordlist)); + + /* Scan the backpointer table for all active words and record + * their exit frames. */ + for (i = 0, bp = ngs->bp_table; i < ngs->bpidx; i++, bp++) { + sf = (bp->bp < 0) ? 0 : ngs->bp_table[bp->bp].frame + 1; + ef = bp->frame; + wid = bp->wid; + + /* Anything that can be transitioned to in the LM can go in + * the word list. */ + if (!ngram_model_set_known_wid(ngs->lmset, + dict_basewid(ps_search_dict(ngs), wid))) + continue; + + /* Look for it in the wordlist. */ + for (node = ngs->frm_wordlist[sf]; node && (node->wid != wid); + node = node->next); + + /* Update last end frame. */ + if (node) + node->lef = ef; + else { + /* New node; link to head of list */ + node = listelem_malloc(ngs->latnode_alloc); + node->wid = wid; + node->fef = node->lef = ef; + + node->next = ngs->frm_wordlist[sf]; + ngs->frm_wordlist[sf] = node; + } + } + + /* Eliminate "unlikely" words, for which there are too few end points */ + for (f = 0; f < ngs->n_frame; f++) { + prevnode = NULL; + for (node = ngs->frm_wordlist[f]; node; node = nextnode) { + nextnode = node->next; + /* Word has too few endpoints */ + if ((node->lef - node->fef < ngs->min_ef_width) || + /* Word is and doesn't actually end in last frame */ + ((node->wid == ps_search_finish_wid(ngs)) && (node->lef < ngs->n_frame - 1))) { + if (!prevnode) + ngs->frm_wordlist[f] = nextnode; + else + prevnode->next = nextnode; + listelem_free(ngs->latnode_alloc, node); + } + else + prevnode = node; + } + } + + /* Form overall wordlist for 2nd pass */ + nwd = 0; + bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs)); + for (f = 0; f < ngs->n_frame; f++) { + for (node = ngs->frm_wordlist[f]; node; node = node->next) { + if (!bitvec_is_set(ngs->word_active, node->wid)) { + bitvec_set(ngs->word_active, node->wid); + ngs->fwdflat_wordlist[nwd++] = node->wid; + } + } + } + ngs->fwdflat_wordlist[nwd] = -1; + E_INFO("Utterance vocabulary contains %d words\n", nwd); +} + +/** + * Build HMM network for one utterance of fwdflat search. + */ +static void +build_fwdflat_chan(ngram_search_t *ngs) +{ + int32 i, wid, p; + root_chan_t *rhmm; + chan_t *hmm, *prevhmm; + dict_t *dict; + dict2pid_t *d2p; + + dict = ps_search_dict(ngs); + d2p = ps_search_dict2pid(ngs); + + /* Build word HMMs for each word in the lattice. */ + for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) { + wid = ngs->fwdflat_wordlist[i]; + + /* Single-phone words are permanently allocated */ + if (dict_is_single_phone(dict, wid)) + continue; + + assert(ngs->word_chan[wid] == NULL); + + /* Multiplex root HMM for first phone (one root per word, flat + * lexicon). diphone is irrelevant here, for the time being, + * at least. */ + rhmm = listelem_malloc(ngs->root_chan_alloc); + rhmm->ci2phone = dict_second_phone(dict, wid); + rhmm->ciphone = dict_first_phone(dict, wid); + rhmm->next = NULL; + hmm_init(ngs->hmmctx, &rhmm->hmm, TRUE, + bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, rhmm->ciphone), + bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, rhmm->ciphone)); + + /* HMMs for word-internal phones */ + prevhmm = NULL; + for (p = 1; p < dict_pronlen(dict, wid) - 1; p++) { + hmm = listelem_malloc(ngs->chan_alloc); + hmm->ciphone = dict_pron(dict, wid, p); + hmm->info.rc_id = (p == dict_pronlen(dict, wid) - 1) ? 0 : -1; + hmm->next = NULL; + hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, + dict2pid_internal(d2p,wid,p), + bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, hmm->ciphone)); + + if (prevhmm) + prevhmm->next = hmm; + else + rhmm->next = hmm; + + prevhmm = hmm; + } + + /* Right-context phones */ + ngram_search_alloc_all_rc(ngs, wid); + + /* Link in just allocated right-context phones */ + if (prevhmm) + prevhmm->next = ngs->word_chan[wid]; + else + rhmm->next = ngs->word_chan[wid]; + ngs->word_chan[wid] = (chan_t *) rhmm; + } + +} + +void +ngram_fwdflat_start(ngram_search_t *ngs) +{ + root_chan_t *rhmm; + int i; + + ptmr_reset(&ngs->fwdflat_perf); + ptmr_start(&ngs->fwdflat_perf); + build_fwdflat_wordlist(ngs); + build_fwdflat_chan(ngs); + + ngs->bpidx = 0; + ngs->bss_head = 0; + + for (i = 0; i < ps_search_n_words(ngs); i++) + ngs->word_lat_idx[i] = NO_BP; + + /* Reset the permanently allocated single-phone words, since they + * may have junk left over in them from previous searches. */ + for (i = 0; i < ngs->n_1ph_words; i++) { + int32 w = ngs->single_phone_wid[i]; + rhmm = (root_chan_t *) ngs->word_chan[w]; + hmm_clear(&rhmm->hmm); + } + + /* Start search with ; word_chan[] is permanently allocated */ + rhmm = (root_chan_t *) ngs->word_chan[ps_search_start_wid(ngs)]; + hmm_enter(&rhmm->hmm, 0, NO_BP, 0); + ngs->active_word_list[0][0] = ps_search_start_wid(ngs); + ngs->n_active_word[0] = 1; + + ngs->best_score = 0; + ngs->renormalized = FALSE; + + for (i = 0; i < ps_search_n_words(ngs); i++) + ngs->last_ltrans[i].sf = -1; + + if (!ngs->fwdtree) + ngs->n_frame = 0; + + ngs->st.n_fwdflat_chan = 0; + ngs->st.n_fwdflat_words = 0; + ngs->st.n_fwdflat_word_transition = 0; + ngs->st.n_senone_active_utt = 0; +} + +static void +compute_fwdflat_sen_active(ngram_search_t *ngs, int frame_idx) +{ + int32 i, nw, w; + int32 *awl; + root_chan_t *rhmm; + chan_t *hmm; + + acmod_clear_active(ps_search_acmod(ngs)); + + nw = ngs->n_active_word[frame_idx & 0x1]; + awl = ngs->active_word_list[frame_idx & 0x1]; + + for (i = 0; i < nw; i++) { + w = *(awl++); + rhmm = (root_chan_t *)ngs->word_chan[w]; + if (hmm_frame(&rhmm->hmm) == frame_idx) { + acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm); + } + + for (hmm = rhmm->next; hmm; hmm = hmm->next) { + if (hmm_frame(&hmm->hmm) == frame_idx) { + acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm); + } + } + } +} + +static void +fwdflat_eval_chan(ngram_search_t *ngs, int frame_idx) +{ + int32 i, w, nw, bestscore; + int32 *awl; + root_chan_t *rhmm; + chan_t *hmm; + + nw = ngs->n_active_word[frame_idx & 0x1]; + awl = ngs->active_word_list[frame_idx & 0x1]; + bestscore = WORST_SCORE; + + ngs->st.n_fwdflat_words += nw; + + /* Scan all active words. */ + for (i = 0; i < nw; i++) { + w = *(awl++); + rhmm = (root_chan_t *) ngs->word_chan[w]; + if (hmm_frame(&rhmm->hmm) == frame_idx) { + int32 score = chan_v_eval(rhmm); + if ((score BETTER_THAN bestscore) && (w != ps_search_finish_wid(ngs))) + bestscore = score; + ngs->st.n_fwdflat_chan++; + } + + for (hmm = rhmm->next; hmm; hmm = hmm->next) { + if (hmm_frame(&hmm->hmm) == frame_idx) { + int32 score = chan_v_eval(hmm); + if (score BETTER_THAN bestscore) + bestscore = score; + ngs->st.n_fwdflat_chan++; + } + } + } + + ngs->best_score = bestscore; +} + +static void +fwdflat_prune_chan(ngram_search_t *ngs, int frame_idx) +{ + int32 i, nw, cf, nf, w, pip, newscore, thresh, wordthresh; + int32 *awl; + root_chan_t *rhmm; + chan_t *hmm, *nexthmm; + + cf = frame_idx; + nf = cf + 1; + nw = ngs->n_active_word[cf & 0x1]; + awl = ngs->active_word_list[cf & 0x1]; + bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs)); + + thresh = ngs->best_score + ngs->fwdflatbeam; + wordthresh = ngs->best_score + ngs->fwdflatwbeam; + pip = ngs->pip; + E_DEBUG("frame %d thresh %d wordthresh %d\n", frame_idx, thresh, wordthresh); + + /* Scan all active words. */ + for (i = 0; i < nw; i++) { + w = *(awl++); + rhmm = (root_chan_t *) ngs->word_chan[w]; + /* Propagate active root channels */ + if (hmm_frame(&rhmm->hmm) == cf + && hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) { + hmm_frame(&rhmm->hmm) = nf; + bitvec_set(ngs->word_active, w); + + /* Transitions out of root channel */ + newscore = hmm_out_score(&rhmm->hmm); + if (rhmm->next) { + assert(!dict_is_single_phone(ps_search_dict(ngs), w)); + + newscore += pip; + if (newscore BETTER_THAN thresh) { + hmm = rhmm->next; + /* Enter all right context phones */ + if (hmm->info.rc_id >= 0) { + for (; hmm; hmm = hmm->next) { + if ((hmm_frame(&hmm->hmm) < cf) + || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) { + hmm_enter(&hmm->hmm, newscore, + hmm_out_history(&rhmm->hmm), nf); + } + } + } + /* Just a normal word internal phone */ + else { + if ((hmm_frame(&hmm->hmm) < cf) + || (newscore BETTER_THAN hmm_in_score(&hmm->hmm))) { + hmm_enter(&hmm->hmm, newscore, + hmm_out_history(&rhmm->hmm), nf); + } + } + } + } + else { + assert(dict_is_single_phone(ps_search_dict(ngs), w)); + + /* Word exit for single-phone words (where did their + * whmms come from?) (either from + * ngram_search_fwdtree, or from + * ngram_fwdflat_allocate_1ph(), that's where) */ + if (newscore BETTER_THAN wordthresh) { + ngram_search_save_bp(ngs, cf, w, newscore, + hmm_out_history(&rhmm->hmm), 0); + } + } + } + + /* Transitions out of non-root channels. */ + for (hmm = rhmm->next; hmm; hmm = hmm->next) { + if (hmm_frame(&hmm->hmm) >= cf) { + /* Propagate forward HMMs inside the beam. */ + if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) { + hmm_frame(&hmm->hmm) = nf; + bitvec_set(ngs->word_active, w); + + newscore = hmm_out_score(&hmm->hmm); + /* Word-internal phones */ + if (hmm->info.rc_id < 0) { + newscore += pip; + if (newscore BETTER_THAN thresh) { + nexthmm = hmm->next; + /* Enter all right-context phones. */ + if (nexthmm->info.rc_id >= 0) { + for (; nexthmm; nexthmm = nexthmm->next) { + if ((hmm_frame(&nexthmm->hmm) < cf) + || (newscore BETTER_THAN + hmm_in_score(&nexthmm->hmm))) { + hmm_enter(&nexthmm->hmm, + newscore, + hmm_out_history(&hmm->hmm), + nf); + } + } + } + /* Enter single word-internal phone. */ + else { + if ((hmm_frame(&nexthmm->hmm) < cf) + || (newscore BETTER_THAN + hmm_in_score(&nexthmm->hmm))) { + hmm_enter(&nexthmm->hmm, newscore, + hmm_out_history(&hmm->hmm), nf); + } + } + } + } + /* Right-context phones - apply word beam and exit. */ + else { + if (newscore BETTER_THAN wordthresh) { + ngram_search_save_bp(ngs, cf, w, newscore, + hmm_out_history(&hmm->hmm), + hmm->info.rc_id); + } + } + } + /* Zero out inactive HMMs. */ + else if (hmm_frame(&hmm->hmm) != nf) { + hmm_clear_scores(&hmm->hmm); + } + } + } + } +} + +static void +get_expand_wordlist(ngram_search_t *ngs, int32 frm, int32 win) +{ + int32 f, sf, ef; + ps_latnode_t *node; + + if (!ngs->fwdtree) { + ngs->st.n_fwdflat_word_transition += ngs->n_expand_words; + return; + } + + sf = frm - win; + if (sf < 0) + sf = 0; + ef = frm + win; + if (ef > ngs->n_frame) + ef = ngs->n_frame; + + bitvec_clear_all(ngs->expand_word_flag, ps_search_n_words(ngs)); + ngs->n_expand_words = 0; + + for (f = sf; f < ef; f++) { + for (node = ngs->frm_wordlist[f]; node; node = node->next) { + if (!bitvec_is_set(ngs->expand_word_flag, node->wid)) { + ngs->expand_word_list[ngs->n_expand_words++] = node->wid; + bitvec_set(ngs->expand_word_flag, node->wid); + } + } + } + ngs->expand_word_list[ngs->n_expand_words] = -1; + ngs->st.n_fwdflat_word_transition += ngs->n_expand_words; +} + +static void +fwdflat_word_transition(ngram_search_t *ngs, int frame_idx) +{ + int32 cf, nf, b, thresh, pip, i, nw, w, newscore; + int32 best_silrc_score = 0, best_silrc_bp = 0; /* FIXME: good defaults? */ + bptbl_t *bp; + int32 *rcss; + root_chan_t *rhmm; + int32 *awl; + float32 lwf; + dict_t *dict = ps_search_dict(ngs); + dict2pid_t *d2p = ps_search_dict2pid(ngs); + + cf = frame_idx; + nf = cf + 1; + thresh = ngs->best_score + ngs->fwdflatbeam; + pip = ngs->pip; + best_silrc_score = WORST_SCORE; + lwf = ngs->fwdflat_fwdtree_lw_ratio; + + /* Search for all words starting within a window of this frame. + * These are the successors for words exiting now. */ + get_expand_wordlist(ngs, cf, ngs->max_sf_win); + + /* Scan words exited in current frame */ + for (b = ngs->bp_table_idx[cf]; b < ngs->bpidx; b++) { + xwdssid_t *rssid; + int32 silscore; + + bp = ngs->bp_table + b; + ngs->word_lat_idx[bp->wid] = NO_BP; + + if (bp->wid == ps_search_finish_wid(ngs)) + continue; + + /* DICT2PID location */ + /* Get the mapping from right context phone ID to index in the + * right context table and the bscore_stack. */ + rcss = ngs->bscore_stack + bp->s_idx; + if (bp->last2_phone == -1) + rssid = NULL; + else + rssid = dict2pid_rssid(d2p, bp->last_phone, bp->last2_phone); + + /* Transition to all successor words. */ + for (i = 0; ngs->expand_word_list[i] >= 0; i++) { + int32 n_used; + + w = ngs->expand_word_list[i]; + + /* Get the exit score we recorded in save_bwd_ptr(), or + * something approximating it. */ + if (rssid) + newscore = rcss[rssid->cimap[dict_first_phone(dict, w)]]; + else + newscore = bp->score; + if (newscore == WORST_SCORE) + continue; + /* FIXME: Floating point... */ + newscore += lwf + * (ngram_tg_score(ngs->lmset, + dict_basewid(dict, w), + bp->real_wid, + bp->prev_real_wid, + &n_used) >> SENSCR_SHIFT); + newscore += pip; + + /* Enter the next word */ + if (newscore BETTER_THAN thresh) { + rhmm = (root_chan_t *) ngs->word_chan[w]; + if ((hmm_frame(&rhmm->hmm) < cf) + || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { + hmm_enter(&rhmm->hmm, newscore, b, nf); + /* DICT2PID: This is where mpx ssids get introduced. */ + /* Look up the ssid to use when entering this mpx triphone. */ + hmm_mpx_ssid(&rhmm->hmm, 0) = + dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone, + dict_last_phone(dict, bp->wid)); + assert(IS_S3SSID(hmm_mpx_ssid(&rhmm->hmm, 0))); + E_DEBUG("ssid %d(%d,%d) = %d\n", + rhmm->ciphone, dict_last_phone(dict, bp->wid), rhmm->ci2phone, + hmm_mpx_ssid(&rhmm->hmm, 0)); + bitvec_set(ngs->word_active, w); + } + } + } + + /* Get the best exit into silence. */ + if (rssid) + silscore = rcss[rssid->cimap[ps_search_acmod(ngs)->mdef->sil]]; + else + silscore = bp->score; + if (silscore BETTER_THAN best_silrc_score) { + best_silrc_score = silscore; + best_silrc_bp = b; + } + } + + /* Transition to */ + newscore = best_silrc_score + ngs->silpen + pip; + if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) { + w = ps_search_silence_wid(ngs); + rhmm = (root_chan_t *) ngs->word_chan[w]; + if ((hmm_frame(&rhmm->hmm) < cf) + || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { + hmm_enter(&rhmm->hmm, newscore, + best_silrc_bp, nf); + bitvec_set(ngs->word_active, w); + } + } + /* Transition to noise words */ + newscore = best_silrc_score + ngs->fillpen + pip; + if ((newscore BETTER_THAN thresh) && (newscore BETTER_THAN WORST_SCORE)) { + for (w = dict_filler_start(dict); w <= dict_filler_end(dict); w++) { + if (w == ps_search_silence_wid(ngs)) + continue; + + rhmm = (root_chan_t *) ngs->word_chan[w]; + /* Noise words that aren't a single phone will have NULL here. */ + if (rhmm == NULL) + continue; + if ((hmm_frame(&rhmm->hmm) < cf) + || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { + hmm_enter(&rhmm->hmm, newscore, + best_silrc_bp, nf); + bitvec_set(ngs->word_active, w); + } + } + } + + /* Reset initial channels of words that have become inactive even after word trans. */ + nw = ngs->n_active_word[cf & 0x1]; + awl = ngs->active_word_list[cf & 0x1]; + for (i = 0; i < nw; i++) { + w = *(awl++); + rhmm = (root_chan_t *) ngs->word_chan[w]; + if (hmm_frame(&rhmm->hmm) == cf) { + hmm_clear_scores(&rhmm->hmm); + } + } +} + +static void +fwdflat_renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm) +{ + root_chan_t *rhmm; + chan_t *hmm; + int32 i, nw, cf, w, *awl; + + cf = frame_idx; + + /* Renormalize individual word channels */ + nw = ngs->n_active_word[cf & 0x1]; + awl = ngs->active_word_list[cf & 0x1]; + for (i = 0; i < nw; i++) { + w = *(awl++); + rhmm = (root_chan_t *) ngs->word_chan[w]; + if (hmm_frame(&rhmm->hmm) == cf) { + hmm_normalize(&rhmm->hmm, norm); + } + for (hmm = rhmm->next; hmm; hmm = hmm->next) { + if (hmm_frame(&hmm->hmm) == cf) { + hmm_normalize(&hmm->hmm, norm); + } + } + } + + ngs->renormalized = TRUE; +} + +int +ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx) +{ + int16 const *senscr; + int32 nf, i, j; + int32 *nawl; + + /* Activate our HMMs for the current frame if need be. */ + if (!ps_search_acmod(ngs)->compallsen) + compute_fwdflat_sen_active(ngs, frame_idx); + + /* Compute GMM scores for the current frame. */ + senscr = acmod_score(ps_search_acmod(ngs), &frame_idx); + ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active; + + /* Mark backpointer table for current frame. */ + ngram_search_mark_bptable(ngs, frame_idx); + + /* If the best score is equal to or worse than WORST_SCORE, + * recognition has failed, don't bother to keep trying. */ + if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE) + return 0; + /* Renormalize if necessary */ + if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) { + E_INFO("Renormalizing Scores at frame %d, best score %d\n", + frame_idx, ngs->best_score); + fwdflat_renormalize_scores(ngs, frame_idx, ngs->best_score); + } + + ngs->best_score = WORST_SCORE; + hmm_context_set_senscore(ngs->hmmctx, senscr); + + /* Evaluate HMMs */ + fwdflat_eval_chan(ngs, frame_idx); + /* Prune HMMs and do phone transitions. */ + fwdflat_prune_chan(ngs, frame_idx); + /* Do word transitions. */ + fwdflat_word_transition(ngs, frame_idx); + + /* Create next active word list, skip fillers */ + nf = frame_idx + 1; + nawl = ngs->active_word_list[nf & 0x1]; + for (i = 0, j = 0; ngs->fwdflat_wordlist[i] >= 0; i++) { + int32 wid = ngs->fwdflat_wordlist[i]; + if (bitvec_is_set(ngs->word_active, wid) && wid < ps_search_start_wid(ngs)) { + *(nawl++) = wid; + j++; + } + } + /* Add fillers */ + for (i = ps_search_start_wid(ngs); i < ps_search_n_words(ngs); i++) { + if (bitvec_is_set(ngs->word_active, i)) { + *(nawl++) = i; + j++; + } + } + if (!ngs->fwdtree) + ++ngs->n_frame; + ngs->n_active_word[nf & 0x1] = j; + + /* Return the number of frames processed. */ + return 1; +} + +/** + * Destroy wordlist from the current utterance. + */ +static void +destroy_fwdflat_wordlist(ngram_search_t *ngs) +{ + ps_latnode_t *node, *tnode; + int32 f; + + if (!ngs->fwdtree) + return; + + for (f = 0; f < ngs->n_frame; f++) { + for (node = ngs->frm_wordlist[f]; node; node = tnode) { + tnode = node->next; + listelem_free(ngs->latnode_alloc, node); + } + } +} + +/** + * Free HMM network for one utterance of fwdflat search. + */ +static void +destroy_fwdflat_chan(ngram_search_t *ngs) +{ + int32 i, wid; + + for (i = 0; ngs->fwdflat_wordlist[i] >= 0; i++) { + root_chan_t *rhmm; + chan_t *thmm; + wid = ngs->fwdflat_wordlist[i]; + if (dict_is_single_phone(ps_search_dict(ngs),wid)) + continue; + assert(ngs->word_chan[wid] != NULL); + + /* The first HMM in ngs->word_chan[wid] was allocated with + * ngs->root_chan_alloc, but this will attempt to free it + * using ngs->chan_alloc, which will not work. Therefore we + * free it manually and move the list forward before handing + * it off. */ + rhmm = (root_chan_t *)ngs->word_chan[wid]; + thmm = rhmm->next; + listelem_free(ngs->root_chan_alloc, rhmm); + ngs->word_chan[wid] = thmm; + ngram_search_free_all_rc(ngs, wid); + } +} + +void +ngram_fwdflat_finish(ngram_search_t *ngs) +{ + int32 cf; + + destroy_fwdflat_chan(ngs); + destroy_fwdflat_wordlist(ngs); + bitvec_clear_all(ngs->word_active, ps_search_n_words(ngs)); + + /* This is the number of frames processed. */ + cf = ps_search_acmod(ngs)->output_frame; + /* Add a mark in the backpointer table for one past the final frame. */ + ngram_search_mark_bptable(ngs, cf); + + ptmr_stop(&ngs->fwdflat_perf); + /* Print out some statistics. */ + if (cf > 0) { + double n_speech = (double)(cf + 1) + / cmd_ln_int32_r(ps_search_config(ngs), "-frate"); + E_INFO("%8d words recognized (%d/fr)\n", + ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1)); + E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt, + (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1)); + E_INFO("%8d channels searched (%d/fr)\n", + ngs->st.n_fwdflat_chan, ngs->st.n_fwdflat_chan / (cf + 1)); + E_INFO("%8d words searched (%d/fr)\n", + ngs->st.n_fwdflat_words, ngs->st.n_fwdflat_words / (cf + 1)); + E_INFO("%8d word transitions (%d/fr)\n", + ngs->st.n_fwdflat_word_transition, + ngs->st.n_fwdflat_word_transition / (cf + 1)); + E_INFO("fwdflat %.2f CPU %.3f xRT\n", + ngs->fwdflat_perf.t_cpu, + ngs->fwdflat_perf.t_cpu / n_speech); + E_INFO("fwdflat %.2f wall %.3f xRT\n", + ngs->fwdflat_perf.t_elapsed, + ngs->fwdflat_perf.t_elapsed / n_speech); + } +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search_fwdflat.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search_fwdflat.h new file mode 100644 index 0000000000000000000000000000000000000000..b17d73c3be616ea3e24b58f9b7d5d0699d774471 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search_fwdflat.h @@ -0,0 +1,91 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ngram_search_fwdflat.h Flat lexicon based Viterbi search. + */ + +#ifndef __NGRAM_SEARCH_FWDFLAT_H__ +#define __NGRAM_SEARCH_FWDFLAT_H__ + +/* SphinxBase headers. */ + +/* Local headers. */ +#include "ngram_search.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Initialize N-Gram search for fwdflat decoding. + */ +void ngram_fwdflat_init(ngram_search_t *ngs); + +/** + * Release memory associated with fwdflat decoding. + */ +void ngram_fwdflat_deinit(ngram_search_t *ngs); + +/** + * Rebuild search structures for updated language models. + */ +int ngram_fwdflat_reinit(ngram_search_t *ngs); + +/** + * Start fwdflat decoding for an utterance. + */ +void ngram_fwdflat_start(ngram_search_t *ngs); + +/** + * Search one frame forward in an utterance. + */ +int ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx); + +/** + * Finish fwdflat decoding for an utterance. + */ +void ngram_fwdflat_finish(ngram_search_t *ngs); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __NGRAM_SEARCH_FWDFLAT_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search_fwdtree.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search_fwdtree.c new file mode 100644 index 0000000000000000000000000000000000000000..a460b07265a056fdc6e168fa17413d16a710ad5a --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search_fwdtree.c @@ -0,0 +1,1570 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ngram_search_fwdtree.c Lexicon tree search. + */ + +/* System headers. */ +#include +#include + +/* SphinxBase headers. */ +#include +#include +#include + +/* Local headers. */ +#include "ngram_search_fwdtree.h" +#include "phone_loop_search.h" + +/* Turn this on to dump channels for debugging */ +#define __CHAN_DUMP__ 0 +#if __CHAN_DUMP__ +#define chan_v_eval(chan) hmm_dump_vit_eval(&(chan)->hmm, stderr) +#else +#define chan_v_eval(chan) hmm_vit_eval(&(chan)->hmm) +#endif + +/* + * Allocate that part of the search channel tree structure that is independent of the + * LM in use. + */ +static void +init_search_tree(ngram_search_t *ngs) +{ + int32 w, ndiph, i, n_words, n_ci; + dict_t *dict = ps_search_dict(ngs); + bitvec_t *dimap; + + E_INFO("Initializing search tree\n"); + + n_words = ps_search_n_words(ngs); + ngs->homophone_set = ckd_calloc(n_words, sizeof(*ngs->homophone_set)); + + /* Find #single phone words, and #unique first diphones (#root channels) in dict. */ + ndiph = 0; + ngs->n_1ph_words = 0; + n_ci = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); + /* Allocate a bitvector with flags for each possible diphone. */ + dimap = bitvec_alloc(n_ci * n_ci); + for (w = 0; w < n_words; w++) { + if (!dict_real_word(dict, w)) + continue; + if (dict_is_single_phone(dict, w)) + ++ngs->n_1ph_words; + else { + int ph0, ph1; + ph0 = dict_first_phone(dict, w); + ph1 = dict_second_phone(dict, w); + /* Increment ndiph the first time we see a diphone. */ + if (bitvec_is_clear(dimap, ph0 * n_ci + ph1)) { + bitvec_set(dimap, ph0 * n_ci + ph1); + ++ndiph; + } + } + } + E_INFO("%d unique initial diphones\n", ndiph); + bitvec_free(dimap); + + /* Add remaining dict words (, , , noise words) to single-phone words */ + ngs->n_1ph_words += dict_num_fillers(dict) + 2; + ngs->n_root_chan_alloc = ndiph + 1; + /* Verify that these are all *actually* single-phone words, + * otherwise really bad things will happen to us. */ + for (w = 0; w < n_words; ++w) { + if (dict_real_word(dict, w)) + continue; + if (!dict_is_single_phone(dict, w)) { + E_WARN("Filler word %d = %s has more than one phone, ignoring it.\n", + w, dict_wordstr(dict, w)); + --ngs->n_1ph_words; + } + } + + /* Allocate and initialize root channels */ + ngs->root_chan = + ckd_calloc(ngs->n_root_chan_alloc, sizeof(*ngs->root_chan)); + for (i = 0; i < ngs->n_root_chan_alloc; i++) { + hmm_init(ngs->hmmctx, &ngs->root_chan[i].hmm, TRUE, -1, -1); + ngs->root_chan[i].penult_phn_wid = -1; + ngs->root_chan[i].next = NULL; + } + + /* Permanently allocate and initialize channels for single-phone + * words (1/word). */ + ngs->rhmm_1ph = ckd_calloc(ngs->n_1ph_words, sizeof(*ngs->rhmm_1ph)); + i = 0; + for (w = 0; w < n_words; w++) { + if (!dict_is_single_phone(dict, w)) + continue; + /* Use SIL as right context for these. */ + ngs->rhmm_1ph[i].ci2phone = bin_mdef_silphone(ps_search_acmod(ngs)->mdef); + ngs->rhmm_1ph[i].ciphone = dict_first_phone(dict, w); + hmm_init(ngs->hmmctx, &ngs->rhmm_1ph[i].hmm, TRUE, + bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone), + bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ngs->rhmm_1ph[i].ciphone)); + ngs->rhmm_1ph[i].next = NULL; + + ngs->word_chan[w] = (chan_t *) &(ngs->rhmm_1ph[i]); + i++; + } + + ngs->single_phone_wid = ckd_calloc(ngs->n_1ph_words, + sizeof(*ngs->single_phone_wid)); +} + +/* + * One-time initialization of internal channels in HMM tree. + */ +static void +init_nonroot_chan(ngram_search_t *ngs, chan_t * hmm, int32 ph, int32 ci, int32 tmatid) +{ + hmm->next = NULL; + hmm->alt = NULL; + hmm->info.penult_phn_wid = -1; + hmm->ciphone = ci; + hmm_init(ngs->hmmctx, &hmm->hmm, FALSE, ph, tmatid); +} + +/* + * Allocate and initialize search channel-tree structure. + * At this point, all the root-channels have been allocated and partly initialized + * (as per init_search_tree()), and channels for all the single-phone words have been + * allocated and initialized. None of the interior channels of search-trees have + * been allocated. + * This routine may be called on every utterance, after reinit_search_tree() clears + * the search tree created for the previous utterance. Meant for reconfiguring the + * search tree to suit the currently active LM. + */ +static void +create_search_channels(ngram_search_t *ngs) +{ + chan_t *hmm; + root_chan_t *rhmm; + int32 w, i, j, p, ph, tmatid; + int32 n_words; + dict_t *dict = ps_search_dict(ngs); + dict2pid_t *d2p = ps_search_dict2pid(ngs); + + n_words = ps_search_n_words(ngs); + + E_INFO("Creating search channels\n"); + + for (w = 0; w < n_words; w++) + ngs->homophone_set[w] = -1; + + ngs->n_1ph_LMwords = 0; + ngs->n_root_chan = 0; + ngs->n_nonroot_chan = 0; + + for (w = 0; w < n_words; w++) { + int ciphone, ci2phone; + + /* Ignore dictionary words not in LM */ + if (!ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w))) + continue; + + /* Handle single-phone words individually; not in channel tree */ + if (dict_is_single_phone(dict, w)) { + E_DEBUG("single_phone_wid[%d] = %s\n", + ngs->n_1ph_LMwords, dict_wordstr(dict, w)); + ngs->single_phone_wid[ngs->n_1ph_LMwords++] = w; + continue; + } + + /* Find a root channel matching the initial diphone, or + * allocate one if not found. */ + ciphone = dict_first_phone(dict, w); + ci2phone = dict_second_phone(dict, w); + for (i = 0; i < ngs->n_root_chan; ++i) { + if (ngs->root_chan[i].ciphone == ciphone + && ngs->root_chan[i].ci2phone == ci2phone) + break; + } + if (i == ngs->n_root_chan) { + rhmm = &(ngs->root_chan[ngs->n_root_chan]); + rhmm->hmm.tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone); + /* Begin with CI phone? Not sure this makes a difference... */ + hmm_mpx_ssid(&rhmm->hmm, 0) = + bin_mdef_pid2ssid(ps_search_acmod(ngs)->mdef, ciphone); + rhmm->ciphone = ciphone; + rhmm->ci2phone = ci2phone; + ngs->n_root_chan++; + } + else + rhmm = &(ngs->root_chan[i]); + + E_DEBUG("word %s rhmm %d\n", dict_wordstr(dict, w), rhmm - ngs->root_chan); + /* Now, rhmm = root channel for w. Go on to remaining phones */ + if (dict_pronlen(dict, w) == 2) { + /* Next phone is the last; not kept in tree; add w to penult_phn_wid set */ + if ((j = rhmm->penult_phn_wid) < 0) + rhmm->penult_phn_wid = w; + else { + for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]); + ngs->homophone_set[j] = w; + } + } + else { + /* Add remaining phones, except the last, to tree */ + ph = dict2pid_internal(d2p, w, 1); + tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, dict_pron(dict, w, 1)); + hmm = rhmm->next; + if (hmm == NULL) { + rhmm->next = hmm = listelem_malloc(ngs->chan_alloc); + init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1), tmatid); + ngs->n_nonroot_chan++; + } + else { + chan_t *prev_hmm = NULL; + + for (; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph); hmm = hmm->alt) + prev_hmm = hmm; + if (!hmm) { /* thanks, rkm! */ + prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc); + init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, 1), tmatid); + ngs->n_nonroot_chan++; + } + } + E_DEBUG("phone %s = %d\n", + bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef, + dict_second_phone(dict, w)), ph); + for (p = 2; p < dict_pronlen(dict, w) - 1; p++) { + ph = dict2pid_internal(d2p, w, p); + tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, dict_pron(dict, w, p)); + if (!hmm->next) { + hmm->next = listelem_malloc(ngs->chan_alloc); + hmm = hmm->next; + init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p), tmatid); + ngs->n_nonroot_chan++; + } + else { + chan_t *prev_hmm = NULL; + + for (hmm = hmm->next; hmm && (hmm_nonmpx_ssid(&hmm->hmm) != ph); + hmm = hmm->alt) + prev_hmm = hmm; + if (!hmm) { /* thanks, rkm! */ + prev_hmm->alt = hmm = listelem_malloc(ngs->chan_alloc); + init_nonroot_chan(ngs, hmm, ph, dict_pron(dict, w, p), tmatid); + ngs->n_nonroot_chan++; + } + } + E_DEBUG("phone %s = %d\n", + bin_mdef_ciphone_str(ps_search_acmod(ngs)->mdef, + dict_pron(dict, w, p)), ph); + } + + /* All but last phone of w in tree; add w to hmm->info.penult_phn_wid set */ + if ((j = hmm->info.penult_phn_wid) < 0) + hmm->info.penult_phn_wid = w; + else { + for (; ngs->homophone_set[j] >= 0; j = ngs->homophone_set[j]); + ngs->homophone_set[j] = w; + } + } + } + + ngs->n_1ph_words = ngs->n_1ph_LMwords; + + /* Add filler words to the array of 1ph words. */ + for (w = 0; w < n_words; ++w) { + /* Skip anything that doesn't actually have a single phone. */ + if (!dict_is_single_phone(dict, w)) + continue; + /* Also skip "real words" and things that are in the LM. */ + if (dict_real_word(dict, w)) + continue; + if (ngram_model_set_known_wid(ngs->lmset, dict_basewid(dict, w))) + continue; + E_DEBUG("single_phone_wid[%d] = %s\n", + ngs->n_1ph_words, dict_wordstr(dict, w)); + ngs->single_phone_wid[ngs->n_1ph_words++] = w; + } + + if (ngs->n_nonroot_chan >= ngs->max_nonroot_chan) { + /* Give some room for channels for new words added dynamically at run time */ + ngs->max_nonroot_chan = ngs->n_nonroot_chan + 128; + E_INFO("Max nonroot chan increased to %d\n", ngs->max_nonroot_chan); + + /* Free old active channel list array if any and allocate new one */ + if (ngs->active_chan_list) + ckd_free_2d(ngs->active_chan_list); + ngs->active_chan_list = ckd_calloc_2d(2, ngs->max_nonroot_chan, + sizeof(**ngs->active_chan_list)); + } + + E_INFO("Created %d root, %d non-root channels, %d single-phone words\n", + ngs->n_root_chan, ngs->n_nonroot_chan, ngs->n_1ph_words); + + if (ngs->n_root_chan + ngs->n_1ph_words == 0) + E_ERROR("No word from the language model has pronunciation in the dictionary\n"); +} + +static void +reinit_search_subtree(ngram_search_t *ngs, chan_t * hmm) +{ + chan_t *child, *sibling; + + /* First free all children under hmm */ + for (child = hmm->next; child; child = sibling) { + sibling = child->alt; + reinit_search_subtree(ngs, child); + } + + /* Now free hmm */ + hmm_deinit(&hmm->hmm); + listelem_free(ngs->chan_alloc, hmm); +} + +/* + * Delete search tree by freeing all interior channels within search tree and + * restoring root channel state to the init state (i.e., just after init_search_tree()). + */ +static void +reinit_search_tree(ngram_search_t *ngs) +{ + int32 i; + chan_t *hmm, *sibling; + + for (i = 0; i < ngs->n_root_chan; i++) { + hmm = ngs->root_chan[i].next; + + while (hmm) { + sibling = hmm->alt; + reinit_search_subtree(ngs, hmm); + hmm = sibling; + } + + ngs->root_chan[i].penult_phn_wid = -1; + ngs->root_chan[i].next = NULL; + } + ngs->n_nonroot_chan = 0; +} + +void +ngram_fwdtree_init(ngram_search_t *ngs) +{ + /* Allocate bestbp_rc, lastphn_cand, last_ltrans */ + ngs->bestbp_rc = ckd_calloc(bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef), + sizeof(*ngs->bestbp_rc)); + ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs), + sizeof(*ngs->lastphn_cand)); + init_search_tree(ngs); + create_search_channels(ngs); +} + +static void +deinit_search_tree(ngram_search_t *ngs) +{ + int i, w, n_words; + + n_words = ps_search_n_words(ngs); + for (i = 0; i < ngs->n_root_chan_alloc; i++) { + hmm_deinit(&ngs->root_chan[i].hmm); + } + if (ngs->rhmm_1ph) { + for (i = w = 0; w < n_words; ++w) { + if (!dict_is_single_phone(ps_search_dict(ngs), w)) + continue; + hmm_deinit(&ngs->rhmm_1ph[i].hmm); + ++i; + } + ckd_free(ngs->rhmm_1ph); + ngs->rhmm_1ph = NULL; + } + ngs->n_root_chan = 0; + ngs->n_root_chan_alloc = 0; + ckd_free(ngs->root_chan); + ngs->root_chan = NULL; + ckd_free(ngs->single_phone_wid); + ngs->single_phone_wid = NULL; + ckd_free(ngs->homophone_set); + ngs->homophone_set = NULL; +} + +void +ngram_fwdtree_deinit(ngram_search_t *ngs) +{ + double n_speech = (double)ngs->n_tot_frame + / cmd_ln_int32_r(ps_search_config(ngs), "-frate"); + + E_INFO("TOTAL fwdtree %.2f CPU %.3f xRT\n", + ngs->fwdtree_perf.t_tot_cpu, + ngs->fwdtree_perf.t_tot_cpu / n_speech); + E_INFO("TOTAL fwdtree %.2f wall %.3f xRT\n", + ngs->fwdtree_perf.t_tot_elapsed, + ngs->fwdtree_perf.t_tot_elapsed / n_speech); + + /* Reset non-root channels. */ + reinit_search_tree(ngs); + /* Free the search tree. */ + deinit_search_tree(ngs); + /* Free other stuff. */ + ngs->max_nonroot_chan = 0; + ckd_free_2d(ngs->active_chan_list); + ngs->active_chan_list = NULL; + ckd_free(ngs->cand_sf); + ngs->cand_sf = NULL; + ckd_free(ngs->bestbp_rc); + ngs->bestbp_rc = NULL; + ckd_free(ngs->lastphn_cand); + ngs->lastphn_cand = NULL; +} + +int +ngram_fwdtree_reinit(ngram_search_t *ngs) +{ + /* Reset non-root channels. */ + reinit_search_tree(ngs); + /* Free the search tree. */ + deinit_search_tree(ngs); + /* Reallocate things that depend on the number of words. */ + ckd_free(ngs->lastphn_cand); + ngs->lastphn_cand = ckd_calloc(ps_search_n_words(ngs), + sizeof(*ngs->lastphn_cand)); + ckd_free(ngs->word_chan); + ngs->word_chan = ckd_calloc(ps_search_n_words(ngs), + sizeof(*ngs->word_chan)); + /* Rebuild the search tree. */ + init_search_tree(ngs); + create_search_channels(ngs); + return 0; +} + +void +ngram_fwdtree_start(ngram_search_t *ngs) +{ + ps_search_t *base = (ps_search_t *)ngs; + int32 i, w, n_words; + root_chan_t *rhmm; + + n_words = ps_search_n_words(ngs); + + /* Reset utterance statistics. */ + memset(&ngs->st, 0, sizeof(ngs->st)); + ptmr_reset(&ngs->fwdtree_perf); + ptmr_start(&ngs->fwdtree_perf); + + /* Reset backpointer table. */ + ngs->bpidx = 0; + ngs->bss_head = 0; + + /* Reset word lattice. */ + for (i = 0; i < n_words; ++i) + ngs->word_lat_idx[i] = NO_BP; + + /* Reset active HMM and word lists. */ + ngs->n_active_chan[0] = ngs->n_active_chan[1] = 0; + ngs->n_active_word[0] = ngs->n_active_word[1] = 0; + + /* Reset scores. */ + ngs->best_score = 0; + ngs->renormalized = 0; + + /* Reset other stuff. */ + for (i = 0; i < n_words; i++) + ngs->last_ltrans[i].sf = -1; + ngs->n_frame = 0; + + /* Clear the hypothesis string. */ + ckd_free(base->hyp_str); + base->hyp_str = NULL; + + /* Reset the permanently allocated single-phone words, since they + * may have junk left over in them from FWDFLAT. */ + for (i = 0; i < ngs->n_1ph_words; i++) { + w = ngs->single_phone_wid[i]; + rhmm = (root_chan_t *) ngs->word_chan[w]; + hmm_clear(&rhmm->hmm); + } + + /* Start search with ; word_chan[] is permanently allocated */ + rhmm = (root_chan_t *) ngs->word_chan[dict_startwid(ps_search_dict(ngs))]; + hmm_clear(&rhmm->hmm); + hmm_enter(&rhmm->hmm, 0, NO_BP, 0); +} + +/* + * Mark the active senones for all senones belonging to channels that are active in the + * current frame. + */ +static void +compute_sen_active(ngram_search_t *ngs, int frame_idx) +{ + root_chan_t *rhmm; + chan_t *hmm, **acl; + int32 i, w, *awl; + + acmod_clear_active(ps_search_acmod(ngs)); + + /* Flag active senones for root channels */ + for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) { + if (hmm_frame(&rhmm->hmm) == frame_idx) + acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm); + } + + /* Flag active senones for nonroot channels in HMM tree */ + i = ngs->n_active_chan[frame_idx & 0x1]; + acl = ngs->active_chan_list[frame_idx & 0x1]; + for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) { + acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm); + } + + /* Flag active senones for individual word channels */ + i = ngs->n_active_word[frame_idx & 0x1]; + awl = ngs->active_word_list[frame_idx & 0x1]; + for (w = *(awl++); i > 0; --i, w = *(awl++)) { + for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) { + acmod_activate_hmm(ps_search_acmod(ngs), &hmm->hmm); + } + } + for (i = 0; i < ngs->n_1ph_words; i++) { + w = ngs->single_phone_wid[i]; + rhmm = (root_chan_t *) ngs->word_chan[w]; + + if (hmm_frame(&rhmm->hmm) == frame_idx) + acmod_activate_hmm(ps_search_acmod(ngs), &rhmm->hmm); + } +} + +static void +renormalize_scores(ngram_search_t *ngs, int frame_idx, int32 norm) +{ + root_chan_t *rhmm; + chan_t *hmm, **acl; + int32 i, w, *awl; + + /* Renormalize root channels */ + for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) { + if (hmm_frame(&rhmm->hmm) == frame_idx) { + hmm_normalize(&rhmm->hmm, norm); + } + } + + /* Renormalize nonroot channels in HMM tree */ + i = ngs->n_active_chan[frame_idx & 0x1]; + acl = ngs->active_chan_list[frame_idx & 0x1]; + for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) { + hmm_normalize(&hmm->hmm, norm); + } + + /* Renormalize individual word channels */ + i = ngs->n_active_word[frame_idx & 0x1]; + awl = ngs->active_word_list[frame_idx & 0x1]; + for (w = *(awl++); i > 0; --i, w = *(awl++)) { + for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) { + hmm_normalize(&hmm->hmm, norm); + } + } + for (i = 0; i < ngs->n_1ph_words; i++) { + w = ngs->single_phone_wid[i]; + rhmm = (root_chan_t *) ngs->word_chan[w]; + if (hmm_frame(&rhmm->hmm) == frame_idx) { + hmm_normalize(&rhmm->hmm, norm); + } + } + + ngs->renormalized = TRUE; +} + +static int32 +eval_root_chan(ngram_search_t *ngs, int frame_idx) +{ + root_chan_t *rhmm; + int32 i, bestscore; + + bestscore = WORST_SCORE; + for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) { + if (hmm_frame(&rhmm->hmm) == frame_idx) { + int32 score = chan_v_eval(rhmm); + if (score BETTER_THAN bestscore) + bestscore = score; + ++ngs->st.n_root_chan_eval; + } + } + return (bestscore); +} + +static int32 +eval_nonroot_chan(ngram_search_t *ngs, int frame_idx) +{ + chan_t *hmm, **acl; + int32 i, bestscore; + + i = ngs->n_active_chan[frame_idx & 0x1]; + acl = ngs->active_chan_list[frame_idx & 0x1]; + bestscore = WORST_SCORE; + ngs->st.n_nonroot_chan_eval += i; + + for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) { + int32 score = chan_v_eval(hmm); + assert(hmm_frame(&hmm->hmm) == frame_idx); + if (score BETTER_THAN bestscore) + bestscore = score; + } + + return bestscore; +} + +static int32 +eval_word_chan(ngram_search_t *ngs, int frame_idx) +{ + root_chan_t *rhmm; + chan_t *hmm; + int32 i, w, bestscore, *awl, j, k; + + k = 0; + bestscore = WORST_SCORE; + awl = ngs->active_word_list[frame_idx & 0x1]; + + i = ngs->n_active_word[frame_idx & 0x1]; + for (w = *(awl++); i > 0; --i, w = *(awl++)) { + assert(bitvec_is_set(ngs->word_active, w)); + bitvec_clear(ngs->word_active, w); + assert(ngs->word_chan[w] != NULL); + + for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) { + int32 score; + + assert(hmm_frame(&hmm->hmm) == frame_idx); + score = chan_v_eval(hmm); + /*printf("eval word chan %d score %d\n", w, score); */ + + if (score BETTER_THAN bestscore) + bestscore = score; + + k++; + } + } + + /* Similarly for statically allocated single-phone words */ + j = 0; + for (i = 0; i < ngs->n_1ph_words; i++) { + int32 score; + + w = ngs->single_phone_wid[i]; + rhmm = (root_chan_t *) ngs->word_chan[w]; + if (hmm_frame(&rhmm->hmm) < frame_idx) + continue; + + score = chan_v_eval(rhmm); + /* printf("eval 1ph word chan %d score %d\n", w, score); */ + if (score BETTER_THAN bestscore && w != ps_search_finish_wid(ngs)) + bestscore = score; + + j++; + } + + ngs->st.n_last_chan_eval += k + j; + ngs->st.n_nonroot_chan_eval += k + j; + ngs->st.n_word_lastchan_eval += + ngs->n_active_word[frame_idx & 0x1] + j; + + return bestscore; +} + +static int32 +evaluate_channels(ngram_search_t *ngs, int16 const *senone_scores, int frame_idx) +{ + int32 bs; + + hmm_context_set_senscore(ngs->hmmctx, senone_scores); + ngs->best_score = eval_root_chan(ngs, frame_idx); + if ((bs = eval_nonroot_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score) + ngs->best_score = bs; + if ((bs = eval_word_chan(ngs, frame_idx)) BETTER_THAN ngs->best_score) + ngs->best_score = bs; + ngs->last_phone_best_score = bs; + + return ngs->best_score; +} + +/* + * Prune currently active root channels for next frame. Also, perform exit + * transitions out of them and activate successors. + * score[] of pruned root chan set to WORST_SCORE elsewhere. + */ +static void +prune_root_chan(ngram_search_t *ngs, int frame_idx) +{ + root_chan_t *rhmm; + chan_t *hmm; + int32 i, nf, w; + int32 thresh, newphone_thresh, lastphn_thresh, newphone_score; + chan_t **nacl; /* next active list */ + lastphn_cand_t *candp; + phone_loop_search_t *pls; + + nf = frame_idx + 1; + thresh = ngs->best_score + ngs->dynamic_beam; + newphone_thresh = ngs->best_score + ngs->pbeam; + lastphn_thresh = ngs->best_score + ngs->lpbeam; + nacl = ngs->active_chan_list[nf & 0x1]; + pls = (phone_loop_search_t *)ps_search_lookahead(ngs); + + for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) { + E_DEBUG("Root channel %d frame %d score %d thresh %d\n", + i, hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm), thresh); + /* First check if this channel was active in current frame */ + if (hmm_frame(&rhmm->hmm) < frame_idx) + continue; + + if (hmm_bestscore(&rhmm->hmm) BETTER_THAN thresh) { + hmm_frame(&rhmm->hmm) = nf; /* rhmm will be active in next frame */ + E_DEBUG("Preserving root channel %d score %d\n", i, hmm_bestscore(&rhmm->hmm)); + /* transitions out of this root channel */ + /* transition to all next-level channels in the HMM tree */ + newphone_score = hmm_out_score(&rhmm->hmm) + ngs->pip; + if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) { + for (hmm = rhmm->next; hmm; hmm = hmm->alt) { + int32 pl_newphone_score = newphone_score + + phone_loop_search_score(pls, hmm->ciphone); + if (pl_newphone_score BETTER_THAN newphone_thresh) { + if ((hmm_frame(&hmm->hmm) < frame_idx) + || (newphone_score BETTER_THAN hmm_in_score(&hmm->hmm))) { + hmm_enter(&hmm->hmm, newphone_score, + hmm_out_history(&rhmm->hmm), nf); + *(nacl++) = hmm; + } + } + } + } + + /* + * Transition to last phone of all words for which this is the + * penultimate phone (the last phones may need multiple right contexts). + * Remember to remove the temporary newword_penalty. + */ + if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) { + for (w = rhmm->penult_phn_wid; w >= 0; + w = ngs->homophone_set[w]) { + int32 pl_newphone_score = newphone_score + + phone_loop_search_score + (pls, dict_last_phone(ps_search_dict(ngs),w)); + E_DEBUG("word %s newphone_score %d\n", dict_wordstr(ps_search_dict(ngs), w), newphone_score); + if (pl_newphone_score BETTER_THAN lastphn_thresh) { + candp = ngs->lastphn_cand + ngs->n_lastphn_cand; + ngs->n_lastphn_cand++; + candp->wid = w; + candp->score = + newphone_score - ngs->nwpen; + candp->bp = hmm_out_history(&rhmm->hmm); + } + } + } + } + } + ngs->n_active_chan[nf & 0x1] = (int)(nacl - ngs->active_chan_list[nf & 0x1]); +} + +/* + * Prune currently active nonroot channels in HMM tree for next frame. Also, perform + * exit transitions out of such channels and activate successors. + */ +static void +prune_nonroot_chan(ngram_search_t *ngs, int frame_idx) +{ + chan_t *hmm, *nexthmm; + int32 nf, w, i; + int32 thresh, newphone_thresh, lastphn_thresh, newphone_score; + chan_t **acl, **nacl; /* active list, next active list */ + lastphn_cand_t *candp; + phone_loop_search_t *pls; + + nf = frame_idx + 1; + + thresh = ngs->best_score + ngs->dynamic_beam; + newphone_thresh = ngs->best_score + ngs->pbeam; + lastphn_thresh = ngs->best_score + ngs->lpbeam; + pls = (phone_loop_search_t *)ps_search_lookahead(ngs); + + acl = ngs->active_chan_list[frame_idx & 0x1]; /* currently active HMMs in tree */ + nacl = ngs->active_chan_list[nf & 0x1] + ngs->n_active_chan[nf & 0x1]; + + for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++); i > 0; + --i, hmm = *(acl++)) { + assert(hmm_frame(&hmm->hmm) >= frame_idx); + + if (hmm_bestscore(&hmm->hmm) BETTER_THAN thresh) { + /* retain this channel in next frame */ + if (hmm_frame(&hmm->hmm) != nf) { + hmm_frame(&hmm->hmm) = nf; + *(nacl++) = hmm; + } + + /* transition to all next-level channel in the HMM tree */ + newphone_score = hmm_out_score(&hmm->hmm) + ngs->pip; + if (pls != NULL || newphone_score BETTER_THAN newphone_thresh) { + for (nexthmm = hmm->next; nexthmm; nexthmm = nexthmm->alt) { + int32 pl_newphone_score = newphone_score + + phone_loop_search_score(pls, nexthmm->ciphone); + if ((pl_newphone_score BETTER_THAN newphone_thresh) + && ((hmm_frame(&nexthmm->hmm) < frame_idx) + || (newphone_score + BETTER_THAN hmm_in_score(&nexthmm->hmm)))) { + if (hmm_frame(&nexthmm->hmm) != nf) { + /* Keep this HMM on the active list */ + *(nacl++) = nexthmm; + } + hmm_enter(&nexthmm->hmm, newphone_score, + hmm_out_history(&hmm->hmm), nf); + } + } + } + + /* + * Transition to last phone of all words for which this is the + * penultimate phone (the last phones may need multiple right contexts). + * Remember to remove the temporary newword_penalty. + */ + if (pls != NULL || newphone_score BETTER_THAN lastphn_thresh) { + for (w = hmm->info.penult_phn_wid; w >= 0; + w = ngs->homophone_set[w]) { + int32 pl_newphone_score = newphone_score + + phone_loop_search_score + (pls, dict_last_phone(ps_search_dict(ngs),w)); + if (pl_newphone_score BETTER_THAN lastphn_thresh) { + candp = ngs->lastphn_cand + ngs->n_lastphn_cand; + ngs->n_lastphn_cand++; + candp->wid = w; + candp->score = + newphone_score - ngs->nwpen; + candp->bp = hmm_out_history(&hmm->hmm); + } + } + } + } + else if (hmm_frame(&hmm->hmm) != nf) { + hmm_clear(&hmm->hmm); + } + } + ngs->n_active_chan[nf & 0x1] = (int)(nacl - ngs->active_chan_list[nf & 0x1]); +} + +/* + * Execute the transition into the last phone for all candidates words emerging from + * the HMM tree. Attach LM scores to such transitions. + * (Executed after pruning root and non-root, but before pruning word-chan.) + */ +static void +last_phone_transition(ngram_search_t *ngs, int frame_idx) +{ + int32 i, j, k, nf, bp, bpend, w; + lastphn_cand_t *candp; + int32 *nawl; + int32 thresh; + int32 bestscore, dscr; + chan_t *hmm; + bptbl_t *bpe; + int32 n_cand_sf = 0; + + nf = frame_idx + 1; + nawl = ngs->active_word_list[nf & 0x1]; + ngs->st.n_lastphn_cand_utt += ngs->n_lastphn_cand; + + /* For each candidate word (entering its last phone) */ + /* If best LM score and bp for candidate known use it, else sort cands by startfrm */ + for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) { + int32 start_score; + + /* This can happen if recognition fails. */ + if (candp->bp == -1) + continue; + /* Backpointer entry for it. */ + bpe = &(ngs->bp_table[candp->bp]); + + /* Subtract starting score for candidate, leave it with only word score */ + start_score = ngram_search_exit_score + (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid)); + assert(start_score BETTER_THAN WORST_SCORE); + candp->score -= start_score; + + /* + * If this candidate not occurred in an earlier frame, prepare for finding + * best transition score into last phone; sort by start frame. + */ + /* i.e. if we don't have an entry in last_ltrans for this + * , then create one */ + if (ngs->last_ltrans[candp->wid].sf != bpe->frame + 1) { + /* Look for an entry in cand_sf matching the backpointer + * for this candidate. */ + for (j = 0; j < n_cand_sf; j++) { + if (ngs->cand_sf[j].bp_ef == bpe->frame) + break; + } + /* Oh, we found one, so chain onto it. */ + if (j < n_cand_sf) + candp->next = ngs->cand_sf[j].cand; + else { + /* Nope, let's make a new one, allocating cand_sf if necessary. */ + if (n_cand_sf >= ngs->cand_sf_alloc) { + if (ngs->cand_sf_alloc == 0) { + ngs->cand_sf = + ckd_calloc(CAND_SF_ALLOCSIZE, + sizeof(*ngs->cand_sf)); + ngs->cand_sf_alloc = CAND_SF_ALLOCSIZE; + } + else { + ngs->cand_sf_alloc += CAND_SF_ALLOCSIZE; + ngs->cand_sf = ckd_realloc(ngs->cand_sf, + ngs->cand_sf_alloc + * sizeof(*ngs->cand_sf)); + E_INFO("cand_sf[] increased to %d entries\n", + ngs->cand_sf_alloc); + } + } + + /* Use the newly created cand_sf. */ + j = n_cand_sf++; + candp->next = -1; /* End of the chain. */ + ngs->cand_sf[j].bp_ef = bpe->frame; + } + /* Update it to point to this candidate. */ + ngs->cand_sf[j].cand = i; + + ngs->last_ltrans[candp->wid].dscr = WORST_SCORE; + ngs->last_ltrans[candp->wid].sf = bpe->frame + 1; + } + } + + /* Compute best LM score and bp for new cands entered in the sorted lists above */ + for (i = 0; i < n_cand_sf; i++) { + /* For the i-th unique end frame... */ + bp = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef]; + bpend = ngs->bp_table_idx[ngs->cand_sf[i].bp_ef + 1]; + for (bpe = &(ngs->bp_table[bp]); bp < bpend; bp++, bpe++) { + if (!bpe->valid) + continue; + /* For each candidate at the start frame find bp->cand transition-score */ + for (j = ngs->cand_sf[i].cand; j >= 0; j = candp->next) { + int32 n_used; + candp = &(ngs->lastphn_cand[j]); + dscr = + ngram_search_exit_score + (ngs, bpe, dict_first_phone(ps_search_dict(ngs), candp->wid)); + if (dscr BETTER_THAN WORST_SCORE) { + assert(!dict_filler_word(ps_search_dict(ngs), candp->wid)); + dscr += ngram_tg_score(ngs->lmset, + dict_basewid(ps_search_dict(ngs), candp->wid), + bpe->real_wid, + bpe->prev_real_wid, + &n_used)>>SENSCR_SHIFT; + } + + if (dscr BETTER_THAN ngs->last_ltrans[candp->wid].dscr) { + ngs->last_ltrans[candp->wid].dscr = dscr; + ngs->last_ltrans[candp->wid].bp = bp; + } + } + } + } + + /* Update best transitions for all candidates; also update best lastphone score */ + bestscore = ngs->last_phone_best_score; + for (i = 0, candp = ngs->lastphn_cand; i < ngs->n_lastphn_cand; i++, candp++) { + candp->score += ngs->last_ltrans[candp->wid].dscr; + candp->bp = ngs->last_ltrans[candp->wid].bp; + + if (candp->score BETTER_THAN bestscore) + bestscore = candp->score; + } + ngs->last_phone_best_score = bestscore; + + /* At this pt, we know the best entry score (with LM component) for all candidates */ + thresh = bestscore + ngs->lponlybeam; + for (i = ngs->n_lastphn_cand, candp = ngs->lastphn_cand; i > 0; --i, candp++) { + if (candp->score BETTER_THAN thresh) { + w = candp->wid; + + ngram_search_alloc_all_rc(ngs, w); + + k = 0; + for (hmm = ngs->word_chan[w]; hmm; hmm = hmm->next) { + if ((hmm_frame(&hmm->hmm) < frame_idx) + || (candp->score BETTER_THAN hmm_in_score(&hmm->hmm))) { + assert(hmm_frame(&hmm->hmm) != nf); + hmm_enter(&hmm->hmm, + candp->score, candp->bp, nf); + k++; + } + } + if (k > 0) { + assert(bitvec_is_clear(ngs->word_active, w)); + assert(!dict_is_single_phone(ps_search_dict(ngs), w)); + *(nawl++) = w; + bitvec_set(ngs->word_active, w); + } + } + } + ngs->n_active_word[nf & 0x1] = (int)(nawl - ngs->active_word_list[nf & 0x1]); +} + +/* + * Prune currently active word channels for next frame. Also, perform exit + * transitions out of such channels and active successors. + */ +static void +prune_word_chan(ngram_search_t *ngs, int frame_idx) +{ + root_chan_t *rhmm; + chan_t *hmm, *thmm; + chan_t **phmmp; /* previous HMM-pointer */ + int32 nf, w, i, k; + int32 newword_thresh, lastphn_thresh; + int32 *awl, *nawl; + + nf = frame_idx + 1; + newword_thresh = ngs->last_phone_best_score + ngs->wbeam; + lastphn_thresh = ngs->last_phone_best_score + ngs->lponlybeam; + + awl = ngs->active_word_list[frame_idx & 0x1]; + nawl = ngs->active_word_list[nf & 0x1] + ngs->n_active_word[nf & 0x1]; + + /* Dynamically allocated last channels of multi-phone words */ + for (i = ngs->n_active_word[frame_idx & 0x1], w = *(awl++); i > 0; + --i, w = *(awl++)) { + k = 0; + phmmp = &(ngs->word_chan[w]); + for (hmm = ngs->word_chan[w]; hmm; hmm = thmm) { + assert(hmm_frame(&hmm->hmm) >= frame_idx); + + thmm = hmm->next; + if (hmm_bestscore(&hmm->hmm) BETTER_THAN lastphn_thresh) { + /* retain this channel in next frame */ + hmm_frame(&hmm->hmm) = nf; + k++; + phmmp = &(hmm->next); + + /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */ + if (hmm_out_score(&hmm->hmm) BETTER_THAN newword_thresh) { + /* can exit channel and recognize word */ + ngram_search_save_bp(ngs, frame_idx, w, + hmm_out_score(&hmm->hmm), + hmm_out_history(&hmm->hmm), + hmm->info.rc_id); + } + } + else if (hmm_frame(&hmm->hmm) == nf) { + phmmp = &(hmm->next); + } + else { + hmm_deinit(&hmm->hmm); + listelem_free(ngs->chan_alloc, hmm); + *phmmp = thmm; + } + } + if ((k > 0) && (bitvec_is_clear(ngs->word_active, w))) { + assert(!dict_is_single_phone(ps_search_dict(ngs), w)); + *(nawl++) = w; + bitvec_set(ngs->word_active, w); + } + } + ngs->n_active_word[nf & 0x1] = (int)(nawl - ngs->active_word_list[nf & 0x1]); + + /* + * Prune permanently allocated single-phone channels. + * NOTES: score[] of pruned channels set to WORST_SCORE elsewhere. + */ + for (i = 0; i < ngs->n_1ph_words; i++) { + w = ngs->single_phone_wid[i]; + rhmm = (root_chan_t *) ngs->word_chan[w]; + E_DEBUG("Single phone word %s frame %d score %d thresh %d outscore %d nwthresh %d\n", + dict_wordstr(ps_search_dict(ngs),w), + hmm_frame(&rhmm->hmm), hmm_bestscore(&rhmm->hmm), + lastphn_thresh, hmm_out_score(&rhmm->hmm), newword_thresh); + if (hmm_frame(&rhmm->hmm) < frame_idx) + continue; + if (hmm_bestscore(&rhmm->hmm) BETTER_THAN lastphn_thresh) { + hmm_frame(&rhmm->hmm) = nf; + + /* Could if ((! skip_alt_frm) || (frame_idx & 0x1)) the following */ + if (hmm_out_score(&rhmm->hmm) BETTER_THAN newword_thresh) { + E_DEBUG("Exiting single phone word %s with %d > %d, %d\n", + dict_wordstr(ps_search_dict(ngs),w), + hmm_out_score(&rhmm->hmm), + lastphn_thresh, newword_thresh); + ngram_search_save_bp(ngs, frame_idx, w, + hmm_out_score(&rhmm->hmm), + hmm_out_history(&rhmm->hmm), 0); + } + } + } +} + +static void +prune_channels(ngram_search_t *ngs, int frame_idx) +{ + /* Clear last phone candidate list. */ + ngs->n_lastphn_cand = 0; + /* Set the dynamic beam based on maxhmmpf here. */ + ngs->dynamic_beam = ngs->beam; + if (ngs->maxhmmpf != -1 + && ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval > ngs->maxhmmpf) { + /* Build a histogram to approximately prune them. */ + int32 bins[256], bw, nhmms, i; + root_chan_t *rhmm; + chan_t **acl, *hmm; + + /* Bins go from zero (best score) to edge of beam. */ + bw = -ngs->beam / 256; + memset(bins, 0, sizeof(bins)); + /* For each active root channel. */ + for (i = 0, rhmm = ngs->root_chan; i < ngs->n_root_chan; i++, rhmm++) { + int32 b; + + /* Put it in a bin according to its bestscore. */ + b = (ngs->best_score - hmm_bestscore(&rhmm->hmm)) / bw; + if (b >= 256) + b = 255; + ++bins[b]; + } + /* For each active non-root channel. */ + acl = ngs->active_chan_list[frame_idx & 0x1]; /* currently active HMMs in tree */ + for (i = ngs->n_active_chan[frame_idx & 0x1], hmm = *(acl++); + i > 0; --i, hmm = *(acl++)) { + int32 b; + + /* Put it in a bin according to its bestscore. */ + b = (ngs->best_score - hmm_bestscore(&hmm->hmm)) / bw; + if (b >= 256) + b = 255; + ++bins[b]; + } + /* Walk down the bins to find the new beam. */ + for (i = nhmms = 0; i < 256; ++i) { + nhmms += bins[i]; + if (nhmms > ngs->maxhmmpf) + break; + } + ngs->dynamic_beam = -(i * bw); + } + + prune_root_chan(ngs, frame_idx); + prune_nonroot_chan(ngs, frame_idx); + last_phone_transition(ngs, frame_idx); + prune_word_chan(ngs, frame_idx); +} + +/* + * Limit the number of word exits in each frame to maxwpf. And also limit the number of filler + * words to 1. + */ +static void +bptable_maxwpf(ngram_search_t *ngs, int frame_idx) +{ + int32 bp, n; + int32 bestscr, worstscr; + bptbl_t *bpe, *bestbpe, *worstbpe; + + /* Don't prune if no pruing. */ + if (ngs->maxwpf == -1 || ngs->maxwpf == ps_search_n_words(ngs)) + return; + + /* Allow only one filler word exit (the best) per frame */ + bestscr = (int32) 0x80000000; + bestbpe = NULL; + n = 0; + for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) { + bpe = &(ngs->bp_table[bp]); + if (dict_filler_word(ps_search_dict(ngs), bpe->wid)) { + if (bpe->score BETTER_THAN bestscr) { + bestscr = bpe->score; + bestbpe = bpe; + } + bpe->valid = FALSE; + n++; /* No. of filler words */ + } + } + /* Restore bestbpe to valid state */ + if (bestbpe != NULL) { + bestbpe->valid = TRUE; + --n; + } + + /* Allow up to maxwpf best entries to survive; mark the remaining with valid = 0 */ + n = (ngs->bpidx + - ngs->bp_table_idx[frame_idx]) - n; /* No. of entries after limiting fillers */ + for (; n > ngs->maxwpf; --n) { + /* Find worst BPTable entry */ + worstscr = (int32) 0x7fffffff; + worstbpe = NULL; + for (bp = ngs->bp_table_idx[frame_idx]; (bp < ngs->bpidx); bp++) { + bpe = &(ngs->bp_table[bp]); + if (bpe->valid && (bpe->score WORSE_THAN worstscr)) { + worstscr = bpe->score; + worstbpe = bpe; + } + } + /* FIXME: Don't panic! */ + if (worstbpe == NULL) + E_FATAL("PANIC: No worst BPtable entry remaining\n"); + worstbpe->valid = FALSE; + } +} + +static void +word_transition(ngram_search_t *ngs, int frame_idx) +{ + int32 i, k, bp, w, nf; + int32 rc; + int32 thresh, newscore, pl_newscore; + bptbl_t *bpe; + root_chan_t *rhmm; + struct bestbp_rc_s *bestbp_rc_ptr; + phone_loop_search_t *pls; + dict_t *dict = ps_search_dict(ngs); + dict2pid_t *d2p = ps_search_dict2pid(ngs); + + /* + * Transition to start of new word instances (HMM tree roots); but only if words + * other than finished here. + * But, first, find the best starting score for each possible right context phone. + */ + for (i = bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef) - 1; i >= 0; --i) + ngs->bestbp_rc[i].score = WORST_SCORE; + k = 0; + pls = (phone_loop_search_t *)ps_search_lookahead(ngs); + /* Ugh, this is complicated. Scan all word exits for this frame + * (they have already been created by prune_word_chan()). */ + for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) { + bpe = &(ngs->bp_table[bp]); + ngs->word_lat_idx[bpe->wid] = NO_BP; + + if (bpe->wid == ps_search_finish_wid(ngs)) + continue; + k++; + + /* DICT2PID */ + /* Array of HMM scores corresponding to all the possible right + * context expansions of the final phone. It's likely that a + * lot of these are going to be missing, actually. */ + if (bpe->last2_phone == -1) { /* implies s_idx == -1 */ + /* No right context expansion. */ + for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) { + if (bpe->score BETTER_THAN ngs->bestbp_rc[rc].score) { + E_DEBUG("bestbp_rc[0] = %d lc %d\n", + bpe->score, bpe->last_phone); + ngs->bestbp_rc[rc].score = bpe->score; + ngs->bestbp_rc[rc].path = bp; + ngs->bestbp_rc[rc].lc = bpe->last_phone; + } + } + } + else { + xwdssid_t *rssid = dict2pid_rssid(d2p, bpe->last_phone, bpe->last2_phone); + int32 *rcss = &(ngs->bscore_stack[bpe->s_idx]); + for (rc = 0; rc < bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef); ++rc) { + if (rcss[rssid->cimap[rc]] BETTER_THAN ngs->bestbp_rc[rc].score) { + E_DEBUG("bestbp_rc[%d] = %d lc %d\n", + rc, rcss[rssid->cimap[rc]], bpe->last_phone); + ngs->bestbp_rc[rc].score = rcss[rssid->cimap[rc]]; + ngs->bestbp_rc[rc].path = bp; + ngs->bestbp_rc[rc].lc = bpe->last_phone; + } + } + } + } + if (k == 0) + return; + + nf = frame_idx + 1; + thresh = ngs->best_score + ngs->dynamic_beam; + /* + * Hypothesize successors to words finished in this frame. + * Main dictionary, multi-phone words transition to HMM-trees roots. + */ + for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) { + bestbp_rc_ptr = &(ngs->bestbp_rc[rhmm->ciphone]); + + newscore = bestbp_rc_ptr->score + ngs->nwpen + ngs->pip; + pl_newscore = newscore + + phone_loop_search_score(pls, rhmm->ciphone); + if (pl_newscore BETTER_THAN thresh) { + if ((hmm_frame(&rhmm->hmm) < frame_idx) + || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { + hmm_enter(&rhmm->hmm, newscore, + bestbp_rc_ptr->path, nf); + /* DICT2PID: Another place where mpx ssids are entered. */ + /* Look up the ssid to use when entering this mpx triphone. */ + hmm_mpx_ssid(&rhmm->hmm, 0) = + dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone, bestbp_rc_ptr->lc); + assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID); + } + } + } + + /* + * Single phone words; no right context for these. Cannot use bestbp_rc as + * LM scores have to be included. First find best transition to these words. + */ + for (i = 0; i < ngs->n_1ph_LMwords; i++) { + w = ngs->single_phone_wid[i]; + ngs->last_ltrans[w].dscr = (int32) 0x80000000; + } + for (bp = ngs->bp_table_idx[frame_idx]; bp < ngs->bpidx; bp++) { + bpe = &(ngs->bp_table[bp]); + if (!bpe->valid) + continue; + + for (i = 0; i < ngs->n_1ph_LMwords; i++) { + int32 n_used; + w = ngs->single_phone_wid[i]; + newscore = ngram_search_exit_score + (ngs, bpe, dict_first_phone(dict, w)); + E_DEBUG("initial newscore for %s: %d\n", + dict_wordstr(dict, w), newscore); + if (newscore != WORST_SCORE) + newscore += ngram_tg_score(ngs->lmset, + dict_basewid(dict, w), + bpe->real_wid, + bpe->prev_real_wid, + &n_used)>>SENSCR_SHIFT; + + /* FIXME: Not sure how WORST_SCORE could be better, but it + * apparently happens. */ + if (newscore BETTER_THAN ngs->last_ltrans[w].dscr) { + ngs->last_ltrans[w].dscr = newscore; + ngs->last_ltrans[w].bp = bp; + } + } + } + + /* Now transition to in-LM single phone words */ + for (i = 0; i < ngs->n_1ph_LMwords; i++) { + w = ngs->single_phone_wid[i]; + /* Never transition into the start word (for one thing, it is + a non-event in the language model.) */ + if (w == dict_startwid(ps_search_dict(ngs))) + continue; + rhmm = (root_chan_t *) ngs->word_chan[w]; + newscore = ngs->last_ltrans[w].dscr + ngs->pip; + pl_newscore = newscore + phone_loop_search_score(pls, rhmm->ciphone); + if (pl_newscore BETTER_THAN thresh) { + bpe = ngs->bp_table + ngs->last_ltrans[w].bp; + if ((hmm_frame(&rhmm->hmm) < frame_idx) + || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { + hmm_enter(&rhmm->hmm, + newscore, ngs->last_ltrans[w].bp, nf); + /* DICT2PID: another place where mpx ssids are entered. */ + /* Look up the ssid to use when entering this mpx triphone. */ + hmm_mpx_ssid(&rhmm->hmm, 0) = + dict2pid_ldiph_lc(d2p, rhmm->ciphone, rhmm->ci2phone, + dict_last_phone(dict, bpe->wid)); + assert(hmm_mpx_ssid(&rhmm->hmm, 0) != BAD_SSID); + } + } + } + + /* Remaining words: , noise words. No mpx for these! */ + w = ps_search_silence_wid(ngs); + rhmm = (root_chan_t *) ngs->word_chan[w]; + bestbp_rc_ptr = &(ngs->bestbp_rc[ps_search_acmod(ngs)->mdef->sil]); + newscore = bestbp_rc_ptr->score + ngs->silpen + ngs->pip; + pl_newscore = newscore + + phone_loop_search_score(pls, rhmm->ciphone); + if (pl_newscore BETTER_THAN thresh) { + if ((hmm_frame(&rhmm->hmm) < frame_idx) + || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { + hmm_enter(&rhmm->hmm, + newscore, bestbp_rc_ptr->path, nf); + } + } + for (w = dict_filler_start(dict); w <= dict_filler_end(dict); w++) { + if (w == ps_search_silence_wid(ngs)) + continue; + /* Never transition into the start word (for one thing, it is + a non-event in the language model.) */ + if (w == dict_startwid(ps_search_dict(ngs))) + continue; + rhmm = (root_chan_t *) ngs->word_chan[w]; + /* If this was not actually a single-phone word, rhmm will be NULL. */ + if (rhmm == NULL) + continue; + newscore = bestbp_rc_ptr->score + ngs->fillpen + ngs->pip; + pl_newscore = newscore + + phone_loop_search_score(pls, rhmm->ciphone); + if (pl_newscore BETTER_THAN thresh) { + if ((hmm_frame(&rhmm->hmm) < frame_idx) + || (newscore BETTER_THAN hmm_in_score(&rhmm->hmm))) { + hmm_enter(&rhmm->hmm, + newscore, bestbp_rc_ptr->path, nf); + } + } + } +} + +static void +deactivate_channels(ngram_search_t *ngs, int frame_idx) +{ + root_chan_t *rhmm; + int i; + + /* Clear score[] of pruned root channels */ + for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) { + if (hmm_frame(&rhmm->hmm) == frame_idx) { + hmm_clear(&rhmm->hmm); + } + } + /* Clear score[] of pruned single-phone channels */ + for (i = 0; i < ngs->n_1ph_words; i++) { + int32 w = ngs->single_phone_wid[i]; + rhmm = (root_chan_t *) ngs->word_chan[w]; + if (hmm_frame(&rhmm->hmm) == frame_idx) { + hmm_clear(&rhmm->hmm); + } + } +} + +int +ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx) +{ + int16 const *senscr; + + /* Activate our HMMs for the current frame if need be. */ + if (!ps_search_acmod(ngs)->compallsen) + compute_sen_active(ngs, frame_idx); + + /* Compute GMM scores for the current frame. */ + if ((senscr = acmod_score(ps_search_acmod(ngs), &frame_idx)) == NULL) + return 0; + ngs->st.n_senone_active_utt += ps_search_acmod(ngs)->n_senone_active; + + /* Mark backpointer table for current frame. */ + ngram_search_mark_bptable(ngs, frame_idx); + + /* If the best score is equal to or worse than WORST_SCORE, + * recognition has failed, don't bother to keep trying. */ + if (ngs->best_score == WORST_SCORE || ngs->best_score WORSE_THAN WORST_SCORE) + return 0; + /* Renormalize if necessary */ + if (ngs->best_score + (2 * ngs->beam) WORSE_THAN WORST_SCORE) { + E_INFO("Renormalizing Scores at frame %d, best score %d\n", + frame_idx, ngs->best_score); + renormalize_scores(ngs, frame_idx, ngs->best_score); + } + + /* Evaluate HMMs */ + evaluate_channels(ngs, senscr, frame_idx); + /* Prune HMMs and do phone transitions. */ + prune_channels(ngs, frame_idx); + /* Do absolute pruning on word exits. */ + bptable_maxwpf(ngs, frame_idx); + /* Do word transitions. */ + word_transition(ngs, frame_idx); + /* Deactivate pruned HMMs. */ + deactivate_channels(ngs, frame_idx); + + ++ngs->n_frame; + /* Return the number of frames processed. */ + return 1; +} + +void +ngram_fwdtree_finish(ngram_search_t *ngs) +{ + int32 i, w, cf, *awl; + root_chan_t *rhmm; + chan_t *hmm, **acl; + + /* This is the number of frames processed. */ + cf = ps_search_acmod(ngs)->output_frame; + /* Add a mark in the backpointer table for one past the final frame. */ + ngram_search_mark_bptable(ngs, cf); + + /* Deactivate channels lined up for the next frame */ + /* First, root channels of HMM tree */ + for (i = ngs->n_root_chan, rhmm = ngs->root_chan; i > 0; --i, rhmm++) { + hmm_clear(&rhmm->hmm); + } + + /* nonroot channels of HMM tree */ + i = ngs->n_active_chan[cf & 0x1]; + acl = ngs->active_chan_list[cf & 0x1]; + for (hmm = *(acl++); i > 0; --i, hmm = *(acl++)) { + hmm_clear(&hmm->hmm); + } + + /* word channels */ + i = ngs->n_active_word[cf & 0x1]; + awl = ngs->active_word_list[cf & 0x1]; + for (w = *(awl++); i > 0; --i, w = *(awl++)) { + /* Don't accidentally free single-phone words! */ + if (dict_is_single_phone(ps_search_dict(ngs), w)) + continue; + bitvec_clear(ngs->word_active, w); + if (ngs->word_chan[w] == NULL) + continue; + ngram_search_free_all_rc(ngs, w); + } + + /* + * The previous search code did a postprocessing of the + * backpointer table here, but we will postpone this until it is + * absolutely necessary, i.e. when generating a word graph. + * Likewise we don't actually have to decide what the exit word is + * until somebody requests a backtrace. + */ + + ptmr_stop(&ngs->fwdtree_perf); + /* Print out some statistics. */ + if (cf > 0) { + double n_speech = (double)(cf + 1) + / cmd_ln_int32_r(ps_search_config(ngs), "-frate"); + E_INFO("%8d words recognized (%d/fr)\n", + ngs->bpidx, (ngs->bpidx + (cf >> 1)) / (cf + 1)); + E_INFO("%8d senones evaluated (%d/fr)\n", ngs->st.n_senone_active_utt, + (ngs->st.n_senone_active_utt + (cf >> 1)) / (cf + 1)); + E_INFO("%8d channels searched (%d/fr), %d 1st, %d last\n", + ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval, + (ngs->st.n_root_chan_eval + ngs->st.n_nonroot_chan_eval) / (cf + 1), + ngs->st.n_root_chan_eval, ngs->st.n_last_chan_eval); + E_INFO("%8d words for which last channels evaluated (%d/fr)\n", + ngs->st.n_word_lastchan_eval, + ngs->st.n_word_lastchan_eval / (cf + 1)); + E_INFO("%8d candidate words for entering last phone (%d/fr)\n", + ngs->st.n_lastphn_cand_utt, ngs->st.n_lastphn_cand_utt / (cf + 1)); + E_INFO("fwdtree %.2f CPU %.3f xRT\n", + ngs->fwdtree_perf.t_cpu, + ngs->fwdtree_perf.t_cpu / n_speech); + E_INFO("fwdtree %.2f wall %.3f xRT\n", + ngs->fwdtree_perf.t_elapsed, + ngs->fwdtree_perf.t_elapsed / n_speech); + } + /* dump_bptable(ngs); */ +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search_fwdtree.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search_fwdtree.h new file mode 100644 index 0000000000000000000000000000000000000000..5d28b4782e2bede77ace130d475af1022d2efd02 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ngram_search_fwdtree.h @@ -0,0 +1,97 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ngram_search_fwdtree.h Lexicon tree based Viterbi search. + */ + +#ifndef __NGRAM_SEARCH_FWDTREE_H__ +#define __NGRAM_SEARCH_FWDTREE_H__ + +/* SphinxBase headers. */ + +/* Local headers. */ +#include +#include "ngram_search.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Initialize N-Gram search for fwdtree decoding. + */ +void ngram_fwdtree_init(ngram_search_t *ngs); + +/** + * Release memory associated with fwdtree decoding. + */ +void ngram_fwdtree_deinit(ngram_search_t *ngs); + +/** + * Rebuild search structures for updated language models. + */ +int ngram_fwdtree_reinit(ngram_search_t *ngs); + +/** + * Start fwdtree decoding for an utterance. + */ +POCKETSPHINX_EXPORT +void ngram_fwdtree_start(ngram_search_t *ngs); + +/** + * Search one frame forward in an utterance. + * + * @return Number of frames searched (either 0 or 1). + */ +POCKETSPHINX_EXPORT +int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx); + +/** + * Finish fwdtree decoding for an utterance. + */ +POCKETSPHINX_EXPORT +void ngram_fwdtree_finish(ngram_search_t *ngs); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __NGRAM_SEARCH_FWDTREE_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/phone_loop_search.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/phone_loop_search.c new file mode 100644 index 0000000000000000000000000000000000000000..0bca980480a9b6f1261733f0278ddade77fbfb84 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/phone_loop_search.c @@ -0,0 +1,373 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file phone_loop_search.h Fast and rough context-independent phoneme loop search. + */ + +#include + +#include "phone_loop_search.h" + +static int phone_loop_search_start(ps_search_t *search); +static int phone_loop_search_step(ps_search_t *search, int frame_idx); +static int phone_loop_search_finish(ps_search_t *search); +static int phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p); +static void phone_loop_search_free(ps_search_t *search); +static char const *phone_loop_search_hyp(ps_search_t *search, int32 *out_score); +static int32 phone_loop_search_prob(ps_search_t *search); +static ps_seg_t *phone_loop_search_seg_iter(ps_search_t *search); + +static ps_searchfuncs_t phone_loop_search_funcs = { + /* start: */ phone_loop_search_start, + /* step: */ phone_loop_search_step, + /* finish: */ phone_loop_search_finish, + /* reinit: */ phone_loop_search_reinit, + /* free: */ phone_loop_search_free, + /* lattice: */ NULL, + /* hyp: */ phone_loop_search_hyp, + /* prob: */ phone_loop_search_prob, + /* seg_iter: */ phone_loop_search_seg_iter, +}; + +static int +phone_loop_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) +{ + phone_loop_search_t *pls = (phone_loop_search_t *)search; + cmd_ln_t *config = ps_search_config(search); + acmod_t *acmod = ps_search_acmod(search); + int i; + + /* Free old dict2pid, dict, if necessary. */ + ps_search_base_reinit(search, dict, d2p); + + /* Initialize HMM context. */ + if (pls->hmmctx) + hmm_context_free(pls->hmmctx); + pls->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), + acmod->tmat->tp, NULL, acmod->mdef->sseq); + if (pls->hmmctx == NULL) + return -1; + + /* Initialize penalty storage */ + pls->n_phones = bin_mdef_n_ciphone(acmod->mdef); + pls->window = cmd_ln_int32_r(config, "-pl_window"); + if (pls->penalties) + ckd_free(pls->penalties); + pls->penalties = (int32 *)ckd_calloc(pls->n_phones, sizeof(*pls->penalties)); + if (pls->pen_buf) + ckd_free_2d(pls->pen_buf); + pls->pen_buf = (int32 **)ckd_calloc_2d(pls->window, pls->n_phones, sizeof(**pls->pen_buf)); + + /* Initialize phone HMMs. */ + if (pls->hmms) { + for (i = 0; i < pls->n_phones; ++i) + hmm_deinit((hmm_t *)&pls->hmms[i]); + ckd_free(pls->hmms); + } + pls->hmms = (hmm_t *)ckd_calloc(pls->n_phones, sizeof(*pls->hmms)); + for (i = 0; i < pls->n_phones; ++i) { + hmm_init(pls->hmmctx, (hmm_t *)&pls->hmms[i], + FALSE, + bin_mdef_pid2ssid(acmod->mdef, i), + bin_mdef_pid2tmatid(acmod->mdef, i)); + } + pls->penalty_weight = cmd_ln_float64_r(config, "-pl_weight"); + pls->beam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_beam")) >> SENSCR_SHIFT; + pls->pbeam = logmath_log(acmod->lmath, cmd_ln_float64_r(config, "-pl_pbeam")) >> SENSCR_SHIFT; + pls->pip = logmath_log(acmod->lmath, cmd_ln_float32_r(config, "-pl_pip")) >> SENSCR_SHIFT; + E_INFO("State beam %d Phone exit beam %d Insertion penalty %d\n", + pls->beam, pls->pbeam, pls->pip); + + return 0; +} + +ps_search_t * +phone_loop_search_init(cmd_ln_t *config, + acmod_t *acmod, + dict_t *dict) +{ + phone_loop_search_t *pls; + + /* Allocate and initialize. */ + pls = (phone_loop_search_t *)ckd_calloc(1, sizeof(*pls)); + ps_search_init(ps_search_base(pls), &phone_loop_search_funcs, + PS_SEARCH_TYPE_PHONE_LOOP, PS_DEFAULT_PL_SEARCH, + config, acmod, dict, NULL); + phone_loop_search_reinit(ps_search_base(pls), ps_search_dict(pls), + ps_search_dict2pid(pls)); + + return ps_search_base(pls); +} + +static void +phone_loop_search_free_renorm(phone_loop_search_t *pls) +{ + gnode_t *gn; + for (gn = pls->renorm; gn; gn = gnode_next(gn)) + ckd_free(gnode_ptr(gn)); + glist_free(pls->renorm); + pls->renorm = NULL; +} + +static void +phone_loop_search_free(ps_search_t *search) +{ + phone_loop_search_t *pls = (phone_loop_search_t *)search; + int i; + + ps_search_base_free(search); + for (i = 0; i < pls->n_phones; ++i) + hmm_deinit((hmm_t *)&pls->hmms[i]); + phone_loop_search_free_renorm(pls); + ckd_free_2d(pls->pen_buf); + ckd_free(pls->hmms); + ckd_free(pls->penalties); + hmm_context_free(pls->hmmctx); + ckd_free(pls); +} + +static int +phone_loop_search_start(ps_search_t *search) +{ + phone_loop_search_t *pls = (phone_loop_search_t *)search; + int i; + + /* Reset and enter all phone HMMs. */ + for (i = 0; i < pls->n_phones; ++i) { + hmm_t *hmm = (hmm_t *)&pls->hmms[i]; + hmm_clear(hmm); + hmm_enter(hmm, 0, -1, 0); + } + memset(pls->penalties, 0, pls->n_phones * sizeof(*pls->penalties)); + for (i = 0; i < pls->window; i++) + memset(pls->pen_buf[i], 0, pls->n_phones * sizeof(*pls->pen_buf[i])); + phone_loop_search_free_renorm(pls); + pls->best_score = 0; + pls->pen_buf_ptr = 0; + + return 0; +} + +static void +renormalize_hmms(phone_loop_search_t *pls, int frame_idx, int32 norm) +{ + phone_loop_renorm_t *rn = (phone_loop_renorm_t *)ckd_calloc(1, sizeof(*rn)); + int i; + + pls->renorm = glist_add_ptr(pls->renorm, rn); + rn->frame_idx = frame_idx; + rn->norm = norm; + + for (i = 0; i < pls->n_phones; ++i) { + hmm_normalize((hmm_t *)&pls->hmms[i], norm); + } +} + +static void +evaluate_hmms(phone_loop_search_t *pls, int16 const *senscr, int frame_idx) +{ + int32 bs = WORST_SCORE; + int i; + + hmm_context_set_senscore(pls->hmmctx, senscr); + + for (i = 0; i < pls->n_phones; ++i) { + hmm_t *hmm = (hmm_t *)&pls->hmms[i]; + int32 score; + + if (hmm_frame(hmm) < frame_idx) + continue; + score = hmm_vit_eval(hmm); + if (score BETTER_THAN bs) { + bs = score; + } + } + pls->best_score = bs; +} + +static void +store_scores(phone_loop_search_t *pls, int frame_idx) +{ + int i, j, itr; + + (void)frame_idx; + for (i = 0; i < pls->n_phones; ++i) { + hmm_t *hmm = (hmm_t *)&pls->hmms[i]; + pls->pen_buf[pls->pen_buf_ptr][i] = (hmm_bestscore(hmm) - pls->best_score) * pls->penalty_weight; + } + pls->pen_buf_ptr++; + pls->pen_buf_ptr = pls->pen_buf_ptr % pls->window; + + /* update penalties */ + for (i = 0; i < pls->n_phones; ++i) { + pls->penalties[i] = WORST_SCORE; + for (j = 0, itr = pls->pen_buf_ptr + 1; j < pls->window; j++, itr++) { + itr = itr % pls->window; + if (pls->pen_buf[itr][i] > pls->penalties[i]) + pls->penalties[i] = pls->pen_buf[itr][i]; + } + } +} + +static void +prune_hmms(phone_loop_search_t *pls, int frame_idx) +{ + int32 thresh = pls->best_score + pls->beam; + int nf = frame_idx + 1; + int i; + + /* Check all phones to see if they remain active in the next frame. */ + for (i = 0; i < pls->n_phones; ++i) { + hmm_t *hmm = (hmm_t *)&pls->hmms[i]; + + if (hmm_frame(hmm) < frame_idx) + continue; + /* Retain if score better than threshold. */ + if (hmm_bestscore(hmm) BETTER_THAN thresh) { + hmm_frame(hmm) = nf; + } + else + hmm_clear_scores(hmm); + } +} + +static void +phone_transition(phone_loop_search_t *pls, int frame_idx) +{ + int32 thresh = pls->best_score + pls->pbeam; + int nf = frame_idx + 1; + int i; + + /* Now transition out of phones whose last states are inside the + * phone transition beam. */ + for (i = 0; i < pls->n_phones; ++i) { + hmm_t *hmm = (hmm_t *)&pls->hmms[i]; + int32 newphone_score; + int j; + + if (hmm_frame(hmm) != nf) + continue; + + newphone_score = hmm_out_score(hmm) + pls->pip; + if (newphone_score BETTER_THAN thresh) { + /* Transition into all phones using the usual Viterbi rule. */ + for (j = 0; j < pls->n_phones; ++j) { + hmm_t *nhmm = (hmm_t *)&pls->hmms[j]; + + if (hmm_frame(nhmm) < frame_idx + || newphone_score BETTER_THAN hmm_in_score(nhmm)) { + hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf); + } + } + } + } +} + +static int +phone_loop_search_step(ps_search_t *search, int frame_idx) +{ + phone_loop_search_t *pls = (phone_loop_search_t *)search; + acmod_t *acmod = ps_search_acmod(search); + int16 const *senscr; + int i; + + /* All CI senones are active all the time. */ + if (!ps_search_acmod(pls)->compallsen) { + acmod_clear_active(ps_search_acmod(pls)); + for (i = 0; i < pls->n_phones; ++i) + acmod_activate_hmm(acmod, (hmm_t *)&pls->hmms[i]); + } + + /* Calculate senone scores for current frame. */ + senscr = acmod_score(acmod, &frame_idx); + + /* Renormalize, if necessary. */ + if (pls->best_score + (2 * pls->beam) WORSE_THAN WORST_SCORE) { + E_INFO("Renormalizing Scores at frame %d, best score %d\n", + frame_idx, pls->best_score); + renormalize_hmms(pls, frame_idx, pls->best_score); + } + + /* Evaluate phone HMMs for current frame. */ + evaluate_hmms(pls, senscr, frame_idx); + + /* Store hmm scores for senone penaly calculation */ + store_scores(pls, frame_idx); + + /* Prune phone HMMs. */ + prune_hmms(pls, frame_idx); + + /* Do phone transitions. */ + phone_transition(pls, frame_idx); + + return 0; +} + +static int +phone_loop_search_finish(ps_search_t *search) +{ + /* Actually nothing to do here really. */ + (void)search; + return 0; +} + +static char const * +phone_loop_search_hyp(ps_search_t *search, int32 *out_score) +{ + (void)search; + (void)out_score; + E_WARN("Hypotheses are not returned from phone loop search"); + return NULL; +} + +static int32 +phone_loop_search_prob(ps_search_t *search) +{ + (void)search; + /* FIXME: Actually... they ought to be. */ + E_WARN("Posterior probabilities are not returned from phone loop search"); + return 0; +} + +static ps_seg_t * +phone_loop_search_seg_iter(ps_search_t *search) +{ + (void)search; + E_WARN("Hypotheses are not returned from phone loop search"); + return NULL; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/phone_loop_search.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/phone_loop_search.h new file mode 100644 index 0000000000000000000000000000000000000000..4de525329d521fd37d546ff3eaefa580dc5f3018 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/phone_loop_search.h @@ -0,0 +1,113 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file phone_loop_search.h Fast and rough context-independent + * phoneme loop search. + * + * This exists for the purposes of phoneme lookahead, and thus it + * actually does not do phoneme recognition (it wouldn't be very + * accurate anyway). + */ + +#ifndef __PHONE_LOOP_SEARCH_H__ +#define __PHONE_LOOP_SEARCH_H__ + +/* SphinxBase headers. */ +#include +#include +#include +#include + +/* Local headers. */ +#include "pocketsphinx_internal.h" +#include "hmm.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Renormalization event. + */ +struct phone_loop_renorm_s { + int frame_idx; /**< Frame of renormalization. */ + int32 norm; /**< Normalization constant. */ +}; +typedef struct phone_loop_renorm_s phone_loop_renorm_t; + +/** + * Phone loop search structure. + */ +struct phone_loop_search_s { + ps_search_t base; /**< Base search structure. */ + hmm_t *hmms; /**< Basic HMM structures for CI phones. */ + hmm_context_t *hmmctx; /**< HMM context structure. */ + int16 frame; /**< Current frame being searched. */ + int16 n_phones; /**< Size of phone array. */ + int32 **pen_buf; /**< Penalty buffer */ + int16 pen_buf_ptr; /**< Pointer for frame to fill in penalty buffer */ + int32 *penalties; /**< Penalties for CI phones in current frame */ + float64 penalty_weight; /**< Weighting factor for penalties */ + + int32 best_score; /**< Best Viterbi score in current frame. */ + int32 beam; /**< HMM pruning beam width. */ + int32 pbeam; /**< Phone exit pruning beam width. */ + int32 pip; /**< Phone insertion penalty ("language score"). */ + int window; /**< Window size for phoneme lookahead */ + glist_t renorm; /**< List of renormalizations. */ +}; +typedef struct phone_loop_search_s phone_loop_search_t; + +ps_search_t *phone_loop_search_init(cmd_ln_t *config, + acmod_t *acmod, + dict_t *dict); + +/** + * Return lookahead heuristic score for a specific phone. + */ +#define phone_loop_search_score(pls,ci) \ + ((pls == NULL) ? 0 : (pls->penalties[ci])) + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __PHONE_LOOP_SEARCH_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/pocketsphinx.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/pocketsphinx.c new file mode 100644 index 0000000000000000000000000000000000000000..06ae5c8b2c96d73ef578d08d24454b169d8e10b6 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/pocketsphinx.c @@ -0,0 +1,1528 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* System headers. */ +#include +#include + +#ifdef HAVE_UNISTD_H +#include +#endif + +/* SphinxBase headers. */ +#include +#include +#include +#include +#include +#include + +/* Local headers. */ +#include "cmdln_macro.h" +#include "pocketsphinx.h" +#include "pocketsphinx_internal.h" +#include "ps_lattice_internal.h" +#include "phone_loop_search.h" +#include "kws_search.h" +#include "fsg_search_internal.h" +#include "ngram_search.h" +#include "ngram_search_fwdtree.h" +#include "ngram_search_fwdflat.h" +#include "allphone_search.h" +#include "state_align_search.h" +#include "fe/fe_internal.h" + +static const arg_t ps_args_def[] = { + POCKETSPHINX_OPTIONS, + CMDLN_EMPTY_OPTION +}; + +/* I'm not sure what the portable way to do this is. */ +static int +file_exists(const char *path) +{ + FILE *tmp; + + tmp = fopen(path, "rb"); + if (tmp) fclose(tmp); + return (tmp != NULL); +} + +#ifdef MODELDIR +static int +hmmdir_exists(const char *path) +{ + FILE *tmp; + char *mdef = string_join(path, "/mdef", NULL); + + tmp = fopen(mdef, "rb"); + if (tmp) fclose(tmp); + ckd_free(mdef); + return (tmp != NULL); +} +#endif + +static void +ps_expand_file_config(ps_decoder_t *ps, const char *arg, const char *extra_arg, + const char *hmmdir, const char *file) +{ + const char *val; + if ((val = cmd_ln_str_r(ps->config, arg)) != NULL) { + cmd_ln_set_str_extra_r(ps->config, extra_arg, val); + } else if (hmmdir == NULL) { + cmd_ln_set_str_extra_r(ps->config, extra_arg, NULL); + } else { + char *tmp = string_join(hmmdir, "/", file, NULL); + if (file_exists(tmp)) + cmd_ln_set_str_extra_r(ps->config, extra_arg, tmp); + else + cmd_ln_set_str_extra_r(ps->config, extra_arg, NULL); + ckd_free(tmp); + } +} + +/* Feature and front-end parameters that may be in feat.params */ +static const arg_t feat_defn[] = { + waveform_to_cepstral_command_line_macro(), + cepstral_to_feature_command_line_macro(), + CMDLN_EMPTY_OPTION +}; + +static void +ps_expand_model_config(ps_decoder_t *ps) +{ + char const *hmmdir, *featparams; + + /* Disable memory mapping on Blackfin (FIXME: should be uClinux in general). */ +#ifdef __ADSPBLACKFIN__ + E_INFO("Will not use mmap() on uClinux/Blackfin."); + cmd_ln_set_boolean_r(ps->config, "-mmap", FALSE); +#endif + + /* Get acoustic model filenames and add them to the command-line */ + hmmdir = cmd_ln_str_r(ps->config, "-hmm"); + ps_expand_file_config(ps, "-mdef", "_mdef", hmmdir, "mdef"); + ps_expand_file_config(ps, "-mean", "_mean", hmmdir, "means"); + ps_expand_file_config(ps, "-var", "_var", hmmdir, "variances"); + ps_expand_file_config(ps, "-tmat", "_tmat", hmmdir, "transition_matrices"); + ps_expand_file_config(ps, "-mixw", "_mixw", hmmdir, "mixture_weights"); + ps_expand_file_config(ps, "-sendump", "_sendump", hmmdir, "sendump"); + ps_expand_file_config(ps, "-fdict", "_fdict", hmmdir, "noisedict"); + ps_expand_file_config(ps, "-lda", "_lda", hmmdir, "feature_transform"); + ps_expand_file_config(ps, "-featparams", "_featparams", hmmdir, "feat.params"); + ps_expand_file_config(ps, "-senmgau", "_senmgau", hmmdir, "senmgau"); + + /* Look for feat.params in acoustic model dir. */ + if ((featparams = cmd_ln_str_r(ps->config, "_featparams"))) { + if (NULL != + cmd_ln_parse_file_r(ps->config, feat_defn, featparams, FALSE)) + E_INFO("Parsed model-specific feature parameters from %s\n", + featparams); + } +} + +static void +ps_free_searches(ps_decoder_t *ps) +{ + if (ps->searches) { + hash_iter_t *search_it; + for (search_it = hash_table_iter(ps->searches); search_it; + search_it = hash_table_iter_next(search_it)) { + ps_search_free(hash_entry_val(search_it->ent)); + } + hash_table_free(ps->searches); + } + + ps->searches = NULL; + ps->search = NULL; +} + +static ps_search_t * +ps_find_search(ps_decoder_t *ps, char const *name) +{ + void *search = NULL; + hash_table_lookup(ps->searches, name, &search); + + return (ps_search_t *) search; +} + +/* Set default acoustic and language models if they are not defined in configuration. */ +void +ps_default_search_args(cmd_ln_t *config) +{ +#ifdef MODELDIR + const char *hmmdir = cmd_ln_str_r(config, "-hmm"); + const char *lmfile = cmd_ln_str_r(config, "-lm"); + const char *dictfile = cmd_ln_str_r(config, "-dict"); + + if (hmmdir == NULL && hmmdir_exists(MODELDIR "/en-us/en-us")) { + hmmdir = MODELDIR "/en-us/en-us"; + cmd_ln_set_str_r(config, "-hmm", hmmdir); + } + + if (lmfile == NULL && !cmd_ln_str_r(config, "-fsg") + && !cmd_ln_str_r(config, "-jsgf") + && !cmd_ln_str_r(config, "-lmctl") + && !cmd_ln_str_r(config, "-kws") + && !cmd_ln_str_r(config, "-keyphrase") + && !cmd_ln_str_r(config, "-alignctl") + && file_exists(MODELDIR "/en-us/en-us.lm.bin")) { + lmfile = MODELDIR "/en-us/en-us.lm.bin"; + cmd_ln_set_str_r(config, "-lm", lmfile); + } + + if (dictfile == NULL && file_exists(MODELDIR "/en-us/cmudict-en-us.dict")) { + dictfile = MODELDIR "/en-us/cmudict-en-us.dict"; + cmd_ln_set_str_r(config, "-dict", dictfile); + } +#else + (void)config; +#endif +} + +int +ps_reinit_feat(ps_decoder_t *ps, cmd_ln_t *config) +{ + if (config && config != ps->config) { + cmd_ln_free_r(ps->config); + ps->config = cmd_ln_retain(config); + } + return acmod_reinit_feat(ps->acmod, NULL, NULL); +} + +int +ps_reinit(ps_decoder_t *ps, cmd_ln_t *config) +{ + const char *path; + const char *keyphrase; + int32 lw; + + if (config && config != ps->config) { + cmd_ln_free_r(ps->config); + ps->config = cmd_ln_retain(config); + } + + /* Set up logging. We need to do this earlier because we want to dump + * the information to the configured log, not to the stderr. */ + if (config) { + const char *logfn, *loglevel; + logfn = cmd_ln_str_r(ps->config, "-logfn"); + if (logfn) { + if (err_set_logfile(logfn) < 0) { + E_ERROR("Cannot redirect log output\n"); + return -1; + } + } + loglevel = cmd_ln_str_r(ps->config, "-loglevel"); + if (loglevel) { + if (err_set_loglevel_str(loglevel) == NULL) { + E_ERROR("Invalid log level: %s\n", loglevel); + return -1; + } + } + } + + ps->mfclogdir = cmd_ln_str_r(ps->config, "-mfclogdir"); + ps->rawlogdir = cmd_ln_str_r(ps->config, "-rawlogdir"); + ps->senlogdir = cmd_ln_str_r(ps->config, "-senlogdir"); + + /* Fill in some default arguments. */ + ps_expand_model_config(ps); + + /* Print out the config for logging. */ + cmd_ln_log_values_r(ps->config, ps_args()); + + /* Free old searches (do this before other reinit) */ + ps_free_searches(ps); + ps->searches = hash_table_new(3, HASH_CASE_YES); + + /* Free old acmod. */ + acmod_free(ps->acmod); + ps->acmod = NULL; + + /* Free old dictionary (must be done after the two things above) */ + dict_free(ps->dict); + ps->dict = NULL; + + /* Free d2p */ + dict2pid_free(ps->d2p); + ps->d2p = NULL; + + /* Logmath computation (used in acmod and search) */ + if (ps->lmath == NULL + || (logmath_get_base(ps->lmath) != + (float64)cmd_ln_float32_r(ps->config, "-logbase"))) { + if (ps->lmath) + logmath_free(ps->lmath); + ps->lmath = logmath_init + ((float64)cmd_ln_float32_r(ps->config, "-logbase"), 0, TRUE); + } + + /* Acoustic model (this is basically everything that + * uttproc.c, senscr.c, and others used to do) */ + if ((ps->acmod = acmod_init(ps->config, ps->lmath, NULL, NULL)) == NULL) + return -1; + + + + if (cmd_ln_int32_r(ps->config, "-pl_window") > 0) { + /* Initialize an auxiliary phone loop search, which will run in + * "parallel" with FSG or N-Gram search. */ + if ((ps->phone_loop = + phone_loop_search_init(ps->config, ps->acmod, ps->dict)) == NULL) + return -1; + hash_table_enter(ps->searches, + ps_search_name(ps->phone_loop), + ps->phone_loop); + } + + /* Dictionary and triphone mappings (depends on acmod). */ + /* FIXME: pass config, change arguments, implement LTS, etc. */ + if ((ps->dict = dict_init(ps->config, ps->acmod->mdef)) == NULL) + return -1; + if ((ps->d2p = dict2pid_build(ps->acmod->mdef, ps->dict)) == NULL) + return -1; + + lw = cmd_ln_float32_r(ps->config, "-lw"); + + /* Determine whether we are starting out in FSG or N-Gram search mode. + * If neither is used skip search initialization. */ + + /* Load KWS if one was specified in config */ + if ((keyphrase = cmd_ln_str_r(ps->config, "-keyphrase"))) { + if (ps_set_keyphrase(ps, PS_DEFAULT_SEARCH, keyphrase)) + return -1; + ps_set_search(ps, PS_DEFAULT_SEARCH); + } + + if ((path = cmd_ln_str_r(ps->config, "-kws"))) { + if (ps_set_kws(ps, PS_DEFAULT_SEARCH, path)) + return -1; + ps_set_search(ps, PS_DEFAULT_SEARCH); + } + + /* Load an FSG if one was specified in config */ + if ((path = cmd_ln_str_r(ps->config, "-fsg"))) { + fsg_model_t *fsg = fsg_model_readfile(path, ps->lmath, lw); + if (!fsg) + return -1; + if (ps_set_fsg(ps, PS_DEFAULT_SEARCH, fsg)) { + fsg_model_free(fsg); + return -1; + } + fsg_model_free(fsg); + ps_set_search(ps, PS_DEFAULT_SEARCH); + } + + /* Or load a JSGF grammar */ + if ((path = cmd_ln_str_r(ps->config, "-jsgf"))) { + if (ps_set_jsgf_file(ps, PS_DEFAULT_SEARCH, path) + || ps_set_search(ps, PS_DEFAULT_SEARCH)) + return -1; + } + + if ((path = cmd_ln_str_r(ps->config, "-allphone"))) { + if (ps_set_allphone_file(ps, PS_DEFAULT_SEARCH, path) + || ps_set_search(ps, PS_DEFAULT_SEARCH)) + return -1; + } + + if ((path = cmd_ln_str_r(ps->config, "-lm")) && + !cmd_ln_str_r(ps->config, "-allphone")) { + if (ps_set_lm_file(ps, PS_DEFAULT_SEARCH, path) + || ps_set_search(ps, PS_DEFAULT_SEARCH)) + return -1; + } + + if ((path = cmd_ln_str_r(ps->config, "-lmctl"))) { + const char *name; + ngram_model_t *lmset; + ngram_model_set_iter_t *lmset_it; + + if (!(lmset = ngram_model_set_read(ps->config, path, ps->lmath))) { + E_ERROR("Failed to read language model control file: %s\n", path); + return -1; + } + + for(lmset_it = ngram_model_set_iter(lmset); + lmset_it; lmset_it = ngram_model_set_iter_next(lmset_it)) { + ngram_model_t *lm = ngram_model_set_iter_model(lmset_it, &name); + E_INFO("adding search %s\n", name); + if (ps_set_lm(ps, name, lm)) { + ngram_model_set_iter_free(lmset_it); + ngram_model_free(lmset); + return -1; + } + } + ngram_model_free(lmset); + + name = cmd_ln_str_r(ps->config, "-lmname"); + if (name) + ps_set_search(ps, name); + else { + E_ERROR("No default LM name (-lmname) for `-lmctl'\n"); + return -1; + } + } + + /* Initialize performance timer. */ + ps->perf.name = "decode"; + ptmr_init(&ps->perf); + + return 0; +} + +ps_decoder_t * +ps_init(cmd_ln_t *config) +{ + ps_decoder_t *ps; + + ps = ckd_calloc(1, sizeof(*ps)); + ps->refcount = 1; + if (config) { + if (ps_reinit(ps, config) < 0) { + ps_free(ps); + return NULL; + } + } + return ps; +} + +arg_t const * +ps_args(void) +{ + return ps_args_def; +} + +ps_decoder_t * +ps_retain(ps_decoder_t *ps) +{ + ++ps->refcount; + return ps; +} + +int +ps_free(ps_decoder_t *ps) +{ + if (ps == NULL) + return 0; + if (--ps->refcount > 0) + return ps->refcount; + ps_free_searches(ps); + dict_free(ps->dict); + dict2pid_free(ps->d2p); + acmod_free(ps->acmod); + logmath_free(ps->lmath); + cmd_ln_free_r(ps->config); + ckd_free(ps); + return 0; +} + +cmd_ln_t * +ps_get_config(ps_decoder_t *ps) +{ + return ps->config; +} + +logmath_t * +ps_get_logmath(ps_decoder_t *ps) +{ + return ps->lmath; +} + +fe_t * +ps_get_fe(ps_decoder_t *ps) +{ + return ps->acmod->fe; +} + +feat_t * +ps_get_feat(ps_decoder_t *ps) +{ + return ps->acmod->fcb; +} + +ps_mllr_t * +ps_update_mllr(ps_decoder_t *ps, ps_mllr_t *mllr) +{ + return acmod_update_mllr(ps->acmod, mllr); +} + +int +ps_set_search(ps_decoder_t *ps, const char *name) +{ + ps_search_t *search; + + if (ps->acmod->state != ACMOD_ENDED && ps->acmod->state != ACMOD_IDLE) { + E_ERROR("Cannot change search while decoding, end utterance first\n"); + return -1; + } + + if (!(search = ps_find_search(ps, name))) { + return -1; + } + + ps->search = search; + /* Set pl window depending on the search */ + if (!strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) { + ps->pl_window = cmd_ln_int32_r(ps->config, "-pl_window"); + } else { + ps->pl_window = 0; + } + + return 0; +} + +const char* +ps_get_search(ps_decoder_t *ps) +{ + hash_iter_t *search_it; + const char* name = NULL; + for (search_it = hash_table_iter(ps->searches); search_it; + search_it = hash_table_iter_next(search_it)) { + if (hash_entry_val(search_it->ent) == ps->search) { + name = hash_entry_key(search_it->ent); + break; + } + } + return name; +} + +int +ps_unset_search(ps_decoder_t *ps, const char *name) +{ + ps_search_t *search = hash_table_delete(ps->searches, name); + if (!search) + return -1; + if (ps->search == search) + ps->search = NULL; + ps_search_free(search); + return 0; +} + +ps_search_iter_t * +ps_search_iter(ps_decoder_t *ps) +{ + return (ps_search_iter_t *)hash_table_iter(ps->searches); +} + +ps_search_iter_t * +ps_search_iter_next(ps_search_iter_t *itor) +{ + return (ps_search_iter_t *)hash_table_iter_next((hash_iter_t *)itor); +} + +const char* +ps_search_iter_val(ps_search_iter_t *itor) +{ + return (const char*)(((hash_iter_t *)itor)->ent->key); +} + +void +ps_search_iter_free(ps_search_iter_t *itor) +{ + hash_table_iter_free((hash_iter_t *)itor); +} + +ngram_model_t * +ps_get_lm(ps_decoder_t *ps, const char *name) +{ + ps_search_t *search = ps_find_search(ps, name); + if (search && strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) + return NULL; + return search ? ((ngram_search_t *) search)->lmset : NULL; +} + +fsg_model_t * +ps_get_fsg(ps_decoder_t *ps, const char *name) +{ + ps_search_t *search = ps_find_search(ps, name); + if (search && strcmp(PS_SEARCH_TYPE_FSG, ps_search_type(search))) + return NULL; + return search ? ((fsg_search_t *) search)->fsg : NULL; +} + +const char* +ps_get_kws(ps_decoder_t *ps, const char* name) +{ + ps_search_t *search = ps_find_search(ps, name); + if (search && strcmp(PS_SEARCH_TYPE_KWS, ps_search_type(search))) + return NULL; + return search ? kws_search_get_keyphrases(search) : NULL; +} + +static int +set_search_internal(ps_decoder_t *ps, ps_search_t *search) +{ + ps_search_t *old_search; + + if (!search) + return -1; + + search->pls = ps->phone_loop; + old_search = (ps_search_t *) hash_table_replace(ps->searches, ps_search_name(search), search); + if (old_search != search) + ps_search_free(old_search); + + return 0; +} + +int +ps_set_lm(ps_decoder_t *ps, const char *name, ngram_model_t *lm) +{ + ps_search_t *search; + search = ngram_search_init(name, lm, ps->config, ps->acmod, ps->dict, ps->d2p); + return set_search_internal(ps, search); +} + +int +ps_set_lm_file(ps_decoder_t *ps, const char *name, const char *path) +{ + ngram_model_t *lm; + int result; + + lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath); + if (!lm) + return -1; + + result = ps_set_lm(ps, name, lm); + ngram_model_free(lm); + return result; +} + +int +ps_set_allphone(ps_decoder_t *ps, const char *name, ngram_model_t *lm) +{ + ps_search_t *search; + search = allphone_search_init(name, lm, ps->config, ps->acmod, ps->dict, ps->d2p); + return set_search_internal(ps, search); +} + +int +ps_set_allphone_file(ps_decoder_t *ps, const char *name, const char *path) +{ + ngram_model_t *lm; + int result; + + lm = NULL; + if (path) + lm = ngram_model_read(ps->config, path, NGRAM_AUTO, ps->lmath); + result = ps_set_allphone(ps, name, lm); + if (lm) + ngram_model_free(lm); + return result; +} + +int +ps_set_align(ps_decoder_t *ps, const char *name, const char *text) +{ + ps_search_t *search; + ps_alignment_t *alignment; + char *textbuf = ckd_salloc(text); + char *ptr, *word, delimfound; + int n; + + textbuf = string_trim(textbuf, STRING_BOTH); + alignment = ps_alignment_init(ps->d2p); + ps_alignment_add_word(alignment, dict_wordid(ps->dict, ""), 0); + for (ptr = textbuf; + (n = nextword(ptr, " \t\n\r", &word, &delimfound)) >= 0; + ptr = word + n, *ptr = delimfound) { + int wid; + if ((wid = dict_wordid(ps->dict, word)) == BAD_S3WID) { + E_ERROR("Unknown word %s\n", word); + ckd_free(textbuf); + ps_alignment_free(alignment); + return -1; + } + ps_alignment_add_word(alignment, wid, 0); + } + ps_alignment_add_word(alignment, dict_wordid(ps->dict, ""), 0); + ps_alignment_populate(alignment); + search = state_align_search_init(name, ps->config, ps->acmod, alignment); + ps_alignment_free(alignment); + ckd_free(textbuf); + return set_search_internal(ps, search); +} + +int +ps_set_kws(ps_decoder_t *ps, const char *name, const char *keyfile) +{ + ps_search_t *search; + search = kws_search_init(name, NULL, keyfile, ps->config, ps->acmod, ps->dict, ps->d2p); + return set_search_internal(ps, search); +} + +int +ps_set_keyphrase(ps_decoder_t *ps, const char *name, const char *keyphrase) +{ + ps_search_t *search; + search = kws_search_init(name, keyphrase, NULL, ps->config, ps->acmod, ps->dict, ps->d2p); + return set_search_internal(ps, search); +} + +int +ps_set_fsg(ps_decoder_t *ps, const char *name, fsg_model_t *fsg) +{ + ps_search_t *search; + search = fsg_search_init(name, fsg, ps->config, ps->acmod, ps->dict, ps->d2p); + return set_search_internal(ps, search); +} + +int +ps_set_jsgf_file(ps_decoder_t *ps, const char *name, const char *path) +{ + fsg_model_t *fsg; + jsgf_rule_t *rule; + char const *toprule; + jsgf_t *jsgf = jsgf_parse_file(path, NULL); + float lw; + int result; + + if (!jsgf) + return -1; + + rule = NULL; + /* Take the -toprule if specified. */ + if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) { + rule = jsgf_get_rule(jsgf, toprule); + if (rule == NULL) { + E_ERROR("Start rule %s not found\n", toprule); + jsgf_grammar_free(jsgf); + return -1; + } + } else { + rule = jsgf_get_public_rule(jsgf); + if (rule == NULL) { + E_ERROR("No public rules found in %s\n", path); + jsgf_grammar_free(jsgf); + return -1; + } + } + + lw = cmd_ln_float32_r(ps->config, "-lw"); + fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw); + result = ps_set_fsg(ps, name, fsg); + fsg_model_free(fsg); + jsgf_grammar_free(jsgf); + return result; +} + +int +ps_set_jsgf_string(ps_decoder_t *ps, const char *name, const char *jsgf_string) +{ + fsg_model_t *fsg; + jsgf_rule_t *rule; + char const *toprule; + jsgf_t *jsgf = jsgf_parse_string(jsgf_string, NULL); + float lw; + int result; + + if (!jsgf) + return -1; + + rule = NULL; + /* Take the -toprule if specified. */ + if ((toprule = cmd_ln_str_r(ps->config, "-toprule"))) { + rule = jsgf_get_rule(jsgf, toprule); + if (rule == NULL) { + E_ERROR("Start rule %s not found\n", toprule); + jsgf_grammar_free(jsgf); + return -1; + } + } else { + rule = jsgf_get_public_rule(jsgf); + if (rule == NULL) { + E_ERROR("No public rules found in input string\n"); + jsgf_grammar_free(jsgf); + return -1; + } + } + + lw = cmd_ln_float32_r(ps->config, "-lw"); + fsg = jsgf_build_fsg(jsgf, rule, ps->lmath, lw); + result = ps_set_fsg(ps, name, fsg); + fsg_model_free(fsg); + jsgf_grammar_free(jsgf); + return result; +} + + +int +ps_load_dict(ps_decoder_t *ps, char const *dictfile, + char const *fdictfile, char const *format) +{ + dict2pid_t *d2p; + dict_t *dict; + hash_iter_t *search_it; + cmd_ln_t *newconfig; + + (void)format; + /* Create a new scratch config to load this dict (so existing one + * won't be affected if it fails) */ + newconfig = cmd_ln_init(NULL, ps_args(), TRUE, NULL); + cmd_ln_set_boolean_r(newconfig, "-dictcase", + cmd_ln_boolean_r(ps->config, "-dictcase")); + cmd_ln_set_str_r(newconfig, "-dict", dictfile); + if (fdictfile) + cmd_ln_set_str_extra_r(newconfig, "_fdict", fdictfile); + else + cmd_ln_set_str_extra_r(newconfig, "_fdict", + cmd_ln_str_r(ps->config, "_fdict")); + + /* Try to load it. */ + if ((dict = dict_init(newconfig, ps->acmod->mdef)) == NULL) { + cmd_ln_free_r(newconfig); + return -1; + } + + /* Reinit the dict2pid. */ + if ((d2p = dict2pid_build(ps->acmod->mdef, dict)) == NULL) { + cmd_ln_free_r(newconfig); + return -1; + } + + /* Success! Update the existing config to reflect new dicts and + * drop everything into place. */ + cmd_ln_free_r(newconfig); + dict_free(ps->dict); + ps->dict = dict; + dict2pid_free(ps->d2p); + ps->d2p = d2p; + + /* And tell all searches to reconfigure themselves. */ + for (search_it = hash_table_iter(ps->searches); search_it; + search_it = hash_table_iter_next(search_it)) { + if (ps_search_reinit(hash_entry_val(search_it->ent), dict, d2p) < 0) { + hash_table_iter_free(search_it); + return -1; + } + } + + return 0; +} + +int +ps_save_dict(ps_decoder_t *ps, char const *dictfile, + char const *format) +{ + return dict_write(ps->dict, dictfile, format); +} + +int +ps_add_word(ps_decoder_t *ps, + char const *word, + char const *phones, + int update) +{ + int32 wid; + s3cipid_t *pron; + hash_iter_t *search_it; + char **phonestr, *tmp; + int np, i, rv; + + /* Parse phones into an array of phone IDs. */ + tmp = ckd_salloc(phones); + np = str2words(tmp, NULL, 0); + phonestr = ckd_calloc(np, sizeof(*phonestr)); + str2words(tmp, phonestr, np); + pron = ckd_calloc(np, sizeof(*pron)); + for (i = 0; i < np; ++i) { + pron[i] = bin_mdef_ciphone_id(ps->acmod->mdef, phonestr[i]); + if (pron[i] == -1) { + E_ERROR("Unknown phone %s in phone string %s\n", + phonestr[i], tmp); + ckd_free(phonestr); + ckd_free(tmp); + ckd_free(pron); + return -1; + } + } + /* No longer needed. */ + ckd_free(phonestr); + ckd_free(tmp); + + /* Add it to the dictionary. */ + if ((wid = dict_add_word(ps->dict, word, pron, np)) == -1) { + ckd_free(pron); + return -1; + } + /* No longer needed. */ + ckd_free(pron); + + /* Now we also have to add it to dict2pid. */ + dict2pid_add_word(ps->d2p, wid); + + /* TODO: we definitely need to refactor this */ + for (search_it = hash_table_iter(ps->searches); search_it; + search_it = hash_table_iter_next(search_it)) { + ps_search_t *search = hash_entry_val(search_it->ent); + if (!strcmp(PS_SEARCH_TYPE_NGRAM, ps_search_type(search))) { + ngram_model_t *lmset = ((ngram_search_t *) search)->lmset; + if (ngram_model_add_word(lmset, word, 1.0) == NGRAM_INVALID_WID) { + hash_table_iter_free(search_it); + return -1; + } + } + + if (update) { + if ((rv = ps_search_reinit(search, ps->dict, ps->d2p)) < 0) { + hash_table_iter_free(search_it); + return rv; + } + } + } + + /* Rebuild the widmap and search tree if requested. */ + return wid; +} + +char * +ps_lookup_word(ps_decoder_t *ps, const char *word) +{ + s3wid_t wid; + int32 phlen, j; + char *phones; + dict_t *dict = ps->dict; + + wid = dict_wordid(dict, word); + if (wid == BAD_S3WID) + return NULL; + + for (phlen = j = 0; j < dict_pronlen(dict, wid); ++j) + phlen += strlen(dict_ciphone_str(dict, wid, j)) + 1; + phones = ckd_calloc(1, phlen); + for (j = 0; j < dict_pronlen(dict, wid); ++j) { + strcat(phones, dict_ciphone_str(dict, wid, j)); + if (j != dict_pronlen(dict, wid) - 1) + strcat(phones, " "); + } + return phones; +} + +long +ps_decode_raw(ps_decoder_t *ps, FILE *rawfh, + long maxsamps) +{ + int16 *data; + long total, pos, endpos; + + ps_start_utt(ps); + + /* If this file is seekable or maxsamps is specified, then decode + * the whole thing at once. */ + if (maxsamps != -1) { + data = ckd_calloc(maxsamps, sizeof(*data)); + total = fread(data, sizeof(*data), maxsamps, rawfh); + ps_process_raw(ps, data, total, FALSE, TRUE); + ckd_free(data); + } else if ((pos = ftell(rawfh)) >= 0) { + fseek(rawfh, 0, SEEK_END); + endpos = ftell(rawfh); + fseek(rawfh, pos, SEEK_SET); + maxsamps = endpos - pos; + + data = ckd_calloc(maxsamps, sizeof(*data)); + total = fread(data, sizeof(*data), maxsamps, rawfh); + ps_process_raw(ps, data, total, FALSE, TRUE); + ckd_free(data); + } else { + /* Otherwise decode it in a stream. */ + total = 0; + while (!feof(rawfh)) { + int16 data[256]; + size_t nread; + + nread = fread(data, sizeof(*data), sizeof(data)/sizeof(*data), rawfh); + ps_process_raw(ps, data, nread, FALSE, FALSE); + total += nread; + } + } + ps_end_utt(ps); + return total; +} + +int +ps_start_stream(ps_decoder_t *ps) +{ + if (ps->acmod == NULL) + return -1; + if (ps->acmod->fe == NULL) + return -1; + if (ps->acmod->fe->noise_stats == NULL) + return -1; + fe_reset_noisestats(ps->acmod->fe->noise_stats); + return 0; +} + +int +ps_get_in_speech(ps_decoder_t *ps) +{ + return (ps->acmod->state == ACMOD_STARTED || ps->acmod->state == ACMOD_PROCESSING); +} + +int +ps_start_utt(ps_decoder_t *ps) +{ + int rv; + char uttid[16]; + + if (ps->acmod->state == ACMOD_STARTED || ps->acmod->state == ACMOD_PROCESSING) { + E_ERROR("Utterance already started\n"); + return -1; + } + + if (ps->search == NULL) { + E_ERROR("No search module is selected, did you forget to " + "specify a language model or grammar?\n"); + return -1; + } + + ptmr_reset(&ps->perf); + ptmr_start(&ps->perf); + + sprintf(uttid, "%09u", ps->uttno); + ++ps->uttno; + + /* Remove any residual word lattice and hypothesis. */ + ps_lattice_free(ps->search->dag); + ps->search->dag = NULL; + ps->search->last_link = NULL; + ps->search->post = 0; + ckd_free(ps->search->hyp_str); + ps->search->hyp_str = NULL; + if ((rv = acmod_start_utt(ps->acmod)) < 0) + return rv; + + /* Start logging features and audio if requested. */ + if (ps->mfclogdir) { + char *logfn = string_join(ps->mfclogdir, "/", + uttid, ".mfc", NULL); + FILE *mfcfh; + E_INFO("Writing MFCC file: %s\n", logfn); + if ((mfcfh = fopen(logfn, "wb")) == NULL) { + E_ERROR_SYSTEM("Failed to open MFCC file %s", logfn); + ckd_free(logfn); + return -1; + } + ckd_free(logfn); + acmod_set_mfcfh(ps->acmod, mfcfh); + } + if (ps->rawlogdir) { + char *logfn = string_join(ps->rawlogdir, "/", + uttid, ".raw", NULL); + FILE *rawfh; + E_INFO("Writing raw audio file: %s\n", logfn); + if ((rawfh = fopen(logfn, "wb")) == NULL) { + E_ERROR_SYSTEM("Failed to open raw audio file %s", logfn); + ckd_free(logfn); + return -1; + } + ckd_free(logfn); + acmod_set_rawfh(ps->acmod, rawfh); + } + if (ps->senlogdir) { + char *logfn = string_join(ps->senlogdir, "/", + uttid, ".sen", NULL); + FILE *senfh; + E_INFO("Writing senone score file: %s\n", logfn); + if ((senfh = fopen(logfn, "wb")) == NULL) { + E_ERROR_SYSTEM("Failed to open senone score file %s", logfn); + ckd_free(logfn); + return -1; + } + ckd_free(logfn); + acmod_set_senfh(ps->acmod, senfh); + } + + /* Start auxiliary phone loop search. */ + if (ps->phone_loop) + ps_search_start(ps->phone_loop); + + return ps_search_start(ps->search); +} + +static int +ps_search_forward(ps_decoder_t *ps) +{ + int nfr; + + if (ps->search == NULL) { + E_ERROR("No search module is selected, did you forget to " + "specify a language model or grammar?\n"); + return -1; + } + nfr = 0; + while (ps->acmod->n_feat_frame > 0) { + int k; + if (ps->pl_window > 0) + if ((k = ps_search_step(ps->phone_loop, ps->acmod->output_frame)) < 0) + return k; + if (ps->acmod->output_frame >= ps->pl_window) + if ((k = ps_search_step(ps->search, + ps->acmod->output_frame - ps->pl_window)) < 0) + return k; + acmod_advance(ps->acmod); + ++ps->n_frame; + ++nfr; + } + return nfr; +} + +int +ps_decode_senscr(ps_decoder_t *ps, FILE *senfh) +{ + int nfr, n_searchfr; + + ps_start_utt(ps); + n_searchfr = 0; + acmod_set_insenfh(ps->acmod, senfh); + while ((nfr = acmod_read_scores(ps->acmod)) > 0) { + if ((nfr = ps_search_forward(ps)) < 0) { + ps_end_utt(ps); + return nfr; + } + n_searchfr += nfr; + } + ps_end_utt(ps); + acmod_set_insenfh(ps->acmod, NULL); + + return n_searchfr; +} + +int +ps_process_raw(ps_decoder_t *ps, + int16 const *data, + size_t n_samples, + int no_search, + int full_utt) +{ + int n_searchfr = 0; + + if (ps->acmod->state == ACMOD_IDLE) { + E_ERROR("Failed to process data, utterance is not started. Use start_utt to start it\n"); + return 0; + } + + if (no_search) + acmod_set_grow(ps->acmod, TRUE); + + while (n_samples) { + int nfr; + + /* Process some data into features. */ + if ((nfr = acmod_process_raw(ps->acmod, &data, + &n_samples, full_utt)) < 0) + return nfr; + + /* Score and search as much data as possible */ + if (no_search) + continue; + if ((nfr = ps_search_forward(ps)) < 0) + return nfr; + n_searchfr += nfr; + } + + return n_searchfr; +} + +int +ps_process_cep(ps_decoder_t *ps, + mfcc_t **data, + int32 n_frames, + int no_search, + int full_utt) +{ + int n_searchfr = 0; + + if (no_search) + acmod_set_grow(ps->acmod, TRUE); + + while (n_frames) { + int nfr; + + /* Process some data into features. */ + if ((nfr = acmod_process_cep(ps->acmod, &data, + &n_frames, full_utt)) < 0) + return nfr; + + /* Score and search as much data as possible */ + if (no_search) + continue; + if ((nfr = ps_search_forward(ps)) < 0) + return nfr; + n_searchfr += nfr; + } + + return n_searchfr; +} + +int +ps_end_utt(ps_decoder_t *ps) +{ + int rv, i; + + if (ps->search == NULL) { + E_ERROR("No search module is selected, did you forget to " + "specify a language model or grammar?\n"); + return -1; + } + if (ps->acmod->state == ACMOD_ENDED || ps->acmod->state == ACMOD_IDLE) { + E_ERROR("Utterance is not started\n"); + return -1; + } + acmod_end_utt(ps->acmod); + + /* Search any remaining frames. */ + if ((rv = ps_search_forward(ps)) < 0) { + ptmr_stop(&ps->perf); + return rv; + } + /* Finish phone loop search. */ + if (ps->phone_loop) { + if ((rv = ps_search_finish(ps->phone_loop)) < 0) { + ptmr_stop(&ps->perf); + return rv; + } + } + /* Search any frames remaining in the lookahead window. */ + if (ps->acmod->output_frame >= ps->pl_window) { + for (i = ps->acmod->output_frame - ps->pl_window; + i < ps->acmod->output_frame; ++i) + ps_search_step(ps->search, i); + } + /* Finish main search. */ + if ((rv = ps_search_finish(ps->search)) < 0) { + ptmr_stop(&ps->perf); + return rv; + } + ptmr_stop(&ps->perf); + + /* Log a backtrace if requested. */ + if (cmd_ln_boolean_r(ps->config, "-backtrace")) { + const char* hyp; + ps_seg_t *seg; + int32 score; + + hyp = ps_get_hyp(ps, &score); + + if (hyp != NULL) { + E_INFO("%s (%d)\n", hyp, score); + E_INFO_NOFN("%-20s %-5s %-5s %-5s %-10s %-10s %-3s\n", + "word", "start", "end", "pprob", "ascr", "lscr", "lback"); + for (seg = ps_seg_iter(ps); seg; + seg = ps_seg_next(seg)) { + char const *word; + int sf, ef; + int32 post, lscr, ascr, lback; + + word = ps_seg_word(seg); + ps_seg_frames(seg, &sf, &ef); + post = ps_seg_prob(seg, &ascr, &lscr, &lback); + E_INFO_NOFN("%-20s %-5d %-5d %-1.3f %-10d %-10d %-3d\n", + word, sf, ef, logmath_exp(ps_get_logmath(ps), post), + ascr, lscr, lback); + } + } + } + return rv; +} + +char const * +ps_get_hyp(ps_decoder_t *ps, int32 *out_best_score) +{ + char const *hyp; + + if (ps->search == NULL) { + E_ERROR("No search module is selected, did you forget to " + "specify a language model or grammar?\n"); + return NULL; + } + ptmr_start(&ps->perf); + hyp = ps_search_hyp(ps->search, out_best_score); + ptmr_stop(&ps->perf); + return hyp; +} + +int32 +ps_get_prob(ps_decoder_t *ps) +{ + int32 prob; + + if (ps->search == NULL) { + E_ERROR("No search module is selected, did you forget to " + "specify a language model or grammar?\n"); + return -1; + } + ptmr_start(&ps->perf); + prob = ps_search_prob(ps->search); + ptmr_stop(&ps->perf); + return prob; +} + +ps_seg_t * +ps_seg_iter(ps_decoder_t *ps) +{ + ps_seg_t *itor; + + if (ps->search == NULL) { + E_ERROR("No search module is selected, did you forget to " + "specify a language model or grammar?\n"); + return NULL; + } + ptmr_start(&ps->perf); + itor = ps_search_seg_iter(ps->search); + ptmr_stop(&ps->perf); + return itor; +} + +ps_seg_t * +ps_seg_next(ps_seg_t *seg) +{ + return ps_search_seg_next(seg); +} + +char const * +ps_seg_word(ps_seg_t *seg) +{ + return seg->word; +} + +void +ps_seg_frames(ps_seg_t *seg, int *out_sf, int *out_ef) +{ + if (out_sf) *out_sf = seg->sf; + if (out_ef) *out_ef = seg->ef; +} + +int32 +ps_seg_prob(ps_seg_t *seg, int32 *out_ascr, int32 *out_lscr, int32 *out_lback) +{ + if (out_ascr) *out_ascr = seg->ascr; + if (out_lscr) *out_lscr = seg->lscr; + if (out_lback) *out_lback = seg->lback; + return seg->prob; +} + +void +ps_seg_free(ps_seg_t *seg) +{ + ps_search_seg_free(seg); +} + +ps_lattice_t * +ps_get_lattice(ps_decoder_t *ps) +{ + if (ps->search == NULL) { + E_ERROR("No search module is selected, did you forget to " + "specify a language model or grammar?\n"); + return NULL; + } + return ps_search_lattice(ps->search); +} + +ps_nbest_t * +ps_nbest(ps_decoder_t *ps) +{ + ps_lattice_t *dag; + ngram_model_t *lmset; + ps_astar_t *nbest; + float32 lwf; + + if (ps->search == NULL) { + E_ERROR("No search module is selected, did you forget to " + "specify a language model or grammar?\n"); + return NULL; + } + if ((dag = ps_get_lattice(ps)) == NULL) + return NULL; + + /* FIXME: This is all quite specific to N-Gram search. Either we + * should make N-best a method for each search module or it needs + * to be abstracted to work for N-Gram and FSG. */ + if (0 != strcmp(ps_search_type(ps->search), PS_SEARCH_TYPE_NGRAM)) { + lmset = NULL; + lwf = 1.0f; + } else { + lmset = ((ngram_search_t *)ps->search)->lmset; + lwf = ((ngram_search_t *)ps->search)->bestpath_fwdtree_lw_ratio; + } + + nbest = ps_astar_start(dag, lmset, lwf, 0, -1, -1, -1); + + nbest = ps_nbest_next(nbest); + + return (ps_nbest_t *)nbest; +} + +void +ps_nbest_free(ps_nbest_t *nbest) +{ + ps_astar_finish(nbest); +} + +ps_nbest_t * +ps_nbest_next(ps_nbest_t *nbest) +{ + ps_latpath_t *next; + + next = ps_astar_next(nbest); + if (next == NULL) { + ps_nbest_free(nbest); + return NULL; + } + return nbest; +} + +char const * +ps_nbest_hyp(ps_nbest_t *nbest, int32 *out_score) +{ + assert(nbest != NULL); + + if (nbest->top == NULL) + return NULL; + if (out_score) *out_score = nbest->top->score; + return ps_astar_hyp(nbest, nbest->top); +} + +ps_seg_t * +ps_nbest_seg(ps_nbest_t *nbest) +{ + if (nbest->top == NULL) + return NULL; + + return ps_astar_seg_iter(nbest, nbest->top, 1.0); +} + +int +ps_get_n_frames(ps_decoder_t *ps) +{ + return ps->acmod->output_frame + 1; +} + +void +ps_get_utt_time(ps_decoder_t *ps, double *out_nspeech, + double *out_ncpu, double *out_nwall) +{ + int32 frate; + + frate = cmd_ln_int32_r(ps->config, "-frate"); + *out_nspeech = (double)ps->acmod->output_frame / frate; + *out_ncpu = ps->perf.t_cpu; + *out_nwall = ps->perf.t_elapsed; +} + +void +ps_get_all_time(ps_decoder_t *ps, double *out_nspeech, + double *out_ncpu, double *out_nwall) +{ + int32 frate; + + frate = cmd_ln_int32_r(ps->config, "-frate"); + *out_nspeech = (double)ps->n_frame / frate; + *out_ncpu = ps->perf.t_tot_cpu; + *out_nwall = ps->perf.t_tot_elapsed; +} + +void +ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, + const char *type, + const char *name, + cmd_ln_t *config, acmod_t *acmod, dict_t *dict, + dict2pid_t *d2p) +{ + search->vt = vt; + search->name = ckd_salloc(name); + search->type = ckd_salloc(type); + + search->config = config; + search->acmod = acmod; + if (d2p) + search->d2p = dict2pid_retain(d2p); + else + search->d2p = NULL; + if (dict) { + search->dict = dict_retain(dict); + search->start_wid = dict_startwid(dict); + search->finish_wid = dict_finishwid(dict); + search->silence_wid = dict_silwid(dict); + search->n_words = dict_size(dict); + } + else { + search->dict = NULL; + search->start_wid = search->finish_wid = search->silence_wid = -1; + search->n_words = 0; + } +} + +void +ps_search_base_free(ps_search_t *search) +{ + /* FIXME: We will have refcounting on acmod, config, etc, at which + * point we will free them here too. */ + ckd_free(search->name); + ckd_free(search->type); + dict_free(search->dict); + dict2pid_free(search->d2p); + ckd_free(search->hyp_str); + ps_lattice_free(search->dag); +} + +void +ps_search_base_reinit(ps_search_t *search, dict_t *dict, + dict2pid_t *d2p) +{ + dict_free(search->dict); + dict2pid_free(search->d2p); + /* FIXME: _retain() should just return NULL if passed NULL. */ + if (dict) { + search->dict = dict_retain(dict); + search->start_wid = dict_startwid(dict); + search->finish_wid = dict_finishwid(dict); + search->silence_wid = dict_silwid(dict); + search->n_words = dict_size(dict); + } + else { + search->dict = NULL; + search->start_wid = search->finish_wid = search->silence_wid = -1; + search->n_words = 0; + } + if (d2p) + search->d2p = dict2pid_retain(d2p); + else + search->d2p = NULL; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/pocketsphinx_internal.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/pocketsphinx_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..7fc0e007fb1da88d0ca5d65399d12114adb1ca65 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/pocketsphinx_internal.h @@ -0,0 +1,245 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file pocketsphinx_internal.h Internal implementation of + * PocketSphinx decoder. + * @author David Huggins-Daines + */ + +#ifndef __POCKETSPHINX_INTERNAL_H__ +#define __POCKETSPHINX_INTERNAL_H__ + +/* SphinxBase headers. */ +#include +#include +#include +#include +#include +#include + +/* Local headers. */ +#include "pocketsphinx.h" +#include "acmod.h" +#include "dict.h" +#include "dict2pid.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Search algorithm structure. + */ +typedef struct ps_search_s ps_search_t; + + +/* Search names*/ +#define PS_DEFAULT_SEARCH "_default" +#define PS_DEFAULT_PL_SEARCH "_default_pl" + +/* Search types */ +#define PS_SEARCH_TYPE_KWS "kws" +#define PS_SEARCH_TYPE_FSG "fsg" +#define PS_SEARCH_TYPE_NGRAM "ngram" +#define PS_SEARCH_TYPE_ALLPHONE "allphone" +#define PS_SEARCH_TYPE_STATE_ALIGN "state_align" +#define PS_SEARCH_TYPE_PHONE_LOOP "phone_loop" + +/** + * V-table for search algorithm. + */ +typedef struct ps_searchfuncs_s { + int (*start)(ps_search_t *search); + int (*step)(ps_search_t *search, int frame_idx); + int (*finish)(ps_search_t *search); + int (*reinit)(ps_search_t *search, dict_t *dict, dict2pid_t *d2p); + void (*free)(ps_search_t *search); + + ps_lattice_t *(*lattice)(ps_search_t *search); + char const *(*hyp)(ps_search_t *search, int32 *out_score); + int32 (*prob)(ps_search_t *search); + ps_seg_t *(*seg_iter)(ps_search_t *search); +} ps_searchfuncs_t; + +/** + * Base structure for search module. + */ +struct ps_search_s { + ps_searchfuncs_t *vt; /**< V-table of search methods. */ + + char *type; + char *name; + + ps_search_t *pls; /**< Phoneme loop for lookahead. */ + cmd_ln_t *config; /**< Configuration. */ + acmod_t *acmod; /**< Acoustic model. */ + dict_t *dict; /**< Pronunciation dictionary. */ + dict2pid_t *d2p; /**< Dictionary to senone mappings. */ + char *hyp_str; /**< Current hypothesis string. */ + ps_lattice_t *dag; /**< Current hypothesis word graph. */ + ps_latlink_t *last_link; /**< Final link in best path. */ + int32 post; /**< Utterance posterior probability. */ + int32 n_words; /**< Number of words known to search (may + be less than in the dictionary) */ + + /* Magical word IDs that must exist in the dictionary: */ + int32 start_wid; /**< Start word ID. */ + int32 silence_wid; /**< Silence word ID. */ + int32 finish_wid; /**< Finish word ID. */ +}; + +#define ps_search_base(s) ((ps_search_t *)s) +#define ps_search_config(s) ps_search_base(s)->config +#define ps_search_acmod(s) ps_search_base(s)->acmod +#define ps_search_dict(s) ps_search_base(s)->dict +#define ps_search_dict2pid(s) ps_search_base(s)->d2p +#define ps_search_dag(s) ps_search_base(s)->dag +#define ps_search_last_link(s) ps_search_base(s)->last_link +#define ps_search_post(s) ps_search_base(s)->post +#define ps_search_lookahead(s) ps_search_base(s)->pls +#define ps_search_n_words(s) ps_search_base(s)->n_words + +#define ps_search_type(s) ps_search_base(s)->type +#define ps_search_name(s) ps_search_base(s)->name +#define ps_search_start(s) (*(ps_search_base(s)->vt->start))(s) +#define ps_search_step(s,i) (*(ps_search_base(s)->vt->step))(s,i) +#define ps_search_finish(s) (*(ps_search_base(s)->vt->finish))(s) +#define ps_search_reinit(s,d,d2p) (*(ps_search_base(s)->vt->reinit))(s,d,d2p) +#define ps_search_free(s) (*(ps_search_base(s)->vt->free))(s) +#define ps_search_lattice(s) (*(ps_search_base(s)->vt->lattice))(s) +#define ps_search_hyp(s,sc) (*(ps_search_base(s)->vt->hyp))(s,sc) +#define ps_search_prob(s) (*(ps_search_base(s)->vt->prob))(s) +#define ps_search_seg_iter(s) (*(ps_search_base(s)->vt->seg_iter))(s) + +/* For convenience... */ +#define ps_search_silence_wid(s) ps_search_base(s)->silence_wid +#define ps_search_start_wid(s) ps_search_base(s)->start_wid +#define ps_search_finish_wid(s) ps_search_base(s)->finish_wid + +/** + * Initialize base structure. + */ +void ps_search_init(ps_search_t *search, ps_searchfuncs_t *vt, + const char *type, const char *name, + cmd_ln_t *config, acmod_t *acmod, dict_t *dict, + dict2pid_t *d2p); + + +/** + * Free search + */ +void ps_search_base_free(ps_search_t *search); + +/** + * Re-initialize base structure with new dictionary. + */ +void ps_search_base_reinit(ps_search_t *search, dict_t *dict, + dict2pid_t *d2p); + +typedef struct ps_segfuncs_s { + ps_seg_t *(*seg_next)(ps_seg_t *seg); + void (*seg_free)(ps_seg_t *seg); +} ps_segfuncs_t; + +/** + * Base structure for hypothesis segmentation iterator. + */ +struct ps_seg_s { + ps_segfuncs_t *vt; /**< V-table of seg methods */ + ps_search_t *search; /**< Search object from whence this came */ + char const *word; /**< Word string (pointer into dictionary hash) */ + frame_idx_t sf; /**< Start frame. */ + frame_idx_t ef; /**< End frame. */ + int32 ascr; /**< Acoustic score. */ + int32 lscr; /**< Language model score. */ + int32 prob; /**< Log posterior probability. */ + /* This doesn't need to be 32 bits, so once the scores above are + * reduced to 16 bits (or less!), this will be too. */ + int32 lback; /**< Language model backoff. */ + /* Not sure if this should be here at all. */ + float32 lwf; /**< Language weight factor (for second-pass searches) */ +}; + +#define ps_search_seg_next(seg) (*(seg->vt->seg_next))(seg) +#define ps_search_seg_free(s) (*(seg->vt->seg_free))(seg) + + +/** + * Decoder object. + */ +struct ps_decoder_s { + /* Model parameters and such. */ + cmd_ln_t *config; /**< Configuration. */ + int refcount; /**< Reference count. */ + + /* Basic units of computation. */ + acmod_t *acmod; /**< Acoustic model. */ + dict_t *dict; /**< Pronunciation dictionary. */ + dict2pid_t *d2p; /**< Dictionary to senone mapping. */ + logmath_t *lmath; /**< Log math computation. */ + + /* Search modules. */ + hash_table_t *searches; /**< Set of search modules. */ + /* TODO: Convert this to a stack of searches each with their own + * lookahead value. */ + ps_search_t *search; /**< Currently active search module. */ + ps_search_t *phone_loop; /**< Phone loop search for lookahead. */ + int pl_window; /**< Window size for phoneme lookahead. */ + + /* Utterance-processing related stuff. */ + uint32 uttno; /**< Utterance counter. */ + ptmr_t perf; /**< Performance counter for all of decoding. */ + uint32 n_frame; /**< Total number of frames processed. */ + char const *mfclogdir; /**< Log directory for MFCC files. */ + char const *rawlogdir; /**< Log directory for audio files. */ + char const *senlogdir; /**< Log directory for senone score files. */ +}; + + +struct ps_search_iter_s { + hash_iter_t itor; +}; + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __POCKETSPHINX_INTERNAL_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_alignment.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_alignment.c new file mode 100644 index 0000000000000000000000000000000000000000..511155264b94c6fa1736023643fdbb5780616b8d --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_alignment.c @@ -0,0 +1,508 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2010 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ps_alignment.c Multi-level alignment structure + */ + +/* System headers. */ + +/* SphinxBase headers. */ +#include + +/* Local headers. */ +#include "ps_alignment.h" + +ps_alignment_t * +ps_alignment_init(dict2pid_t *d2p) +{ + ps_alignment_t *al = ckd_calloc(1, sizeof(*al)); + al->d2p = dict2pid_retain(d2p); + al->refcount = 1; + return al; +} + +ps_alignment_t * +ps_alignment_retain(ps_alignment_t *al) +{ + ++al->refcount; + return al; +} + +int +ps_alignment_free(ps_alignment_t *al) +{ + if (al == NULL) + return 0; + if (--al->refcount > 0) + return al->refcount; + dict2pid_free(al->d2p); + ckd_free(al->word.seq); + ckd_free(al->sseq.seq); + ckd_free(al->state.seq); + ckd_free(al); + return 0; +} + +#define VECTOR_GROW 10 +static void * +vector_grow_one(void *ptr, uint16 *n_alloc, uint16 *n, size_t item_size) +{ + int newsize = *n + 1; + if (newsize < *n_alloc) { + *n += 1; + return ptr; + } + newsize += VECTOR_GROW; + if (newsize > 0xffff) + return NULL; + ptr = ckd_realloc(ptr, newsize * item_size); + *n += 1; + *n_alloc = newsize; + return ptr; +} + +static ps_alignment_entry_t * +ps_alignment_vector_grow_one(ps_alignment_vector_t *vec) +{ + void *ptr; + ptr = vector_grow_one(vec->seq, &vec->n_alloc, + &vec->n_ent, sizeof(*vec->seq)); + if (ptr == NULL) + return NULL; + vec->seq = ptr; + return vec->seq + vec->n_ent - 1; +} + +static void +ps_alignment_vector_empty(ps_alignment_vector_t *vec) +{ + vec->n_ent = 0; +} + +int +ps_alignment_add_word(ps_alignment_t *al, + int32 wid, int duration) +{ + ps_alignment_entry_t *ent; + + if ((ent = ps_alignment_vector_grow_one(&al->word)) == NULL) + return 0; + ent->id.wid = wid; + if (al->word.n_ent > 1) + ent->start = ent[-1].start + ent[-1].duration; + else + ent->start = 0; + ent->duration = duration; + ent->score = 0; + ent->parent = PS_ALIGNMENT_NONE; + ent->child = PS_ALIGNMENT_NONE; + + return al->word.n_ent; +} + +int +ps_alignment_populate(ps_alignment_t *al) +{ + dict2pid_t *d2p; + dict_t *dict; + bin_mdef_t *mdef; + int i, lc; + + /* Clear phone and state sequences. */ + ps_alignment_vector_empty(&al->sseq); + ps_alignment_vector_empty(&al->state); + + /* For each word, expand to phones/senone sequences. */ + d2p = al->d2p; + dict = d2p->dict; + mdef = d2p->mdef; + lc = bin_mdef_silphone(mdef); + for (i = 0; i < al->word.n_ent; ++i) { + ps_alignment_entry_t *went = al->word.seq + i; + ps_alignment_entry_t *sent; + int wid = went->id.wid; + int len = dict_pronlen(dict, wid); + int j, rc; + + if (i < al->word.n_ent - 1) + rc = dict_first_phone(dict, al->word.seq[i+1].id.wid); + else + rc = bin_mdef_silphone(mdef); + + /* First phone. */ + if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { + E_ERROR("Failed to add phone entry!\n"); + return -1; + } + sent->id.pid.cipid = dict_first_phone(dict, wid); + sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); + sent->start = went->start; + sent->duration = went->duration; + sent->score = 0; + sent->parent = i; + went->child = (uint16)(sent - al->sseq.seq); + if (len == 1) + sent->id.pid.ssid + = dict2pid_lrdiph_rc(d2p, sent->id.pid.cipid, lc, rc); + else + sent->id.pid.ssid + = dict2pid_ldiph_lc(d2p, sent->id.pid.cipid, + dict_second_phone(dict, wid), lc); + assert(sent->id.pid.ssid != BAD_SSID); + + /* Internal phones. */ + for (j = 1; j < len - 1; ++j) { + if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { + E_ERROR("Failed to add phone entry!\n"); + return -1; + } + sent->id.pid.cipid = dict_pron(dict, wid, j); + sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); + sent->id.pid.ssid = dict2pid_internal(d2p, wid, j); + assert(sent->id.pid.ssid != BAD_SSID); + sent->start = went->start; + sent->duration = went->duration; + sent->score = 0; + sent->parent = i; + } + + /* Last phone. */ + if (j < len) { + xwdssid_t *rssid; + assert(j == len - 1); + if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { + E_ERROR("Failed to add phone entry!\n"); + return -1; + } + sent->id.pid.cipid = dict_last_phone(dict, wid); + sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); + rssid = dict2pid_rssid(d2p, sent->id.pid.cipid, + dict_second_last_phone(dict, wid)); + sent->id.pid.ssid = rssid->ssid[rssid->cimap[rc]]; + assert(sent->id.pid.ssid != BAD_SSID); + sent->start = went->start; + sent->duration = went->duration; + sent->score = 0; + sent->parent = i; + } + /* Update lc. Could just use sent->id.pid.cipid here but that + * seems needlessly obscure. */ + lc = dict_last_phone(dict, wid); + } + + /* For each senone sequence, expand to senones. (we could do this + * nested above but this makes it more clear and easier to + * refactor) */ + for (i = 0; i < al->sseq.n_ent; ++i) { + ps_alignment_entry_t *pent = al->sseq.seq + i; + ps_alignment_entry_t *sent; + int j; + + for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) { + if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) { + E_ERROR("Failed to add state entry!\n"); + return -1; + } + sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j); + assert(sent->id.senid != BAD_SENID); + sent->start = pent->start; + sent->duration = pent->duration; + sent->score = 0; + sent->parent = i; + if (j == 0) + pent->child = (uint16)(sent - al->state.seq); + } + } + + return 0; +} + +/* FIXME: Somewhat the same as the above function, needs refactoring */ +int +ps_alignment_populate_ci(ps_alignment_t *al) +{ + dict2pid_t *d2p; + dict_t *dict; + bin_mdef_t *mdef; + int i; + + /* Clear phone and state sequences. */ + ps_alignment_vector_empty(&al->sseq); + ps_alignment_vector_empty(&al->state); + + /* For each word, expand to phones/senone sequences. */ + d2p = al->d2p; + dict = d2p->dict; + mdef = d2p->mdef; + for (i = 0; i < al->word.n_ent; ++i) { + ps_alignment_entry_t *went = al->word.seq + i; + ps_alignment_entry_t *sent; + int wid = went->id.wid; + int len = dict_pronlen(dict, wid); + int j; + + for (j = 0; j < len; ++j) { + if ((sent = ps_alignment_vector_grow_one(&al->sseq)) == NULL) { + E_ERROR("Failed to add phone entry!\n"); + return -1; + } + sent->id.pid.cipid = dict_pron(dict, wid, j); + sent->id.pid.tmatid = bin_mdef_pid2tmatid(mdef, sent->id.pid.cipid); + sent->id.pid.ssid = bin_mdef_pid2ssid(mdef, sent->id.pid.cipid); + assert(sent->id.pid.ssid != BAD_SSID); + sent->start = went->start; + sent->duration = went->duration; + sent->score = 0; + sent->parent = i; + } + } + + /* For each senone sequence, expand to senones. (we could do this + * nested above but this makes it more clear and easier to + * refactor) */ + for (i = 0; i < al->sseq.n_ent; ++i) { + ps_alignment_entry_t *pent = al->sseq.seq + i; + ps_alignment_entry_t *sent; + int j; + + for (j = 0; j < bin_mdef_n_emit_state(mdef); ++j) { + if ((sent = ps_alignment_vector_grow_one(&al->state)) == NULL) { + E_ERROR("Failed to add state entry!\n"); + return -1; + } + sent->id.senid = bin_mdef_sseq2sen(mdef, pent->id.pid.ssid, j); + assert(sent->id.senid != BAD_SENID); + sent->start = pent->start; + sent->duration = pent->duration; + sent->score = 0; + sent->parent = i; + if (j == 0) + pent->child = (uint16)(sent - al->state.seq); + } + } + + return 0; +} + +int +ps_alignment_propagate(ps_alignment_t *al) +{ + ps_alignment_entry_t *last_ent = NULL; + int i; + + /* Propagate duration up from states to phones. */ + for (i = 0; i < al->state.n_ent; ++i) { + ps_alignment_entry_t *sent = al->state.seq + i; + ps_alignment_entry_t *pent = al->sseq.seq + sent->parent; + if (pent != last_ent) { + pent->start = sent->start; + pent->duration = 0; + pent->score = 0; + } + pent->duration += sent->duration; + pent->score += sent->score; + last_ent = pent; + } + + /* Propagate duration up from phones to words. */ + last_ent = NULL; + for (i = 0; i < al->sseq.n_ent; ++i) { + ps_alignment_entry_t *pent = al->sseq.seq + i; + ps_alignment_entry_t *went = al->word.seq + pent->parent; + if (went != last_ent) { + went->start = pent->start; + went->duration = 0; + went->score = 0; + } + went->duration += pent->duration; + went->score += pent->score; + last_ent = went; + } + + return 0; +} + +int +ps_alignment_n_words(ps_alignment_t *al) +{ + return (int)al->word.n_ent; +} + +int +ps_alignment_n_phones(ps_alignment_t *al) +{ + return (int)al->sseq.n_ent; +} + +int +ps_alignment_n_states(ps_alignment_t *al) +{ + return (int)al->state.n_ent; +} + +ps_alignment_iter_t * +ps_alignment_words(ps_alignment_t *al) +{ + ps_alignment_iter_t *itor; + + if (al->word.n_ent == 0) + return NULL; + itor = ckd_calloc(1, sizeof(*itor)); + itor->al = al; + itor->vec = &al->word; + itor->pos = 0; + return itor; +} + +ps_alignment_iter_t * +ps_alignment_phones(ps_alignment_t *al) +{ + ps_alignment_iter_t *itor; + + if (al->sseq.n_ent == 0) + return NULL; + itor = ckd_calloc(1, sizeof(*itor)); + itor->al = al; + itor->vec = &al->sseq; + itor->pos = 0; + return itor; +} + +ps_alignment_iter_t * +ps_alignment_states(ps_alignment_t *al) +{ + ps_alignment_iter_t *itor; + + if (al->state.n_ent == 0) + return NULL; + itor = ckd_calloc(1, sizeof(*itor)); + itor->al = al; + itor->vec = &al->state; + itor->pos = 0; + return itor; +} + +ps_alignment_entry_t * +ps_alignment_iter_get(ps_alignment_iter_t *itor) +{ + return itor->vec->seq + itor->pos; +} + +int +ps_alignment_iter_free(ps_alignment_iter_t *itor) +{ + ckd_free(itor); + return 0; +} + +ps_alignment_iter_t * +ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos) +{ + if (itor == NULL) + return NULL; + if (pos >= itor->vec->n_ent) { + ps_alignment_iter_free(itor); + return NULL; + } + itor->pos = pos; + return itor; +} + +ps_alignment_iter_t * +ps_alignment_iter_next(ps_alignment_iter_t *itor) +{ + if (itor == NULL) + return NULL; + if (++itor->pos >= itor->vec->n_ent) { + ps_alignment_iter_free(itor); + return NULL; + } + return itor; +} + +ps_alignment_iter_t * +ps_alignment_iter_prev(ps_alignment_iter_t *itor) +{ + if (itor == NULL) + return NULL; + if (--itor->pos < 0) { + ps_alignment_iter_free(itor); + return NULL; + } + return itor; +} + +ps_alignment_iter_t * +ps_alignment_iter_up(ps_alignment_iter_t *itor) +{ + ps_alignment_iter_t *itor2; + if (itor == NULL) + return NULL; + if (itor->vec == &itor->al->word) + return NULL; + if (itor->vec->seq[itor->pos].parent == PS_ALIGNMENT_NONE) + return NULL; + itor2 = ckd_calloc(1, sizeof(*itor2)); + itor2->al = itor->al; + itor2->pos = itor->vec->seq[itor->pos].parent; + if (itor->vec == &itor->al->sseq) + itor2->vec = &itor->al->word; + else + itor2->vec = &itor->al->sseq; + return itor2; +} + +ps_alignment_iter_t * +ps_alignment_iter_down(ps_alignment_iter_t *itor) +{ + ps_alignment_iter_t *itor2; + if (itor == NULL) + return NULL; + if (itor->vec == &itor->al->state) + return NULL; + if (itor->vec->seq[itor->pos].child == PS_ALIGNMENT_NONE) + return NULL; + itor2 = ckd_calloc(1, sizeof(*itor2)); + itor2->al = itor->al; + itor2->pos = itor->vec->seq[itor->pos].child; + if (itor->vec == &itor->al->word) + itor2->vec = &itor->al->sseq; + else + itor2->vec = &itor->al->state; + return itor2; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_alignment.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_alignment.h new file mode 100644 index 0000000000000000000000000000000000000000..8b471efa794e8e4f1c06c7d55e93770244d0d9a1 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_alignment.h @@ -0,0 +1,240 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2010 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ps_alignment.h Multi-level alignment structure + */ + +#ifndef __PS_ALIGNMENT_H__ +#define __PS_ALIGNMENT_H__ + +/* System headers. */ + +/* SphinxBase headers. */ +#include +#include + +/* Local headers. */ +#include "dict2pid.h" +#include "hmm.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +#define PS_ALIGNMENT_NONE ((uint16)0xffff) + +struct ps_alignment_entry_s { + union { + int32 wid; + struct { + uint16 ssid; + uint16 cipid; + uint16 tmatid; + } pid; + uint16 senid; + } id; + int16 start; + int16 duration; + int32 score; + uint16 parent; + uint16 child; +}; +typedef struct ps_alignment_entry_s ps_alignment_entry_t; + +struct ps_alignment_vector_s { + ps_alignment_entry_t *seq; + uint16 n_ent, n_alloc; +}; +typedef struct ps_alignment_vector_s ps_alignment_vector_t; + +struct ps_alignment_s { + int refcount; + dict2pid_t *d2p; + ps_alignment_vector_t word; + ps_alignment_vector_t sseq; + ps_alignment_vector_t state; +}; +typedef struct ps_alignment_s ps_alignment_t; + +struct ps_alignment_iter_s { + ps_alignment_t *al; + ps_alignment_vector_t *vec; + int pos; +}; +typedef struct ps_alignment_iter_s ps_alignment_iter_t; + +/** + * Create a new, empty alignment. + */ +POCKETSPHINX_EXPORT +ps_alignment_t *ps_alignment_init(dict2pid_t *d2p); + +/** + * Retain an alighment + */ +POCKETSPHINX_EXPORT +ps_alignment_t *ps_alignment_retain(ps_alignment_t *al); + +/** + * Release an alignment + */ +POCKETSPHINX_EXPORT +int ps_alignment_free(ps_alignment_t *al); + +/** + * Append a word. + */ +POCKETSPHINX_EXPORT +int ps_alignment_add_word(ps_alignment_t *al, + int32 wid, int duration); + +/** + * Populate lower layers using available word information. + */ +POCKETSPHINX_EXPORT +int ps_alignment_populate(ps_alignment_t *al); + +/** + * Populate lower layers using context-independent phones. + */ +POCKETSPHINX_EXPORT +int ps_alignment_populate_ci(ps_alignment_t *al); + +/** + * Propagate timing information up from state sequence. + */ +POCKETSPHINX_EXPORT +int ps_alignment_propagate(ps_alignment_t *al); + +/** + * Number of words. + */ +POCKETSPHINX_EXPORT +int ps_alignment_n_words(ps_alignment_t *al); + +/** + * Number of phones. + */ +POCKETSPHINX_EXPORT +int ps_alignment_n_phones(ps_alignment_t *al); + +/** + * Number of states. + */ +POCKETSPHINX_EXPORT +int ps_alignment_n_states(ps_alignment_t *al); + +/** + * Iterate over the alignment starting at the first word. + */ +POCKETSPHINX_EXPORT +ps_alignment_iter_t *ps_alignment_words(ps_alignment_t *al); + +/** + * Iterate over the alignment starting at the first phone. + */ +POCKETSPHINX_EXPORT +ps_alignment_iter_t *ps_alignment_phones(ps_alignment_t *al); + +/** + * Iterate over the alignment starting at the first state. + */ +POCKETSPHINX_EXPORT +ps_alignment_iter_t *ps_alignment_states(ps_alignment_t *al); + +/** + * Get the alignment entry pointed to by an iterator. + * + * The iterator retains ownership of this so don't try to free it. + */ +POCKETSPHINX_EXPORT +ps_alignment_entry_t *ps_alignment_iter_get(ps_alignment_iter_t *itor); + +/** + * Move alignment iterator to given index. + */ +POCKETSPHINX_EXPORT +ps_alignment_iter_t *ps_alignment_iter_goto(ps_alignment_iter_t *itor, int pos); + +/** + * Move an alignment iterator forward. + * + * If the end of the alignment is reached, this will free the iterator + * and return NULL. + */ +POCKETSPHINX_EXPORT +ps_alignment_iter_t *ps_alignment_iter_next(ps_alignment_iter_t *itor); + +/** + * Move an alignment iterator back. + * + * If the start of the alignment is reached, this will free the iterator + * and return NULL. + */ +POCKETSPHINX_EXPORT +ps_alignment_iter_t *ps_alignment_iter_prev(ps_alignment_iter_t *itor); + +/** + * Get a new iterator starting at the parent of the current node. + * + * If there is no parent node, NULL is returned. + */ +POCKETSPHINX_EXPORT +ps_alignment_iter_t *ps_alignment_iter_up(ps_alignment_iter_t *itor); +/** + * Get a new iterator starting at the first child of the current node. + * + * If there is no child node, NULL is returned. + */ +POCKETSPHINX_EXPORT +ps_alignment_iter_t *ps_alignment_iter_down(ps_alignment_iter_t *itor); + +/** + * Release an iterator before completing all iterations. + */ +POCKETSPHINX_EXPORT +int ps_alignment_iter_free(ps_alignment_iter_t *itor); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __PS_ALIGNMENT_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_lattice.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_lattice.c new file mode 100644 index 0000000000000000000000000000000000000000..4b6c1cce8b4a416573d875c46e261f068ea3f1d7 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_lattice.c @@ -0,0 +1,1937 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ps_lattice.c Word graph search. + */ + +/* System headers. */ +#include +#include +#include + +/* SphinxBase headers. */ +#include +#include +#include +#include +#include + +/* Local headers. */ +#include "pocketsphinx_internal.h" +#include "ps_lattice_internal.h" +#include "ngram_search.h" +#include "dict.h" + +/* + * Create a directed link between "from" and "to" nodes, but if a link already exists, + * choose one with the best ascr. + */ +void +ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, + int32 score, int32 ef) +{ + latlink_list_t *fwdlink; + + /* Look for an existing link between "from" and "to" nodes */ + for (fwdlink = from->exits; fwdlink; fwdlink = fwdlink->next) + if (fwdlink->link->to == to) + break; + + if (fwdlink == NULL) { + latlink_list_t *revlink; + ps_latlink_t *link; + + /* No link between the two nodes; create a new one */ + link = listelem_malloc(dag->latlink_alloc); + fwdlink = listelem_malloc(dag->latlink_list_alloc); + revlink = listelem_malloc(dag->latlink_list_alloc); + + link->from = from; + link->to = to; + link->ascr = score; + link->ef = ef; + link->best_prev = NULL; + + fwdlink->link = revlink->link = link; + fwdlink->next = from->exits; + from->exits = fwdlink; + revlink->next = to->entries; + to->entries = revlink; + } + else { + /* Link already exists; just retain the best ascr */ + if (score BETTER_THAN fwdlink->link->ascr) { + fwdlink->link->ascr = score; + fwdlink->link->ef = ef; + } + } +} + +void +ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen) +{ + ps_latnode_t *node; + + for (node = dag->nodes; node; node = node->next) { + latlink_list_t *linklist; + if (node != dag->start && node != dag->end && dict_filler_word(dag->dict, node->basewid)) { + for (linklist = node->entries; linklist; linklist = linklist->next) + linklist->link->ascr += (node->basewid == dag->silence) ? silpen : fillpen; + } + } +} + +static void +delete_node(ps_lattice_t *dag, ps_latnode_t *node) +{ + latlink_list_t *x, *next_x; + + for (x = node->exits; x; x = next_x) { + next_x = x->next; + x->link->from = NULL; + listelem_free(dag->latlink_list_alloc, x); + } + for (x = node->entries; x; x = next_x) { + next_x = x->next; + x->link->to = NULL; + listelem_free(dag->latlink_list_alloc, x); + } + listelem_free(dag->latnode_alloc, node); +} + + +static void +remove_dangling_links(ps_lattice_t *dag, ps_latnode_t *node) +{ + latlink_list_t *x, *prev_x, *next_x; + + prev_x = NULL; + for (x = node->exits; x; x = next_x) { + next_x = x->next; + if (x->link->to == NULL) { + if (prev_x) + prev_x->next = next_x; + else + node->exits = next_x; + listelem_free(dag->latlink_alloc, x->link); + listelem_free(dag->latlink_list_alloc, x); + } + else + prev_x = x; + } + prev_x = NULL; + for (x = node->entries; x; x = next_x) { + next_x = x->next; + if (x->link->from == NULL) { + if (prev_x) + prev_x->next = next_x; + else + node->entries = next_x; + listelem_free(dag->latlink_alloc, x->link); + listelem_free(dag->latlink_list_alloc, x); + } + else + prev_x = x; + } +} + +void +ps_lattice_delete_unreachable(ps_lattice_t *dag) +{ + ps_latnode_t *node, *prev_node, *next_node; + int i; + + /* Remove unreachable nodes from the list of nodes. */ + prev_node = NULL; + for (node = dag->nodes; node; node = next_node) { + next_node = node->next; + if (!node->reachable) { + if (prev_node) + prev_node->next = next_node; + else + dag->nodes = next_node; + /* Delete this node and NULLify links to it. */ + delete_node(dag, node); + } + else + prev_node = node; + } + + /* Remove all links to and from unreachable nodes. */ + i = 0; + for (node = dag->nodes; node; node = node->next) { + /* Assign sequence numbers. */ + node->id = i++; + + /* We should obviously not encounter unreachable nodes here! */ + assert(node->reachable); + + /* Remove all links that go nowhere. */ + remove_dangling_links(dag, node); + } +} + +int32 +ps_lattice_write(ps_lattice_t *dag, char const *filename) +{ + FILE *fp; + int32 i; + ps_latnode_t *d, *initial, *final; + + initial = dag->start; + final = dag->end; + + E_INFO("Writing lattice file: %s\n", filename); + if ((fp = fopen(filename, "w")) == NULL) { + E_ERROR_SYSTEM("Failed to open lattice file '%s' for writing", filename); + return -1; + } + + /* Stupid Sphinx-III lattice code expects 'getcwd:' here */ + fprintf(fp, "# getcwd: /this/is/bogus\n"); + fprintf(fp, "# -logbase %e\n", logmath_get_base(dag->lmath)); + fprintf(fp, "#\n"); + + fprintf(fp, "Frames %d\n", dag->n_frames); + fprintf(fp, "#\n"); + + for (i = 0, d = dag->nodes; d; d = d->next, i++); + fprintf(fp, + "Nodes %d (NODEID WORD STARTFRAME FIRST-ENDFRAME LAST-ENDFRAME)\n", + i); + for (i = 0, d = dag->nodes; d; d = d->next, i++) { + d->id = i; + fprintf(fp, "%d %s %d %d %d ; %d\n", + i, dict_wordstr(dag->dict, d->wid), + d->sf, d->fef, d->lef, d->node_id); + } + fprintf(fp, "#\n"); + + fprintf(fp, "Initial %d\nFinal %d\n", initial->id, final->id); + fprintf(fp, "#\n"); + + /* Don't bother with this, it's not used by anything. */ + fprintf(fp, "BestSegAscr %d (NODEID ENDFRAME ASCORE)\n", + 0 /* #BPTable entries */ ); + fprintf(fp, "#\n"); + + fprintf(fp, "Edges (FROM-NODEID TO-NODEID ASCORE)\n"); + for (d = dag->nodes; d; d = d->next) { + latlink_list_t *l; + for (l = d->exits; l; l = l->next) { + if (l->link->ascr WORSE_THAN WORST_SCORE || l->link->ascr BETTER_THAN 0) + continue; + fprintf(fp, "%d %d %d\n", + d->id, l->link->to->id, l->link->ascr << SENSCR_SHIFT); + } + } + fprintf(fp, "End\n"); + fclose(fp); + + return 0; +} + +int32 +ps_lattice_write_htk(ps_lattice_t *dag, char const *filename) +{ + FILE *fp; + ps_latnode_t *d, *initial, *final; + int32 j, n_links, n_nodes; + + initial = dag->start; + final = dag->end; + + E_INFO("Writing lattice file: %s\n", filename); + if ((fp = fopen(filename, "w")) == NULL) { + E_ERROR_SYSTEM("Failed to open lattice file '%s' for writing", filename); + return -1; + } + + for (n_links = n_nodes = 0, d = dag->nodes; d; d = d->next) { + latlink_list_t *l; + if (!d->reachable) + continue; + d->id = n_nodes; + for (l = d->exits; l; l = l->next) { + if (l->link->to == NULL || !l->link->to->reachable) + continue; + if (l->link->ascr WORSE_THAN WORST_SCORE || l->link->ascr BETTER_THAN 0) + continue; + ++n_links; + } + ++n_nodes; + } + + fprintf(fp, "# Lattice generated by PocketSphinx\n"); + fprintf(fp, "#\n# Header\n#\n"); + fprintf(fp, "VERSION=1.0\n"); + fprintf(fp, "start=%d\n", initial->id); + fprintf(fp, "end=%d\n", final->id); + fprintf(fp, "#\n"); + + fprintf(fp, "N=%d\tL=%d\n", n_nodes, n_links); + fprintf(fp, "#\n# Node definitions\n#\n"); + for (d = dag->nodes; d; d = d->next) { + char const *word = dict_wordstr(dag->dict, d->wid); + char const *c = strrchr(word, '('); + int altpron = 1; + if (!d->reachable) + continue; + if (c) + altpron = atoi(c + 1); + word = dict_basestr(dag->dict, d->wid); + if (d->wid == dict_startwid(dag->dict)) + word = "!SENT_START"; + else if (d->wid == dict_finishwid(dag->dict)) + word = "!SENT_END"; + else if (dict_filler_word(dag->dict, d->wid)) + word = "!NULL"; + fprintf(fp, "I=%d\tt=%.2f\tW=%s\tv=%d\n", + d->id, (double)d->sf / dag->frate, + word, altpron); + } + fprintf(fp, "#\n# Link definitions\n#\n"); + for (j = 0, d = dag->nodes; d; d = d->next) { + latlink_list_t *l; + if (!d->reachable) + continue; + for (l = d->exits; l; l = l->next) { + if (l->link->to == NULL || !l->link->to->reachable) + continue; + if (l->link->ascr WORSE_THAN WORST_SCORE || l->link->ascr BETTER_THAN 0) + continue; + fprintf(fp, "J=%d\tS=%d\tE=%d\ta=%f\tp=%g\n", j++, + d->id, l->link->to->id, + logmath_log_to_ln(dag->lmath, l->link->ascr << SENSCR_SHIFT), + logmath_exp(dag->lmath, l->link->alpha + l->link->beta - dag->norm)); + } + } + fclose(fp); + + return 0; +} + +/* Read parameter from a lattice file*/ +static int +dag_param_read(lineiter_t *li, char *param) +{ + int32 n; + + while ((li = lineiter_next(li)) != NULL) { + char *c; + + /* Ignore comments. */ + if (li->buf[0] == '#') + continue; + + /* Find the first space. */ + c = strchr(li->buf, ' '); + if (c == NULL) continue; + + /* Check that the first field equals param and that there's a number after it. */ + if (strncmp(li->buf, param, strlen(param)) == 0 + && sscanf(c + 1, "%d", &n) == 1) + return n; + } + return -1; +} + +/* Mark every node that has a path to the argument dagnode as "reachable". */ +static void +dag_mark_reachable(ps_latnode_t * d) +{ + latlink_list_t *l; + + d->reachable = 1; + for (l = d->entries; l; l = l->next) + if (l->link->from && !l->link->from->reachable) + dag_mark_reachable(l->link->from); +} + +ps_lattice_t * +ps_lattice_read(ps_decoder_t *ps, + char const *file) +{ + FILE *fp; + int32 ispipe; + lineiter_t *line; + float64 lb; + float32 logratio; + ps_latnode_t **darray; + ps_lattice_t *dag; + int i, k, n_nodes; + int32 pip, silpen, fillpen; + ps_latnode_t **pnodes; + + dag = ckd_calloc(1, sizeof(*dag)); + + if (ps) { + dag->search = ps->search; + dag->dict = dict_retain(ps->dict); + dag->lmath = logmath_retain(ps->lmath); + dag->frate = cmd_ln_int32_r(dag->search->config, "-frate"); + } + else { + dag->dict = dict_init(NULL, NULL); + dag->lmath = logmath_init(1.0001, 0, FALSE); + dag->frate = 100; + } + dag->silence = dict_silwid(dag->dict); + dag->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t)); + dag->latlink_alloc = listelem_alloc_init(sizeof(ps_latlink_t)); + dag->latlink_list_alloc = listelem_alloc_init(sizeof(latlink_list_t)); + dag->refcount = 1; + + darray = NULL; + + E_INFO("Reading DAG file: %s\n", file); + if ((fp = fopen_compchk(file, &ispipe)) == NULL) { + E_ERROR_SYSTEM("Failed to open DAG file '%s' for reading", file); + return NULL; + } + line = lineiter_start(fp); + + /* Read and verify logbase (ONE BIG HACK!!) */ + if (line == NULL) { + E_ERROR("Premature EOF(%s)\n", file); + goto load_error; + } + if (strncmp(line->buf, "# getcwd: ", 10) != 0) { + E_ERROR("%s does not begin with '# getcwd: '\n%s", file, line->buf); + goto load_error; + } + if ((line = lineiter_next(line)) == NULL) { + E_ERROR("Premature EOF(%s)\n", file); + goto load_error; + } + if ((strncmp(line->buf, "# -logbase ", 11) != 0) + || (sscanf(line->buf + 11, "%lf", &lb) != 1)) { + E_WARN("%s: Cannot find -logbase in header\n", file); + lb = 1.0001; + } + logratio = 1.0f; + if (dag->lmath == NULL) + dag->lmath = logmath_init(lb, 0, TRUE); + else { + float32 pb = logmath_get_base(dag->lmath); + if (fabs(lb - pb) >= 0.0001) { + E_WARN("Inconsistent logbases: %f vs %f: will compensate\n", lb, pb); + logratio = (float32)(log(lb) / log(pb)); + E_INFO("Lattice log ratio: %f\n", logratio); + } + } + /* Read Frames parameter */ + dag->n_frames = dag_param_read(line, "Frames"); + if (dag->n_frames <= 0) { + E_ERROR("Frames parameter missing or invalid\n"); + goto load_error; + } + /* Read Nodes parameter */ + n_nodes = dag_param_read(line, "Nodes"); + if (n_nodes <= 0) { + E_ERROR("Nodes parameter missing or invalid\n"); + goto load_error; + } + + /* Read nodes */ + darray = ckd_calloc(n_nodes, sizeof(*darray)); + pnodes = &dag->nodes; + for (i = 0; i < n_nodes; i++) { + int32 w; + int seqid, sf, fef, lef; + char wd[256]; + ps_latnode_t *node; + + if ((line = lineiter_next(line)) == NULL) { + E_ERROR("Premature EOF while loading Nodes(%s)\n", file); + goto load_error; + } + + if ((k = + sscanf(line->buf, "%d %255s %d %d %d", &seqid, wd, &sf, &fef, + &lef)) != 5) { + E_ERROR("Cannot parse line: %s, value of count %d\n", line->buf, k); + goto load_error; + } + + w = dict_wordid(dag->dict, wd); + if (w < 0) { + if (dag->search == NULL) { + char *ww = ckd_salloc(wd); + if (dict_word2basestr(ww) != -1) { + if (dict_wordid(dag->dict, ww) == BAD_S3WID) + dict_add_word(dag->dict, ww, NULL, 0); + } + ckd_free(ww); + w = dict_add_word(dag->dict, wd, NULL, 0); + } + if (w < 0) { + E_ERROR("Unknown word in line: %s\n", line->buf); + goto load_error; + } + } + + if (seqid != i) { + E_ERROR("Seqno error: %s\n", line->buf); + goto load_error; + } + + *pnodes = listelem_malloc(dag->latnode_alloc); + node = *pnodes; + darray[i] = node; + node->wid = w; + node->basewid = dict_basewid(dag->dict, w); + node->id = seqid; + node->sf = sf; + node->fef = fef; + node->lef = lef; + node->reachable = 0; + node->exits = node->entries = NULL; + node->next = NULL; + pnodes = &node->next; + } + + /* Read initial node ID */ + k = dag_param_read(line, "Initial"); + if ((k < 0) || (k >= n_nodes)) { + E_ERROR("Initial node parameter missing or invalid\n"); + goto load_error; + } + dag->start = darray[k]; + + /* Read final node ID */ + k = dag_param_read(line, "Final"); + if ((k < 0) || (k >= n_nodes)) { + E_ERROR("Final node parameter missing or invalid\n"); + goto load_error; + } + dag->end = darray[k]; + + /* Read bestsegscore entries and ignore them. */ + if ((k = dag_param_read(line, "BestSegAscr")) < 0) { + E_ERROR("BestSegAscr parameter missing\n"); + goto load_error; + } + for (i = 0; i < k; i++) { + if ((line = lineiter_next(line)) == NULL) { + E_ERROR("Premature EOF while (%s) ignoring BestSegAscr\n", + line); + goto load_error; + } + } + + /* Read in edges. */ + while ((line = lineiter_next(line)) != NULL) { + if (line->buf[0] == '#') + continue; + if (0 == strncmp(line->buf, "Edges", 5)) + break; + } + if (line == NULL) { + E_ERROR("Edges missing\n"); + goto load_error; + } + while ((line = lineiter_next(line)) != NULL) { + int from, to, ascr; + ps_latnode_t *pd, *d; + + if (sscanf(line->buf, "%d %d %d", &from, &to, &ascr) != 3) + break; + if (ascr WORSE_THAN WORST_SCORE) + continue; + pd = darray[from]; + d = darray[to]; + if (logratio != 1.0f) + ascr = (int32)(ascr * logratio); + ps_lattice_link(dag, pd, d, ascr, d->sf - 1); + } + if (strcmp(line->buf, "End\n") != 0) { + E_ERROR("Terminating 'End' missing\n"); + goto load_error; + } + lineiter_free(line); + fclose_comp(fp, ispipe); + ckd_free(darray); + + /* Minor hack: If the final node is a filler word and not , + * then set its base word ID to , so that the language model + * scores won't be screwed up. */ + if (dict_filler_word(dag->dict, dag->end->wid)) + dag->end->basewid = dag->search + ? ps_search_finish_wid(dag->search) + : dict_wordid(dag->dict, S3_FINISH_WORD); + + /* Mark reachable from dag->end */ + dag_mark_reachable(dag->end); + + /* Free nodes unreachable from dag->end and their links */ + ps_lattice_delete_unreachable(dag); + + if (ps) { + /* Build links around silence and filler words, since they do + * not exist in the language model. FIXME: This is + * potentially buggy, as we already do this before outputting + * lattices. */ + pip = logmath_log(dag->lmath, cmd_ln_float32_r(ps->config, "-pip")); + silpen = pip + logmath_log(dag->lmath, + cmd_ln_float32_r(ps->config, "-silprob")); + fillpen = pip + logmath_log(dag->lmath, + cmd_ln_float32_r(ps->config, "-fillprob")); + ps_lattice_penalize_fillers(dag, silpen, fillpen); + } + + return dag; + + load_error: + E_ERROR("Failed to load %s\n", file); + lineiter_free(line); + fclose_comp(fp, ispipe); + ckd_free(darray); + return NULL; +} + +int +ps_lattice_n_frames(ps_lattice_t *dag) +{ + return dag->n_frames; +} + +ps_lattice_t * +ps_lattice_init_search(ps_search_t *search, int n_frame) +{ + ps_lattice_t *dag; + + dag = ckd_calloc(1, sizeof(*dag)); + dag->search = search; + dag->dict = dict_retain(search->dict); + dag->lmath = logmath_retain(search->acmod->lmath); + dag->frate = cmd_ln_int32_r(dag->search->config, "-frate"); + dag->silence = dict_silwid(dag->dict); + dag->n_frames = n_frame; + dag->latnode_alloc = listelem_alloc_init(sizeof(ps_latnode_t)); + dag->latlink_alloc = listelem_alloc_init(sizeof(ps_latlink_t)); + dag->latlink_list_alloc = listelem_alloc_init(sizeof(latlink_list_t)); + dag->refcount = 1; + return dag; +} + +ps_lattice_t * +ps_lattice_retain(ps_lattice_t *dag) +{ + ++dag->refcount; + return dag; +} + +int +ps_lattice_free(ps_lattice_t *dag) +{ + if (dag == NULL) + return 0; + if (--dag->refcount > 0) + return dag->refcount; + logmath_free(dag->lmath); + dict_free(dag->dict); + listelem_alloc_free(dag->latnode_alloc); + listelem_alloc_free(dag->latlink_alloc); + listelem_alloc_free(dag->latlink_list_alloc); + ckd_free(dag->hyp_str); + ckd_free(dag); + return 0; +} + +logmath_t * +ps_lattice_get_logmath(ps_lattice_t *dag) +{ + return dag->lmath; +} + +ps_latnode_iter_t * +ps_latnode_iter(ps_lattice_t *dag) +{ + return dag->nodes; +} + +ps_latnode_iter_t * +ps_latnode_iter_next(ps_latnode_iter_t *itor) +{ + return itor->next; +} + +void +ps_latnode_iter_free(ps_latnode_iter_t *itor) +{ + /* Do absolutely nothing. */ + (void)itor; +} + +ps_latnode_t * +ps_latnode_iter_node(ps_latnode_iter_t *itor) +{ + return itor; +} + +int +ps_latnode_times(ps_latnode_t *node, int16 *out_fef, int16 *out_lef) +{ + if (out_fef) *out_fef = (int16)node->fef; + if (out_lef) *out_lef = (int16)node->lef; + return node->sf; +} + +char const * +ps_latnode_word(ps_lattice_t *dag, ps_latnode_t *node) +{ + return dict_wordstr(dag->dict, node->wid); +} + +char const * +ps_latnode_baseword(ps_lattice_t *dag, ps_latnode_t *node) +{ + return dict_wordstr(dag->dict, node->basewid); +} + +int32 +ps_latnode_prob(ps_lattice_t *dag, ps_latnode_t *node, + ps_latlink_t **out_link) +{ + latlink_list_t *links; + int32 bestpost = logmath_get_zero(dag->lmath); + + for (links = node->exits; links; links = links->next) { + int32 post = links->link->alpha + links->link->beta - dag->norm; + if (post > bestpost) { + if (out_link) *out_link = links->link; + bestpost = post; + } + } + return bestpost; +} + +ps_latlink_iter_t * +ps_latnode_exits(ps_latnode_t *node) +{ + return node->exits; +} + +ps_latlink_iter_t * +ps_latnode_entries(ps_latnode_t *node) +{ + return node->entries; +} + +ps_latlink_iter_t * +ps_latlink_iter_next(ps_latlink_iter_t *itor) +{ + return itor->next; +} + +void +ps_latlink_iter_free(ps_latlink_iter_t *itor) +{ + /* Do absolutely nothing. */ + (void)itor; +} + +ps_latlink_t * +ps_latlink_iter_link(ps_latlink_iter_t *itor) +{ + return itor->link; +} + +int +ps_latlink_times(ps_latlink_t *link, int16 *out_sf) +{ + if (out_sf) { + if (link->from) { + *out_sf = link->from->sf; + } + else { + *out_sf = 0; + } + } + return link->ef; +} + +ps_latnode_t * +ps_latlink_nodes(ps_latlink_t *link, ps_latnode_t **out_src) +{ + if (out_src) *out_src = link->from; + return link->to; +} + +char const * +ps_latlink_word(ps_lattice_t *dag, ps_latlink_t *link) +{ + if (link->from == NULL) + return NULL; + return dict_wordstr(dag->dict, link->from->wid); +} + +char const * +ps_latlink_baseword(ps_lattice_t *dag, ps_latlink_t *link) +{ + if (link->from == NULL) + return NULL; + return dict_wordstr(dag->dict, link->from->basewid); +} + +ps_latlink_t * +ps_latlink_pred(ps_latlink_t *link) +{ + return link->best_prev; +} + +int32 +ps_latlink_prob(ps_lattice_t *dag, ps_latlink_t *link, int32 *out_ascr) +{ + int32 post = link->alpha + link->beta - dag->norm; + if (out_ascr) *out_ascr = link->ascr << SENSCR_SHIFT; + return post; +} + +char const * +ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link) +{ + ps_latlink_t *l; + size_t len; + char *c; + + /* Backtrace once to get hypothesis length. */ + len = 0; + /* FIXME: There may not be a search, but actually there should be a dict. */ + if (dict_real_word(dag->dict, link->to->basewid)) { + char *wstr = dict_wordstr(dag->dict, link->to->basewid); + if (wstr != NULL) + len += strlen(wstr) + 1; + } + for (l = link; l; l = l->best_prev) { + if (dict_real_word(dag->dict, l->from->basewid)) { + char *wstr = dict_wordstr(dag->dict, l->from->basewid); + if (wstr != NULL) + len += strlen(wstr) + 1; + } + } + + /* Backtrace again to construct hypothesis string. */ + ckd_free(dag->hyp_str); + dag->hyp_str = ckd_calloc(1, len+1); /* extra one incase the hyp is empty */ + c = dag->hyp_str + len - 1; + if (dict_real_word(dag->dict, link->to->basewid)) { + char *wstr = dict_wordstr(dag->dict, link->to->basewid); + if (wstr != NULL) { + len = strlen(wstr); + c -= len; + memcpy(c, wstr, len); + if (c > dag->hyp_str) { + --c; + *c = ' '; + } + } + } + for (l = link; l; l = l->best_prev) { + if (dict_real_word(dag->dict, l->from->basewid)) { + char *wstr = dict_wordstr(dag->dict, l->from->basewid); + if (wstr != NULL) { + len = strlen(wstr); + c -= len; + memcpy(c, wstr, len); + if (c > dag->hyp_str) { + --c; + *c = ' '; + } + } + } + } + + return dag->hyp_str; +} + +static void +ps_lattice_compute_lscr(ps_seg_t *seg, ps_latlink_t *link, int to) +{ + ngram_model_t *lmset; + + /* Language model score is included in the link score for FSG + * search. FIXME: Of course, this is sort of a hack :( */ + if (0 != strcmp(ps_search_type(seg->search), PS_SEARCH_TYPE_NGRAM)) { + seg->lback = 1; /* Unigram... */ + seg->lscr = 0; + return; + } + + lmset = ((ngram_search_t *)seg->search)->lmset; + + if (link->best_prev == NULL) { + if (to) /* Sentence has only two words. */ + seg->lscr = ngram_bg_score(lmset, link->to->basewid, + link->from->basewid, &seg->lback) + >> SENSCR_SHIFT; + else {/* This is the start symbol, its lscr is always 0. */ + seg->lscr = 0; + seg->lback = 1; + } + } + else { + /* Find the two predecessor words. */ + if (to) { + seg->lscr = ngram_tg_score(lmset, link->to->basewid, + link->from->basewid, + link->best_prev->from->basewid, + &seg->lback) >> SENSCR_SHIFT; + } + else { + if (link->best_prev->best_prev) + seg->lscr = ngram_tg_score(lmset, link->from->basewid, + link->best_prev->from->basewid, + link->best_prev->best_prev->from->basewid, + &seg->lback) >> SENSCR_SHIFT; + else + seg->lscr = ngram_bg_score(lmset, link->from->basewid, + link->best_prev->from->basewid, + &seg->lback) >> SENSCR_SHIFT; + } + } +} + +static void +ps_lattice_link2itor(ps_seg_t *seg, ps_latlink_t *link, int to) +{ + dag_seg_t *itor = (dag_seg_t *)seg; + ps_latnode_t *node; + + if (to) { + node = link->to; + seg->ef = node->lef; + seg->prob = 0; /* norm + beta - norm */ + } + else { + latlink_list_t *x; + ps_latnode_t *n; + logmath_t *lmath = ps_search_acmod(seg->search)->lmath; + + node = link->from; + seg->ef = link->ef; + seg->prob = link->alpha + link->beta - itor->norm; + /* Sum over all exits for this word and any alternate + pronunciations at the same frame. */ + for (n = node; n; n = n->alt) { + for (x = n->exits; x; x = x->next) { + if (x->link == link) + continue; + seg->prob = logmath_add(lmath, seg->prob, + x->link->alpha + x->link->beta - itor->norm); + } + } + } + seg->word = dict_wordstr(ps_search_dict(seg->search), node->wid); + seg->sf = node->sf; + seg->ascr = link->ascr << SENSCR_SHIFT; + /* Compute language model score from best predecessors. */ + ps_lattice_compute_lscr(seg, link, to); +} + +static void +ps_lattice_seg_free(ps_seg_t *seg) +{ + dag_seg_t *itor = (dag_seg_t *)seg; + + ckd_free(itor->links); + ckd_free(itor); +} + +static ps_seg_t * +ps_lattice_seg_next(ps_seg_t *seg) +{ + dag_seg_t *itor = (dag_seg_t *)seg; + + ++itor->cur; + if (itor->cur == itor->n_links + 1) { + ps_lattice_seg_free(seg); + return NULL; + } + else if (itor->cur == itor->n_links) { + /* Re-use the last link but with the "to" node. */ + ps_lattice_link2itor(seg, itor->links[itor->cur - 1], TRUE); + } + else { + ps_lattice_link2itor(seg, itor->links[itor->cur], FALSE); + } + + return seg; +} + +static ps_segfuncs_t ps_lattice_segfuncs = { + /* seg_next */ ps_lattice_seg_next, + /* seg_free */ ps_lattice_seg_free +}; + +ps_seg_t * +ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf) +{ + dag_seg_t *itor; + ps_latlink_t *l; + int cur; + + /* Calling this an "iterator" is a bit of a misnomer since we have + * to get the entire backtrace in order to produce it. + */ + itor = ckd_calloc(1, sizeof(*itor)); + itor->base.vt = &ps_lattice_segfuncs; + itor->base.search = dag->search; + itor->base.lwf = lwf; + itor->n_links = 0; + itor->norm = dag->norm; + + for (l = link; l; l = l->best_prev) { + ++itor->n_links; + } + if (itor->n_links == 0) { + ckd_free(itor); + return NULL; + } + + itor->links = ckd_calloc(itor->n_links, sizeof(*itor->links)); + cur = itor->n_links - 1; + for (l = link; l; l = l->best_prev) { + itor->links[cur] = l; + --cur; + } + + ps_lattice_link2itor((ps_seg_t *)itor, itor->links[0], FALSE); + return (ps_seg_t *)itor; +} + +latlink_list_t * +latlink_list_new(ps_lattice_t *dag, ps_latlink_t *link, latlink_list_t *next) +{ + latlink_list_t *ll; + + ll = listelem_malloc(dag->latlink_list_alloc); + ll->link = link; + ll->next = next; + + return ll; +} + +void +ps_lattice_pushq(ps_lattice_t *dag, ps_latlink_t *link) +{ + if (dag->q_head == NULL) + dag->q_head = dag->q_tail = latlink_list_new(dag, link, NULL); + else { + dag->q_tail->next = latlink_list_new(dag, link, NULL); + dag->q_tail = dag->q_tail->next; + } + +} + +ps_latlink_t * +ps_lattice_popq(ps_lattice_t *dag) +{ + latlink_list_t *x; + ps_latlink_t *link; + + if (dag->q_head == NULL) + return NULL; + link = dag->q_head->link; + x = dag->q_head->next; + listelem_free(dag->latlink_list_alloc, dag->q_head); + dag->q_head = x; + if (dag->q_head == NULL) + dag->q_tail = NULL; + return link; +} + +void +ps_lattice_delq(ps_lattice_t *dag) +{ + while (ps_lattice_popq(dag)) { + /* Do nothing. */ + } +} + +ps_latlink_t * +ps_lattice_traverse_edges(ps_lattice_t *dag, ps_latnode_t *start, ps_latnode_t *end) +{ + ps_latnode_t *node; + latlink_list_t *x; + + /* Cancel any unfinished traversal. */ + ps_lattice_delq(dag); + + /* Initialize node fanin counts and path scores. */ + for (node = dag->nodes; node; node = node->next) + node->info.fanin = 0; + for (node = dag->nodes; node; node = node->next) { + for (x = node->exits; x; x = x->next) + (x->link->to->info.fanin)++; + } + + /* Initialize agenda with all exits from start. */ + if (start == NULL) start = dag->start; + for (x = start->exits; x; x = x->next) + ps_lattice_pushq(dag, x->link); + + /* Pull the first edge off the queue. */ + return ps_lattice_traverse_next(dag, end); +} + +ps_latlink_t * +ps_lattice_traverse_next(ps_lattice_t *dag, ps_latnode_t *end) +{ + ps_latlink_t *next; + + next = ps_lattice_popq(dag); + if (next == NULL) + return NULL; + + /* Decrease fanin count for destination node and expand outgoing + * edges if all incoming edges have been seen. */ + --next->to->info.fanin; + if (next->to->info.fanin == 0) { + latlink_list_t *x; + + if (end == NULL) end = dag->end; + if (next->to == end) { + /* If we have traversed all links entering the end node, + * clear the queue, causing future calls to this function + * to return NULL. */ + ps_lattice_delq(dag); + return next; + } + + /* Extend all outgoing edges. */ + for (x = next->to->exits; x; x = x->next) + ps_lattice_pushq(dag, x->link); + } + return next; +} + +ps_latlink_t * +ps_lattice_reverse_edges(ps_lattice_t *dag, ps_latnode_t *start, ps_latnode_t *end) +{ + ps_latnode_t *node; + latlink_list_t *x; + + /* Cancel any unfinished traversal. */ + ps_lattice_delq(dag); + + /* Initialize node fanout counts and path scores. */ + for (node = dag->nodes; node; node = node->next) { + node->info.fanin = 0; + for (x = node->exits; x; x = x->next) + ++node->info.fanin; + } + + /* Initialize agenda with all entries from end. */ + if (end == NULL) end = dag->end; + for (x = end->entries; x; x = x->next) + ps_lattice_pushq(dag, x->link); + + /* Pull the first edge off the queue. */ + return ps_lattice_reverse_next(dag, start); +} + +ps_latlink_t * +ps_lattice_reverse_next(ps_lattice_t *dag, ps_latnode_t *start) +{ + ps_latlink_t *next; + + next = ps_lattice_popq(dag); + if (next == NULL) + return NULL; + + /* Decrease fanout count for source node and expand incoming + * edges if all incoming edges have been seen. */ + --next->from->info.fanin; + if (next->from->info.fanin == 0) { + latlink_list_t *x; + + if (start == NULL) start = dag->start; + if (next->from == start) { + /* If we have traversed all links entering the start node, + * clear the queue, causing future calls to this function + * to return NULL. */ + ps_lattice_delq(dag); + return next; + } + + /* Extend all outgoing edges. */ + for (x = next->from->entries; x; x = x->next) + ps_lattice_pushq(dag, x->link); + } + return next; +} + +/* + * Find the best score from dag->start to end point of any link and + * use it to update links further down the path. This is like + * single-source shortest path search, except that it is done over + * edges rather than nodes, which allows us to do exact trigram scoring. + * + * Helpfully enough, we get half of the posterior probability + * calculation for free that way too. (interesting research topic: is + * there a reliable Viterbi analogue to word-level Forward-Backward + * like there is for state-level? Or, is it just lattice density?) + */ +ps_latlink_t * +ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, + float32 lwf, float32 ascale) +{ + ps_search_t *search; + ps_latnode_t *node; + ps_latlink_t *link; + ps_latlink_t *bestend; + latlink_list_t *x; + logmath_t *lmath; + int32 bestescr; + + search = dag->search; + lmath = dag->lmath; + + /* Initialize path scores for all links exiting dag->start, and + * set all other scores to the minimum. Also initialize alphas to + * log-zero. */ + for (node = dag->nodes; node; node = node->next) { + for (x = node->exits; x; x = x->next) { + x->link->path_scr = MAX_NEG_INT32; + x->link->alpha = logmath_get_zero(lmath); + } + } + for (x = dag->start->exits; x; x = x->next) { + int32 n_used; + int16 to_is_fil; + + to_is_fil = dict_filler_word(ps_search_dict(search), x->link->to->basewid) && x->link->to != dag->end; + + /* Best path points to dag->start, obviously. */ + x->link->path_scr = x->link->ascr; + if (lmset && !to_is_fil) + x->link->path_scr += (ngram_bg_score(lmset, x->link->to->basewid, + ps_search_start_wid(search), &n_used) >> SENSCR_SHIFT) * lwf; + x->link->best_prev = NULL; + /* No predecessors for start links. */ + x->link->alpha = 0; + } + + /* Traverse the edges in the graph, updating path scores. */ + for (link = ps_lattice_traverse_edges(dag, NULL, NULL); + link; link = ps_lattice_traverse_next(dag, NULL)) { + int32 bprob, n_used; + int32 w3_wid, w2_wid; + int16 w3_is_fil, w2_is_fil; + ps_latlink_t *prev_link; + + /* Sanity check, we should not be traversing edges that + * weren't previously updated, otherwise nasty overflows will result. */ + assert(link->path_scr != MAX_NEG_INT32); + + /* Find word predecessor if from-word is filler */ + w3_wid = link->from->basewid; + w2_wid = link->to->basewid; + w3_is_fil = dict_filler_word(ps_search_dict(search), link->from->basewid) && link->from != dag->start; + w2_is_fil = dict_filler_word(ps_search_dict(search), w2_wid) && link->to != dag->end; + prev_link = link; + + if (w3_is_fil) { + while (prev_link->best_prev != NULL) { + prev_link = prev_link->best_prev; + w3_wid = prev_link->from->basewid; + if (!dict_filler_word(ps_search_dict(search), w3_wid) || prev_link->from == dag->start) { + w3_is_fil = FALSE; + break; + } + } + } + + /* Calculate common bigram probability for all alphas. */ + if (lmset && !w3_is_fil && !w2_is_fil) + bprob = ngram_ng_prob(lmset, w2_wid, &w3_wid, 1, &n_used); + else + bprob = 0; + /* Add in this link's acoustic score, which was a constant + factor in previous computations (if any). */ + link->alpha += (link->ascr << SENSCR_SHIFT) * ascale; + + if (w2_is_fil) { + w2_is_fil = w3_is_fil; + w3_is_fil = TRUE; + w2_wid = w3_wid; + while (prev_link->best_prev != NULL) { + prev_link = prev_link->best_prev; + w3_wid = prev_link->from->basewid; + if (!dict_filler_word(ps_search_dict(search), w3_wid) || prev_link->from == dag->start) { + w3_is_fil = FALSE; + break; + } + } + } + + /* Update scores for all paths exiting link->to. */ + for (x = link->to->exits; x; x = x->next) { + int32 score; + int32 w1_wid; + int16 w1_is_fil; + + w1_wid = x->link->to->basewid; + w1_is_fil = dict_filler_word(ps_search_dict(search), w1_wid) && x->link->to != dag->end; + + /* Update alpha with sum of previous alphas. */ + x->link->alpha = logmath_add(lmath, x->link->alpha, link->alpha + bprob); + + /* Update link score with maximum link score. */ + score = link->path_scr + x->link->ascr; + /* Calculate language score for bestpath if possible */ + if (lmset && !w1_is_fil && !w2_is_fil) { + if (w3_is_fil) + /* partial context available */ + score += (ngram_bg_score(lmset, w1_wid, w2_wid, &n_used) >> SENSCR_SHIFT) * lwf; + else + /* full context available */ + score += (ngram_tg_score(lmset, w1_wid, w2_wid, w3_wid, &n_used) >> SENSCR_SHIFT) * lwf; + } + + if (score BETTER_THAN x->link->path_scr) { + x->link->path_scr = score; + x->link->best_prev = link; + } + } + } + + /* Find best link entering final node, and calculate normalizer + * for posterior probabilities. */ + bestend = NULL; + bestescr = MAX_NEG_INT32; + + /* Normalizer is the alpha for the imaginary link exiting the + final node. */ + dag->norm = logmath_get_zero(lmath); + for (x = dag->end->entries; x; x = x->next) { + int32 bprob, n_used; + int32 from_wid; + int16 from_is_fil; + + from_wid = x->link->from->basewid; + from_is_fil = dict_filler_word(ps_search_dict(search), from_wid) && x->link->from != dag->start; + if (from_is_fil) { + ps_latlink_t *prev_link = x->link; + while (prev_link->best_prev != NULL) { + prev_link = prev_link->best_prev; + from_wid = prev_link->from->basewid; + if (!dict_filler_word(ps_search_dict(search), from_wid) || prev_link->from == dag->start) { + from_is_fil = FALSE; + break; + } + } + } + + if (lmset && !from_is_fil) + bprob = ngram_ng_prob(lmset, + x->link->to->basewid, + &from_wid, 1, &n_used); + else + bprob = 0; + dag->norm = logmath_add(lmath, dag->norm, x->link->alpha + bprob); + if (x->link->path_scr BETTER_THAN bestescr) { + bestescr = x->link->path_scr; + bestend = x->link; + } + } + /* FIXME: floating point... */ + dag->norm += (int32)(dag->final_node_ascr << SENSCR_SHIFT) * ascale; + + E_INFO("Bestpath score: %d\n", bestescr); + E_INFO("Normalizer P(O) = alpha(%s:%d:%d) = %d\n", + dict_wordstr(dag->search->dict, dag->end->wid), + dag->end->sf, dag->end->lef, + dag->norm); + return bestend; +} + +static int32 +ps_lattice_joint(ps_lattice_t *dag, ps_latlink_t *link, float32 ascale) +{ + ngram_model_t *lmset; + int32 jprob; + + /* Sort of a hack... */ + if (dag->search && 0 == strcmp(ps_search_type(dag->search), PS_SEARCH_TYPE_NGRAM)) + lmset = ((ngram_search_t *)dag->search)->lmset; + else + lmset = NULL; + + jprob = (dag->final_node_ascr << SENSCR_SHIFT) * ascale; + while (link) { + if (lmset) { + int lback; + int32 from_wid, to_wid; + int16 from_is_fil, to_is_fil; + + from_wid = link->from->basewid; + to_wid = link->to->basewid; + from_is_fil = dict_filler_word(dag->dict, from_wid) && link->from != dag->start; + to_is_fil = dict_filler_word(dag->dict, to_wid) && link->to != dag->end; + + /* Find word predecessor if from-word is filler */ + if (!to_is_fil && from_is_fil) { + ps_latlink_t *prev_link = link; + while (prev_link->best_prev != NULL) { + prev_link = prev_link->best_prev; + from_wid = prev_link->from->basewid; + if (!dict_filler_word(dag->dict, from_wid) || prev_link->from == dag->start) { + from_is_fil = FALSE; + break; + } + } + } + + /* Compute unscaled language model probability. Note that + this is actually not the language model probability + that corresponds to this link, but that is okay, + because we are just taking the sum over all links in + the best path. */ + if (!from_is_fil && !to_is_fil) + jprob += ngram_ng_prob(lmset, to_wid, + &from_wid, 1, &lback); + } + /* If there is no language model, we assume that the language + model probability (such as it is) has been included in the + link score. */ + jprob += (link->ascr << SENSCR_SHIFT) * ascale; + link = link->best_prev; + } + + E_INFO("Joint P(O,S) = %d P(S|O) = %d\n", jprob, jprob - dag->norm); + return jprob; +} + +int32 +ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, + float32 ascale) +{ + logmath_t *lmath; + ps_latnode_t *node; + ps_latlink_t *link; + latlink_list_t *x; + ps_latlink_t *bestend; + int32 bestescr; + + lmath = dag->lmath; + + /* Reset all betas to zero. */ + for (node = dag->nodes; node; node = node->next) { + for (x = node->exits; x; x = x->next) { + x->link->beta = logmath_get_zero(lmath); + } + } + + bestend = NULL; + bestescr = MAX_NEG_INT32; + /* Accumulate backward probabilities for all links. */ + for (link = ps_lattice_reverse_edges(dag, NULL, NULL); + link; link = ps_lattice_reverse_next(dag, NULL)) { + int32 bprob, n_used; + int32 from_wid, to_wid; + int16 from_is_fil, to_is_fil; + + from_wid = link->from->basewid; + to_wid = link->to->basewid; + from_is_fil = dict_filler_word(dag->dict, from_wid) && link->from != dag->start; + to_is_fil = dict_filler_word(dag->dict, to_wid) && link->to != dag->end; + + /* Find word predecessor if from-word is filler */ + if (!to_is_fil && from_is_fil) { + ps_latlink_t *prev_link = link; + while (prev_link->best_prev != NULL) { + prev_link = prev_link->best_prev; + from_wid = prev_link->from->basewid; + if (!dict_filler_word(dag->dict, from_wid) || prev_link->from == dag->start) { + from_is_fil = FALSE; + break; + } + } + } + + /* Calculate LM probability. */ + if (lmset && !from_is_fil && !to_is_fil) + bprob = ngram_ng_prob(lmset, to_wid, &from_wid, 1, &n_used); + else + bprob = 0; + + if (link->to == dag->end) { + /* Track the best path - we will backtrace in order to + calculate the unscaled joint probability for sentence + posterior. */ + if (link->path_scr BETTER_THAN bestescr) { + bestescr = link->path_scr; + bestend = link; + } + /* Imaginary exit link from final node has beta = 1.0 */ + link->beta = bprob + (dag->final_node_ascr << SENSCR_SHIFT) * ascale; + } + else { + /* Update beta from all outgoing betas. */ + for (x = link->to->exits; x; x = x->next) { + link->beta = logmath_add(lmath, link->beta, + x->link->beta + bprob + + (x->link->ascr << SENSCR_SHIFT) * ascale); + } + } + } + + /* Return P(S|O) = P(O,S)/P(O) */ + return ps_lattice_joint(dag, bestend, ascale) - dag->norm; +} + +int32 +ps_lattice_posterior_prune(ps_lattice_t *dag, int32 beam) +{ + ps_latlink_t *link; + int npruned = 0; + + for (link = ps_lattice_traverse_edges(dag, dag->start, dag->end); + link; link = ps_lattice_traverse_next(dag, dag->end)) { + link->from->reachable = FALSE; + if (link->alpha + link->beta - dag->norm < beam) { + latlink_list_t *x, *tmp, *next; + tmp = NULL; + for (x = link->from->exits; x; x = next) { + next = x->next; + if (x->link == link) { + listelem_free(dag->latlink_list_alloc, x); + } + else { + x->next = tmp; + tmp = x; + } + } + link->from->exits = tmp; + tmp = NULL; + for (x = link->to->entries; x; x = next) { + next = x->next; + if (x->link == link) { + listelem_free(dag->latlink_list_alloc, x); + } + else { + x->next = tmp; + tmp = x; + } + } + link->to->entries = tmp; + listelem_free(dag->latlink_alloc, link); + ++npruned; + } + } + dag_mark_reachable(dag->end); + ps_lattice_delete_unreachable(dag); + return npruned; +} + + +/* Parameters to prune n-best alternatives search */ +#define MAX_PATHS 500 /* Max allowed active paths at any time */ +#define MAX_HYP_TRIES 10000 + +/* + * For each node in any path between from and end of utt, find the + * best score from "from".sf to end of utt. (NOTE: Uses bigram probs; + * this is an estimate of the best score from "from".) (NOTE #2: yes, + * this is the "heuristic score" used in A* search) + */ +static int32 +best_rem_score(ps_astar_t *nbest, ps_latnode_t * from) +{ + latlink_list_t *x; + int32 bestscore, score; + + if (from->info.rem_score <= 0) + return (from->info.rem_score); + + /* Best score from "from" to end of utt not known; compute from successors */ + bestscore = WORST_SCORE; + for (x = from->exits; x; x = x->next) { + int32 n_used; + + score = best_rem_score(nbest, x->link->to); + score += x->link->ascr; + if (nbest->lmset) + score += (ngram_bg_score(nbest->lmset, x->link->to->basewid, + from->basewid, &n_used) >> SENSCR_SHIFT) + * nbest->lwf; + if (score BETTER_THAN bestscore) + bestscore = score; + } + from->info.rem_score = bestscore; + + return bestscore; +} + +/* + * Insert newpath in sorted (by path score) list of paths. But if newpath is + * too far down the list, drop it (FIXME: necessary?) + * total_score = path score (newpath) + rem_score to end of utt. + */ +static void +path_insert(ps_astar_t *nbest, ps_latpath_t *newpath, int32 total_score) +{ + ps_latpath_t *prev, *p; + int32 i; + + prev = NULL; + for (i = 0, p = nbest->path_list; (i < MAX_PATHS) && p; p = p->next, i++) { + if ((p->score + p->node->info.rem_score) < total_score) + break; + prev = p; + } + + /* newpath should be inserted between prev and p */ + if (i < MAX_PATHS) { + /* Insert new partial hyp */ + newpath->next = p; + if (!prev) + nbest->path_list = newpath; + else + prev->next = newpath; + if (!p) + nbest->path_tail = newpath; + + nbest->n_path++; + nbest->n_hyp_insert++; + nbest->insert_depth += i; + } + else { + /* newpath score too low; reject it and also prune paths beyond MAX_PATHS */ + nbest->path_tail = prev; + prev->next = NULL; + nbest->n_path = MAX_PATHS; + listelem_free(nbest->latpath_alloc, newpath); + + nbest->n_hyp_reject++; + for (; p; p = newpath) { + newpath = p->next; + listelem_free(nbest->latpath_alloc, p); + nbest->n_hyp_reject++; + } + } +} + +/* Find all possible extensions to given partial path */ +static void +path_extend(ps_astar_t *nbest, ps_latpath_t * path) +{ + latlink_list_t *x; + ps_latpath_t *newpath; + int32 total_score, tail_score; + + /* Consider all successors of path->node */ + for (x = path->node->exits; x; x = x->next) { + int32 n_used; + + /* Skip successor if no path from it reaches the final node */ + if (x->link->to->info.rem_score <= WORST_SCORE) + continue; + + /* Create path extension and compute exact score for this extension */ + newpath = listelem_malloc(nbest->latpath_alloc); + newpath->node = x->link->to; + newpath->parent = path; + newpath->score = path->score + x->link->ascr; + if (nbest->lmset) { + if (path->parent) { + newpath->score += nbest->lwf + * (ngram_tg_score(nbest->lmset, newpath->node->basewid, + path->node->basewid, + path->parent->node->basewid, &n_used) + >> SENSCR_SHIFT); + } + else + newpath->score += nbest->lwf + * (ngram_bg_score(nbest->lmset, newpath->node->basewid, + path->node->basewid, &n_used) + >> SENSCR_SHIFT); + } + + /* Insert new partial path hypothesis into sorted path_list */ + nbest->n_hyp_tried++; + total_score = newpath->score + newpath->node->info.rem_score; + + /* First see if hyp would be worse than the worst */ + if (nbest->n_path >= MAX_PATHS) { + tail_score = + nbest->path_tail->score + + nbest->path_tail->node->info.rem_score; + if (total_score < tail_score) { + listelem_free(nbest->latpath_alloc, newpath); + nbest->n_hyp_reject++; + continue; + } + } + + path_insert(nbest, newpath, total_score); + } +} + +ps_astar_t * +ps_astar_start(ps_lattice_t *dag, + ngram_model_t *lmset, + float32 lwf, + int sf, int ef, + int w1, int w2) +{ + ps_astar_t *nbest; + ps_latnode_t *node; + + nbest = ckd_calloc(1, sizeof(*nbest)); + nbest->dag = dag; + nbest->lmset = lmset; + nbest->lwf = lwf; + nbest->sf = sf; + if (ef < 0) + nbest->ef = dag->n_frames + 1; + else + nbest->ef = ef; + nbest->w1 = w1; + nbest->w2 = w2; + nbest->latpath_alloc = listelem_alloc_init(sizeof(ps_latpath_t)); + + /* Initialize rem_score (A* heuristic) to default values */ + for (node = dag->nodes; node; node = node->next) { + if (node == dag->end) + node->info.rem_score = 0; + else if (node->exits == NULL) + node->info.rem_score = WORST_SCORE; + else + node->info.rem_score = 1; /* +ve => unknown value */ + } + + /* Create initial partial hypotheses list consisting of nodes starting at sf */ + nbest->path_list = nbest->path_tail = NULL; + for (node = dag->nodes; node; node = node->next) { + if (node->sf == sf) { + ps_latpath_t *path; + int32 n_used; + + best_rem_score(nbest, node); + path = listelem_malloc(nbest->latpath_alloc); + path->node = node; + path->parent = NULL; + if (nbest->lmset) + path->score = nbest->lwf * + ((w1 < 0) + ? ngram_bg_score(nbest->lmset, node->basewid, w2, &n_used) + : ngram_tg_score(nbest->lmset, node->basewid, w2, w1, &n_used)); + else + path->score = 0; + path->score >>= SENSCR_SHIFT; + path_insert(nbest, path, path->score + node->info.rem_score); + } + } + + return nbest; +} + +ps_latpath_t * +ps_astar_next(ps_astar_t *nbest) +{ + ps_lattice_t *dag; + + dag = nbest->dag; + + /* Pop the top (best) partial hypothesis */ + while ((nbest->top = nbest->path_list) != NULL) { + nbest->path_list = nbest->path_list->next; + if (nbest->top == nbest->path_tail) + nbest->path_tail = NULL; + nbest->n_path--; + + /* Complete hypothesis? */ + if ((nbest->top->node->sf >= nbest->ef) + || ((nbest->top->node == dag->end) && + (nbest->ef > dag->end->sf))) { + /* FIXME: Verify that it is non-empty. Also we may want + * to verify that it is actually distinct from other + * paths, since often this is not the case*/ + return nbest->top; + } + else { + if (nbest->top->node->fef < nbest->ef) + path_extend(nbest, nbest->top); + } + } + + /* Did not find any more paths to extend. */ + return NULL; +} + +char const * +ps_astar_hyp(ps_astar_t *nbest, ps_latpath_t *path) +{ + ps_search_t *search; + ps_latpath_t *p; + size_t len; + char *c; + char *hyp; + + search = nbest->dag->search; + + /* Backtrace once to get hypothesis length. */ + len = 0; + for (p = path; p; p = p->parent) { + if (dict_real_word(ps_search_dict(search), p->node->basewid)) { + char *wstr = dict_wordstr(ps_search_dict(search), p->node->basewid); + if (wstr != NULL) + len += strlen(wstr) + 1; + } + } + + if (len == 0) { + return NULL; + } + + /* Backtrace again to construct hypothesis string. */ + hyp = ckd_calloc(1, len); + c = hyp + len - 1; + for (p = path; p; p = p->parent) { + if (dict_real_word(ps_search_dict(search), p->node->basewid)) { + char *wstr = dict_wordstr(ps_search_dict(search), p->node->basewid); + if (wstr != NULL) { + len = strlen(wstr); + c -= len; + memcpy(c, wstr, len); + if (c > hyp) { + --c; + *c = ' '; + } + } + } + } + + nbest->hyps = glist_add_ptr(nbest->hyps, hyp); + return hyp; +} + +static void +ps_astar_node2itor(astar_seg_t *itor) +{ + ps_seg_t *seg = (ps_seg_t *)itor; + ps_latnode_t *node; + + assert(itor->cur < itor->n_nodes); + node = itor->nodes[itor->cur]; + if (itor->cur == itor->n_nodes - 1) + seg->ef = node->lef; + else + seg->ef = itor->nodes[itor->cur + 1]->sf - 1; + seg->word = dict_wordstr(ps_search_dict(seg->search), node->wid); + seg->sf = node->sf; + seg->prob = 0; /* FIXME: implement forward-backward */ +} + +static void +ps_astar_seg_free(ps_seg_t *seg) +{ + astar_seg_t *itor = (astar_seg_t *)seg; + ckd_free(itor->nodes); + ckd_free(itor); +} + +static ps_seg_t * +ps_astar_seg_next(ps_seg_t *seg) +{ + astar_seg_t *itor = (astar_seg_t *)seg; + + ++itor->cur; + if (itor->cur == itor->n_nodes) { + ps_astar_seg_free(seg); + return NULL; + } + else { + ps_astar_node2itor(itor); + } + + return seg; +} + +static ps_segfuncs_t ps_astar_segfuncs = { + /* seg_next */ ps_astar_seg_next, + /* seg_free */ ps_astar_seg_free +}; + +ps_seg_t * +ps_astar_seg_iter(ps_astar_t *astar, ps_latpath_t *path, float32 lwf) +{ + astar_seg_t *itor; + ps_latpath_t *p; + int cur; + + /* Backtrace and make an iterator, this should look familiar by now. */ + itor = ckd_calloc(1, sizeof(*itor)); + itor->base.vt = &ps_astar_segfuncs; + itor->base.search = astar->dag->search; + itor->base.lwf = lwf; + itor->n_nodes = itor->cur = 0; + for (p = path; p; p = p->parent) { + ++itor->n_nodes; + } + itor->nodes = ckd_calloc(itor->n_nodes, sizeof(*itor->nodes)); + cur = itor->n_nodes - 1; + for (p = path; p; p = p->parent) { + itor->nodes[cur] = p->node; + --cur; + } + + ps_astar_node2itor(itor); + return (ps_seg_t *)itor; +} + +void +ps_astar_finish(ps_astar_t *nbest) +{ + gnode_t *gn; + + /* Free all hyps. */ + for (gn = nbest->hyps; gn; gn = gnode_next(gn)) { + ckd_free(gnode_ptr(gn)); + } + glist_free(nbest->hyps); + /* Free all paths. */ + listelem_alloc_free(nbest->latpath_alloc); + /* Free the Henge. */ + ckd_free(nbest); +} + diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_lattice_internal.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_lattice_internal.h new file mode 100644 index 0000000000000000000000000000000000000000..39b4fbfc22765dd684169e6cc77daf42cc1b9bb9 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_lattice_internal.h @@ -0,0 +1,298 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ps_lattice_internal.h Word graph search implementation + */ + +#ifndef __PS_LATTICE_INTERNAL_H__ +#define __PS_LATTICE_INTERNAL_H__ + +/* Local headers. */ +#include +#include "pocketsphinx_internal.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Linked list of DAG link pointers. + * + * Because the same link structure is used for forward and reverse + * links, as well as for the agenda used in bestpath search, we can't + * store the list pointer inside latlink_t. We could use glist_t + * here, but it wastes 4 bytes per entry on 32-bit machines. + */ +typedef struct latlink_list_s { + ps_latlink_t *link; + struct latlink_list_s *next; +} latlink_list_t; + +/** + * Word graph structure used in bestpath/nbest search. + */ +struct ps_lattice_s { + int refcount; /**< Reference count. */ + + logmath_t *lmath; /**< Log-math object. */ + ps_search_t *search; /**< Search (if generated by search). */ + dict_t *dict; /**< Dictionary for this DAG. */ + int32 silence; /**< Silence word ID. */ + int32 frate; /**< Frame rate. */ + + ps_latnode_t *nodes; /**< List of all nodes. */ + ps_latnode_t *start; /**< Starting node. */ + ps_latnode_t *end; /**< Ending node. */ + + frame_idx_t n_frames; /**< Number of frames for this utterance. */ + int32 n_nodes; /**< Number of nodes in this lattice. */ + int32 final_node_ascr; /**< Acoustic score of implicit link exiting final node. */ + int32 norm; /**< Normalizer for posterior probabilities. */ + char *hyp_str; /**< Current hypothesis string. */ + + listelem_alloc_t *latnode_alloc; /**< Node allocator for this DAG. */ + listelem_alloc_t *latlink_alloc; /**< Link allocator for this DAG. */ + listelem_alloc_t *latlink_list_alloc; /**< List element allocator for this DAG. */ + + /* This will probably be replaced with a heap. */ + latlink_list_t *q_head; /**< Queue of links for traversal. */ + latlink_list_t *q_tail; /**< Queue of links for traversal. */ +}; + +/** + * Links between DAG nodes. + * + * A link corresponds to a single hypothesized instance of a word with + * a given start and end point. + + */ +struct ps_latlink_s { + struct ps_latnode_s *from; /**< From node */ + struct ps_latnode_s *to; /**< To node */ + struct ps_latlink_s *best_prev; + int32 ascr; /**< Score for from->wid (from->sf to this->ef) */ + int32 path_scr; /**< Best path score from root of DAG */ + frame_idx_t ef; /**< Ending frame of this word */ + int32 alpha; /**< Forward probability of this link P(w,o_1^{ef}) */ + int32 beta; /**< Backward probability of this link P(w|o_{ef+1}^T) */ +}; + +/** + * DAG nodes. + * + * A node corresponds to a number of hypothesized instances of a word + * which all share the same starting point. + */ +struct ps_latnode_s { + int32 id; /**< Unique id for this node */ + int32 wid; /**< Dictionary word id */ + int32 basewid; /**< Dictionary base word id */ + /* FIXME: These are (ab)used to store backpointer indices, therefore they MUST be 32 bits. */ + int32 fef; /**< First end frame */ + int32 lef; /**< Last end frame */ + frame_idx_t sf; /**< Start frame */ + int16 reachable; /**< From \verbatim \endverbatim or \verbatim \endverbatim */ + int32 node_id; /**< Node from fsg model, used to map lattice back to model */ + union { + glist_t velist; /**< List of history entries with different lmstate (tst only) */ + int32 fanin; /**< Number nodes with links to this node */ + int32 rem_score; /**< Estimated best score from node.sf to end */ + int32 best_exit; /**< Best exit score (used for final nodes only) */ + } info; + latlink_list_t *exits; /**< Links out of this node */ + latlink_list_t *entries; /**< Links into this node */ + + struct ps_latnode_s *alt; /**< Node with alternate pronunciation for this word */ + struct ps_latnode_s *next; /**< Next node in DAG (no ordering implied) */ +}; + +/** + * Segmentation "iterator" for backpointer table results. + */ +typedef struct dag_seg_s { + ps_seg_t base; /**< Base structure. */ + ps_latlink_t **links; /**< Array of lattice links. */ + int32 norm; /**< Normalizer for posterior probabilities. */ + int16 n_links; /**< Number of lattice links. */ + int16 cur; /**< Current position in bpidx. */ +} dag_seg_t; + +/** + * Partial path structure used in N-best (A*) search. + * + * Each partial path (latpath_t) is constructed by extending another + * partial path--parent--by one node. + */ +typedef struct ps_latpath_s { + ps_latnode_t *node; /**< Node ending this path. */ + struct ps_latpath_s *parent; /**< Previous element in this path. */ + struct ps_latpath_s *next; /**< Pointer to next path in list of paths. */ + int32 score; /**< Exact score from start node up to node->sf. */ +} ps_latpath_t; + +/** + * A* search structure. + */ +typedef struct ps_astar_s { + ps_lattice_t *dag; + ngram_model_t *lmset; + float32 lwf; + + frame_idx_t sf; + frame_idx_t ef; + int32 w1; + int32 w2; + + int32 n_hyp_tried; + int32 n_hyp_insert; + int32 n_hyp_reject; + int32 insert_depth; + int32 n_path; + + ps_latpath_t *path_list; + ps_latpath_t *path_tail; + ps_latpath_t *top; + + glist_t hyps; /**< List of hypothesis strings. */ + listelem_alloc_t *latpath_alloc; /**< Path allocator for N-best search. */ +} ps_astar_t; + +/** + * Segmentation "iterator" for A* search results. + */ +typedef struct astar_seg_s { + ps_seg_t base; + ps_latnode_t **nodes; + int n_nodes; + int cur; +} astar_seg_t; + +/** + * Construct an empty word graph with reference to a search structure. + */ +ps_lattice_t *ps_lattice_init_search(ps_search_t *search, int n_frame); + +/** + * Insert penalty for fillers + */ +void ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen); + +/** + * Remove nodes marked as unreachable. + */ +POCKETSPHINX_EXPORT +void ps_lattice_delete_unreachable(ps_lattice_t *dag); + +/** + * Add an edge to the traversal queue. + */ +void ps_lattice_pushq(ps_lattice_t *dag, ps_latlink_t *link); + +/** + * Remove an edge from the traversal queue. + */ +ps_latlink_t *ps_lattice_popq(ps_lattice_t *dag); + +/** + * Clear and reset the traversal queue. + */ +void ps_lattice_delq(ps_lattice_t *dag); + +/** + * Create a new lattice link element. + */ +latlink_list_t *latlink_list_new(ps_lattice_t *dag, ps_latlink_t *link, + latlink_list_t *next); + +/** + * Get hypothesis string after bestpath search. + */ +POCKETSPHINX_EXPORT +char const *ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link); + +/** + * Get hypothesis segmentation iterator after bestpath search. + */ +ps_seg_t *ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, + float32 lwf); + +/** + * Begin N-Gram based A* search on a word graph. + * + * @param sf Starting frame for N-best search. + * @param ef Ending frame for N-best search, or -1 for last frame. + * @param w1 First context word, or -1 for none. + * @param w2 Second context word, or -1 for none. + * @return 0 for success, <0 on error. + */ +ps_astar_t *ps_astar_start(ps_lattice_t *dag, + ngram_model_t *lmset, + float32 lwf, + int sf, int ef, + int w1, int w2); + +/** + * Find next best hypothesis of A* on a word graph. + * + * @return a complete path, or NULL if no more hypotheses exist. + */ +ps_latpath_t *ps_astar_next(ps_astar_t *nbest); + +/** + * Finish N-best search, releasing resources associated with it. + */ +void ps_astar_finish(ps_astar_t *nbest); + +/** + * Get hypothesis string from A* search. + */ +char const *ps_astar_hyp(ps_astar_t *nbest, ps_latpath_t *path); + +/** + * Get hypothesis segmentation from A* search. + */ +ps_seg_t *ps_astar_seg_iter(ps_astar_t *astar, ps_latpath_t *path, float32 lwf); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __PS_LATTICE_INTERNAL_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_mllr.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_mllr.c new file mode 100644 index 0000000000000000000000000000000000000000..b43f6fbdb3c99262878db3ec3a3c604460770de0 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ps_mllr.c @@ -0,0 +1,169 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2009 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file ps_mllr.c Model-space linear transforms for speaker adaptation + */ + +/* System headers. */ +#include + +/* SphinxBase headers. */ +#include + +/* Local headers. */ +#include "acmod.h" + +ps_mllr_t * +ps_mllr_read(char const *regmatfile) +{ + ps_mllr_t *mllr; + FILE *fp; + int n, i, m, j, k; + + mllr = ckd_calloc(1, sizeof(*mllr)); + mllr->refcnt = 1; + + if ((fp = fopen(regmatfile, "r")) == NULL) { + E_ERROR_SYSTEM("Failed to open MLLR file '%s' for reading", regmatfile); + goto error_out; + } + else + E_INFO("Reading MLLR transformation file '%s'\n", regmatfile); + + if ((fscanf(fp, "%d", &n) != 1) || (n < 1)) { + E_ERROR("Failed to read number of MLLR classes\n"); + goto error_out; + } + mllr->n_class = n; + + if ((fscanf(fp, "%d", &n) != 1)) { + E_ERROR("Failed to read number of feature streams\n"); + goto error_out; + } + mllr->n_feat = n; + mllr->veclen = ckd_calloc(mllr->n_feat, sizeof(*mllr->veclen)); + + mllr->A = (float32 ****) ckd_calloc(mllr->n_feat, sizeof(float32 **)); + mllr->b = (float32 ***) ckd_calloc(mllr->n_feat, sizeof(float32 *)); + mllr->h = (float32 ***) ckd_calloc(mllr->n_feat, sizeof(float32 *)); + + for (i = 0; i < mllr->n_feat; ++i) { + if (fscanf(fp, "%d", &n) != 1) { + E_ERROR("Failed to read stream length for feature %d\n", i); + goto error_out; + } + mllr->veclen[i] = n; + mllr->A[i] = + (float32 ***) ckd_calloc_3d(mllr->n_class, mllr->veclen[i], + mllr->veclen[i], sizeof(float32)); + mllr->b[i] = + (float32 **) ckd_calloc_2d(mllr->n_class, mllr->veclen[i], + sizeof(float32)); + mllr->h[i] = + (float32 **) ckd_calloc_2d(mllr->n_class, mllr->veclen[i], + sizeof(float32)); + + for (m = 0; m < mllr->n_class; ++m) { + for (j = 0; j < mllr->veclen[i]; ++j) { + for (k = 0; k < mllr->veclen[i]; ++k) { + if (fscanf(fp, "%f ", &mllr->A[i][m][j][k]) != 1) { + E_ERROR("Failed reading MLLR rotation (%d,%d,%d,%d)\n", + i, m, j, k); + goto error_out; + } + } + } + for (j = 0; j < mllr->veclen[i]; ++j) { + if (fscanf(fp, "%f ", &mllr->b[i][m][j]) != 1) { + E_ERROR("Failed reading MLLR bias (%d,%d,%d)\n", + i, m, j); + goto error_out; + } + } + for (j = 0; j < mllr->veclen[i]; ++j) { + if (fscanf(fp, "%f ", &mllr->h[i][m][j]) != 1) { + E_ERROR("Failed reading MLLR variance scale (%d,%d,%d)\n", + i, m, j); + goto error_out; + } + } + } + } + fclose(fp); + return mllr; + +error_out: + if (fp) + fclose(fp); + ps_mllr_free(mllr); + return NULL; +} + +ps_mllr_t * +ps_mllr_retain(ps_mllr_t *mllr) +{ + ++mllr->refcnt; + return mllr; +} + +int +ps_mllr_free(ps_mllr_t *mllr) +{ + int i; + + if (mllr == NULL) + return 0; + if (--mllr->refcnt > 0) + return mllr->refcnt; + + for (i = 0; i < mllr->n_feat; ++i) { + if (mllr->A) + ckd_free_3d(mllr->A[i]); + if (mllr->b) + ckd_free_2d(mllr->b[i]); + if (mllr->h) + ckd_free_2d(mllr->h[i]); + } + ckd_free(mllr->veclen); + ckd_free(mllr->A); + ckd_free(mllr->b); + ckd_free(mllr->h); + ckd_free(mllr); + + return 0; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ptm_mgau.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ptm_mgau.c new file mode 100644 index 0000000000000000000000000000000000000000..48d857873950785feae293a89f3fa62e76240b07 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ptm_mgau.c @@ -0,0 +1,916 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2010 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* System headers */ +#include +#include +#include +#include +#include +#include +#if defined(__ADSPBLACKFIN__) +#elif !defined(_WIN32_WCE) +#include +#endif + +/* SphinxBase headers */ +#include +#include +#include +#include +#include +#include +#include + +/* Local headers */ +#include "tied_mgau_common.h" +#include "ptm_mgau.h" + +static ps_mgaufuncs_t ptm_mgau_funcs = { + "ptm", + ptm_mgau_frame_eval, /* frame_eval */ + ptm_mgau_mllr_transform, /* transform */ + ptm_mgau_free /* free */ +}; + +#define COMPUTE_GMM_MAP(_idx) \ + diff[_idx] = obs[_idx] - mean[_idx]; \ + sqdiff[_idx] = MFCCMUL(diff[_idx], diff[_idx]); \ + compl[_idx] = MFCCMUL(sqdiff[_idx], var[_idx]); +#define COMPUTE_GMM_REDUCE(_idx) \ + d = GMMSUB(d, compl[_idx]); + +static void +insertion_sort_topn(ptm_topn_t *topn, int i, int32 d) +{ + ptm_topn_t vtmp; + int j; + + topn[i].score = d; + if (i == 0) + return; + vtmp = topn[i]; + for (j = i - 1; j >= 0 && d > topn[j].score; j--) { + topn[j + 1] = topn[j]; + } + topn[j + 1] = vtmp; +} + +static int +eval_topn(ptm_mgau_t *s, int cb, int feat, mfcc_t *z) +{ + ptm_topn_t *topn; + int i, ceplen; + + topn = s->f->topn[cb][feat]; + ceplen = s->g->featlen[feat]; + + for (i = 0; i < s->max_topn; i++) { + mfcc_t *mean, diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */ + mfcc_t *var, d; + mfcc_t *obs; + int32 cw, j; + + cw = topn[i].cw; + mean = s->g->mean[cb][feat][0] + cw * ceplen; + var = s->g->var[cb][feat][0] + cw * ceplen; + d = s->g->det[cb][feat][cw]; + obs = z; + for (j = 0; j < ceplen % 4; ++j) { + diff[0] = *obs++ - *mean++; + sqdiff[0] = MFCCMUL(diff[0], diff[0]); + compl[0] = MFCCMUL(sqdiff[0], *var); + d = GMMSUB(d, compl[0]); + ++var; + } + /* We could vectorize this but it's unlikely to make much + * difference as the outer loop here isn't very big. */ + for (;j < ceplen; j += 4) { + COMPUTE_GMM_MAP(0); + COMPUTE_GMM_MAP(1); + COMPUTE_GMM_MAP(2); + COMPUTE_GMM_MAP(3); + COMPUTE_GMM_REDUCE(0); + COMPUTE_GMM_REDUCE(1); + COMPUTE_GMM_REDUCE(2); + COMPUTE_GMM_REDUCE(3); + var += 4; + obs += 4; + mean += 4; + } + insertion_sort_topn(topn, i, (int32)d); + } + + return topn[0].score; +} + +/* This looks bad, but it actually isn't. Less than 1% of eval_cb's + * time is spent doing this. */ +static void +insertion_sort_cb(ptm_topn_t **cur, ptm_topn_t *worst, ptm_topn_t *best, + int cw, int32 intd) +{ + for (*cur = worst - 1; *cur >= best && intd >= (*cur)->score; --*cur) + memcpy(*cur + 1, *cur, sizeof(**cur)); + ++*cur; + (*cur)->cw = cw; + (*cur)->score = intd; +} + +static int +eval_cb(ptm_mgau_t *s, int cb, int feat, mfcc_t *z) +{ + ptm_topn_t *worst, *best, *topn; + mfcc_t *mean; + mfcc_t *var, *det, *detP, *detE; + int32 i, ceplen; + + best = topn = s->f->topn[cb][feat]; + worst = topn + (s->max_topn - 1); + mean = s->g->mean[cb][feat][0]; + var = s->g->var[cb][feat][0]; + det = s->g->det[cb][feat]; + detE = det + s->g->n_density; + ceplen = s->g->featlen[feat]; + + for (detP = det; detP < detE; ++detP) { + mfcc_t diff[4], sqdiff[4], compl[4]; /* diff, diff^2, component likelihood */ + mfcc_t d, thresh; + mfcc_t *obs; + ptm_topn_t *cur; + int32 cw, j; + + d = *detP; + thresh = (mfcc_t) worst->score; /* Avoid int-to-float conversions */ + obs = z; + cw = (int)(detP - det); + + /* Unroll the loop starting with the first dimension(s). In + * theory this might be a bit faster if this Gaussian gets + * "knocked out" by C0. In practice not. */ + for (j = 0; (j < ceplen % 4) && (d >= thresh); ++j) { + diff[0] = *obs++ - *mean++; + sqdiff[0] = MFCCMUL(diff[0], diff[0]); + compl[0] = MFCCMUL(sqdiff[0], *var++); + d = GMMSUB(d, compl[0]); + } + /* Now do 4 dimensions at a time. You'd think that GCC would + * vectorize this? Apparently not. And it's right, because + * that won't make this any faster, at least on x86-64. */ + for (; j < ceplen && d >= thresh; j += 4) { + COMPUTE_GMM_MAP(0); + COMPUTE_GMM_MAP(1); + COMPUTE_GMM_MAP(2); + COMPUTE_GMM_MAP(3); + COMPUTE_GMM_REDUCE(0); + COMPUTE_GMM_REDUCE(1); + COMPUTE_GMM_REDUCE(2); + COMPUTE_GMM_REDUCE(3); + var += 4; + obs += 4; + mean += 4; + } + if (j < ceplen) { + /* terminated early, so not in topn */ + mean += (ceplen - j); + var += (ceplen - j); + continue; + } + if (d < thresh) + continue; + for (i = 0; i < s->max_topn; i++) { + /* already there, so don't need to insert */ + if (topn[i].cw == cw) + break; + } + if (i < s->max_topn) + continue; /* already there. Don't insert */ + insertion_sort_cb(&cur, worst, best, cw, (int32)d); + } + + return best->score; +} + +/** + * Compute top-N densities for active codebooks (and prune) + */ +static int +ptm_mgau_codebook_eval(ptm_mgau_t *s, mfcc_t **z, int frame) +{ + int i, j; + + /* First evaluate top-N from previous frame. */ + for (i = 0; i < s->g->n_mgau; ++i) + for (j = 0; j < s->g->n_feat; ++j) + eval_topn(s, i, j, z[j]); + + /* If frame downsampling is in effect, possibly do nothing else. */ + if (frame % s->ds_ratio) + return 0; + + /* Evaluate remaining codebooks. */ + for (i = 0; i < s->g->n_mgau; ++i) { + if (bitvec_is_clear(s->f->mgau_active, i)) + continue; + for (j = 0; j < s->g->n_feat; ++j) { + eval_cb(s, i, j, z[j]); + } + } + return 0; +} + +/** + * Normalize densities to produce "posterior probabilities", + * i.e. things with a reasonable dynamic range, then scale and + * clamp them to the acceptable range. This is actually done + * solely to ensure that we can use fast_logmath_add(). Note that + * unless we share the same normalizer across all codebooks for + * each feature stream we get defective scores (that's why these + * loops are inside out - doing it per-feature should give us + * greater precision). */ +static int +ptm_mgau_codebook_norm(ptm_mgau_t *s, mfcc_t **z, int frame) +{ + int i, j; + + (void)z; + (void)frame; + for (j = 0; j < s->g->n_feat; ++j) { + int32 norm = WORST_SCORE; + for (i = 0; i < s->g->n_mgau; ++i) { + if (bitvec_is_clear(s->f->mgau_active, i)) + continue; + if (norm < s->f->topn[i][j][0].score >> SENSCR_SHIFT) + norm = s->f->topn[i][j][0].score >> SENSCR_SHIFT; + } + assert(norm != WORST_SCORE); + for (i = 0; i < s->g->n_mgau; ++i) { + int32 k; + if (bitvec_is_clear(s->f->mgau_active, i)) + continue; + for (k = 0; k < s->max_topn; ++k) { + s->f->topn[i][j][k].score >>= SENSCR_SHIFT; + s->f->topn[i][j][k].score -= norm; + s->f->topn[i][j][k].score = -s->f->topn[i][j][k].score; + if (s->f->topn[i][j][k].score > MAX_NEG_ASCR) + s->f->topn[i][j][k].score = MAX_NEG_ASCR; + } + } + } + + return 0; +} + +static int +ptm_mgau_calc_cb_active(ptm_mgau_t *s, uint8 *senone_active, + int32 n_senone_active, int compallsen) +{ + int i, lastsen; + + if (compallsen) { + bitvec_set_all(s->f->mgau_active, s->g->n_mgau); + return 0; + } + bitvec_clear_all(s->f->mgau_active, s->g->n_mgau); + for (lastsen = i = 0; i < n_senone_active; ++i) { + int sen = senone_active[i] + lastsen; + int cb = s->sen2cb[sen]; + bitvec_set(s->f->mgau_active, cb); + lastsen = sen; + } + E_DEBUG("Active codebooks:"); + for (i = 0; i < s->g->n_mgau; ++i) { + if (bitvec_is_clear(s->f->mgau_active, i)) + continue; + E_DEBUG(" %d", i); + } + return 0; +} + +/** + * Compute senone scores from top-N densities for active codebooks. + */ +static int +ptm_mgau_senone_eval(ptm_mgau_t *s, int16 *senone_scores, + uint8 *senone_active, int32 n_senone_active, + int compall) +{ + int i, lastsen, bestscore; + + memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores)); + /* FIXME: This is the non-cache-efficient way to do this. We want + * to evaluate one codeword at a time but this requires us to have + * a reverse codebook to senone mapping, which we don't have + * (yet), since different codebooks have different top-N + * codewords. */ + if (compall) + n_senone_active = s->n_sen; + bestscore = 0x7fffffff; + for (lastsen = i = 0; i < n_senone_active; ++i) { + int sen, f, cb; + int ascore; + + if (compall) + sen = i; + else + sen = senone_active[i] + lastsen; + lastsen = sen; + cb = s->sen2cb[sen]; + + if (bitvec_is_clear(s->f->mgau_active, cb)) { + int j; + /* Because senone_active is deltas we can't really "knock + * out" senones from pruned codebooks, and in any case, + * it wouldn't make any difference to the search code, + * which doesn't expect senone_active to change. */ + for (f = 0; f < s->g->n_feat; ++f) { + for (j = 0; j < s->max_topn; ++j) { + s->f->topn[cb][f][j].score = MAX_NEG_ASCR; + } + } + } + /* For each feature, log-sum codeword scores + mixw to get + * feature density, then sum (multiply) to get ascore */ + ascore = 0; + for (f = 0; f < s->g->n_feat; ++f) { + ptm_topn_t *topn; + int j, fden = 0; + topn = s->f->topn[cb][f]; + for (j = 0; j < s->max_topn; ++j) { + int mixw; + /* Find mixture weight for this codeword. */ + if (s->mixw_cb) { + int dcw = s->mixw[f][topn[j].cw][sen/2]; + dcw = (dcw & 1) ? dcw >> 4 : dcw & 0x0f; + mixw = s->mixw_cb[dcw]; + } + else { + mixw = s->mixw[f][topn[j].cw][sen]; + } + if (j == 0) + fden = mixw + topn[j].score; + else + fden = fast_logmath_add(s->lmath_8b, fden, + mixw + topn[j].score); + E_DEBUG("fden[%d][%d] l+= %d + %d = %d\n", + sen, f, mixw, topn[j].score, fden); + } + ascore += fden; + } + if (ascore < bestscore) bestscore = ascore; + senone_scores[sen] = ascore; + } + /* Normalize the scores again (finishing the job we started above + * in ptm_mgau_codebook_eval...) */ + for (i = 0; i < s->n_sen; ++i) { + senone_scores[i] -= bestscore; + } + + return 0; +} + +/** + * Compute senone scores for the active senones. + */ +int32 +ptm_mgau_frame_eval(ps_mgau_t *ps, + int16 *senone_scores, + uint8 *senone_active, + int32 n_senone_active, + mfcc_t ** featbuf, int32 frame, + int32 compallsen) +{ + ptm_mgau_t *s = (ptm_mgau_t *)ps; + int fast_eval_idx; + + /* Find the appropriate frame in the rotating history buffer + * corresponding to the requested input frame. No bounds checking + * is done here, which just means you'll get semi-random crap if + * you request a frame in the future or one that's too far in the + * past. Since the history buffer is just used for fast match + * that might not be fatal. */ + fast_eval_idx = frame % s->n_fast_hist; + s->f = s->hist + fast_eval_idx; + /* Compute the top-N codewords for every codebook, unless this + * is a past frame, in which case we already have them (we + * hope!) */ + if (frame >= ps_mgau_base(ps)->frame_idx) { + ptm_fast_eval_t *lastf; + /* Get the previous frame's top-N information (on the + * first frame of the input this is just all WORST_DIST, + * no harm in that) */ + if (fast_eval_idx == 0) + lastf = s->hist + s->n_fast_hist - 1; + else + lastf = s->hist + fast_eval_idx - 1; + /* Copy in initial top-N info */ + memcpy(s->f->topn[0][0], lastf->topn[0][0], + s->g->n_mgau * s->g->n_feat * s->max_topn * sizeof(ptm_topn_t)); + /* Generate initial active codebook list (this might not be + * necessary) */ + ptm_mgau_calc_cb_active(s, senone_active, n_senone_active, compallsen); + /* Now evaluate top-N, prune, and evaluate remaining codebooks. */ + ptm_mgau_codebook_eval(s, featbuf, frame); + ptm_mgau_codebook_norm(s, featbuf, frame); + } + /* Evaluate intersection of active senones and active codebooks. */ + ptm_mgau_senone_eval(s, senone_scores, senone_active, + n_senone_active, compallsen); + + return 0; +} + +static int32 +read_sendump(ptm_mgau_t *s, bin_mdef_t *mdef, char const *file) +{ + FILE *fp; + char line[1000]; + int32 i, n, r, c; + int32 do_swap, do_mmap; + size_t offset; + int n_clust = 0; + int n_feat = s->g->n_feat; + int n_density = s->g->n_density; + int n_sen = bin_mdef_n_sen(mdef); + int n_bits = 8; + + s->n_sen = n_sen; /* FIXME: Should have been done earlier */ + do_mmap = cmd_ln_boolean_r(s->config, "-mmap"); + + if ((fp = fopen(file, "rb")) == NULL) + return -1; + + E_INFO("Loading senones from dump file %s\n", file); + /* Read title size, title */ + if (fread(&n, sizeof(int32), 1, fp) != 1) { + E_ERROR_SYSTEM("Failed to read title size from %s", file); + goto error_out; + } + /* This is extremely bogus */ + do_swap = 0; + if (n < 1 || n > 999) { + SWAP_INT32(&n); + if (n < 1 || n > 999) { + E_ERROR("Title length %x in dump file %s out of range\n", n, file); + goto error_out; + } + do_swap = 1; + } + if (fread(line, sizeof(char), n, fp) != (size_t)n) { + E_ERROR_SYSTEM("Cannot read title"); + goto error_out; + } + if (line[n - 1] != '\0') { + E_ERROR("Bad title in dump file\n"); + goto error_out; + } + E_INFO("%s\n", line); + + /* Read header size, header */ + if (fread(&n, sizeof(n), 1, fp) != 1) { + E_ERROR_SYSTEM("Failed to read header size from %s", file); + goto error_out; + } + if (do_swap) SWAP_INT32(&n); + if (fread(line, sizeof(char), n, fp) != (size_t)n) { + E_ERROR_SYSTEM("Cannot read header"); + goto error_out; + } + if (line[n - 1] != '\0') { + E_ERROR("Bad header in dump file\n"); + goto error_out; + } + + /* Read other header strings until string length = 0 */ + for (;;) { + if (fread(&n, sizeof(n), 1, fp) != 1) { + E_ERROR_SYSTEM("Failed to read header string size from %s", file); + goto error_out; + } + if (do_swap) SWAP_INT32(&n); + if (n == 0) + break; + if (fread(line, sizeof(char), n, fp) != (size_t)n) { + E_ERROR_SYSTEM("Cannot read header"); + goto error_out; + } + /* Look for a cluster count, if present */ + if (!strncmp(line, "feature_count ", strlen("feature_count "))) { + n_feat = atoi(line + strlen("feature_count ")); + } + if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) { + n_density = atoi(line + strlen("mixture_count ")); + } + if (!strncmp(line, "model_count ", strlen("model_count "))) { + n_sen = atoi(line + strlen("model_count ")); + } + if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) { + n_clust = atoi(line + strlen("cluster_count ")); + } + if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) { + n_bits = atoi(line + strlen("cluster_bits ")); + } + } + + /* Defaults for #rows, #columns in mixw array. */ + c = n_sen; + r = n_density; + if (n_clust == 0) { + /* Older mixw files have them here, and they might be padded. */ + if (fread(&r, sizeof(r), 1, fp) != 1) { + E_ERROR_SYSTEM("Cannot read #rows"); + goto error_out; + } + if (do_swap) SWAP_INT32(&r); + if (fread(&c, sizeof(c), 1, fp) != 1) { + E_ERROR_SYSTEM("Cannot read #columns"); + goto error_out; + } + if (do_swap) SWAP_INT32(&c); + E_INFO("Rows: %d, Columns: %d\n", r, c); + } + + if (n_feat != s->g->n_feat) { + E_ERROR("Number of feature streams mismatch: %d != %d\n", + n_feat, s->g->n_feat); + goto error_out; + } + if (n_density != s->g->n_density) { + E_ERROR("Number of densities mismatch: %d != %d\n", + n_density, s->g->n_density); + goto error_out; + } + if (n_sen != s->n_sen) { + E_ERROR("Number of senones mismatch: %d != %d\n", + n_sen, s->n_sen); + goto error_out; + } + + if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) { + E_ERROR("Cluster count must be 0, 15, or 16\n"); + goto error_out; + } + if (n_clust == 15) + ++n_clust; + + if (!((n_bits == 8) || (n_bits == 4))) { + E_ERROR("Cluster count must be 4 or 8\n"); + goto error_out; + } + + if (do_mmap) { + E_INFO("Using memory-mapped I/O for senones\n"); + } + offset = ftell(fp); + + /* Allocate memory for pdfs (or memory map them) */ + if (do_mmap) { + s->sendump_mmap = mmio_file_read(file); + /* Get cluster codebook if any. */ + if (n_clust) { + s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset; + offset += n_clust; + } + } + else { + /* Get cluster codebook if any. */ + if (n_clust) { + s->mixw_cb = ckd_calloc(1, n_clust); + if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) { + E_ERROR("Failed to read %d bytes from sendump\n", n_clust); + goto error_out; + } + } + } + + /* Set up pointers, or read, or whatever */ + if (s->sendump_mmap) { + s->mixw = ckd_calloc_2d(n_feat, n_density, sizeof(*s->mixw)); + for (n = 0; n < n_feat; n++) { + int step = c; + if (n_bits == 4) + step = (step + 1) / 2; + for (i = 0; i < r; i++) { + s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset; + offset += step; + } + } + } + else { + s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw)); + /* Read pdf values and ids */ + for (n = 0; n < n_feat; n++) { + int step = c; + if (n_bits == 4) + step = (step + 1) / 2; + for (i = 0; i < r; i++) { + if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp) + != (size_t) step) { + E_ERROR("Failed to read %d bytes from sendump\n", step); + goto error_out; + } + } + } + } + + fclose(fp); + return 0; +error_out: + fclose(fp); + return -1; +} + +static int32 +read_mixw(ptm_mgau_t * s, char const *file_name, double SmoothMin) +{ + char **argname, **argval; + char eofchk; + FILE *fp; + int32 byteswap, chksum_present; + uint32 chksum; + float32 *pdf; + int32 i, f, c, n; + int32 n_sen; + int32 n_feat; + int32 n_comp; + int32 n_err; + + E_INFO("Reading mixture weights file '%s'\n", file_name); + + if ((fp = fopen(file_name, "rb")) == NULL) + E_FATAL_SYSTEM("Failed to open mixture file '%s' for reading", file_name); + + /* Read header, including argument-value info and 32-bit byteorder magic */ + if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) + E_FATAL("Failed to read header from '%s'\n", file_name); + + /* Parse argument-value list */ + chksum_present = 0; + for (i = 0; argname[i]; i++) { + if (strcmp(argname[i], "version") == 0) { + if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0) + E_WARN("Version mismatch(%s): %s, expecting %s\n", + file_name, argval[i], MGAU_MIXW_VERSION); + } + else if (strcmp(argname[i], "chksum0") == 0) { + chksum_present = 1; /* Ignore the associated value */ + } + } + bio_hdrarg_free(argname, argval); + argname = argval = NULL; + + chksum = 0; + + /* Read #senones, #features, #codewords, arraysize */ + if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1) + || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != + 1) + || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) != + 1) + || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { + E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); + } + if (n_feat != s->g->n_feat) + E_FATAL("#Features streams(%d) != %d\n", n_feat, s->g->n_feat); + if (n != n_sen * n_feat * n_comp) { + E_FATAL + ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n", + file_name, i, n_sen, n_feat, n_comp); + } + + /* n_sen = number of mixture weights per codeword, which is + * fixed at the number of senones since we have only one codebook. + */ + s->n_sen = n_sen; + + /* Quantized mixture weight arrays. */ + s->mixw = ckd_calloc_3d(s->g->n_feat, s->g->n_density, + n_sen, sizeof(***s->mixw)); + + /* Temporary structure to read in floats before conversion to (int32) logs3 */ + pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32)); + + /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ + n_err = 0; + for (i = 0; i < n_sen; i++) { + for (f = 0; f < n_feat; f++) { + if (bio_fread((void *) pdf, sizeof(float32), + n_comp, fp, byteswap, &chksum) != n_comp) { + E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); + } + + /* Normalize and floor */ + if (vector_sum_norm(pdf, n_comp) <= 0.0) + n_err++; + vector_floor(pdf, n_comp, SmoothMin); + vector_sum_norm(pdf, n_comp); + + /* Convert to LOG, quantize, and transpose */ + for (c = 0; c < n_comp; c++) { + int32 qscr; + + qscr = -logmath_log(s->lmath_8b, pdf[c]); + if ((qscr > MAX_NEG_MIXW) || (qscr < 0)) + qscr = MAX_NEG_MIXW; + s->mixw[f][c][i] = qscr; + } + } + } + if (n_err > 0) + E_WARN("Weight normalization failed for %d mixture weights components\n", n_err); + + ckd_free(pdf); + + if (chksum_present) + bio_verify_chksum(fp, byteswap, chksum); + + if (fread(&eofchk, 1, 1, fp) == 1) + E_FATAL("More data than expected in %s\n", file_name); + + fclose(fp); + + E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp); + return n_sen; +} + +ps_mgau_t * +ptm_mgau_init(acmod_t *acmod, bin_mdef_t *mdef) +{ + ptm_mgau_t *s; + ps_mgau_t *ps; + char const *sendump_path; + int i; + + s = ckd_calloc(1, sizeof(*s)); + s->config = acmod->config; + + s->lmath = logmath_retain(acmod->lmath); + /* Log-add table. */ + s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE); + if (s->lmath_8b == NULL) + goto error_out; + /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */ + if (logmath_get_width(s->lmath_8b) != 1) { + E_ERROR("Log base %f is too small to represent add table in 8 bits\n", + logmath_get_base(s->lmath_8b)); + goto error_out; + } + + /* Read means and variances. */ + if ((s->g = gauden_init(cmd_ln_str_r(s->config, "_mean"), + cmd_ln_str_r(s->config, "_var"), + cmd_ln_float32_r(s->config, "-varfloor"), + s->lmath)) == NULL) { + E_ERROR("Failed to read means and variances\n"); + goto error_out; + } + + /* We only support 256 codebooks or less (like 640k or 2GB, this + * should be enough for anyone) */ + if (s->g->n_mgau > 256) { + E_INFO("Number of codebooks exceeds 256: %d\n", s->g->n_mgau); + goto error_out; + } + if (s->g->n_mgau != bin_mdef_n_ciphone(mdef)) { + E_INFO("Number of codebooks doesn't match number of ciphones, doesn't look like PTM: %d != %d\n", s->g->n_mgau, bin_mdef_n_ciphone(mdef)); + goto error_out; + } + /* Verify n_feat and veclen, against acmod. */ + if (s->g->n_feat != feat_dimension1(acmod->fcb)) { + E_ERROR("Number of streams does not match: %d != %d\n", + s->g->n_feat, feat_dimension1(acmod->fcb)); + goto error_out; + } + for (i = 0; i < s->g->n_feat; ++i) { + if ((uint32)s->g->featlen[i] != feat_dimension2(acmod->fcb, i)) { + E_ERROR("Dimension of stream %d does not match: %d != %d\n", + s->g->featlen[i], feat_dimension2(acmod->fcb, i)); + goto error_out; + } + } + /* Read mixture weights. */ + if ((sendump_path = cmd_ln_str_r(s->config, "_sendump"))) { + if (read_sendump(s, acmod->mdef, sendump_path) < 0) { + goto error_out; + } + } + else { + if (read_mixw(s, cmd_ln_str_r(s->config, "_mixw"), + cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) { + goto error_out; + } + } + s->ds_ratio = cmd_ln_int32_r(s->config, "-ds"); + s->max_topn = cmd_ln_int32_r(s->config, "-topn"); + E_INFO("Maximum top-N: %d\n", s->max_topn); + + /* Assume mapping of senones to their base phones, though this + * will become more flexible in the future. */ + s->sen2cb = ckd_calloc(s->n_sen, sizeof(*s->sen2cb)); + for (i = 0; i < s->n_sen; ++i) + s->sen2cb[i] = bin_mdef_sen2cimap(acmod->mdef, i); + + /* Allocate fast-match history buffers. We need enough for the + * phoneme lookahead window, plus the current frame, plus one for + * good measure? (FIXME: I don't remember why) */ + s->n_fast_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2; + s->hist = ckd_calloc(s->n_fast_hist, sizeof(*s->hist)); + /* s->f will be a rotating pointer into s->hist. */ + s->f = s->hist; + for (i = 0; i < s->n_fast_hist; ++i) { + int j, k, m; + /* Top-N codewords for every codebook and feature. */ + s->hist[i].topn = ckd_calloc_3d(s->g->n_mgau, s->g->n_feat, + s->max_topn, sizeof(ptm_topn_t)); + /* Initialize them to sane (yet arbitrary) defaults. */ + for (j = 0; j < s->g->n_mgau; ++j) { + for (k = 0; k < s->g->n_feat; ++k) { + for (m = 0; m < s->max_topn; ++m) { + s->hist[i].topn[j][k][m].cw = m; + s->hist[i].topn[j][k][m].score = WORST_DIST; + } + } + } + /* Active codebook mapping (just codebook, not features, + at least not yet) */ + s->hist[i].mgau_active = bitvec_alloc(s->g->n_mgau); + /* Start with them all on, prune them later. */ + bitvec_set_all(s->hist[i].mgau_active, s->g->n_mgau); + } + + ps = (ps_mgau_t *)s; + ps->vt = &ptm_mgau_funcs; + return ps; +error_out: + ptm_mgau_free(ps_mgau_base(s)); + return NULL; +} + +int +ptm_mgau_mllr_transform(ps_mgau_t *ps, + ps_mllr_t *mllr) +{ + ptm_mgau_t *s = (ptm_mgau_t *)ps; + return gauden_mllr_transform(s->g, mllr, s->config); +} + +void +ptm_mgau_free(ps_mgau_t *ps) +{ + int i; + ptm_mgau_t *s = (ptm_mgau_t *)ps; + + logmath_free(s->lmath); + logmath_free(s->lmath_8b); + if (s->sendump_mmap) { + ckd_free_2d(s->mixw); + mmio_file_unmap(s->sendump_mmap); + } + else { + ckd_free_3d(s->mixw); + } + ckd_free(s->sen2cb); + + for (i = 0; i < s->n_fast_hist; i++) { + ckd_free_3d(s->hist[i].topn); + bitvec_free(s->hist[i].mgau_active); + } + ckd_free(s->hist); + + gauden_free(s->g); + ckd_free(s); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ptm_mgau.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ptm_mgau.h new file mode 100644 index 0000000000000000000000000000000000000000..b60f25b99b911bc64e12235d02b1001f1ad09b70 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/ptm_mgau.h @@ -0,0 +1,113 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2010 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file ptm_mgau.h Fast phonetically-tied mixture evaluation. + * @author David Huggins-Daines + */ + +#ifndef __PTM_MGAU_H__ +#define __PTM_MGAU_H__ + +/* SphinxBase headesr. */ +#include +#include +#include + +/* Local headers. */ +#include "acmod.h" +#include "hmm.h" +#include "bin_mdef.h" +#include "ms_gauden.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +typedef struct ptm_mgau_s ptm_mgau_t; + +typedef struct ptm_topn_s { + int32 cw; /**< Codeword index. */ + int32 score; /**< Score. */ +} ptm_topn_t; + +typedef struct ptm_fast_eval_s { + ptm_topn_t ***topn; /**< Top-N for each codebook (mgau x feature x topn) */ + bitvec_t *mgau_active; /**< Set of active codebooks */ +} ptm_fast_eval_t; + +struct ptm_mgau_s { + ps_mgau_t base; /**< base structure. */ + cmd_ln_t *config; /**< Configuration parameters */ + gauden_t *g; /**< Set of Gaussians. */ + int32 n_sen; /**< Number of senones. */ + uint8 *sen2cb; /**< Senone to codebook mapping. */ + uint8 ***mixw; /**< Mixture weight distributions by feature, codeword, senone */ + mmio_file_t *sendump_mmap;/* Memory map for mixw (or NULL if not mmap) */ + uint8 *mixw_cb; /* Mixture weight codebook, if any (assume it contains 16 values) */ + int16 max_topn; + int16 ds_ratio; + + ptm_fast_eval_t *hist; /**< Fast evaluation info for past frames. */ + ptm_fast_eval_t *f; /**< Fast eval info for current frame. */ + int n_fast_hist; /**< Number of past frames tracked. */ + + /* Log-add table for compressed values. */ + logmath_t *lmath_8b; + /* Log-add object for reloading means/variances. */ + logmath_t *lmath; +}; + +ps_mgau_t *ptm_mgau_init(acmod_t *acmod, bin_mdef_t *mdef); +void ptm_mgau_free(ps_mgau_t *s); +int ptm_mgau_frame_eval(ps_mgau_t *s, + int16 *senone_scores, + uint8 *senone_active, + int32 n_senone_active, + mfcc_t **featbuf, + int32 frame, + int32 compallsen); +int ptm_mgau_mllr_transform(ps_mgau_t *s, + ps_mllr_t *mllr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __PTM_MGAU_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/s2_semi_mgau.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/s2_semi_mgau.c new file mode 100644 index 0000000000000000000000000000000000000000..4d54af633f4c9e732d7aa587af8b32f897a58056 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/s2_semi_mgau.c @@ -0,0 +1,1359 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* System headers */ +#include +#include +#include +#include +#include +#include +#if defined(__ADSPBLACKFIN__) +#elif !defined(_WIN32_WCE) +#include +#endif + +/* SphinxBase headers */ +#include +#include +#include +#include +#include +#include +#include + +/* Local headers */ +#include "s2_semi_mgau.h" +#include "tied_mgau_common.h" + +static ps_mgaufuncs_t s2_semi_mgau_funcs = { + "s2_semi", + s2_semi_mgau_frame_eval, /* frame_eval */ + s2_semi_mgau_mllr_transform, /* transform */ + s2_semi_mgau_free /* free */ +}; + +struct vqFeature_s { + int32 score; /* score or distance */ + int32 codeword; /* codeword (vector index) */ +}; + +static void +eval_topn(s2_semi_mgau_t *s, int32 feat, mfcc_t *z) +{ + int i, ceplen; + vqFeature_t *topn; + + topn = s->f[feat]; + ceplen = s->g->featlen[feat]; + + for (i = 0; i < s->max_topn; i++) { + mfcc_t *mean, diff, sqdiff, compl; /* diff, diff^2, component likelihood */ + vqFeature_t vtmp; + mfcc_t *var, d; + mfcc_t *obs; + int32 cw, j; + + cw = topn[i].codeword; + mean = s->g->mean[0][feat][0] + cw * ceplen; + var = s->g->var[0][feat][0] + cw * ceplen; + d = s->g->det[0][feat][cw]; + obs = z; + for (j = 0; j < ceplen; j++) { + diff = *obs++ - *mean++; + sqdiff = MFCCMUL(diff, diff); + compl = MFCCMUL(sqdiff, *var); + d = GMMSUB(d, compl); + ++var; + } + topn[i].score = (int32)d; + if (i == 0) + continue; + vtmp = topn[i]; + for (j = i - 1; j >= 0 && (int32)d > topn[j].score; j--) { + topn[j + 1] = topn[j]; + } + topn[j + 1] = vtmp; + } +} + +static void +eval_cb(s2_semi_mgau_t *s, int32 feat, mfcc_t *z) +{ + vqFeature_t *worst, *best, *topn; + mfcc_t *mean; + mfcc_t *var, *det, *detP, *detE; + int32 i, ceplen; + + best = topn = s->f[feat]; + worst = topn + (s->max_topn - 1); + mean = s->g->mean[0][feat][0]; + var = s->g->var[0][feat][0]; + det = s->g->det[0][feat]; + detE = det + s->g->n_density; + ceplen = s->g->featlen[feat]; + + for (detP = det; detP < detE; ++detP) { + mfcc_t diff, sqdiff, compl; /* diff, diff^2, component likelihood */ + mfcc_t d; + mfcc_t *obs; + vqFeature_t *cur; + int32 cw, j; + + d = *detP; + obs = z; + cw = (int)(detP - det); + for (j = 0; (j < ceplen) && (d >= worst->score); ++j) { + diff = *obs++ - *mean++; + sqdiff = MFCCMUL(diff, diff); + compl = MFCCMUL(sqdiff, *var); + d = GMMSUB(d, compl); + ++var; + } + if (j < ceplen) { + /* terminated early, so not in topn */ + mean += (ceplen - j); + var += (ceplen - j); + continue; + } + if ((int32)d < worst->score) + continue; + for (i = 0; i < s->max_topn; i++) { + /* already there, so don't need to insert */ + if (topn[i].codeword == cw) + break; + } + if (i < s->max_topn) + continue; /* already there. Don't insert */ + /* remaining code inserts codeword and dist in correct spot */ + for (cur = worst - 1; cur >= best && (int32)d >= cur->score; --cur) + memcpy(cur + 1, cur, sizeof(vqFeature_t)); + ++cur; + cur->codeword = cw; + cur->score = (int32)d; + } +} + +static void +mgau_dist(s2_semi_mgau_t * s, int32 frame, int32 feat, mfcc_t * z) +{ + eval_topn(s, feat, z); + + /* If this frame is skipped, do nothing else. */ + if (frame % s->ds_ratio) + return; + + /* Evaluate the rest of the codebook (or subset thereof). */ + eval_cb(s, feat, z); +} + +static int +mgau_norm(s2_semi_mgau_t *s, int feat) +{ + int32 norm; + int j; + + /* Compute quantized normalizing constant. */ + norm = s->f[feat][0].score >> SENSCR_SHIFT; + + /* Normalize the scores, negate them, and clamp their dynamic range. */ + for (j = 0; j < s->max_topn; ++j) { + s->f[feat][j].score = -((s->f[feat][j].score >> SENSCR_SHIFT) - norm); + if (s->f[feat][j].score > MAX_NEG_ASCR) + s->f[feat][j].score = MAX_NEG_ASCR; + if (s->topn_beam[feat] && s->f[feat][j].score > s->topn_beam[feat]) + break; + } + return j; +} + +static int32 +get_scores_8b_feat_6(s2_semi_mgau_t * s, int i, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, l; + uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5; + + pid_cw0 = s->mixw[i][s->f[i][0].codeword]; + pid_cw1 = s->mixw[i][s->f[i][1].codeword]; + pid_cw2 = s->mixw[i][s->f[i][2].codeword]; + pid_cw3 = s->mixw[i][s->f[i][3].codeword]; + pid_cw4 = s->mixw[i][s->f[i][4].codeword]; + pid_cw5 = s->mixw[i][s->f[i][5].codeword]; + + for (l = j = 0; j < n_senone_active; j++) { + int sen = senone_active[j] + l; + int32 tmp = pid_cw0[sen] + s->f[i][0].score; + + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw1[sen] + s->f[i][1].score); + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw2[sen] + s->f[i][2].score); + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw3[sen] + s->f[i][3].score); + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw4[sen] + s->f[i][4].score); + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw5[sen] + s->f[i][5].score); + + senone_scores[sen] += tmp; + l = sen; + } + return 0; +} + +static int32 +get_scores_8b_feat_5(s2_semi_mgau_t * s, int i, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, l; + uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4; + + pid_cw0 = s->mixw[i][s->f[i][0].codeword]; + pid_cw1 = s->mixw[i][s->f[i][1].codeword]; + pid_cw2 = s->mixw[i][s->f[i][2].codeword]; + pid_cw3 = s->mixw[i][s->f[i][3].codeword]; + pid_cw4 = s->mixw[i][s->f[i][4].codeword]; + + for (l = j = 0; j < n_senone_active; j++) { + int sen = senone_active[j] + l; + int32 tmp = pid_cw0[sen] + s->f[i][0].score; + + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw1[sen] + s->f[i][1].score); + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw2[sen] + s->f[i][2].score); + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw3[sen] + s->f[i][3].score); + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw4[sen] + s->f[i][4].score); + + senone_scores[sen] += tmp; + l = sen; + } + return 0; +} + +static int32 +get_scores_8b_feat_4(s2_semi_mgau_t * s, int i, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, l; + uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3; + + pid_cw0 = s->mixw[i][s->f[i][0].codeword]; + pid_cw1 = s->mixw[i][s->f[i][1].codeword]; + pid_cw2 = s->mixw[i][s->f[i][2].codeword]; + pid_cw3 = s->mixw[i][s->f[i][3].codeword]; + + for (l = j = 0; j < n_senone_active; j++) { + int sen = senone_active[j] + l; + int32 tmp = pid_cw0[sen] + s->f[i][0].score; + + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw1[sen] + s->f[i][1].score); + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw2[sen] + s->f[i][2].score); + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw3[sen] + s->f[i][3].score); + + senone_scores[sen] += tmp; + l = sen; + } + return 0; +} + +static int32 +get_scores_8b_feat_3(s2_semi_mgau_t * s, int i, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, l; + uint8 *pid_cw0, *pid_cw1, *pid_cw2; + + pid_cw0 = s->mixw[i][s->f[i][0].codeword]; + pid_cw1 = s->mixw[i][s->f[i][1].codeword]; + pid_cw2 = s->mixw[i][s->f[i][2].codeword]; + + for (l = j = 0; j < n_senone_active; j++) { + int sen = senone_active[j] + l; + int32 tmp = pid_cw0[sen] + s->f[i][0].score; + + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw1[sen] + s->f[i][1].score); + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw2[sen] + s->f[i][2].score); + + senone_scores[sen] += tmp; + l = sen; + } + return 0; +} + +static int32 +get_scores_8b_feat_2(s2_semi_mgau_t * s, int i, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, l; + uint8 *pid_cw0, *pid_cw1; + + pid_cw0 = s->mixw[i][s->f[i][0].codeword]; + pid_cw1 = s->mixw[i][s->f[i][1].codeword]; + + for (l = j = 0; j < n_senone_active; j++) { + int sen = senone_active[j] + l; + int32 tmp = pid_cw0[sen] + s->f[i][0].score; + + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw1[sen] + s->f[i][1].score); + + senone_scores[sen] += tmp; + l = sen; + } + return 0; +} + +static int32 +get_scores_8b_feat_1(s2_semi_mgau_t * s, int i, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, l; + uint8 *pid_cw0; + + pid_cw0 = s->mixw[i][s->f[i][0].codeword]; + for (l = j = 0; j < n_senone_active; j++) { + int sen = senone_active[j] + l; + int32 tmp = pid_cw0[sen] + s->f[i][0].score; + senone_scores[sen] += tmp; + l = sen; + } + return 0; +} + +static int32 +get_scores_8b_feat_any(s2_semi_mgau_t * s, int i, int topn, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, k, l; + + for (l = j = 0; j < n_senone_active; j++) { + int sen = senone_active[j] + l; + uint8 *pid_cw; + int32 tmp; + pid_cw = s->mixw[i][s->f[i][0].codeword]; + tmp = pid_cw[sen] + s->f[i][0].score; + for (k = 1; k < topn; ++k) { + pid_cw = s->mixw[i][s->f[i][k].codeword]; + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw[sen] + s->f[i][k].score); + } + senone_scores[sen] += tmp; + l = sen; + } + return 0; +} + +static int32 +get_scores_8b_feat(s2_semi_mgau_t * s, int i, int topn, + int16 *senone_scores, uint8 *senone_active, int32 n_senone_active) +{ + switch (topn) { + case 6: + return get_scores_8b_feat_6(s, i, senone_scores, + senone_active, n_senone_active); + case 5: + return get_scores_8b_feat_5(s, i, senone_scores, + senone_active, n_senone_active); + case 4: + return get_scores_8b_feat_4(s, i, senone_scores, + senone_active, n_senone_active); + case 3: + return get_scores_8b_feat_3(s, i, senone_scores, + senone_active, n_senone_active); + case 2: + return get_scores_8b_feat_2(s, i, senone_scores, + senone_active, n_senone_active); + case 1: + return get_scores_8b_feat_1(s, i, senone_scores, + senone_active, n_senone_active); + default: + return get_scores_8b_feat_any(s, i, topn, senone_scores, + senone_active, n_senone_active); + } +} + +static int32 +get_scores_8b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores) +{ + int32 j, k; + + for (j = 0; j < s->n_sen; j++) { + uint8 *pid_cw; + int32 tmp; + pid_cw = s->mixw[i][s->f[i][0].codeword]; + tmp = pid_cw[j] + s->f[i][0].score; + for (k = 1; k < topn; ++k) { + pid_cw = s->mixw[i][s->f[i][k].codeword]; + tmp = fast_logmath_add(s->lmath_8b, tmp, + pid_cw[j] + s->f[i][k].score); + } + senone_scores[j] += tmp; + } + return 0; +} + +static int32 +get_scores_4b_feat_6(s2_semi_mgau_t * s, int i, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, l; + uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4, *pid_cw5; + uint8 w_den[6][16]; + + /* Precompute scaled densities. */ + for (j = 0; j < 16; ++j) { + w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score; + w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score; + w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score; + w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score; + w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score; + w_den[5][j] = s->mixw_cb[j] + s->f[i][5].score; + } + + pid_cw0 = s->mixw[i][s->f[i][0].codeword]; + pid_cw1 = s->mixw[i][s->f[i][1].codeword]; + pid_cw2 = s->mixw[i][s->f[i][2].codeword]; + pid_cw3 = s->mixw[i][s->f[i][3].codeword]; + pid_cw4 = s->mixw[i][s->f[i][4].codeword]; + pid_cw5 = s->mixw[i][s->f[i][5].codeword]; + + for (l = j = 0; j < n_senone_active; j++) { + int n = senone_active[j] + l; + int tmp, cw; + + if (n & 1) { + cw = pid_cw0[n/2] >> 4; + tmp = w_den[0][cw]; + cw = pid_cw1[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); + cw = pid_cw2[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); + cw = pid_cw3[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]); + cw = pid_cw4[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]); + cw = pid_cw5[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]); + } + else { + cw = pid_cw0[n/2] & 0x0f; + tmp = w_den[0][cw]; + cw = pid_cw1[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); + cw = pid_cw2[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); + cw = pid_cw3[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]); + cw = pid_cw4[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]); + cw = pid_cw5[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[5][cw]); + } + senone_scores[n] += tmp; + l = n; + } + return 0; +} + +static int32 +get_scores_4b_feat_5(s2_semi_mgau_t * s, int i, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, l; + uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3, *pid_cw4; + uint8 w_den[5][16]; + + /* Precompute scaled densities. */ + for (j = 0; j < 16; ++j) { + w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score; + w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score; + w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score; + w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score; + w_den[4][j] = s->mixw_cb[j] + s->f[i][4].score; + } + + pid_cw0 = s->mixw[i][s->f[i][0].codeword]; + pid_cw1 = s->mixw[i][s->f[i][1].codeword]; + pid_cw2 = s->mixw[i][s->f[i][2].codeword]; + pid_cw3 = s->mixw[i][s->f[i][3].codeword]; + pid_cw4 = s->mixw[i][s->f[i][4].codeword]; + + for (l = j = 0; j < n_senone_active; j++) { + int n = senone_active[j] + l; + int tmp, cw; + + if (n & 1) { + cw = pid_cw0[n/2] >> 4; + tmp = w_den[0][cw]; + cw = pid_cw1[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); + cw = pid_cw2[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); + cw = pid_cw3[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]); + cw = pid_cw4[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]); + } + else { + cw = pid_cw0[n/2] & 0x0f; + tmp = w_den[0][cw]; + cw = pid_cw1[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); + cw = pid_cw2[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); + cw = pid_cw3[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]); + cw = pid_cw4[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[4][cw]); + } + senone_scores[n] += tmp; + l = n; + } + return 0; +} + +static int32 +get_scores_4b_feat_4(s2_semi_mgau_t * s, int i, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, l; + uint8 *pid_cw0, *pid_cw1, *pid_cw2, *pid_cw3; + uint8 w_den[4][16]; + + /* Precompute scaled densities. */ + for (j = 0; j < 16; ++j) { + w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score; + w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score; + w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score; + w_den[3][j] = s->mixw_cb[j] + s->f[i][3].score; + } + + pid_cw0 = s->mixw[i][s->f[i][0].codeword]; + pid_cw1 = s->mixw[i][s->f[i][1].codeword]; + pid_cw2 = s->mixw[i][s->f[i][2].codeword]; + pid_cw3 = s->mixw[i][s->f[i][3].codeword]; + + for (l = j = 0; j < n_senone_active; j++) { + int n = senone_active[j] + l; + int tmp, cw; + + if (n & 1) { + cw = pid_cw0[n/2] >> 4; + tmp = w_den[0][cw]; + cw = pid_cw1[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); + cw = pid_cw2[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); + cw = pid_cw3[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]); + } + else { + cw = pid_cw0[n/2] & 0x0f; + tmp = w_den[0][cw]; + cw = pid_cw1[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); + cw = pid_cw2[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); + cw = pid_cw3[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[3][cw]); + } + senone_scores[n] += tmp; + l = n; + } + return 0; +} + +static int32 +get_scores_4b_feat_3(s2_semi_mgau_t * s, int i, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, l; + uint8 *pid_cw0, *pid_cw1, *pid_cw2; + uint8 w_den[3][16]; + + /* Precompute scaled densities. */ + for (j = 0; j < 16; ++j) { + w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score; + w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score; + w_den[2][j] = s->mixw_cb[j] + s->f[i][2].score; + } + + pid_cw0 = s->mixw[i][s->f[i][0].codeword]; + pid_cw1 = s->mixw[i][s->f[i][1].codeword]; + pid_cw2 = s->mixw[i][s->f[i][2].codeword]; + + for (l = j = 0; j < n_senone_active; j++) { + int n = senone_active[j] + l; + int tmp, cw; + + if (n & 1) { + cw = pid_cw0[n/2] >> 4; + tmp = w_den[0][cw]; + cw = pid_cw1[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); + cw = pid_cw2[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); + } + else { + cw = pid_cw0[n/2] & 0x0f; + tmp = w_den[0][cw]; + cw = pid_cw1[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); + cw = pid_cw2[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[2][cw]); + } + senone_scores[n] += tmp; + l = n; + } + return 0; +} + +static int32 +get_scores_4b_feat_2(s2_semi_mgau_t * s, int i, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, l; + uint8 *pid_cw0, *pid_cw1; + uint8 w_den[2][16]; + + /* Precompute scaled densities. */ + for (j = 0; j < 16; ++j) { + w_den[0][j] = s->mixw_cb[j] + s->f[i][0].score; + w_den[1][j] = s->mixw_cb[j] + s->f[i][1].score; + } + + pid_cw0 = s->mixw[i][s->f[i][0].codeword]; + pid_cw1 = s->mixw[i][s->f[i][1].codeword]; + + for (l = j = 0; j < n_senone_active; j++) { + int n = senone_active[j] + l; + int tmp, cw; + + if (n & 1) { + cw = pid_cw0[n/2] >> 4; + tmp = w_den[0][cw]; + cw = pid_cw1[n/2] >> 4; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); + } + else { + cw = pid_cw0[n/2] & 0x0f; + tmp = w_den[0][cw]; + cw = pid_cw1[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, w_den[1][cw]); + } + senone_scores[n] += tmp; + l = n; + } + return 0; +} + +static int32 +get_scores_4b_feat_1(s2_semi_mgau_t * s, int i, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, l; + uint8 *pid_cw0; + uint8 w_den[16]; + + /* Precompute scaled densities. */ + for (j = 0; j < 16; ++j) { + w_den[j] = s->mixw_cb[j] + s->f[i][0].score; + } + + pid_cw0 = s->mixw[i][s->f[i][0].codeword]; + + for (l = j = 0; j < n_senone_active; j++) { + int n = senone_active[j] + l; + int tmp, cw; + + if (n & 1) { + cw = pid_cw0[n/2] >> 4; + tmp = w_den[cw]; + } + else { + cw = pid_cw0[n/2] & 0x0f; + tmp = w_den[cw]; + } + senone_scores[n] += tmp; + l = n; + } + return 0; +} + +static int32 +get_scores_4b_feat_any(s2_semi_mgau_t * s, int i, int topn, + int16 *senone_scores, uint8 *senone_active, + int32 n_senone_active) +{ + int32 j, k, l; + + for (l = j = 0; j < n_senone_active; j++) { + int n = senone_active[j] + l; + int tmp, cw; + uint8 *pid_cw; + + pid_cw = s->mixw[i][s->f[i][0].codeword]; + if (n & 1) + cw = pid_cw[n/2] >> 4; + else + cw = pid_cw[n/2] & 0x0f; + tmp = s->mixw_cb[cw] + s->f[i][0].score; + for (k = 1; k < topn; ++k) { + pid_cw = s->mixw[i][s->f[i][k].codeword]; + if (n & 1) + cw = pid_cw[n/2] >> 4; + else + cw = pid_cw[n/2] & 0x0f; + tmp = fast_logmath_add(s->lmath_8b, tmp, + s->mixw_cb[cw] + s->f[i][k].score); + } + senone_scores[n] += tmp; + l = n; + } + return 0; +} + +static int32 +get_scores_4b_feat(s2_semi_mgau_t * s, int i, int topn, + int16 *senone_scores, uint8 *senone_active, int32 n_senone_active) +{ + switch (topn) { + case 6: + return get_scores_4b_feat_6(s, i, senone_scores, + senone_active, n_senone_active); + case 5: + return get_scores_4b_feat_5(s, i, senone_scores, + senone_active, n_senone_active); + case 4: + return get_scores_4b_feat_4(s, i, senone_scores, + senone_active, n_senone_active); + case 3: + return get_scores_4b_feat_3(s, i, senone_scores, + senone_active, n_senone_active); + case 2: + return get_scores_4b_feat_2(s, i, senone_scores, + senone_active, n_senone_active); + case 1: + return get_scores_4b_feat_1(s, i, senone_scores, + senone_active, n_senone_active); + default: + return get_scores_4b_feat_any(s, i, topn, senone_scores, + senone_active, n_senone_active); + } +} + +static int32 +get_scores_4b_feat_all(s2_semi_mgau_t * s, int i, int topn, int16 *senone_scores) +{ + int j, last_sen; + + j = 0; + /* Number of senones is always even, but don't overrun if it isn't. */ + last_sen = s->n_sen & ~1; + while (j < last_sen) { + uint8 *pid_cw; + int32 tmp0, tmp1; + int k; + + pid_cw = s->mixw[i][s->f[i][0].codeword]; + tmp0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->f[i][0].score; + tmp1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->f[i][0].score; + for (k = 1; k < topn; ++k) { + int32 w_den0, w_den1; + + pid_cw = s->mixw[i][s->f[i][k].codeword]; + w_den0 = s->mixw_cb[pid_cw[j/2] & 0x0f] + s->f[i][k].score; + w_den1 = s->mixw_cb[pid_cw[j/2] >> 4] + s->f[i][k].score; + tmp0 = fast_logmath_add(s->lmath_8b, tmp0, w_den0); + tmp1 = fast_logmath_add(s->lmath_8b, tmp1, w_den1); + } + senone_scores[j++] += tmp0; + senone_scores[j++] += tmp1; + } + return 0; +} + +/* + * Compute senone scores for the active senones. + */ +int32 +s2_semi_mgau_frame_eval(ps_mgau_t *ps, + int16 *senone_scores, + uint8 *senone_active, + int32 n_senone_active, + mfcc_t ** featbuf, int32 frame, + int32 compallsen) +{ + s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps; + int i, topn_idx; + int n_feat = s->g->n_feat; + + memset(senone_scores, 0, s->n_sen * sizeof(*senone_scores)); + /* No bounds checking is done here, which just means you'll get + * semi-random crap if you request a frame in the future or one + * that's too far in the past. */ + topn_idx = frame % s->n_topn_hist; + s->f = s->topn_hist[topn_idx]; + for (i = 0; i < n_feat; ++i) { + /* For past frames this will already be computed. */ + if (frame >= ps_mgau_base(ps)->frame_idx) { + vqFeature_t **lastf; + if (topn_idx == 0) + lastf = s->topn_hist[s->n_topn_hist-1]; + else + lastf = s->topn_hist[topn_idx-1]; + memcpy(s->f[i], lastf[i], sizeof(vqFeature_t) * s->max_topn); + mgau_dist(s, frame, i, featbuf[i]); + s->topn_hist_n[topn_idx][i] = mgau_norm(s, i); + } + if (s->mixw_cb) { + if (compallsen) + get_scores_4b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores); + else + get_scores_4b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores, + senone_active, n_senone_active); + } + else { + if (compallsen) + get_scores_8b_feat_all(s, i, s->topn_hist_n[topn_idx][i], senone_scores); + else + get_scores_8b_feat(s, i, s->topn_hist_n[topn_idx][i], senone_scores, + senone_active, n_senone_active); + } + } + + return 0; +} + +static int32 +read_sendump(s2_semi_mgau_t *s, bin_mdef_t *mdef, char const *file) +{ + FILE *fp; + char line[1000]; + int32 i, n, r, c; + int32 do_swap, do_mmap; + size_t offset; + int n_clust = 0; + int n_feat = s->g->n_feat; + int n_density = s->g->n_density; + int n_sen = bin_mdef_n_sen(mdef); + int n_bits = 8; + + s->n_sen = n_sen; /* FIXME: Should have been done earlier */ + do_mmap = cmd_ln_boolean_r(s->config, "-mmap"); + + if ((fp = fopen(file, "rb")) == NULL) + return -1; + + E_INFO("Loading senones from dump file %s\n", file); + /* Read title size, title */ + if (fread(&n, sizeof(int32), 1, fp) != 1) { + E_ERROR_SYSTEM("Failed to read title size from %s", file); + goto error_out; + } + /* This is extremely bogus */ + do_swap = 0; + if (n < 1 || n > 999) { + SWAP_INT32(&n); + if (n < 1 || n > 999) { + E_ERROR("Title length %x in dump file %s out of range\n", n, file); + goto error_out; + } + do_swap = 1; + } + if (fread(line, sizeof(char), n, fp) != (size_t)n) { + E_ERROR_SYSTEM("Cannot read title"); + goto error_out; + } + if (line[n - 1] != '\0') { + E_ERROR("Bad title in dump file\n"); + goto error_out; + } + E_INFO("%s\n", line); + + /* Read header size, header */ + if (fread(&n, sizeof(n), 1, fp) != 1) { + E_ERROR_SYSTEM("Failed to read header size from %s", file); + goto error_out; + } + if (do_swap) SWAP_INT32(&n); + if (fread(line, sizeof(char), n, fp) != (size_t)n) { + E_ERROR_SYSTEM("Cannot read header"); + goto error_out; + } + if (line[n - 1] != '\0') { + E_ERROR("Bad header in dump file\n"); + goto error_out; + } + + /* Read other header strings until string length = 0 */ + for (;;) { + if (fread(&n, sizeof(n), 1, fp) != 1) { + E_ERROR_SYSTEM("Failed to read header string size from %s", file); + goto error_out; + } + if (do_swap) SWAP_INT32(&n); + if (n == 0) + break; + if (fread(line, sizeof(char), n, fp) != (size_t)n) { + E_ERROR_SYSTEM("Cannot read header"); + goto error_out; + } + /* Look for a cluster count, if present */ + if (!strncmp(line, "feature_count ", strlen("feature_count "))) { + n_feat = atoi(line + strlen("feature_count ")); + } + if (!strncmp(line, "mixture_count ", strlen("mixture_count "))) { + n_density = atoi(line + strlen("mixture_count ")); + } + if (!strncmp(line, "model_count ", strlen("model_count "))) { + n_sen = atoi(line + strlen("model_count ")); + } + if (!strncmp(line, "cluster_count ", strlen("cluster_count "))) { + n_clust = atoi(line + strlen("cluster_count ")); + } + if (!strncmp(line, "cluster_bits ", strlen("cluster_bits "))) { + n_bits = atoi(line + strlen("cluster_bits ")); + } + } + + /* Defaults for #rows, #columns in mixw array. */ + c = n_sen; + r = n_density; + if (n_clust == 0) { + /* Older mixw files have them here, and they might be padded. */ + if (fread(&r, sizeof(r), 1, fp) != 1) { + E_ERROR_SYSTEM("Cannot read #rows"); + goto error_out; + } + if (do_swap) SWAP_INT32(&r); + if (fread(&c, sizeof(c), 1, fp) != 1) { + E_ERROR_SYSTEM("Cannot read #columns"); + goto error_out; + } + if (do_swap) SWAP_INT32(&c); + E_INFO("Rows: %d, Columns: %d\n", r, c); + } + + if (n_feat != s->g->n_feat) { + E_ERROR("Number of feature streams mismatch: %d != %d\n", + n_feat, s->g->n_feat); + goto error_out; + } + if (n_density != s->g->n_density) { + E_ERROR("Number of densities mismatch: %d != %d\n", + n_density, s->g->n_density); + goto error_out; + } + if (n_sen != s->n_sen) { + E_ERROR("Number of senones mismatch: %d != %d\n", + n_sen, s->n_sen); + goto error_out; + } + + if (!((n_clust == 0) || (n_clust == 15) || (n_clust == 16))) { + E_ERROR("Cluster count must be 0, 15, or 16\n"); + goto error_out; + } + if (n_clust == 15) + ++n_clust; + + if (!((n_bits == 8) || (n_bits == 4))) { + E_ERROR("Cluster count must be 4 or 8\n"); + goto error_out; + } + + if (do_mmap) { + E_INFO("Using memory-mapped I/O for senones\n"); + } + offset = ftell(fp); + + /* Allocate memory for pdfs (or memory map them) */ + if (do_mmap) { + s->sendump_mmap = mmio_file_read(file); + /* Get cluster codebook if any. */ + if (n_clust) { + s->mixw_cb = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset; + offset += n_clust; + } + } + else { + /* Get cluster codebook if any. */ + if (n_clust) { + s->mixw_cb = ckd_calloc(1, n_clust); + if (fread(s->mixw_cb, 1, n_clust, fp) != (size_t) n_clust) { + E_ERROR("Failed to read %d bytes from sendump\n", n_clust); + goto error_out; + } + } + } + + /* Set up pointers, or read, or whatever */ + if (s->sendump_mmap) { + s->mixw = ckd_calloc_2d(n_feat, n_density, sizeof(*s->mixw)); + for (n = 0; n < n_feat; n++) { + int step = c; + if (n_bits == 4) + step = (step + 1) / 2; + for (i = 0; i < r; i++) { + s->mixw[n][i] = ((uint8 *) mmio_file_ptr(s->sendump_mmap)) + offset; + offset += step; + } + } + } + else { + s->mixw = ckd_calloc_3d(n_feat, n_density, n_sen, sizeof(***s->mixw)); + /* Read pdf values and ids */ + for (n = 0; n < n_feat; n++) { + int step = c; + if (n_bits == 4) + step = (step + 1) / 2; + for (i = 0; i < r; i++) { + if (fread(s->mixw[n][i], sizeof(***s->mixw), step, fp) + != (size_t) step) { + E_ERROR("Failed to read %d bytes from sendump\n", step); + goto error_out; + } + } + } + } + + fclose(fp); + return 0; +error_out: + fclose(fp); + return -1; +} + +static int32 +read_mixw(s2_semi_mgau_t * s, char const *file_name, double SmoothMin) +{ + char **argname, **argval; + char eofchk; + FILE *fp; + int32 byteswap, chksum_present; + uint32 chksum; + float32 *pdf; + int32 i, f, c, n; + int32 n_sen; + int32 n_feat; + int32 n_comp; + int32 n_err; + + E_INFO("Reading mixture weights file '%s'\n", file_name); + + if ((fp = fopen(file_name, "rb")) == NULL) + E_FATAL_SYSTEM("Failed to open mixture weights file '%s' for reading", file_name); + + /* Read header, including argument-value info and 32-bit byteorder magic */ + if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) + E_FATAL("Failed to read header from file '%s'\n", file_name); + + /* Parse argument-value list */ + chksum_present = 0; + for (i = 0; argname[i]; i++) { + if (strcmp(argname[i], "version") == 0) { + if (strcmp(argval[i], MGAU_MIXW_VERSION) != 0) + E_WARN("Version mismatch(%s): %s, expecting %s\n", + file_name, argval[i], MGAU_MIXW_VERSION); + } + else if (strcmp(argname[i], "chksum0") == 0) { + chksum_present = 1; /* Ignore the associated value */ + } + } + bio_hdrarg_free(argname, argval); + argname = argval = NULL; + + chksum = 0; + + /* Read #senones, #features, #codewords, arraysize */ + if ((bio_fread(&n_sen, sizeof(int32), 1, fp, byteswap, &chksum) != 1) + || (bio_fread(&n_feat, sizeof(int32), 1, fp, byteswap, &chksum) != + 1) + || (bio_fread(&n_comp, sizeof(int32), 1, fp, byteswap, &chksum) != + 1) + || (bio_fread(&n, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { + E_FATAL("bio_fread(%s) (arraysize) failed\n", file_name); + } + if (n_feat != s->g->n_feat) + E_FATAL("#Features streams(%d) != %d\n", n_feat, s->g->n_feat); + if (n != n_sen * n_feat * n_comp) { + E_FATAL + ("%s: #float32s(%d) doesn't match header dimensions: %d x %d x %d\n", + file_name, i, n_sen, n_feat, n_comp); + } + + /* n_sen = number of mixture weights per codeword, which is + * fixed at the number of senones since we have only one codebook. + */ + s->n_sen = n_sen; + + /* Quantized mixture weight arrays. */ + s->mixw = ckd_calloc_3d(n_feat, s->g->n_density, n_sen, sizeof(***s->mixw)); + + /* Temporary structure to read in floats before conversion to (int32) logs3 */ + pdf = (float32 *) ckd_calloc(n_comp, sizeof(float32)); + + /* Read senone probs data, normalize, floor, convert to logs3, truncate to 8 bits */ + n_err = 0; + for (i = 0; i < n_sen; i++) { + for (f = 0; f < n_feat; f++) { + if (bio_fread((void *) pdf, sizeof(float32), + n_comp, fp, byteswap, &chksum) != n_comp) { + E_FATAL("bio_fread(%s) (arraydata) failed\n", file_name); + } + + /* Normalize and floor */ + if (vector_sum_norm(pdf, n_comp) <= 0.0) + n_err++; + vector_floor(pdf, n_comp, SmoothMin); + vector_sum_norm(pdf, n_comp); + + /* Convert to LOG, quantize, and transpose */ + for (c = 0; c < n_comp; c++) { + int32 qscr; + + qscr = -logmath_log(s->lmath_8b, pdf[c]); + if ((qscr > MAX_NEG_MIXW) || (qscr < 0)) + qscr = MAX_NEG_MIXW; + s->mixw[f][c][i] = qscr; + } + } + } + if (n_err > 0) + E_WARN("Weight normalization failed for %d mixture weights components\n", n_err); + + ckd_free(pdf); + + if (chksum_present) + bio_verify_chksum(fp, byteswap, chksum); + + if (fread(&eofchk, 1, 1, fp) == 1) + E_FATAL("More data than expected in %s\n", file_name); + + fclose(fp); + + E_INFO("Read %d x %d x %d mixture weights\n", n_sen, n_feat, n_comp); + return n_sen; +} + + +static int +split_topn(char const *str, uint8 *out, int nfeat) +{ + char *topn_list = ckd_salloc(str); + char *c, *cc; + int i, maxn; + + c = topn_list; + i = 0; + maxn = 0; + while (i < nfeat && (cc = strchr(c, ',')) != NULL) { + *cc = '\0'; + out[i] = atoi(c); + if (out[i] > maxn) maxn = out[i]; + c = cc + 1; + ++i; + } + if (i < nfeat && *c != '\0') { + out[i] = atoi(c); + if (out[i] > maxn) maxn = out[i]; + ++i; + } + while (i < nfeat) + out[i++] = maxn; + + ckd_free(topn_list); + return maxn; +} + + +ps_mgau_t * +s2_semi_mgau_init(acmod_t *acmod) +{ + s2_semi_mgau_t *s; + ps_mgau_t *ps; + char const *sendump_path; + int i; + int n_feat; + + s = ckd_calloc(1, sizeof(*s)); + s->config = acmod->config; + + s->lmath = logmath_retain(acmod->lmath); + /* Log-add table. */ + s->lmath_8b = logmath_init(logmath_get_base(acmod->lmath), SENSCR_SHIFT, TRUE); + if (s->lmath_8b == NULL) + goto error_out; + /* Ensure that it is only 8 bits wide so that fast_logmath_add() works. */ + if (logmath_get_width(s->lmath_8b) != 1) { + E_ERROR("Log base %f is too small to represent add table in 8 bits\n", + logmath_get_base(s->lmath_8b)); + goto error_out; + } + + /* Read means and variances. */ + if ((s->g = gauden_init(cmd_ln_str_r(s->config, "_mean"), + cmd_ln_str_r(s->config, "_var"), + cmd_ln_float32_r(s->config, "-varfloor"), + s->lmath)) == NULL) { + E_ERROR("Failed to read means and variances\n"); + goto error_out; + } + + /* Currently only a single codebook is supported. */ + if (s->g->n_mgau != 1) + goto error_out; + + n_feat = s->g->n_feat; + + /* Verify n_feat and veclen, against acmod. */ + if (n_feat != feat_dimension1(acmod->fcb)) { + E_ERROR("Number of streams does not match: %d != %d\n", + n_feat, feat_dimension1(acmod->fcb)); + goto error_out; + } + for (i = 0; i < n_feat; ++i) { + if ((uint32)s->g->featlen[i] != feat_dimension2(acmod->fcb, i)) { + E_ERROR("Dimension of stream %d does not match: %d != %d\n", + i, s->g->featlen[i], feat_dimension2(acmod->fcb, i)); + goto error_out; + } + } + /* Read mixture weights */ + if ((sendump_path = cmd_ln_str_r(s->config, "_sendump"))) { + if (read_sendump(s, acmod->mdef, sendump_path) < 0) { + goto error_out; + } + } + else { + if (read_mixw(s, cmd_ln_str_r(s->config, "_mixw"), + cmd_ln_float32_r(s->config, "-mixwfloor")) < 0) { + goto error_out; + } + } + s->ds_ratio = cmd_ln_int32_r(s->config, "-ds"); + + /* Determine top-N for each feature */ + s->topn_beam = ckd_calloc(n_feat, sizeof(*s->topn_beam)); + s->max_topn = cmd_ln_int32_r(s->config, "-topn"); + split_topn(cmd_ln_str_r(s->config, "-topn_beam"), s->topn_beam, n_feat); + E_INFO("Maximum top-N: %d ", s->max_topn); + E_INFOCONT("Top-N beams:"); + for (i = 0; i < n_feat; ++i) { + E_INFOCONT(" %d", s->topn_beam[i]); + } + E_INFOCONT("\n"); + + /* Top-N scores from recent frames */ + s->n_topn_hist = cmd_ln_int32_r(s->config, "-pl_window") + 2; + s->topn_hist = (vqFeature_t ***) + ckd_calloc_3d(s->n_topn_hist, n_feat, s->max_topn, + sizeof(***s->topn_hist)); + s->topn_hist_n = ckd_calloc_2d(s->n_topn_hist, n_feat, + sizeof(**s->topn_hist_n)); + for (i = 0; i < s->n_topn_hist; ++i) { + int j; + for (j = 0; j < n_feat; ++j) { + int k; + for (k = 0; k < s->max_topn; ++k) { + s->topn_hist[i][j][k].score = WORST_DIST; + s->topn_hist[i][j][k].codeword = k; + } + } + } + + ps = (ps_mgau_t *)s; + ps->vt = &s2_semi_mgau_funcs; + return ps; +error_out: + s2_semi_mgau_free(ps_mgau_base(s)); + return NULL; +} + +int +s2_semi_mgau_mllr_transform(ps_mgau_t *ps, + ps_mllr_t *mllr) +{ + s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps; + return gauden_mllr_transform(s->g, mllr, s->config); +} + +void +s2_semi_mgau_free(ps_mgau_t *ps) +{ + s2_semi_mgau_t *s = (s2_semi_mgau_t *)ps; + + logmath_free(s->lmath); + logmath_free(s->lmath_8b); + if (s->sendump_mmap) { + ckd_free_2d(s->mixw); + mmio_file_unmap(s->sendump_mmap); + } + else { + ckd_free_3d(s->mixw); + if (s->mixw_cb) + ckd_free(s->mixw_cb); + } + gauden_free(s->g); + ckd_free(s->topn_beam); + ckd_free_2d(s->topn_hist_n); + ckd_free_3d((void **)s->topn_hist); + ckd_free(s); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/s2_semi_mgau.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/s2_semi_mgau.h new file mode 100644 index 0000000000000000000000000000000000000000..b45e5e3363e688bed224f01918bc16e7cc6b6a46 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/s2_semi_mgau.h @@ -0,0 +1,108 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * Interface for "semi-continuous vector quantization", a.k.a. Sphinx2 + * fast GMM computation. + */ + +#ifndef __S2_SEMI_MGAU_H__ +#define __S2_SEMI_MGAU_H__ + +/* SphinxBase headesr. */ +#include +#include +#include + +/* Local headers. */ +#include "acmod.h" +#include "hmm.h" +#include "bin_mdef.h" +#include "ms_gauden.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +typedef struct vqFeature_s vqFeature_t; + +typedef struct s2_semi_mgau_s s2_semi_mgau_t; +struct s2_semi_mgau_s { + ps_mgau_t base; /**< base structure. */ + cmd_ln_t *config; /* configuration parameters */ + + gauden_t *g; /* Set of Gaussians (pointers below point in here and will go away soon) */ + + uint8 ***mixw; /* mixture weight distributions */ + mmio_file_t *sendump_mmap;/* memory map for mixw (or NULL if not mmap) */ + + uint8 *mixw_cb; /* mixture weight codebook, if any (assume it contains 16 values) */ + int32 n_sen; /* Number of senones */ + uint8 *topn_beam; /* Beam for determining per-frame top-N densities */ + int16 max_topn; + int16 ds_ratio; + + vqFeature_t ***topn_hist; /**< Top-N scores and codewords for past frames. */ + uint8 **topn_hist_n; /**< Variable top-N for past frames. */ + vqFeature_t **f; /**< Topn-N for currently scoring frame. */ + int n_topn_hist; /**< Number of past frames tracked. */ + + /* Log-add table for compressed values. */ + logmath_t *lmath_8b; + /* Log-add object for reloading means/variances. */ + logmath_t *lmath; +}; + +ps_mgau_t *s2_semi_mgau_init(acmod_t *acmod); +void s2_semi_mgau_free(ps_mgau_t *s); +int s2_semi_mgau_frame_eval(ps_mgau_t *s, + int16 *senone_scores, + uint8 *senone_active, + int32 n_senone_active, + mfcc_t **featbuf, + int32 frame, + int32 compallsen); +int s2_semi_mgau_mllr_transform(ps_mgau_t *s, + ps_mllr_t *mllr); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __S2_SEMI_MGAU_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/s3types.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/s3types.h new file mode 100644 index 0000000000000000000000000000000000000000..fb3dbd615aaf4f638cc6028015fc14a554dd34a6 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/s3types.h @@ -0,0 +1,102 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef _S3_S3TYPES_H_ +#define _S3_S3TYPES_H_ + +#include +#include + +#include +#include +#include + +/** \file s3types.h + * \brief Size definition of semantically units. Common for both s3 and s3.X decoder. + */ + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * Size definitions for more semantially meaningful units. + * Illegal value definitions, limits, and tests for specific types. + * NOTE: Types will be either int32 or smaller; only smaller ones may be unsigned (i.e., + * no type will be uint32). + */ + +typedef int16 s3cipid_t; /** Ci phone id */ +#define BAD_S3CIPID ((s3cipid_t) -1) +#define NOT_S3CIPID(p) ((p)<0) +#define IS_S3CIPID(p) ((p)>=0) +#define MAX_S3CIPID 32767 + +/*#define MAX_S3CIPID 127*/ + +typedef int32 s3pid_t; /** Phone id (triphone or ciphone) */ +#define BAD_S3PID ((s3pid_t) -1) +#define NOT_S3PID(p) ((p)<0) +#define IS_S3PID(p) ((p)>=0) +#define MAX_S3PID ((int32)0x7ffffffe) + +typedef uint16 s3ssid_t; /** Senone sequence id (triphone or ciphone) */ +#define BAD_S3SSID ((s3ssid_t) 0xffff) +#define NOT_S3SSID(p) ((p) == BAD_S3SSID) +#define IS_S3SSID(p) ((p) != BAD_S3SSID) +#define MAX_S3SSID ((s3ssid_t)0xfffe) + +typedef int32 s3tmatid_t; /** Transition matrix id; there can be as many as pids */ +#define BAD_S3TMATID ((s3tmatid_t) -1) +#define NOT_S3TMATID(t) ((t)<0) +#define IS_S3TMATID(t) ((t)>=0) +#define MAX_S3TMATID ((int32)0x7ffffffe) + +typedef int32 s3wid_t; /** Dictionary word id */ +#define BAD_S3WID ((s3wid_t) -1) +#define NOT_S3WID(w) ((w)<0) +#define IS_S3WID(w) ((w)>=0) +#define MAX_S3WID ((int32)0x7ffffffe) + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/state_align_search.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/state_align_search.c new file mode 100644 index 0000000000000000000000000000000000000000..33c851dd822752a01545c632c265386c5dd4e88b --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/state_align_search.c @@ -0,0 +1,433 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2010 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file state_align_search.c State (and phone and word) alignment search. + */ + +#include "state_align_search.h" + +static int +state_align_search_start(ps_search_t *search) +{ + state_align_search_t *sas = (state_align_search_t *)search; + + /* Activate the initial state. */ + hmm_enter(sas->hmms, 0, 0, 0); + + return 0; +} + +static void +renormalize_hmms(state_align_search_t *sas, int frame_idx, int32 norm) +{ + int i; + (void) frame_idx; + for (i = 0; i < sas->n_phones; ++i) + hmm_normalize(sas->hmms + i, norm); +} + +static int32 +evaluate_hmms(state_align_search_t *sas, int16 const *senscr, int frame_idx) +{ + int32 bs = WORST_SCORE; + int i; + + hmm_context_set_senscore(sas->hmmctx, senscr); + + for (i = 0; i < sas->n_phones; ++i) { + hmm_t *hmm = sas->hmms + i; + int32 score; + + if (hmm_frame(hmm) < frame_idx) + continue; + score = hmm_vit_eval(hmm); + if (score BETTER_THAN bs) { + bs = score; + } + } + return bs; +} + +static void +prune_hmms(state_align_search_t *sas, int frame_idx) +{ + int nf = frame_idx + 1; + int i; + + /* Check all phones to see if they remain active in the next frame. */ + for (i = 0; i < sas->n_phones; ++i) { + hmm_t *hmm = sas->hmms + i; + if (hmm_frame(hmm) < frame_idx) + continue; + hmm_frame(hmm) = nf; + } +} + +static void +phone_transition(state_align_search_t *sas, int frame_idx) +{ + int nf = frame_idx + 1; + int i; + + for (i = 0; i < sas->n_phones - 1; ++i) { + hmm_t *hmm, *nhmm; + int32 newphone_score; + + hmm = sas->hmms + i; + if (hmm_frame(hmm) != nf) + continue; + + newphone_score = hmm_out_score(hmm); + /* Transition into next phone using the usual Viterbi rule. */ + nhmm = hmm + 1; + if (hmm_frame(nhmm) < frame_idx + || newphone_score BETTER_THAN hmm_in_score(nhmm)) { + hmm_enter(nhmm, newphone_score, hmm_out_history(hmm), nf); + } + } +} + +#define TOKEN_STEP 20 +static void +extend_tokenstack(state_align_search_t *sas, int frame_idx) +{ + if (frame_idx >= sas->n_fr_alloc) { + sas->n_fr_alloc = frame_idx + TOKEN_STEP + 1; + sas->tokens = ckd_realloc(sas->tokens, + sas->n_emit_state * sas->n_fr_alloc + * sizeof(*sas->tokens)); + } + memset(sas->tokens + frame_idx * sas->n_emit_state, 0xff, + sas->n_emit_state * sizeof(*sas->tokens)); +} + +static void +record_transitions(state_align_search_t *sas, int frame_idx) +{ + state_align_hist_t *tokens; + int i; + + /* Push another frame of tokens on the stack. */ + extend_tokenstack(sas, frame_idx); + tokens = sas->tokens + frame_idx * sas->n_emit_state; + + /* Scan all active HMMs */ + for (i = 0; i < sas->n_phones; ++i) { + hmm_t *hmm = sas->hmms + i; + int j; + + if (hmm_frame(hmm) < frame_idx) + continue; + for (j = 0; j < sas->hmmctx->n_emit_state; ++j) { + int state_idx = i * sas->hmmctx->n_emit_state + j; + /* Record their backpointers on the token stack. */ + tokens[state_idx].id = hmm_history(hmm, j); + tokens[state_idx].score = hmm_score(hmm, j); + /* Update backpointer fields with state index. */ + hmm_history(hmm, j) = state_idx; + } + } +} + +static int +state_align_search_step(ps_search_t *search, int frame_idx) +{ + state_align_search_t *sas = (state_align_search_t *)search; + acmod_t *acmod = ps_search_acmod(search); + int16 const *senscr; + int i; + + /* Calculate senone scores. */ + for (i = 0; i < sas->n_phones; ++i) + acmod_activate_hmm(acmod, sas->hmms + i); + senscr = acmod_score(acmod, &frame_idx); + + /* Renormalize here if needed. */ + /* FIXME: Make sure to (unit-)test this!!! */ + if ((sas->best_score - 0x300000) WORSE_THAN WORST_SCORE) { + E_INFO("Renormalizing Scores at frame %d, best score %d\n", + frame_idx, sas->best_score); + renormalize_hmms(sas, frame_idx, sas->best_score); + } + + /* Viterbi step. */ + sas->best_score = evaluate_hmms(sas, senscr, frame_idx); + prune_hmms(sas, frame_idx); + + /* Transition out of non-emitting states. */ + phone_transition(sas, frame_idx); + + /* Generate new tokens from best path results. */ + record_transitions(sas, frame_idx); + + /* Update frame counter */ + sas->frame = frame_idx; + + return 0; +} + +static int +state_align_search_finish(ps_search_t *search) +{ + state_align_search_t *sas = (state_align_search_t *)search; + hmm_t *final_phone = sas->hmms + sas->n_phones - 1; + ps_alignment_iter_t *itor; + ps_alignment_entry_t *ent; + + int last_frame, cur_frame; + state_align_hist_t last, cur; + + /* Best state exiting the last cur_frame. */ + last.id = cur.id = hmm_out_history(final_phone); + last.score = hmm_out_score(final_phone); + if (last.id == 0xffff) { + E_ERROR("Failed to reach final state in alignment\n"); + return -1; + } + itor = ps_alignment_states(sas->al); + last_frame = sas->frame + 1; + for (cur_frame = sas->frame - 1; cur_frame >= 0; --cur_frame) { + cur = sas->tokens[cur_frame * sas->n_emit_state + cur.id]; + /* State boundary, update alignment entry for next state. */ + if (cur.id != last.id) { + itor = ps_alignment_iter_goto(itor, last.id); + assert(itor != NULL); + ent = ps_alignment_iter_get(itor); + ent->start = cur_frame + 1; + ent->duration = last_frame - ent->start; + ent->score = last.score - cur.score; + E_DEBUG("state %d start %d end %d\n", last.id, + ent->start, last_frame); + last = cur; + last_frame = cur_frame + 1; + } + } + /* Update alignment entry for initial state. */ + itor = ps_alignment_iter_goto(itor, 0); + assert(itor != NULL); + ent = ps_alignment_iter_get(itor); + ent->start = 0; + ent->duration = last_frame; + E_DEBUG("state %d start %d end %d\n", 0, + ent->start, last_frame); + ps_alignment_iter_free(itor); + ps_alignment_propagate(sas->al); + + return 0; +} + +static int +state_align_search_reinit(ps_search_t *search, dict_t *dict, dict2pid_t *d2p) +{ + /* This does nothing, you need to make a new search for each utterance. */ + (void) search; + (void) dict; + (void) d2p; + return 0; +} + +static void +state_align_search_free(ps_search_t *search) +{ + state_align_search_t *sas = (state_align_search_t *)search; + ps_search_base_free(search); + ckd_free(sas->hmms); + ckd_free(sas->tokens); + hmm_context_free(sas->hmmctx); + ps_alignment_free(sas->al); + ckd_free(sas); +} + +struct state_align_seg_s { + ps_seg_t base; + ps_alignment_iter_t *itor; +}; +typedef struct state_align_seg_s state_align_seg_t; + +static void +state_align_search_seg_free(ps_seg_t * seg) +{ + state_align_seg_t *itor = (state_align_seg_t *)seg; + ps_alignment_iter_free(itor->itor); + ckd_free(itor); +} + +static void +state_align_search_fill_iter(ps_seg_t *seg) +{ + state_align_seg_t *itor = (state_align_seg_t *)seg; + ps_alignment_entry_t *entry = ps_alignment_iter_get(itor->itor); + + seg->sf = entry->start; + seg->ef = entry->start + entry->duration - 1; + seg->ascr = entry->score; + seg->lscr = 0; + seg->word = dict_wordstr(ps_search_dict(seg->search), entry->id.wid); +} + +static ps_seg_t * +state_align_search_seg_next(ps_seg_t * seg) +{ + state_align_seg_t *itor = (state_align_seg_t *)seg; + + itor->itor = ps_alignment_iter_next(itor->itor); + if (itor->itor == NULL) { + state_align_search_seg_free(seg); + return NULL; + } + state_align_search_fill_iter(seg); + return seg; +} + +static ps_segfuncs_t state_align_segfuncs = { + /* seg_next */ state_align_search_seg_next, + /* seg_free */ state_align_search_seg_free +}; + + +static ps_seg_t * +state_align_search_seg_iter(ps_search_t * search) +{ + state_align_search_t *sas = (state_align_search_t *) search; + state_align_seg_t *seg; + ps_alignment_iter_t *itor; + + if (sas->al == NULL) + return NULL; + /* Even though the alignment has a bunch of levels, for the + purposes of the decoder API we will just iterate over words, + which is the most likely/useful use case. We will also expose + the rest of the alignment API separately. */ + + itor = ps_alignment_words(sas->al); + if (itor == NULL) + return NULL; + seg = ckd_calloc(1, sizeof(state_align_seg_t)); + seg->base.vt = &state_align_segfuncs; + seg->base.search = search; + seg->itor = itor; + state_align_search_fill_iter((ps_seg_t *)seg); + + return (ps_seg_t *)seg; +} + +static char const * +state_align_search_hyp(ps_search_t *search, int32 *out_score) +{ + state_align_search_t *sas = (state_align_search_t *)search; + ps_alignment_iter_t *itor; + size_t hyp_len; + + if (search->hyp_str) + ckd_free(search->hyp_str); + search->hyp_str = NULL; + if (sas->al == NULL) + return NULL; + itor = ps_alignment_words(sas->al); + if (itor == NULL) + return NULL; + for (hyp_len = 0; itor; itor = ps_alignment_iter_next(itor)) { + const char *word = dict_wordstr(ps_search_dict(search), + ps_alignment_iter_get(itor)->id.wid); + if (word == NULL) { + E_ERROR("Unknown word id %d in alignment", + ps_alignment_iter_get(itor)->id.wid); + return NULL; + } + hyp_len += strlen(word) + 1; + } + search->hyp_str = ckd_calloc(hyp_len + 1, sizeof(*search->hyp_str)); + for (itor = ps_alignment_words(sas->al); + itor; itor = ps_alignment_iter_next(itor)) { + ps_alignment_entry_t *ent = ps_alignment_iter_get(itor); + const char *word = dict_wordstr(ps_search_dict(search), + ent->id.wid); + strcat(search->hyp_str, word); + strcat(search->hyp_str, " "); + *out_score = ent->score; + } + search->hyp_str[strlen(search->hyp_str) - 1] = '\0'; + return search->hyp_str; +} + +static ps_searchfuncs_t state_align_search_funcs = { + /* start: */ state_align_search_start, + /* step: */ state_align_search_step, + /* finish: */ state_align_search_finish, + /* reinit: */ state_align_search_reinit, + /* free: */ state_align_search_free, + /* lattice: */ NULL, + /* hyp: */ state_align_search_hyp, + /* prob: */ NULL, + /* seg_iter: */ state_align_search_seg_iter, +}; + +ps_search_t * +state_align_search_init(const char *name, + cmd_ln_t *config, + acmod_t *acmod, + ps_alignment_t *al) +{ + state_align_search_t *sas; + ps_alignment_iter_t *itor; + hmm_t *hmm; + + sas = ckd_calloc(1, sizeof(*sas)); + ps_search_init(ps_search_base(sas), &state_align_search_funcs, + PS_SEARCH_TYPE_STATE_ALIGN, name, + config, acmod, al->d2p->dict, al->d2p); + sas->hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->mdef), + acmod->tmat->tp, NULL, acmod->mdef->sseq); + if (sas->hmmctx == NULL) { + ckd_free(sas); + return NULL; + } + sas->al = ps_alignment_retain(al); + + /* Generate HMM vector from phone level of alignment. */ + sas->n_phones = ps_alignment_n_phones(al); + sas->n_emit_state = ps_alignment_n_states(al); + sas->hmms = ckd_calloc(sas->n_phones, sizeof(*sas->hmms)); + for (hmm = sas->hmms, itor = ps_alignment_phones(al); itor; + ++hmm, itor = ps_alignment_iter_next(itor)) { + ps_alignment_entry_t *ent = ps_alignment_iter_get(itor); + hmm_init(sas->hmmctx, hmm, FALSE, + ent->id.pid.ssid, ent->id.pid.tmatid); + } + return ps_search_base(sas); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/state_align_search.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/state_align_search.h new file mode 100644 index 0000000000000000000000000000000000000000..95bd19bd25cd251f14ff01a74737e6625539029c --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/state_align_search.h @@ -0,0 +1,99 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2010 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file state_align_search.h State (and phone and word) alignment search. + */ + +#ifndef __STATE_ALIGN_SEARCH_H__ +#define __STATE_ALIGN_SEARCH_H__ + +/* SphinxBase headers. */ +#include + +/* Local headers. */ +#include +#include "pocketsphinx_internal.h" +#include "ps_alignment.h" +#include "hmm.h" + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * History structure + */ +struct state_align_hist_s { + uint16 id; + int32 score; +}; +typedef struct state_align_hist_s state_align_hist_t; + +/** + * Forced alignment search structure. + */ +struct state_align_search_s { + ps_search_t base; /**< Base search structure. */ + hmm_context_t *hmmctx; /**< HMM context structure. */ + ps_alignment_t *al; /**< Alignment structure being operated on. */ + hmm_t *hmms; /**< Vector of HMMs corresponding to phone level. */ + int n_phones; /**< Number of HMMs (phones). */ + + int frame; /**< Current frame being processed. */ + int32 best_score; /**< Best score in current frame. */ + + int n_emit_state; /**< Number of emitting states (tokens per frame) */ + state_align_hist_t *tokens; /**< Tokens (backpointers) for state alignment. */ + int n_fr_alloc; /**< Number of frames of tokens allocated. */ +}; +typedef struct state_align_search_s state_align_search_t; + +POCKETSPHINX_EXPORT +ps_search_t *state_align_search_init(const char *name, + cmd_ln_t *config, + acmod_t *acmod, + ps_alignment_t *al); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __STATE_ALIGN_SEARCH_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/tied_mgau_common.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/tied_mgau_common.h new file mode 100644 index 0000000000000000000000000000000000000000..1ddd995060a75435d90500054224f79ea043845c --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/tied_mgau_common.h @@ -0,0 +1,132 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2010 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file tied_mgau_common.h + * @brief Common code shared between SC and PTM (tied-state) models. + */ + +#ifndef __TIED_MGAU_COMMON_H__ +#define __TIED_MGAU_COMMON_H__ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +#define MGAU_MIXW_VERSION "1.0" /* Sphinx-3 file format version for mixw */ +#define MGAU_PARAM_VERSION "1.0" /* Sphinx-3 file format version for mean/var */ +#define NONE -1 +#define WORST_DIST (int32)(0x80000000) + +/** Subtract GMM component b (assumed to be positive) and saturate */ +#ifdef FIXED_POINT +#define GMMSUB(a,b) \ + (((a)-(b) > a) ? (INT_MIN) : ((a)-(b))) +/** Add GMM component b (assumed to be positive) and saturate */ +#define GMMADD(a,b) \ + (((a)+(b) < a) ? (INT_MAX) : ((a)+(b))) +#else +#define GMMSUB(a,b) ((a)-(b)) +#define GMMADD(a,b) ((a)+(b)) +#endif + +#ifndef MIN +#define MIN(a,b) ((a) < (b) ? (a) : (b)) +#endif + + +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) +#define LOGMATH_INLINE static inline +#elif defined(_MSC_VER) +#define LOGMATH_INLINE __inline +#else +#define LOGMATH_INLINE static +#endif + +/* Allocate 0..159 for negated quantized mixture weights and 0..96 for + * negated normalized acoustic scores, so that the combination of the + * two (for a single mixture) can never exceed 255. */ +#define MAX_NEG_MIXW 159 /**< Maximum negated mixture weight value. */ +#define MAX_NEG_ASCR 96 /**< Maximum negated acoustic score value. */ + +/** + * Quickly log-add two negated log probabilities. + * + * @param lmath The log-math object + * @param mlx A negative log probability (0 < mlx < 255) + * @param mly A negative log probability (0 < mly < 255) + * @return -log(exp(-mlx)+exp(-mly)) + * + * We can do some extra-fast log addition since we know that + * mixw+ascr is always less than 256 and hence x-y is also always less + * than 256. This relies on some cooperation from logmath_t which + * will never produce a logmath table smaller than 256 entries. + * + * Note that the parameters are *negated* log probabilities (and + * hence, are positive numbers), as is the return value. This is the + * key to the "fastness" of this function. + */ +LOGMATH_INLINE int +fast_logmath_add(logmath_t *lmath, int mlx, int mly) +{ + logadd_t *t = LOGMATH_TABLE(lmath); + int d, r; + + /* d must be positive, obviously. */ + if (mlx > mly) { + d = (mlx - mly); + r = mly; + } + else { + d = (mly - mlx); + r = mlx; + } + + return r - (((uint8 *)t->table)[d]); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __TIED_MGAU_COMMON_H__ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/tmat.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/tmat.c new file mode 100644 index 0000000000000000000000000000000000000000..cd32ea5631955afe96d08fb5d10453667be801fe --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/tmat.c @@ -0,0 +1,284 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* System headers. */ +#include + +/* SphinxBase headers. */ +#include +#include +#include +#include + +/* Local headers. */ +#include "tmat.h" +#include "hmm.h" +#include "vector.h" + +#define TMAT_PARAM_VERSION "1.0" + + +/** + * Checks that no transition matrix in the given object contains backward arcs. + * @returns 0 if successful, -1 if check failed. + */ +static int32 tmat_chk_uppertri(tmat_t *tmat, logmath_t *lmath); + + +/** + * Checks that transition matrix arcs in the given object skip over + * at most 1 state. + * @returns 0 if successful, -1 if check failed. + */ + +static int32 tmat_chk_1skip(tmat_t *tmat, logmath_t *lmath); + + +void +tmat_dump(tmat_t * tmat, FILE * fp) +{ + int32 i, src, dst; + + for (i = 0; i < tmat->n_tmat; i++) { + fprintf(fp, "TMAT %d = %d x %d\n", i, tmat->n_state, + tmat->n_state + 1); + for (src = 0; src < tmat->n_state; src++) { + for (dst = 0; dst <= tmat->n_state; dst++) + fprintf(fp, " %12d", tmat->tp[i][src][dst]); + fprintf(fp, "\n"); + } + fprintf(fp, "\n"); + } + fflush(fp); +} + + +/* + * Check model tprob matrices that they conform to upper-triangular assumption; + * i.e. no "backward" transitions allowed. + */ +int32 +tmat_chk_uppertri(tmat_t * tmat, logmath_t *lmath) +{ + int32 i, src, dst; + + (void) lmath; + /* Check that each tmat is upper-triangular */ + for (i = 0; i < tmat->n_tmat; i++) { + for (dst = 0; dst < tmat->n_state; dst++) + for (src = dst + 1; src < tmat->n_state; src++) + if (tmat->tp[i][src][dst] < 255) { + E_ERROR("tmat[%d][%d][%d] = %d\n", + i, src, dst, tmat->tp[i][src][dst]); + return -1; + } + } + + return 0; +} + + +int32 +tmat_chk_1skip(tmat_t * tmat, logmath_t *lmath) +{ + int32 i, src, dst; + + (void) lmath; + for (i = 0; i < tmat->n_tmat; i++) { + for (src = 0; src < tmat->n_state; src++) + for (dst = src + 3; dst <= tmat->n_state; dst++) + if (tmat->tp[i][src][dst] < 255) { + E_ERROR("tmat[%d][%d][%d] = %d\n", + i, src, dst, tmat->tp[i][src][dst]); + return -1; + } + } + + return 0; +} + + +tmat_t * +tmat_init(char const *file_name, logmath_t *lmath, float64 tpfloor, int32 breport) +{ + char tmp; + int32 n_src, n_dst, n_tmat; + FILE *fp; + int32 byteswap, chksum_present; + uint32 chksum; + float32 **tp; + int32 i, j, k, tp_per_tmat; + char **argname, **argval; + tmat_t *t; + + + if (breport) { + E_INFO("Reading HMM transition probability matrices: %s\n", + file_name); + } + + t = (tmat_t *) ckd_calloc(1, sizeof(tmat_t)); + + if ((fp = fopen(file_name, "rb")) == NULL) + E_FATAL_SYSTEM("Failed to open transition file '%s' for reading", file_name); + + /* Read header, including argument-value info and 32-bit byteorder magic */ + if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) + E_FATAL("Failed to read header from file '%s'\n", file_name); + + /* Parse argument-value list */ + chksum_present = 0; + for (i = 0; argname[i]; i++) { + if (strcmp(argname[i], "version") == 0) { + if (strcmp(argval[i], TMAT_PARAM_VERSION) != 0) + E_WARN("Version mismatch(%s): %s, expecting %s\n", + file_name, argval[i], TMAT_PARAM_VERSION); + } + else if (strcmp(argname[i], "chksum0") == 0) { + chksum_present = 1; /* Ignore the associated value */ + } + } + bio_hdrarg_free(argname, argval); + argname = argval = NULL; + + chksum = 0; + + /* Read #tmat, #from-states, #to-states, arraysize */ + if ((bio_fread(&n_tmat, sizeof(int32), 1, fp, byteswap, &chksum) + != 1) + || (bio_fread(&n_src, sizeof(int32), 1, fp, byteswap, &chksum) != + 1) + || (bio_fread(&n_dst, sizeof(int32), 1, fp, byteswap, &chksum) != + 1) + || (bio_fread(&i, sizeof(int32), 1, fp, byteswap, &chksum) != 1)) { + E_FATAL("Failed to read header from '%s'\n", file_name); + } + if (n_tmat >= MAX_INT16) + E_FATAL("%s: Number of transition matrices (%d) exceeds limit (%d)\n", file_name, + n_tmat, MAX_INT16); + t->n_tmat = n_tmat; + + if (n_dst != n_src + 1) + E_FATAL("%s: Unsupported transition matrix. Number of source states (%d) != number of target states (%d)-1\n", file_name, + n_src, n_dst); + t->n_state = n_src; + + if (i != t->n_tmat * n_src * n_dst) { + E_FATAL + ("%s: Invalid transitions. Number of coefficients (%d) doesn't match expected array dimension: %d x %d x %d\n", + file_name, i, t->n_tmat, n_src, n_dst); + } + + /* Allocate memory for tmat data */ + t->tp = ckd_calloc_3d(t->n_tmat, n_src, n_dst, sizeof(***t->tp)); + + /* Temporary structure to read in the float data */ + tp = ckd_calloc_2d(n_src, n_dst, sizeof(**tp)); + + /* Read transition matrices, normalize and floor them, and convert to log domain */ + tp_per_tmat = n_src * n_dst; + for (i = 0; i < t->n_tmat; i++) { + if (bio_fread(tp[0], sizeof(float32), tp_per_tmat, fp, + byteswap, &chksum) != tp_per_tmat) { + E_FATAL("Failed to read transition matrix %d from '%s'\n", i, file_name); + } + + /* Normalize and floor */ + for (j = 0; j < n_src; j++) { + if (vector_sum_norm(tp[j], n_dst) == 0.0) + E_WARN("Normalization failed for transition matrix %d from state %d\n", + i, j); + vector_nz_floor(tp[j], n_dst, tpfloor); + vector_sum_norm(tp[j], n_dst); + + /* Convert to logs3. */ + for (k = 0; k < n_dst; k++) { + int ltp; +#if 0 /* No, don't do this! It will subtly break 3-state HMMs. */ + /* For these ones, we floor them even if they are + * zero, otherwise HMM evaluation goes nuts. */ + if (k >= j && k-j < 3 && tp[j][k] == 0.0f) + tp[j][k] = tpfloor; +#endif + /* Log and quantize them. */ + ltp = -logmath_log(lmath, tp[j][k]) >> SENSCR_SHIFT; + if (ltp > 255) ltp = 255; + t->tp[i][j][k] = (uint8)ltp; + } + } + } + + ckd_free_2d(tp); + + if (chksum_present) + bio_verify_chksum(fp, byteswap, chksum); + + if (fread(&tmp, 1, 1, fp) == 1) + E_ERROR("Non-empty file beyond end of data\n"); + + fclose(fp); + + if (tmat_chk_uppertri(t, lmath) < 0) + E_FATAL("Tmat not upper triangular\n"); + if (tmat_chk_1skip(t, lmath) < 0) + E_FATAL("Topology not Left-to-Right or Bakis\n"); + + return t; +} + +void +tmat_report(tmat_t * t) +{ + E_INFO_NOFN("Initialization of tmat_t, report:\n"); + E_INFO_NOFN("Read %d transition matrices of size %dx%d\n", + t->n_tmat, t->n_state, t->n_state + 1); + E_INFO_NOFN("\n"); + +} + +/* + * RAH, Free memory allocated in tmat_init () + */ +void +tmat_free(tmat_t * t) +{ + if (t) { + if (t->tp) + ckd_free_3d(t->tp); + ckd_free(t); + } +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/tmat.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/tmat.h new file mode 100644 index 0000000000000000000000000000000000000000..a8929ace60d75f63dcc3d6a9662d78f4bc0e2b69 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/tmat.h @@ -0,0 +1,101 @@ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifndef _S3_TMAT_H_ +#define _S3_TMAT_H_ + +#include +#include + +/** \file tmat.h + * \brief Transition matrix data structure. + */ +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +/** + * \struct tmat_t + * \brief Transition matrix data structure. All phone HMMs are assumed to have the same + * topology. + */ +typedef struct tmat_s { + uint8 ***tp; /**< The transition matrices; kept in the same scale as acoustic scores; + tp[tmatid][from-state][to-state] */ + int16 n_tmat; /**< Number matrices */ + int16 n_state; /**< Number source states in matrix (only the emitting states); + Number destination states = n_state+1, it includes the exit state */ +} tmat_t; + + +/** Initialize transition matrix */ + +tmat_t *tmat_init (char const *tmatfile,/**< In: input file */ + logmath_t *lmath, /**< In: log math parameters */ + float64 tpfloor, /**< In: floor value for each non-zero transition probability */ + int32 breport /**< In: whether reporting the process of tmat_t */ + ); + + + +/** Dumping the transition matrix for debugging */ + +void tmat_dump (tmat_t *tmat, /**< In: transition matrix */ + FILE *fp /**< In: file pointer */ + ); + + +/** + * RAH, add code to remove memory allocated by tmat_init + */ + +void tmat_free (tmat_t *t /**< In: transition matrix */ + ); + +/** + * Report the detail of the transition matrix structure. + */ +void tmat_report(tmat_t *t /**< In: transition matrix*/ + ); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/README.python b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/README.python new file mode 100644 index 0000000000000000000000000000000000000000..cfcca6bec8f6cf1bcfa1bfc2bed7eb81b6c41056 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/README.python @@ -0,0 +1,41 @@ +Regenerating lapack_lite source +=============================== + +:Author: David M. Cooke +:Modified by David Huggins-Daines for Sphinx + +``blas_lite.c``, ``slapack_lite.c``, are ``f2c``'d versions of the +LAPACK routines required by the ``LinearAlgebra`` module, and wrapped +by the ``lapack_lite`` module. The scripts in this directory can be +used to create these files automatically from a directory of LAPACK +source files. + +You'll need `Plex 1.1.4`_ installed to do the appropriate scrubbing. + +.. _Plex 1.1.4: http://www.cosc.canterbury.ac.nz/~greg/python/Plex/ + +The routines that ``lapack_litemodule.c`` wraps are listed in +``wrapped_routines``, along with a few exceptions that aren't picked up +properly. Assuming that you have an unpacked LAPACK source tree in +``~/LAPACK``, you generate the new routines in a directory ``new-lite/`` with:: + +$ python ./make_lite.py wrapped_routines ~/LAPACK new-lite/ + +This will grab the right routines, with dependencies, put them into the +appropiate ``blas_lite.f``, ``dlapack_lite.f``, or ``zlapack_lite.f`` files, +run ``f2c`` over them, then do some scrubbing similiar to that done to +generate the CLAPACK_ distribution. + +.. _CLAPACK: http://netlib.org/clapack/index.html + +The versions in CVS as of 2005-04-12 use the LAPACK source from the +`Debian package lapack3`_, version 3.0.20000531a-6. It was found that these +(being regularly maintained) worked better than the patches to the last +released version of LAPACK available at the LAPACK_ page. + +.. _Debian package lapack3: http://packages.debian.org/unstable/libs/lapack3 +.. _LAPACK: http://netlib.org/lapack/index.html + +A slightly-patched ``f2c`` was used to add parentheses around ``||`` expressions +and the arguments to ``<<`` to silence gcc warnings. Edit +the ``src/output.c`` in the ``f2c`` source to do this. diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/bio.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/bio.c new file mode 100644 index 0000000000000000000000000000000000000000..8a66a48a2d50344b497d13f9f0da448d8b19cdb9 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/bio.c @@ -0,0 +1,645 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * bio.c -- Sphinx-3 binary file I/O functions. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1996 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log$ + * Revision 1.4 2005/06/21 20:40:46 arthchan2003 + * 1, Fixed doxygen documentation, 2, Add the $ keyword. + * + * Revision 1.3 2005/03/30 01:22:46 archan + * Fixed mistakes in last updates. Add + * + * + * 02-Jul-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Bugfix: Added byteswapping in bio_verify_chksum(). + * + * 18-Dec-1996 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/bio.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" + + +#define BIO_HDRARG_MAX 32 +#define END_COMMENT "*end_comment*\n" + + +static void +bcomment_read(FILE * fp) +{ + __BIGSTACKVARIABLE__ char iline[16384]; + + while (fgets(iline, sizeof(iline), fp) != NULL) { + if (strcmp(iline, END_COMMENT) == 0) + return; + } + E_FATAL("Missing %s marker\n", END_COMMENT); +} + + +static int32 +swap_check(FILE * fp) +{ + uint32 magic; + + if (fread(&magic, sizeof(uint32), 1, fp) != 1) { + E_ERROR("Cannot read BYTEORDER MAGIC NO.\n"); + return -1; + } + + if (magic != BYTE_ORDER_MAGIC) { + /* either need to swap or got bogus magic number */ + SWAP_INT32(&magic); + + if (magic == BYTE_ORDER_MAGIC) + return 1; + + SWAP_INT32(&magic); + E_ERROR("Bad BYTEORDER MAGIC NO: %08x, expecting %08x\n", + magic, BYTE_ORDER_MAGIC); + return -1; + } + + return 0; +} + + +void +bio_hdrarg_free(char **argname, char **argval) +{ + int32 i; + + if (argname == NULL) + return; + for (i = 0; argname[i]; i++) { + ckd_free(argname[i]); + ckd_free(argval[i]); + } + ckd_free(argname); + ckd_free(argval); +} + + +int32 +bio_writehdr_version(FILE * fp, char *version) +{ + uint32 b; + + fprintf(fp, "s3\n"); + fprintf(fp, "version %s\n", version); + fprintf(fp, "endhdr\n"); + fflush(fp); + + b = (uint32) BYTE_ORDER_MAGIC; + fwrite(&b, sizeof(uint32), 1, fp); + fflush(fp); + + return 0; +} + + +int32 +bio_writehdr(FILE *fp, ...) +{ + char const *key; + va_list args; + uint32 b; + + fprintf(fp, "s3\n"); + va_start(args, fp); + while ((key = va_arg(args, char const *)) != NULL) { + char const *val = va_arg(args, char const *); + if (val == NULL) { + E_ERROR("Wrong number of arguments\n"); + va_end(args); + return -1; + } + fprintf(fp, "%s %s\n", key, val); + } + va_end(args); + + fprintf(fp, "endhdr\n"); + fflush(fp); + + b = (uint32) BYTE_ORDER_MAGIC; + if (fwrite(&b, sizeof(uint32), 1, fp) != 1) + return -1; + fflush(fp); + + return 0; +} + + +int32 +bio_readhdr(FILE * fp, char ***argname, char ***argval, int32 * swap) +{ + __BIGSTACKVARIABLE__ char line[16384], word[4096]; + int32 i, l; + int32 lineno; + + *argname = (char **) ckd_calloc(BIO_HDRARG_MAX + 1, sizeof(char *)); + *argval = (char **) ckd_calloc(BIO_HDRARG_MAX, sizeof(char *)); + + lineno = 0; + if (fgets(line, sizeof(line), fp) == NULL){ + E_ERROR("Premature EOF, line %d\n", lineno); + goto error_out; + } + lineno++; + + if ((line[0] == 's') && (line[1] == '3') && (line[2] == '\n')) { + /* New format (post Dec-1996, including checksums); read argument-value pairs */ + for (i = 0;;) { + if (fgets(line, sizeof(line), fp) == NULL) { + E_ERROR("Premature EOF, line %d\n", lineno); + goto error_out; + } + lineno++; + + if (sscanf(line, "%s%n", word, &l) != 1) { + E_ERROR("Header format error, line %d\n", lineno); + goto error_out; + } + if (strcmp(word, "endhdr") == 0) + break; + if (word[0] == '#') /* Skip comments */ + continue; + + if (i >= BIO_HDRARG_MAX) { + E_ERROR + ("Max arg-value limit(%d) exceeded; increase BIO_HDRARG_MAX\n", + BIO_HDRARG_MAX); + goto error_out; + } + + (*argname)[i] = ckd_salloc(word); + if (sscanf(line + l, "%s", word) != 1) { /* Multi-word values not allowed */ + E_ERROR("Header format error, line %d\n", lineno); + goto error_out; + } + (*argval)[i] = ckd_salloc(word); + i++; + } + } + else { + /* Old format (without checksums); the first entry must be the version# */ + if (sscanf(line, "%s", word) != 1) { + E_ERROR("Header format error, line %d\n", lineno); + goto error_out; + } + + (*argname)[0] = ckd_salloc("version"); + (*argval)[0] = ckd_salloc(word); + i = 1; + + bcomment_read(fp); + } + (*argname)[i] = NULL; + + if ((*swap = swap_check(fp)) < 0) { + E_ERROR("swap_check failed\n"); + goto error_out; + } + + return 0; +error_out: + bio_hdrarg_free(*argname, *argval); + *argname = *argval = NULL; + return -1; +} + + +static uint32 +chksum_accum(const void *buf, int32 el_sz, int32 n_el, uint32 sum) +{ + int32 i; + uint8 *i8; + uint16 *i16; + uint32 *i32; + + switch (el_sz) { + case 1: + i8 = (uint8 *) buf; + for (i = 0; i < n_el; i++) + sum = (sum << 5 | sum >> 27) + i8[i]; + break; + case 2: + i16 = (uint16 *) buf; + for (i = 0; i < n_el; i++) + sum = (sum << 10 | sum >> 22) + i16[i]; + break; + case 4: + i32 = (uint32 *) buf; + for (i = 0; i < n_el; i++) + sum = (sum << 20 | sum >> 12) + i32[i]; + break; + default: + E_FATAL("Unsupported elemsize for checksum: %d\n", el_sz); + break; + } + + return sum; +} + + +static void +swap_buf(void *buf, int32 el_sz, int32 n_el) +{ + int32 i; + uint16 *buf16; + uint32 *buf32; + + switch (el_sz) { + case 1: + break; + case 2: + buf16 = (uint16 *) buf; + for (i = 0; i < n_el; i++) + SWAP_INT16(buf16 + i); + break; + case 4: + buf32 = (uint32 *) buf; + for (i = 0; i < n_el; i++) + SWAP_INT32(buf32 + i); + break; + default: + E_FATAL("Unsupported elemsize for byteswapping: %d\n", el_sz); + break; + } +} + + +int32 +bio_fread(void *buf, int32 el_sz, int32 n_el, FILE * fp, int32 swap, + uint32 * chksum) +{ + if (fread(buf, el_sz, n_el, fp) != (size_t) n_el) + return -1; + + if (swap) + swap_buf(buf, el_sz, n_el); + + if (chksum) + *chksum = chksum_accum(buf, el_sz, n_el, *chksum); + + return n_el; +} + +int32 +bio_fwrite(const void *buf, int32 el_sz, int32 n_el, FILE *fp, + int32 swap, uint32 *chksum) +{ + if (chksum) + *chksum = chksum_accum(buf, el_sz, n_el, *chksum); + if (swap) { + void *nbuf; + int rv; + + nbuf = ckd_calloc(n_el, el_sz); + memcpy(nbuf, buf, n_el * el_sz); + swap_buf(nbuf, el_sz, n_el); + rv = fwrite(nbuf, el_sz, n_el, fp); + ckd_free(nbuf); + return rv; + } + else { + return fwrite(buf, el_sz, n_el, fp); + } +} + +int32 +bio_fread_1d(void **buf, size_t el_sz, uint32 * n_el, FILE * fp, + int32 sw, uint32 * ck) +{ + /* Read 1-d array size */ + if (bio_fread(n_el, sizeof(int32), 1, fp, sw, ck) != 1) + E_FATAL("fread(arraysize) failed\n"); + if (*n_el <= 0) + E_FATAL("Bad arraysize: %d\n", *n_el); + + /* Allocate memory for array data */ + *buf = (void *) ckd_calloc(*n_el, el_sz); + + /* Read array data */ + if (bio_fread(*buf, el_sz, *n_el, fp, sw, ck) != (int32)*n_el) + E_FATAL("fread(arraydata) failed\n"); + + return *n_el; +} + +int32 +bio_fread_2d(void ***arr, + size_t e_sz, + uint32 *d1, + uint32 *d2, + FILE *fp, + uint32 swap, + uint32 *chksum) +{ + uint32 l_d1, l_d2; + uint32 n; + size_t ret; + void *raw; + + ret = bio_fread(&l_d1, sizeof(uint32), 1, fp, swap, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to read complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fread_2d"); + } + return -1; + } + ret = bio_fread(&l_d2, sizeof(uint32), 1, fp, swap, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to read complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fread_2d"); + } + return -1; + } + if (bio_fread_1d(&raw, e_sz, &n, fp, swap, chksum) != (int32)n) + return -1; + + assert(n == l_d1*l_d2); + + *d1 = l_d1; + *d2 = l_d2; + *arr = ckd_alloc_2d_ptr(l_d1, l_d2, raw, e_sz); + + return n; +} + +int32 +bio_fread_3d(void ****arr, + size_t e_sz, + uint32 *d1, + uint32 *d2, + uint32 *d3, + FILE *fp, + uint32 swap, + uint32 *chksum) +{ + uint32 l_d1; + uint32 l_d2; + uint32 l_d3; + uint32 n; + void *raw; + size_t ret; + + ret = bio_fread(&l_d1, sizeof(uint32), 1, fp, swap, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to read complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fread_3d"); + } + return -1; + } + ret = bio_fread(&l_d2, sizeof(uint32), 1, fp, swap, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to read complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fread_3d"); + } + return -1; + } + ret = bio_fread(&l_d3, sizeof(uint32), 1, fp, swap, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to read complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fread_3d"); + } + return -1; + } + + if (bio_fread_1d(&raw, e_sz, &n, fp, swap, chksum) != (int32)n) { + return -1; + } + + assert(n == l_d1 * l_d2 * l_d3); + + *arr = ckd_alloc_3d_ptr(l_d1, l_d2, l_d3, raw, e_sz); + *d1 = l_d1; + *d2 = l_d2; + *d3 = l_d3; + + return n; +} + +void +bio_verify_chksum(FILE * fp, int32 byteswap, uint32 chksum) +{ + uint32 file_chksum; + + if (fread(&file_chksum, sizeof(uint32), 1, fp) != 1) + E_FATAL("fread(chksum) failed\n"); + if (byteswap) + SWAP_INT32(&file_chksum); + if (file_chksum != chksum) + E_FATAL + ("Checksum error; file-checksum %08x, computed %08x\n", + file_chksum, chksum); +} + +int +bio_fwrite_3d(void ***arr, + size_t e_sz, + uint32 d1, + uint32 d2, + uint32 d3, + FILE *fp, + uint32 *chksum) +{ + size_t ret; + + /* write out first dimension 1 */ + ret = bio_fwrite(&d1, sizeof(uint32), 1, fp, 0, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to write complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fwrite_3d"); + } + return -1; + } + + /* write out first dimension 2 */ + ret = bio_fwrite(&d2, sizeof(uint32), 1, fp, 0, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to write complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fwrite_3d"); + } + return -1; + } + + /* write out first dimension 3 */ + ret = bio_fwrite(&d3, sizeof(uint32), 1, fp, 0, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to write complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fwrite_3d"); + } + return -1; + } + + /* write out the data in the array as one big block */ + return bio_fwrite_1d(arr[0][0], e_sz, d1 * d2 * d3, fp, chksum); +} + +int +bio_fwrite_1d(void *arr, + size_t e_sz, + uint32 d1, + FILE *fp, + uint32 *chksum) +{ + size_t ret; + ret = bio_fwrite(&d1, sizeof(uint32), 1, fp, 0, chksum); + if (ret != 1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to write complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fwrite_1d"); + } + return -1; + } + + ret = bio_fwrite(arr, e_sz, d1, fp, 0, chksum); + if (ret != d1) { + if (ret == 0) { + E_ERROR_SYSTEM("Unable to write complete data"); + } + else { + E_ERROR_SYSTEM("OS error in bio_fwrite_1d"); + } + + return -1; + } + + return ret; +} + +int16* +bio_read_wavfile(char const *directory, + char const *filename, + char const *extension, + int32 header, + int32 endian, + size_t *nsamps) +{ + FILE *uttfp; + char *inputfile; + size_t n, l; + int16 *data; + + (void)endian; + n = strlen(extension); + l = strlen(filename); + if ((n <= l) && (0 == strcmp(filename + l - n, extension))) + extension = ""; + inputfile = ckd_calloc(strlen(directory) + l + n + 2, 1); + if (directory) { + sprintf(inputfile, "%s/%s%s", directory, filename, extension); + } else { + sprintf(inputfile, "%s%s", filename, extension); + } + + if ((uttfp = fopen(inputfile, "rb")) == NULL) { + E_FATAL_SYSTEM("Failed to open file '%s' for reading", inputfile); + } + fseek(uttfp, 0, SEEK_END); + n = ftell(uttfp); + fseek(uttfp, 0, SEEK_SET); + if (header > 0) { + if (fseek(uttfp, header, SEEK_SET) < 0) { + E_ERROR_SYSTEM("Failed to move to an offset %d in a file '%s'", header, inputfile); + fclose(uttfp); + ckd_free(inputfile); + return NULL; + } + n -= header; + } + n /= sizeof(int16); + data = ckd_calloc(n, sizeof(*data)); + if ((l = fread(data, sizeof(int16), n, uttfp)) < n) { + E_ERROR_SYSTEM("Failed to read %d samples from %s: %d", n, inputfile, l); + ckd_free(data); + ckd_free(inputfile); + fclose(uttfp); + return NULL; + } + ckd_free(inputfile); + fclose(uttfp); + if (nsamps) *nsamps = n; + + return data; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/bitarr.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/bitarr.c new file mode 100644 index 0000000000000000000000000000000000000000..bb880a9677139f38c44643409e1aca84e46bf5f6 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/bitarr.c @@ -0,0 +1,108 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2015 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * bitarr.c -- Bit array manipulations implementation. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sphinxbase/err.h" +#include "sphinxbase/bitarr.h" +#include "sphinxbase/byteorder.h" + +uint64 bitarr_read_int57(bitarr_address_t address, uint8 length, uint64 mask) +{ + uint64 value64; + const uint8 *base_off = (const uint8 *)(address.base) + (address.offset >> 3); + (void)length; /* Yeah, what is this for anyway? */ + memcpy(&value64, base_off, sizeof(value64)); + SWAP_LE_64(&value64); + return (value64 >> (address.offset & 7)) & mask; +} + +void bitarr_write_int57(bitarr_address_t address, uint8 length, uint64 value) +{ + uint64 value64; + uint8 *base_off = (uint8 *)(address.base) + (address.offset >> 3); + (void)length; /* Yeah, what is this for anyway? */ + memcpy(&value64, base_off, sizeof(value64)); + SWAP_LE_64(&value64); + value64 |= (value << (address.offset & 7)); + SWAP_LE_64(&value64); + memcpy(base_off, &value64, sizeof(value64)); +} + +uint32 bitarr_read_int25(bitarr_address_t address, uint8 length, uint32 mask) +{ + uint32 value32; + const uint8 *base_off = (const uint8*)(address.base) + (address.offset >> 3); + (void)length; /* Yeah, what is this for anyway? */ + memcpy(&value32, base_off, sizeof(value32)); + SWAP_LE_32(&value32); + return (value32 >> (address.offset & 7)) & mask; +} + +void bitarr_write_int25(bitarr_address_t address, uint8 length, uint32 value) +{ + uint32 value32; + uint8 *base_off = (uint8 *)(address.base) + (address.offset >> 3); + (void)length; /* Yeah, what is this for anyway? */ + memcpy(&value32, base_off, sizeof(value32)); + SWAP_LE_32(&value32); + value32 |= (value << (address.offset & 7)); + SWAP_LE_32(&value32); + memcpy(base_off, &value32, sizeof(value32)); +} + +void bitarr_mask_from_max(bitarr_mask_t *bit_mask, uint32 max_value) +{ + bit_mask->bits = bitarr_required_bits(max_value); + bit_mask->mask = (uint32)((1ULL << bit_mask->bits) - 1); +} + +uint8 bitarr_required_bits(uint32 max_value) +{ + uint8 res; + + if (!max_value) return 0; + res = 1; + while (max_value >>= 1) res++; + return res; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/bitvec.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/bitvec.c new file mode 100644 index 0000000000000000000000000000000000000000..2d139010e2dc22b62b618e7387db7eaeaefc738f --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/bitvec.c @@ -0,0 +1,101 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * bitvec.c -- Bit vector type. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: bitvec.c,v $ + * Revision 1.4 2005/06/22 02:58:22 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 05-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Started. + */ + + +#include "sphinxbase/bitvec.h" + +bitvec_t * +bitvec_realloc(bitvec_t *vec, + size_t old_len, + size_t new_len) +{ + bitvec_t *new_vec; + size_t old_size = bitvec_size(old_len); + size_t new_size = bitvec_size(new_len); + + new_vec = ckd_realloc(vec, new_size * sizeof(bitvec_t)); + if (new_size > old_size) + memset(new_vec + old_size, 0, (new_size - old_size) * sizeof(bitvec_t)); + + return new_vec; +} + +size_t +bitvec_count_set(bitvec_t *vec, size_t len) +{ + size_t words, bits, w, b, n; + bitvec_t *v; + + words = len / BITVEC_BITS; + bits = len % BITVEC_BITS; + v = vec; + n = 0; + for (w = 0; w < words; ++w, ++v) { + if (*v == 0) + continue; + for (b = 0; b < BITVEC_BITS; ++b) + if (*v & (1<= 97 && inta <= 122) { + inta += -32; + } + if (intb >= 97 && intb <= 122) { + intb += -32; + } + + } else if (zcode == 233 || zcode == 169) { + +/* + EBCDIC is assumed - ZCODE is the EBCDIC code of either lower or + upper case 'Z'. +*/ + + if ((inta >= 129 && inta <= 137) || (inta >= 145 && inta <= 153) || + (inta >= 162 && inta <= 169)) { + inta += 64; + } + if ((intb >= 129 && intb <= 137) || (intb >= 145 && intb <= 153) || + (intb >= 162 && intb <= 169)) { + intb += 64; + } + + } else if (zcode == 218 || zcode == 250) { + +/* + ASCII is assumed, on Prime machines - ZCODE is the ASCII code + plus 128 of either lower or upper case 'Z'. +*/ + + if (inta >= 225 && inta <= 250) { + inta += -32; + } + if (intb >= 225 && intb <= 250) { + intb += -32; + } + } + ret_val = inta == intb; + +/* + RETURN + + End of LSAME +*/ + + return ret_val; +} /* lsame_ */ + +doublereal sdot_(integer *n, real *sx, integer *incx, real *sy, integer *incy) +{ + /* System generated locals */ + integer i__1; + real ret_val; + + /* Local variables */ + static integer i__, m, ix, iy, mp1; + static real stemp; + + +/* + forms the dot product of two vectors. + uses unrolled loops for increments equal to one. + jack dongarra, linpack, 3/11/78. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --sy; + --sx; + + /* Function Body */ + stemp = 0.f; + ret_val = 0.f; + if (*n <= 0) { + return ret_val; + } + if (*incx == 1 && *incy == 1) { + goto L20; + } + +/* + code for unequal increments or equal increments + not equal to 1 +*/ + + ix = 1; + iy = 1; + if (*incx < 0) { + ix = (-(*n) + 1) * *incx + 1; + } + if (*incy < 0) { + iy = (-(*n) + 1) * *incy + 1; + } + i__1 = *n; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += sx[ix] * sy[iy]; + ix += *incx; + iy += *incy; +/* L10: */ + } + ret_val = stemp; + return ret_val; + +/* + code for both increments equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__1 = m; + for (i__ = 1; i__ <= i__1; ++i__) { + stemp += sx[i__] * sy[i__]; +/* L30: */ + } + if (*n < 5) { + goto L60; + } +L40: + mp1 = m + 1; + i__1 = *n; + for (i__ = mp1; i__ <= i__1; i__ += 5) { + stemp = stemp + sx[i__] * sy[i__] + sx[i__ + 1] * sy[i__ + 1] + sx[ + i__ + 2] * sy[i__ + 2] + sx[i__ + 3] * sy[i__ + 3] + sx[i__ + + 4] * sy[i__ + 4]; +/* L50: */ + } +L60: + ret_val = stemp; + return ret_val; +} /* sdot_ */ + +/* Subroutine */ int sgemm_(char *transa, char *transb, integer *m, integer * + n, integer *k, real *alpha, real *a, integer *lda, real *b, integer * + ldb, real *beta, real *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__, j, l, info; + static logical nota, notb; + static real temp; + static integer ncola; + extern logical lsame_(char *, char *); + static integer nrowa, nrowb; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SGEMM performs one of the matrix-matrix operations + + C := alpha*op( A )*op( B ) + beta*C, + + where op( X ) is one of + + op( X ) = X or op( X ) = X', + + alpha and beta are scalars, and A, B and C are matrices, with op( A ) + an m by k matrix, op( B ) a k by n matrix and C an m by n matrix. + + Parameters + ========== + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n', op( A ) = A. + + TRANSA = 'T' or 't', op( A ) = A'. + + TRANSA = 'C' or 'c', op( A ) = A'. + + Unchanged on exit. + + TRANSB - CHARACTER*1. + On entry, TRANSB specifies the form of op( B ) to be used in + the matrix multiplication as follows: + + TRANSB = 'N' or 'n', op( B ) = B. + + TRANSB = 'T' or 't', op( B ) = B'. + + TRANSB = 'C' or 'c', op( B ) = B'. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix + op( A ) and of the matrix C. M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix + op( B ) and the number of columns of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry, K specifies the number of columns of the matrix + op( A ) and the number of rows of the matrix op( B ). K must + be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, ka ), where ka is + k when TRANSA = 'N' or 'n', and is m otherwise. + Before entry with TRANSA = 'N' or 'n', the leading m by k + part of the array A must contain the matrix A, otherwise + the leading k by m part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANSA = 'N' or 'n' then + LDA must be at least max( 1, m ), otherwise LDA must be at + least max( 1, k ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, kb ), where kb is + n when TRANSB = 'N' or 'n', and is k otherwise. + Before entry with TRANSB = 'N' or 'n', the leading k by n + part of the array B must contain the matrix B, otherwise + the leading n by k part of the array B must contain the + matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. When TRANSB = 'N' or 'n' then + LDB must be at least max( 1, k ), otherwise LDB must be at + least max( 1, n ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then C need not be set on input. + Unchanged on exit. + + C - REAL array of DIMENSION ( LDC, n ). + Before entry, the leading m by n part of the array C must + contain the matrix C, except when beta is zero, in which + case C need not be set on entry. + On exit, the array C is overwritten by the m by n matrix + ( alpha*op( A )*op( B ) + beta*C ). + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Set NOTA and NOTB as true if A and B respectively are not + transposed and set NROWA, NCOLA and NROWB as the number of rows + and columns of A and the number of rows of B respectively. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + nota = lsame_(transa, "N"); + notb = lsame_(transb, "N"); + if (nota) { + nrowa = *m; + ncola = *k; + } else { + nrowa = *k; + ncola = *m; + } + if (notb) { + nrowb = *k; + } else { + nrowb = *n; + } + (void) ncola; +/* Test the input parameters. */ + + info = 0; + if (! nota && ! lsame_(transa, "C") && ! lsame_( + transa, "T")) { + info = 1; + } else if (! notb && ! lsame_(transb, "C") && ! + lsame_(transb, "T")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*k < 0) { + info = 5; + } else if (*lda < max(1,nrowa)) { + info = 8; + } else if (*ldb < max(1,nrowb)) { + info = 10; + } else if (*ldc < max(1,*m)) { + info = 13; + } + if (info != 0) { + xerbla_("SGEMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || ((*alpha == 0.f || *k == 0) && *beta == 1.f)) { + return 0; + } + +/* And if alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (notb) { + if (nota) { + +/* Form C := alpha*A*B + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L50: */ + } + } else if (*beta != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L60: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[l + j * b_dim1] != 0.f) { + temp = *alpha * b[l + j * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L70: */ + } + } +/* L80: */ + } +/* L90: */ + } + } else { + +/* Form C := alpha*A'*B + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[l + j * b_dim1]; +/* L100: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L110: */ + } +/* L120: */ + } + } + } else { + if (nota) { + +/* Form C := alpha*A*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L130: */ + } + } else if (*beta != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L140: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (b[j + l * b_dim1] != 0.f) { + temp = *alpha * b[j + l * b_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L150: */ + } + } +/* L160: */ + } +/* L170: */ + } + } else { + +/* Form C := alpha*A'*B' + beta*C */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * b[j + l * b_dim1]; +/* L180: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L190: */ + } +/* L200: */ + } + } + } + + return 0; + +/* End of SGEMM . */ + +} /* sgemm_ */ + +/* Subroutine */ int sgemv_(char *trans, integer *m, integer *n, real *alpha, + real *a, integer *lda, real *x, integer *incx, real *beta, real *y, + integer *incy) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2; + + /* Local variables */ + static integer i__, j, ix, iy, jx, jy, kx, ky, info; + static real temp; + static integer lenx, leny; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SGEMV performs one of the matrix-vector operations + + y := alpha*A*x + beta*y, or y := alpha*A'*x + beta*y, + + where alpha and beta are scalars, x and y are vectors and A is an + m by n matrix. + + Parameters + ========== + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' y := alpha*A*x + beta*y. + + TRANS = 'T' or 't' y := alpha*A'*x + beta*y. + + TRANS = 'C' or 'c' y := alpha*A'*x + beta*y. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix A. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix A. + N must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, n ). + Before entry, the leading m by n part of the array A must + contain the matrix of coefficients. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. LDA must be at least + max( 1, m ). + Unchanged on exit. + + X - REAL array of DIMENSION at least + ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( m - 1 )*abs( INCX ) ) otherwise. + Before entry, the incremented array X must contain the + vector x. + Unchanged on exit. + + INCX - INTEGER. + On entry, INCX specifies the increment for the elements of + X. INCX must not be zero. + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then Y need not be set on input. + Unchanged on exit. + + Y - REAL array of DIMENSION at least + ( 1 + ( m - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n' + and at least + ( 1 + ( n - 1 )*abs( INCY ) ) otherwise. + Before entry with BETA non-zero, the incremented array Y + must contain the vector y. On exit, Y is overwritten by the + updated vector y. + + INCY - INTEGER. + On entry, INCY specifies the increment for the elements of + Y. INCY must not be zero. + Unchanged on exit. + + + Level 2 Blas routine. + + -- Written on 22-October-1986. + Jack Dongarra, Argonne National Lab. + Jeremy Du Croz, Nag Central Office. + Sven Hammarling, Nag Central Office. + Richard Hanson, Sandia National Labs. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + --x; + --y; + + /* Function Body */ + info = 0; + if (! lsame_(trans, "N") && ! lsame_(trans, "T") && ! lsame_(trans, "C") + ) { + info = 1; + } else if (*m < 0) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*lda < max(1,*m)) { + info = 6; + } else if (*incx == 0) { + info = 8; + } else if (*incy == 0) { + info = 11; + } + if (info != 0) { + xerbla_("SGEMV ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (*alpha == 0.f && *beta == 1.f)) { + return 0; + } + +/* + Set LENX and LENY, the lengths of the vectors x and y, and set + up the start points in X and Y. +*/ + + if (lsame_(trans, "N")) { + lenx = *n; + leny = *m; + } else { + lenx = *m; + leny = *n; + } + if (*incx > 0) { + kx = 1; + } else { + kx = 1 - (lenx - 1) * *incx; + } + if (*incy > 0) { + ky = 1; + } else { + ky = 1 - (leny - 1) * *incy; + } + +/* + Start the operations. In this version the elements of A are + accessed sequentially with one pass through A. + + First form y := beta*y. +*/ + + if (*beta != 1.f) { + if (*incy == 1) { + if (*beta == 0.f) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = 0.f; +/* L10: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[i__] = *beta * y[i__]; +/* L20: */ + } + } + } else { + iy = ky; + if (*beta == 0.f) { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = 0.f; + iy += *incy; +/* L30: */ + } + } else { + i__1 = leny; + for (i__ = 1; i__ <= i__1; ++i__) { + y[iy] = *beta * y[iy]; + iy += *incy; +/* L40: */ + } + } + } + } + if (*alpha == 0.f) { + return 0; + } + if (lsame_(trans, "N")) { + +/* Form y := alpha*A*x + y. */ + + jx = kx; + if (*incy == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f) { + temp = *alpha * x[jx]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[i__] += temp * a[i__ + j * a_dim1]; +/* L50: */ + } + } + jx += *incx; +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (x[jx] != 0.f) { + temp = *alpha * x[jx]; + iy = ky; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + y[iy] += temp * a[i__ + j * a_dim1]; + iy += *incy; +/* L70: */ + } + } + jx += *incx; +/* L80: */ + } + } + } else { + +/* Form y := alpha*A'*x + y. */ + + jy = ky; + if (*incx == 1) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.f; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[i__]; +/* L90: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L100: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp = 0.f; + ix = kx; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp += a[i__ + j * a_dim1] * x[ix]; + ix += *incx; +/* L110: */ + } + y[jy] += *alpha * temp; + jy += *incy; +/* L120: */ + } + } + } + + return 0; + +/* End of SGEMV . */ + +} /* sgemv_ */ + +/* Subroutine */ int sscal_(integer *n, real *sa, real *sx, integer *incx) +{ + /* System generated locals */ + integer i__1, i__2; + + /* Local variables */ + static integer i__, m, mp1, nincx; + + +/* + scales a vector by a constant. + uses unrolled loops for increment equal to 1. + jack dongarra, linpack, 3/11/78. + modified 3/93 to return if incx .le. 0. + modified 12/3/93, array(1) declarations changed to array(*) +*/ + + + /* Parameter adjustments */ + --sx; + + /* Function Body */ + if (*n <= 0 || *incx <= 0) { + return 0; + } + if (*incx == 1) { + goto L20; + } + +/* code for increment not equal to 1 */ + + nincx = *n * *incx; + i__1 = nincx; + i__2 = *incx; + for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { + sx[i__] = *sa * sx[i__]; +/* L10: */ + } + return 0; + +/* + code for increment equal to 1 + + + clean-up loop +*/ + +L20: + m = *n % 5; + if (m == 0) { + goto L40; + } + i__2 = m; + for (i__ = 1; i__ <= i__2; ++i__) { + sx[i__] = *sa * sx[i__]; +/* L30: */ + } + if (*n < 5) { + return 0; + } +L40: + mp1 = m + 1; + i__2 = *n; + for (i__ = mp1; i__ <= i__2; i__ += 5) { + sx[i__] = *sa * sx[i__]; + sx[i__ + 1] = *sa * sx[i__ + 1]; + sx[i__ + 2] = *sa * sx[i__ + 2]; + sx[i__ + 3] = *sa * sx[i__ + 3]; + sx[i__ + 4] = *sa * sx[i__ + 4]; +/* L50: */ + } + return 0; +} /* sscal_ */ + +/* Subroutine */ int ssymm_(char *side, char *uplo, integer *m, integer *n, + real *alpha, real *a, integer *lda, real *b, integer *ldb, real *beta, + real *c__, integer *ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2, + i__3; + + /* Local variables */ + static integer i__, j, k, info; + static real temp1, temp2; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SSYMM performs one of the matrix-matrix operations + + C := alpha*A*B + beta*C, + + or + + C := alpha*B*A + beta*C, + + where alpha and beta are scalars, A is a symmetric matrix and B and + C are m by n matrices. + + Parameters + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether the symmetric matrix A + appears on the left or right in the operation as follows: + + SIDE = 'L' or 'l' C := alpha*A*B + beta*C, + + SIDE = 'R' or 'r' C := alpha*B*A + beta*C, + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the symmetric matrix A is to be + referenced as follows: + + UPLO = 'U' or 'u' Only the upper triangular part of the + symmetric matrix is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of the + symmetric matrix is to be referenced. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of the matrix C. + M must be at least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of the matrix C. + N must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, ka ), where ka is + m when SIDE = 'L' or 'l' and is n otherwise. + Before entry with SIDE = 'L' or 'l', the m by m part of + the array A must contain the symmetric matrix, such that + when UPLO = 'U' or 'u', the leading m by m upper triangular + part of the array A must contain the upper triangular part + of the symmetric matrix and the strictly lower triangular + part of A is not referenced, and when UPLO = 'L' or 'l', + the leading m by m lower triangular part of the array A + must contain the lower triangular part of the symmetric + matrix and the strictly upper triangular part of A is not + referenced. + Before entry with SIDE = 'R' or 'r', the n by n part of + the array A must contain the symmetric matrix, such that + when UPLO = 'U' or 'u', the leading n by n upper triangular + part of the array A must contain the upper triangular part + of the symmetric matrix and the strictly lower triangular + part of A is not referenced, and when UPLO = 'L' or 'l', + the leading n by n lower triangular part of the array A + must contain the lower triangular part of the symmetric + matrix and the strictly upper triangular part of A is not + referenced. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), otherwise LDA must be at + least max( 1, n ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the matrix B. + Unchanged on exit. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. When BETA is + supplied as zero then C need not be set on input. + Unchanged on exit. + + C - REAL array of DIMENSION ( LDC, n ). + Before entry, the leading m by n part of the array C must + contain the matrix C, except when beta is zero, in which + case C need not be set on entry. + On exit, the array C is overwritten by the m by n updated + matrix. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Set NROWA as the number of rows of A. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(side, "L")) { + nrowa = *m; + } else { + nrowa = *n; + } + upper = lsame_(uplo, "U"); + +/* Test the input parameters. */ + + info = 0; + if (! lsame_(side, "L") && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (*m < 0) { + info = 3; + } else if (*n < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldb < max(1,*m)) { + info = 9; + } else if (*ldc < max(1,*m)) { + info = 12; + } + if (info != 0) { + xerbla_("SSYMM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*m == 0 || *n == 0 || (*alpha == 0.f && *beta == 1.f)) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(side, "L")) { + +/* Form C := alpha*A*B + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp1 = *alpha * b[i__ + j * b_dim1]; + temp2 = 0.f; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1]; + temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1]; +/* L50: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] + + *alpha * temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + temp1 * a[i__ + i__ * a_dim1] + *alpha * + temp2; + } +/* L60: */ + } +/* L70: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp1 = *alpha * b[i__ + j * b_dim1]; + temp2 = 0.f; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + c__[k + j * c_dim1] += temp1 * a[k + i__ * a_dim1]; + temp2 += b[k + j * b_dim1] * a[k + i__ * a_dim1]; +/* L80: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = temp1 * a[i__ + i__ * a_dim1] + + *alpha * temp2; + } else { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + temp1 * a[i__ + i__ * a_dim1] + *alpha * + temp2; + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* Form C := alpha*B*A + beta*C. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + temp1 = *alpha * a[j + j * a_dim1]; + if (*beta == 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = temp1 * b[i__ + j * b_dim1]; +/* L110: */ + } + } else { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1] + + temp1 * b[i__ + j * b_dim1]; +/* L120: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (upper) { + temp1 = *alpha * a[k + j * a_dim1]; + } else { + temp1 = *alpha * a[j + k * a_dim1]; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1]; +/* L130: */ + } +/* L140: */ + } + i__2 = *n; + for (k = j + 1; k <= i__2; ++k) { + if (upper) { + temp1 = *alpha * a[j + k * a_dim1]; + } else { + temp1 = *alpha * a[k + j * a_dim1]; + } + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp1 * b[i__ + k * b_dim1]; +/* L150: */ + } +/* L160: */ + } +/* L170: */ + } + } + + return 0; + +/* End of SSYMM . */ + +} /* ssymm_ */ + +/* Subroutine */ int ssyrk_(char *uplo, char *trans, integer *n, integer *k, + real *alpha, real *a, integer *lda, real *beta, real *c__, integer * + ldc) +{ + /* System generated locals */ + integer a_dim1, a_offset, c_dim1, c_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, l, info; + static real temp; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + Purpose + ======= + + SSYRK performs one of the symmetric rank k operations + + C := alpha*A*A' + beta*C, + + or + + C := alpha*A'*A + beta*C, + + where alpha and beta are scalars, C is an n by n symmetric matrix + and A is an n by k matrix in the first case and a k by n matrix + in the second case. + + Parameters + ========== + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the upper or lower + triangular part of the array C is to be referenced as + follows: + + UPLO = 'U' or 'u' Only the upper triangular part of C + is to be referenced. + + UPLO = 'L' or 'l' Only the lower triangular part of C + is to be referenced. + + Unchanged on exit. + + TRANS - CHARACTER*1. + On entry, TRANS specifies the operation to be performed as + follows: + + TRANS = 'N' or 'n' C := alpha*A*A' + beta*C. + + TRANS = 'T' or 't' C := alpha*A'*A + beta*C. + + TRANS = 'C' or 'c' C := alpha*A'*A + beta*C. + + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the order of the matrix C. N must be + at least zero. + Unchanged on exit. + + K - INTEGER. + On entry with TRANS = 'N' or 'n', K specifies the number + of columns of the matrix A, and on entry with + TRANS = 'T' or 't' or 'C' or 'c', K specifies the number + of rows of the matrix A. K must be at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, ka ), where ka is + k when TRANS = 'N' or 'n', and is n otherwise. + Before entry with TRANS = 'N' or 'n', the leading n by k + part of the array A must contain the matrix A, otherwise + the leading k by n part of the array A must contain the + matrix A. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When TRANS = 'N' or 'n' + then LDA must be at least max( 1, n ), otherwise LDA must + be at least max( 1, k ). + Unchanged on exit. + + BETA - REAL . + On entry, BETA specifies the scalar beta. + Unchanged on exit. + + C - REAL array of DIMENSION ( LDC, n ). + Before entry with UPLO = 'U' or 'u', the leading n by n + upper triangular part of the array C must contain the upper + triangular part of the symmetric matrix and the strictly + lower triangular part of C is not referenced. On exit, the + upper triangular part of the array C is overwritten by the + upper triangular part of the updated matrix. + Before entry with UPLO = 'L' or 'l', the leading n by n + lower triangular part of the array C must contain the lower + triangular part of the symmetric matrix and the strictly + upper triangular part of C is not referenced. On exit, the + lower triangular part of the array C is overwritten by the + lower triangular part of the updated matrix. + + LDC - INTEGER. + On entry, LDC specifies the first dimension of C as declared + in the calling (sub) program. LDC must be at least + max( 1, n ). + Unchanged on exit. + + + Level 3 Blas routine. + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + c_dim1 = *ldc; + c_offset = 1 + c_dim1; + c__ -= c_offset; + + /* Function Body */ + if (lsame_(trans, "N")) { + nrowa = *n; + } else { + nrowa = *k; + } + upper = lsame_(uplo, "U"); + + info = 0; + if (! upper && ! lsame_(uplo, "L")) { + info = 1; + } else if (! lsame_(trans, "N") && ! lsame_(trans, + "T") && ! lsame_(trans, "C")) { + info = 2; + } else if (*n < 0) { + info = 3; + } else if (*k < 0) { + info = 4; + } else if (*lda < max(1,nrowa)) { + info = 7; + } else if (*ldc < max(1,*n)) { + info = 10; + } + if (info != 0) { + xerbla_("SSYRK ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0 || ((*alpha == 0.f || *k == 0) && *beta == 1.f)) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + if (upper) { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L30: */ + } +/* L40: */ + } + } + } else { + if (*beta == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L70: */ + } +/* L80: */ + } + } + } + return 0; + } + +/* Start the operations. */ + + if (lsame_(trans, "N")) { + +/* Form C := alpha*A*A' + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L90: */ + } + } else if (*beta != 1.f) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L100: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.f) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = j; + for (i__ = 1; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L110: */ + } + } +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*beta == 0.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = 0.f; +/* L140: */ + } + } else if (*beta != 1.f) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + c__[i__ + j * c_dim1] = *beta * c__[i__ + j * c_dim1]; +/* L150: */ + } + } + i__2 = *k; + for (l = 1; l <= i__2; ++l) { + if (a[j + l * a_dim1] != 0.f) { + temp = *alpha * a[j + l * a_dim1]; + i__3 = *n; + for (i__ = j; i__ <= i__3; ++i__) { + c__[i__ + j * c_dim1] += temp * a[i__ + l * + a_dim1]; +/* L160: */ + } + } +/* L170: */ + } +/* L180: */ + } + } + } else { + +/* Form C := alpha*A'*A + beta*C. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = j; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L190: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L200: */ + } +/* L210: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *n; + for (i__ = j; i__ <= i__2; ++i__) { + temp = 0.f; + i__3 = *k; + for (l = 1; l <= i__3; ++l) { + temp += a[l + i__ * a_dim1] * a[l + j * a_dim1]; +/* L220: */ + } + if (*beta == 0.f) { + c__[i__ + j * c_dim1] = *alpha * temp; + } else { + c__[i__ + j * c_dim1] = *alpha * temp + *beta * c__[ + i__ + j * c_dim1]; + } +/* L230: */ + } +/* L240: */ + } + } + } + + return 0; + +/* End of SSYRK . */ + +} /* ssyrk_ */ + +/* Subroutine */ int strsm_(char *side, char *uplo, char *transa, char *diag, + integer *m, integer *n, real *alpha, real *a, integer *lda, real *b, + integer *ldb) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3; + + /* Local variables */ + static integer i__, j, k, info; + static real temp; + static logical lside; + extern logical lsame_(char *, char *); + static integer nrowa; + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + static logical nounit; + + +/* + Purpose + ======= + + STRSM solves one of the matrix equations + + op( A )*X = alpha*B, or X*op( A ) = alpha*B, + + where alpha is a scalar, X and B are m by n matrices, A is a unit, or + non-unit, upper or lower triangular matrix and op( A ) is one of + + op( A ) = A or op( A ) = A'. + + The matrix X is overwritten on B. + + Parameters + ========== + + SIDE - CHARACTER*1. + On entry, SIDE specifies whether op( A ) appears on the left + or right of X as follows: + + SIDE = 'L' or 'l' op( A )*X = alpha*B. + + SIDE = 'R' or 'r' X*op( A ) = alpha*B. + + Unchanged on exit. + + UPLO - CHARACTER*1. + On entry, UPLO specifies whether the matrix A is an upper or + lower triangular matrix as follows: + + UPLO = 'U' or 'u' A is an upper triangular matrix. + + UPLO = 'L' or 'l' A is a lower triangular matrix. + + Unchanged on exit. + + TRANSA - CHARACTER*1. + On entry, TRANSA specifies the form of op( A ) to be used in + the matrix multiplication as follows: + + TRANSA = 'N' or 'n' op( A ) = A. + + TRANSA = 'T' or 't' op( A ) = A'. + + TRANSA = 'C' or 'c' op( A ) = A'. + + Unchanged on exit. + + DIAG - CHARACTER*1. + On entry, DIAG specifies whether or not A is unit triangular + as follows: + + DIAG = 'U' or 'u' A is assumed to be unit triangular. + + DIAG = 'N' or 'n' A is not assumed to be unit + triangular. + + Unchanged on exit. + + M - INTEGER. + On entry, M specifies the number of rows of B. M must be at + least zero. + Unchanged on exit. + + N - INTEGER. + On entry, N specifies the number of columns of B. N must be + at least zero. + Unchanged on exit. + + ALPHA - REAL . + On entry, ALPHA specifies the scalar alpha. When alpha is + zero then A is not referenced and B need not be set before + entry. + Unchanged on exit. + + A - REAL array of DIMENSION ( LDA, k ), where k is m + when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'. + Before entry with UPLO = 'U' or 'u', the leading k by k + upper triangular part of the array A must contain the upper + triangular matrix and the strictly lower triangular part of + A is not referenced. + Before entry with UPLO = 'L' or 'l', the leading k by k + lower triangular part of the array A must contain the lower + triangular matrix and the strictly upper triangular part of + A is not referenced. + Note that when DIAG = 'U' or 'u', the diagonal elements of + A are not referenced either, but are assumed to be unity. + Unchanged on exit. + + LDA - INTEGER. + On entry, LDA specifies the first dimension of A as declared + in the calling (sub) program. When SIDE = 'L' or 'l' then + LDA must be at least max( 1, m ), when SIDE = 'R' or 'r' + then LDA must be at least max( 1, n ). + Unchanged on exit. + + B - REAL array of DIMENSION ( LDB, n ). + Before entry, the leading m by n part of the array B must + contain the right-hand side matrix B, and on exit is + overwritten by the solution matrix X. + + LDB - INTEGER. + On entry, LDB specifies the first dimension of B as declared + in the calling (sub) program. LDB must be at least + max( 1, m ). + Unchanged on exit. + + + Level 3 Blas routine. + + + -- Written on 8-February-1989. + Jack Dongarra, Argonne National Laboratory. + Iain Duff, AERE Harwell. + Jeremy Du Croz, Numerical Algorithms Group Ltd. + Sven Hammarling, Numerical Algorithms Group Ltd. + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + lside = lsame_(side, "L"); + if (lside) { + nrowa = *m; + } else { + nrowa = *n; + } + nounit = lsame_(diag, "N"); + upper = lsame_(uplo, "U"); + + info = 0; + if (! lside && ! lsame_(side, "R")) { + info = 1; + } else if (! upper && ! lsame_(uplo, "L")) { + info = 2; + } else if (! lsame_(transa, "N") && ! lsame_(transa, + "T") && ! lsame_(transa, "C")) { + info = 3; + } else if (! lsame_(diag, "U") && ! lsame_(diag, + "N")) { + info = 4; + } else if (*m < 0) { + info = 5; + } else if (*n < 0) { + info = 6; + } else if (*lda < max(1,nrowa)) { + info = 9; + } else if (*ldb < max(1,*m)) { + info = 11; + } + if (info != 0) { + xerbla_("STRSM ", &info); + return 0; + } + +/* Quick return if possible. */ + + if (*n == 0) { + return 0; + } + +/* And when alpha.eq.zero. */ + + if (*alpha == 0.f) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = 0.f; +/* L10: */ + } +/* L20: */ + } + return 0; + } + +/* Start the operations. */ + + if (lside) { + if (lsame_(transa, "N")) { + +/* Form B := alpha*inv( A )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L30: */ + } + } + for (k = *m; k >= 1; --k) { + if (b[k + j * b_dim1] != 0.f) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__2 = k - 1; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L40: */ + } + } +/* L50: */ + } +/* L60: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L70: */ + } + } + i__2 = *m; + for (k = 1; k <= i__2; ++k) { + if (b[k + j * b_dim1] != 0.f) { + if (nounit) { + b[k + j * b_dim1] /= a[k + k * a_dim1]; + } + i__3 = *m; + for (i__ = k + 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= b[k + j * b_dim1] * a[ + i__ + k * a_dim1]; +/* L80: */ + } + } +/* L90: */ + } +/* L100: */ + } + } + } else { + +/* Form B := alpha*inv( A' )*B. */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__3 = i__ - 1; + for (k = 1; k <= i__3; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L110: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L120: */ + } +/* L130: */ + } + } else { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + for (i__ = *m; i__ >= 1; --i__) { + temp = *alpha * b[i__ + j * b_dim1]; + i__2 = *m; + for (k = i__ + 1; k <= i__2; ++k) { + temp -= a[k + i__ * a_dim1] * b[k + j * b_dim1]; +/* L140: */ + } + if (nounit) { + temp /= a[i__ + i__ * a_dim1]; + } + b[i__ + j * b_dim1] = temp; +/* L150: */ + } +/* L160: */ + } + } + } + } else { + if (lsame_(transa, "N")) { + +/* Form B := alpha*B*inv( A ). */ + + if (upper) { + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L170: */ + } + } + i__2 = j - 1; + for (k = 1; k <= i__2; ++k) { + if (a[k + j * a_dim1] != 0.f) { + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L180: */ + } + } +/* L190: */ + } + if (nounit) { + temp = 1.f / a[j + j * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L200: */ + } + } +/* L210: */ + } + } else { + for (j = *n; j >= 1; --j) { + if (*alpha != 1.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = *alpha * b[i__ + j * b_dim1] + ; +/* L220: */ + } + } + i__1 = *n; + for (k = j + 1; k <= i__1; ++k) { + if (a[k + j * a_dim1] != 0.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= a[k + j * a_dim1] * b[ + i__ + k * b_dim1]; +/* L230: */ + } + } +/* L240: */ + } + if (nounit) { + temp = 1.f / a[j + j * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + j * b_dim1] = temp * b[i__ + j * b_dim1]; +/* L250: */ + } + } +/* L260: */ + } + } + } else { + +/* Form B := alpha*B*inv( A' ). */ + + if (upper) { + for (k = *n; k >= 1; --k) { + if (nounit) { + temp = 1.f / a[k + k * a_dim1]; + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L270: */ + } + } + i__1 = k - 1; + for (j = 1; j <= i__1; ++j) { + if (a[j + k * a_dim1] != 0.f) { + temp = a[j + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L280: */ + } + } +/* L290: */ + } + if (*alpha != 1.f) { + i__1 = *m; + for (i__ = 1; i__ <= i__1; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L300: */ + } + } +/* L310: */ + } + } else { + i__1 = *n; + for (k = 1; k <= i__1; ++k) { + if (nounit) { + temp = 1.f / a[k + k * a_dim1]; + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = temp * b[i__ + k * b_dim1]; +/* L320: */ + } + } + i__2 = *n; + for (j = k + 1; j <= i__2; ++j) { + if (a[j + k * a_dim1] != 0.f) { + temp = a[j + k * a_dim1]; + i__3 = *m; + for (i__ = 1; i__ <= i__3; ++i__) { + b[i__ + j * b_dim1] -= temp * b[i__ + k * + b_dim1]; +/* L330: */ + } + } +/* L340: */ + } + if (*alpha != 1.f) { + i__2 = *m; + for (i__ = 1; i__ <= i__2; ++i__) { + b[i__ + k * b_dim1] = *alpha * b[i__ + k * b_dim1] + ; +/* L350: */ + } + } +/* L360: */ + } + } + } + } + + return 0; + +/* End of STRSM . */ + +} /* strsm_ */ + +/* Subroutine */ int xerbla_(char *srname, integer *info) +{ + /* Format strings */ + static char fmt_9999[] = "(\002 ** On entry to \002,a6,\002 parameter nu" + "mber \002,i2,\002 had \002,\002an illegal value\002)"; + + /* Builtin functions */ + integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), e_wsfe(void); + /* Subroutine */ int s_stop(char *, ftnlen); + + /* Fortran I/O blocks */ + static cilist io___60 = { 0, 6, 0, fmt_9999, 0 }; + + +/* + -- LAPACK auxiliary routine (preliminary version) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + XERBLA is an error handler for the LAPACK routines. + It is called by an LAPACK routine if an input parameter has an + invalid value. A message is printed and execution stops. + + Installers may consider modifying the STOP statement in order to + call system-specific exception-handling facilities. + + Arguments + ========= + + SRNAME (input) CHARACTER*6 + The name of the routine which called XERBLA. + + INFO (input) INTEGER + The position of the invalid parameter in the parameter list + of the calling routine. +*/ + + + s_wsfe(&io___60); + do_fio(&c__1, srname, (ftnlen)6); + do_fio(&c__1, (char *)&(*info), (ftnlen)sizeof(integer)); + e_wsfe(); + + s_stop("", (ftnlen)0); + + +/* End of XERBLA */ + + return 0; +} /* xerbla_ */ + diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/case.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/case.c new file mode 100644 index 0000000000000000000000000000000000000000..f9e24ac069c8f53547c16ef593af9e548664061d --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/case.c @@ -0,0 +1,141 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * case.c -- Upper/lower case conversion routines + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: case.c,v $ + * Revision 1.7 2005/06/22 02:58:54 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 18-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added strcmp_nocase. Moved UPPER_CASE and LOWER_CASE definitions to .h. + * + * 16-Feb-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Created. + */ + + +#include +#include + +#include "sphinxbase/case.h" +#include "sphinxbase/err.h" + + +void +lcase(register char *cp) +{ + if (cp) { + while (*cp) { + *cp = LOWER_CASE(*cp); + cp++; + } + } +} + +void +ucase(register char *cp) +{ + if (cp) { + while (*cp) { + *cp = UPPER_CASE(*cp); + cp++; + } + } +} + +int32 +strcmp_nocase(const char *str1, const char *str2) +{ + char c1, c2; + + if (str1 == str2) + return 0; + if (str1 && str2) { + for (;;) { + c1 = *(str1++); + c1 = UPPER_CASE(c1); + c2 = *(str2++); + c2 = UPPER_CASE(c2); + if (c1 != c2) + return (c1 - c2); + if (c1 == '\0') + return 0; + } + } + else + return (str1 == NULL) ? -1 : 1; + + return 0; +} + +int32 +strncmp_nocase(const char *str1, const char *str2, size_t len) +{ + char c1, c2; + + if (str1 && str2) { + size_t n; + + for (n = 0; n < len; ++n) { + c1 = *(str1++); + c1 = UPPER_CASE(c1); + c2 = *(str2++); + c2 = UPPER_CASE(c2); + if (c1 != c2) + return (c1 - c2); + if (c1 == '\0') + return 0; + } + } + else + return (str1 == NULL) ? -1 : 1; + + return 0; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/ckd_alloc.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/ckd_alloc.c new file mode 100644 index 0000000000000000000000000000000000000000..45dc84aaec6248b00e64f81e6c0c85affecd1f24 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/ckd_alloc.c @@ -0,0 +1,427 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * ckd_alloc.c -- Memory allocation package. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: ckd_alloc.c,v $ + * Revision 1.6 2005/06/22 02:59:25 arthchan2003 + * Added keyword + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 19-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Removed file,line arguments from free functions. + * Removed debugging stuff. + * + * 01-Jan-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + + +/********************************************************************* + * + * $Header: /cvsroot/cmusphinx/sphinx3/src/libutil/ckd_alloc.c,v 1.6 2005/06/22 02:59:25 arthchan2003 Exp $ + * + * Carnegie Mellon ARPA Speech Group + * + * Copyright (c) 1994 Carnegie Mellon University. + * All rights reserved. + * + ********************************************************************* + * + * file: ckd_alloc.c + * + * traceability: + * + * description: + * + * author: + * + *********************************************************************/ + + +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/err.h" + +/** + * Target for longjmp() on failure. + * + * FIXME: This should be in thread-local storage. + */ +static jmp_buf *ckd_target; +static int jmp_abort; + +jmp_buf * +ckd_set_jump(jmp_buf *env, int abort) +{ + jmp_buf *old; + + if (abort) + jmp_abort = 1; + + old = ckd_target; + ckd_target = env; + return old; +} + +void +ckd_fail(char *format, ...) +{ + va_list args; + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + + if (jmp_abort) + /* abort() doesn't exist in Windows CE */ + #if defined(_WIN32_WCE) + exit(-1); + #else + abort(); + #endif + else if (ckd_target) + longjmp(*ckd_target, 1); + else + exit(-1); +} + +void * +__ckd_calloc__(size_t n_elem, size_t elem_size, + const char *caller_file, int caller_line) +{ + void *mem; + +#if defined(__ADSPBLACKFIN__) && !defined(__linux__) + if ((mem = heap_calloc(heap_lookup(1),n_elem, elem_size)) == NULL) + if ((mem = heap_calloc(heap_lookup(0),n_elem, elem_size)) == NULL) + { + ckd_fail("calloc(%d,%d) failed from %s(%d), free space: %d\n", n_elem, + elem_size, caller_file, caller_line,space_unused()); + } +#else + if ((mem = calloc(n_elem, elem_size)) == NULL) { + ckd_fail("calloc(%d,%d) failed from %s(%d)\n", n_elem, + elem_size, caller_file, caller_line); + } +#endif + + + return mem; +} + + +void * +__ckd_malloc__(size_t size, const char *caller_file, int caller_line) +{ + void *mem; + +#if defined(__ADSPBLACKFIN__) && !defined(__linux__) + if ((mem = heap_malloc(heap_lookup(0),size)) == NULL) + if ((mem = heap_malloc(heap_lookup(1),size)) == NULL) +#else + if ((mem = malloc(size)) == NULL) +#endif + ckd_fail("malloc(%d) failed from %s(%d)\n", size, + caller_file, caller_line); + + return mem; +} + + +void * +__ckd_realloc__(void *ptr, size_t new_size, + const char *caller_file, int caller_line) +{ + void *mem; +#if defined(__ADSPBLACKFIN__) && !defined(__linux__) + if ((mem = heap_realloc(heap_lookup(0),ptr, new_size)) == NULL) { +#else + if ((mem = realloc(ptr, new_size)) == NULL) { +#endif + ckd_fail("malloc(%d) failed from %s(%d)\n", new_size, + caller_file, caller_line); + } + + return mem; +} + + +char * +__ckd_salloc__(const char *orig, const char *caller_file, + int caller_line) +{ + size_t len; + char *buf; + + if (!orig) + return NULL; + + len = strlen(orig) + 1; + buf = (char *) __ckd_malloc__(len, caller_file, caller_line); + + strcpy(buf, orig); + return (buf); +} + + +void * +__ckd_calloc_2d__(size_t d1, size_t d2, size_t elemsize, + const char *caller_file, int caller_line) +{ + char **ref, *mem; + size_t i, offset; + + mem = + (char *) __ckd_calloc__(d1 * d2, elemsize, caller_file, + caller_line); + ref = + (char **) __ckd_malloc__(d1 * sizeof(void *), caller_file, + caller_line); + + for (i = 0, offset = 0; i < d1; i++, offset += d2 * elemsize) + ref[i] = mem + offset; + + return ref; +} + + +void +ckd_free(void *ptr) +{ +#if defined(__ADSPBLACKFIN__) && !defined(__linux__) + if (ptr) + heap_free(0,ptr); +#else + free(ptr); +#endif +} + +void +ckd_free_2d(void *tmpptr) +{ + void **ptr = (void **)tmpptr; + if (ptr) + ckd_free(ptr[0]); + ckd_free(ptr); +} + + +void * +__ckd_calloc_3d__(size_t d1, size_t d2, size_t d3, size_t elemsize, + const char *caller_file, int caller_line) +{ + char ***ref1, **ref2, *mem; + size_t i, j, offset; + + mem = + (char *) __ckd_calloc__(d1 * d2 * d3, elemsize, caller_file, + caller_line); + ref1 = + (char ***) __ckd_malloc__(d1 * sizeof(void **), caller_file, + caller_line); + ref2 = + (char **) __ckd_malloc__(d1 * d2 * sizeof(void *), caller_file, + caller_line); + + for (i = 0, offset = 0; i < d1; i++, offset += d2) + ref1[i] = ref2 + offset; + + offset = 0; + for (i = 0; i < d1; i++) { + for (j = 0; j < d2; j++) { + ref1[i][j] = mem + offset; + offset += d3 * elemsize; + } + } + + return ref1; +} + + +void +ckd_free_3d(void *inptr) +{ + void ***ptr = (void ***)inptr; + + if (ptr && ptr[0]) + ckd_free(ptr[0][0]); + if (ptr) + ckd_free(ptr[0]); + ckd_free(ptr); +} + + +void **** +__ckd_calloc_4d__(size_t d1, + size_t d2, + size_t d3, + size_t d4, + size_t elem_size, + char *file, + int line) +{ + void *store; + void **tmp1; + void ***tmp2; + void ****out; + size_t i, j; + + store = calloc(d1 * d2 * d3 * d4, elem_size); + if (store == NULL) { + E_FATAL("ckd_calloc_4d failed for caller at %s(%d) at %s(%d)\n", + file, line, __FILE__, __LINE__); + } + + tmp1 = calloc(d1 * d2 * d3, sizeof(void *)); + if (tmp1 == NULL) { + E_FATAL("ckd_calloc_4d failed for caller at %s(%d) at %s(%d)\n", + file, line, __FILE__, __LINE__); + } + + tmp2 = ckd_calloc(d1 * d2, sizeof(void **)); + if (tmp2 == NULL) { + E_FATAL("ckd_calloc_4d failed for caller at %s(%d) at %s(%d)\n", + file, line, __FILE__, __LINE__); + } + + out = ckd_calloc(d1, sizeof(void ***)); + if (out == NULL) { + E_FATAL("ckd_calloc_4d failed for caller at %s(%d) at %s(%d)\n", + file, line, __FILE__, __LINE__); + } + + for (i = 0, j = 0; i < d1*d2*d3; i++, j += d4) { + tmp1[i] = &((char *)store)[j*elem_size]; + } + + for (i = 0, j = 0; i < d1*d2; i++, j += d3) { + tmp2[i] = &tmp1[j]; + } + + for (i = 0, j = 0; i < d1; i++, j += d2) { + out[i] = &tmp2[j]; + } + + return out; +} + +void +ckd_free_4d(void *inptr) +{ + void ****ptr = (void ****)inptr; + if (ptr == NULL) + return; + /* free the underlying store */ + ckd_free(ptr[0][0][0]); + + /* free the access overhead */ + ckd_free(ptr[0][0]); + ckd_free(ptr[0]); + ckd_free(ptr); +} + +/* Layers a 3d array access structure over a preallocated storage area */ +void * +__ckd_alloc_3d_ptr(size_t d1, + size_t d2, + size_t d3, + void *store, + size_t elem_size, + char *file, + int line) +{ + void **tmp1; + void ***out; + size_t i, j; + + tmp1 = __ckd_calloc__(d1 * d2, sizeof(void *), file, line); + + out = __ckd_calloc__(d1, sizeof(void **), file, line); + + for (i = 0, j = 0; i < d1*d2; i++, j += d3) { + tmp1[i] = &((char *)store)[j*elem_size]; + } + + for (i = 0, j = 0; i < d1; i++, j += d2) { + out[i] = &tmp1[j]; + } + + return out; +} + +void * +__ckd_alloc_2d_ptr(size_t d1, + size_t d2, + void *store, + size_t elem_size, + char *file, + int line) +{ + void **out; + size_t i, j; + + out = __ckd_calloc__(d1, sizeof(void *), file, line); + + for (i = 0, j = 0; i < d1; i++, j += d2) { + out[i] = &((char *)store)[j*elem_size]; + } + + return out; +} + +/* vim: set ts=4 sw=4: */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/clapack_scrub.py b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/clapack_scrub.py new file mode 100644 index 0000000000000000000000000000000000000000..b98b937f683926aed79edad093a6d0d5b1245f01 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/clapack_scrub.py @@ -0,0 +1,276 @@ +#!/usr/bin/env python2.4 + +import sys, os +from cStringIO import StringIO +import re + +from Plex import * +from Plex.Traditional import re as Re + +class MyScanner(Scanner): + def __init__(self, info, name=''): + Scanner.__init__(self, self.lexicon, info, name) + + def begin(self, state_name): +# if self.state_name == '': +# print '' +# else: +# print self.state_name + Scanner.begin(self, state_name) + +def sep_seq(sequence, sep): + pat = Str(sequence[0]) + for s in sequence[1:]: + pat += sep + Str(s) + return pat + +def runScanner(data, scanner_class, lexicon=None): + info = StringIO(data) + outfo = StringIO() + if lexicon is not None: + scanner = scanner_class(lexicon, info) + else: + scanner = scanner_class(info) + while 1: + value, text = scanner.read() + if value is None: + break + elif value is IGNORE: + pass + else: + outfo.write(value) + return outfo.getvalue(), scanner + +class LenSubsScanner(MyScanner): + """Following clapack, we remove ftnlen arguments, which f2c puts after + a char * argument to hold the length of the passed string. This is just + a nuisance in C. + """ + def __init__(self, info, name=''): + MyScanner.__init__(self, info, name) + self.paren_count = 0 + + def beginArgs(self, text): + if self.paren_count == 0: + self.begin('args') + self.paren_count += 1 + return text + + def endArgs(self, text): + self.paren_count -= 1 + if self.paren_count == 0: + self.begin('') + return text + + digits = Re('[0-9]+') + iofun = Re(r'\([^;]*;') + decl = Re(r'\([^)]*\)[,;'+'\n]') + any = Re('[.]*') + S = Re('[ \t\n]*') + cS = Str(',') + S + len_ = Re('[a-z][a-z0-9]*_len') + + iofunctions = Str("s_cat", "s_copy", "s_stop", "s_cmp", + "i_len", "do_fio", "do_lio") + iofun + + # Routines to not scrub the ftnlen argument from + keep_ftnlen = (Str('ilaenv_') | Str('s_rnge')) + Str('(') + + lexicon = Lexicon([ + (iofunctions, TEXT), + (keep_ftnlen, beginArgs), + State('args', [ + (Str(')'), endArgs), + (Str('('), beginArgs), + (AnyChar, TEXT), + ]), + (cS+Re(r'[1-9][0-9]*L'), IGNORE), + (cS+Str('ftnlen')+Opt(S+len_), IGNORE), + (cS+sep_seq(['(', 'ftnlen', ')'], S)+S+digits, IGNORE), + (Bol+Str('ftnlen ')+len_+Str(';\n'), IGNORE), + (cS+len_, TEXT), + (AnyChar, TEXT), + ]) + +def scrubFtnlen(source): + return runScanner(source, LenSubsScanner)[0] + +def cleanSource(source): + # remove whitespace at end of lines + source = re.sub(r'[\t ]+\n', '\n', source) + # remove comments like .. Scalar Arguments .. + source = re.sub(r'(?m)^[\t ]*/\* *\.\. .*?\n', '', source) + # collapse blanks of more than two in-a-row to two + source = re.sub(r'\n\n\n\n+', r'\n\n\n', source) + return source + +class LineQueue(object): + def __init__(self): + object.__init__(self) + self._queue = [] + + def add(self, line): + self._queue.append(line) + + def clear(self): + self._queue = [] + + def flushTo(self, other_queue): + for line in self._queue: + other_queue.add(line) + self.clear() + + def getValue(self): + q = LineQueue() + self.flushTo(q) + s = ''.join(q._queue) + self.clear() + return s + +class CommentQueue(LineQueue): + def __init__(self): + LineQueue.__init__(self) + + def add(self, line): + if line.strip() == '': + LineQueue.add(self, '\n') + else: + line = ' ' + line[2:-3].rstrip() + '\n' + LineQueue.add(self, line) + + def flushTo(self, other_queue): + if len(self._queue) == 0: + pass + elif len(self._queue) == 1: + other_queue.add('/*' + self._queue[0][2:].rstrip() + ' */\n') + else: + other_queue.add('/*\n') + LineQueue.flushTo(self, other_queue) + other_queue.add('*/\n') + self.clear() + +# This really seems to be about 4x longer than it needs to be +def cleanComments(source): + lines = LineQueue() + comments = CommentQueue() + def isCommentLine(line): + return line.startswith('/*') and line.endswith('*/\n') + + blanks = LineQueue() + def isBlank(line): + return line.strip() == '' + + def SourceLines(line): + if isCommentLine(line): + comments.add(line) + return HaveCommentLines + else: + lines.add(line) + return SourceLines + def HaveCommentLines(line): + if isBlank(line): + blanks.add('\n') + return HaveBlankLines + elif isCommentLine(line): + comments.add(line) + return HaveCommentLines + else: + comments.flushTo(lines) + lines.add(line) + return SourceLines + def HaveBlankLines(line): + if isBlank(line): + blanks.add('\n') + return HaveBlankLines + elif isCommentLine(line): + blanks.flushTo(comments) + comments.add(line) + return HaveCommentLines + else: + comments.flushTo(lines) + blanks.flushTo(lines) + lines.add(line) + return SourceLines + + state = SourceLines + for line in StringIO(source): + state = state(line) + comments.flushTo(lines) + return lines.getValue() + +def removeHeader(source): + lines = LineQueue() + + def LookingForHeader(line): + m = re.match(r'/\*[^\n]*-- translated', line) + if m: + return InHeader + else: + lines.add(line) + return LookingForHeader + def InHeader(line): + if line.startswith('*/'): + return OutOfHeader + else: + return InHeader + def OutOfHeader(line): + if line.startswith('#include "f2c.h"'): + pass + else: + lines.add(line) + return OutOfHeader + + state = LookingForHeader + for line in StringIO(source): + state = state(line) + return lines.getValue() + +def replaceSlamch(source): + """Replace slamch_ calls with appropiate macros""" + def repl(m): + s = m.group(1) + return dict(E='EPSILON', P='PRECISION', S='SAFEMINIMUM', + B='BASE')[s[0]] + source = re.sub(r'slamch_\("(.*?)"\)', repl, source) + source = re.sub(r'^\s+extern.*? slamch_.*?;$(?m)', '', source) + return source + +# do it + +def scrubSource(source, nsteps=None, verbose=False): + steps = [ + ('scrubbing ftnlen', scrubFtnlen), + ('remove header', removeHeader), + ('clean source', cleanSource), + ('clean comments', cleanComments), + ('replace slamch_() calls', replaceSlamch), + ] + + if nsteps is not None: + steps = steps[:nsteps] + + for msg, step in steps: + if verbose: + print msg + source = step(source) + + return source + +if __name__ == '__main__': + filename = sys.argv[1] + outfilename = os.path.join(sys.argv[2], os.path.basename(filename)) + fo = open(filename, 'r') + source = fo.read() + fo.close() + + if len(sys.argv) > 3: + nsteps = int(sys.argv[3]) + else: + nsteps = None + + source = scrub_source(source, nsteps, verbose=True) + + writefo = open(outfilename, 'w') + writefo.write(source) + writefo.close() + diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/cmd_ln.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/cmd_ln.c new file mode 100644 index 0000000000000000000000000000000000000000..ba776940c4a2bb068c4eb390daaf70f614a00925 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/cmd_ln.c @@ -0,0 +1,999 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * cmd_ln.c -- Command line argument parsing. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * + * 10-Sep-1998 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Changed strcasecmp() call in cmp_name() to strcmp_nocase() call. + * + * 15-Jul-1997 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added required arguments handling. + * + * 07-Dec-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created, based on Eric's implementation. Basically, combined several + * functions into one, eliminated validation, and simplified the interface. + */ + + +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4996 4018) +#endif + +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef HAVE_UNISTD_H +#include +#endif + +#include "sphinxbase/cmd_ln.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/hash_table.h" +#include "sphinxbase/case.h" +#include "sphinxbase/strfuncs.h" + +static void +arg_log_r(cmd_ln_t *, arg_t const *, int32, int32); + +static cmd_ln_t * +parse_options(cmd_ln_t *, const arg_t *, int32, char* [], int32); + +/* + * Find max length of name and default fields in the given defn array. + * Return #items in defn array. + */ +static int32 +arg_strlen(const arg_t * defn, int32 * namelen, int32 * deflen) +{ + int32 i, l; + + *namelen = *deflen = 0; + for (i = 0; defn[i].name; i++) { + l = strlen(defn[i].name); + if (*namelen < l) + *namelen = l; + + if (defn[i].deflt) + l = strlen(defn[i].deflt); + else + l = strlen("(null)"); + /* E_INFO("string default, %s , name %s, length %d\n",defn[i].deflt,defn[i].name,l); */ + if (*deflen < l) + *deflen = l; + } + + return i; +} + + +static int32 +cmp_name(const void *a, const void *b) +{ + return (strcmp_nocase + ((* (arg_t**) a)->name, + (* (arg_t**) b)->name)); +} + +static arg_t const ** +arg_sort(const arg_t * defn, int32 n) +{ + const arg_t ** pos; + int32 i; + + pos = (arg_t const **) ckd_calloc(n, sizeof(arg_t *)); + for (i = 0; i < n; ++i) + pos[i] = &defn[i]; + qsort(pos, n, sizeof(arg_t *), cmp_name); + + return pos; +} + +static size_t +strnappend(char **dest, size_t *dest_allocation, + const char *source, size_t n) +{ + size_t source_len, required_allocation; + + if (dest == NULL || dest_allocation == NULL) + return -1; + if (*dest == NULL && *dest_allocation != 0) + return -1; + if (source == NULL) + return *dest_allocation; + + source_len = strlen(source); + if (n && n < source_len) + source_len = n; + + required_allocation = (*dest ? strlen(*dest) : 0) + source_len + 1; + if (*dest_allocation < required_allocation) { + if (*dest_allocation == 0) { + *dest = (char *)ckd_calloc(required_allocation * 2, 1); + } else { + *dest = (char *)ckd_realloc(*dest, required_allocation * 2); + } + *dest_allocation = required_allocation * 2; + } + + strncat(*dest, source, source_len); + + return *dest_allocation; +} + +static size_t +strappend(char **dest, size_t *dest_allocation, + const char *source) +{ + return strnappend(dest, dest_allocation, source, 0); +} + +static char* +arg_resolve_env(const char *str) +{ + char *resolved_str = NULL; + char env_name[100]; + const char *env_val; + size_t alloced = 0; + const char *i = str, *j; + + /* calculate required resolved_str size */ + do { + j = strstr(i, "$("); + if (j != NULL) { + if (j != i) { + strnappend(&resolved_str, &alloced, i, j - i); + i = j; + } + j = strchr(i + 2, ')'); + if (j != NULL) { + if (j - (i + 2) < 100) { + strncpy(env_name, i + 2, j - (i + 2)); + env_name[j - (i + 2)] = '\0'; + #if !defined(_WIN32_WCE) + env_val = getenv(env_name); + if (env_val) + strappend(&resolved_str, &alloced, env_val); + #else + env_val = 0; + #endif + } + i = j + 1; + } else { + /* unclosed, copy and skip */ + j = i + 2; + strnappend(&resolved_str, &alloced, i, j - i); + i = j; + } + } else { + strappend(&resolved_str, &alloced, i); + } + } while(j != NULL); + + return resolved_str; +} + +static void +arg_log_r(cmd_ln_t *cmdln, const arg_t * defn, int32 doc, int32 lineno) +{ + arg_t const **pos; + int32 i, n; + size_t l; + int32 namelen, deflen; + cmd_ln_val_t const *vp; + char const **array; + + /* No definitions, do nothing. */ + if (defn == NULL) + return; + + /* Find max lengths of name and default value fields, and #entries in defn */ + n = arg_strlen(defn, &namelen, &deflen); + namelen += 4; + deflen += 4; + if (lineno) + E_INFO("%-*s", namelen, "[NAME]"); + else + E_INFOCONT("%-*s", namelen, "[NAME]"); + E_INFOCONT("%-*s", deflen, "[DEFLT]"); + if (doc) { + E_INFOCONT(" [DESCR]\n"); + } + else { + E_INFOCONT(" [VALUE]\n"); + } + + /* Print current configuration, sorted by name */ + pos = arg_sort(defn, n); + for (i = 0; i < n; i++) { + if (lineno) + E_INFO("%-*s", namelen, pos[i]->name); + else + E_INFOCONT("%-*s", namelen, pos[i]->name); + if (pos[i]->deflt) + E_INFOCONT("%-*s", deflen, pos[i]->deflt); + else + E_INFOCONT("%-*s", deflen, ""); + if (doc) { + if (pos[i]->doc) + E_INFOCONT(" %s", pos[i]->doc); + } + else { + vp = cmd_ln_access_r(cmdln, pos[i]->name); + if (vp) { + switch (pos[i]->type) { + case ARG_INTEGER: + case REQARG_INTEGER: + E_INFOCONT(" %ld", vp->val.i); + break; + case ARG_FLOATING: + case REQARG_FLOATING: + E_INFOCONT(" %e", vp->val.fl); + break; + case ARG_STRING: + case REQARG_STRING: + if (vp->val.ptr) + E_INFOCONT(" %s", (char *)vp->val.ptr); + break; + case ARG_STRING_LIST: + array = (char const**)vp->val.ptr; + if (array) + E_INFOCONT(" "); + for (l = 0; array[l] != 0; l++) { + E_INFOCONT("%s,", array[l]); + } + break; + case ARG_BOOLEAN: + case REQARG_BOOLEAN: + E_INFOCONT(" %s", vp->val.i ? "yes" : "no"); + break; + default: + E_ERROR("Unknown argument type: %d\n", pos[i]->type); + } + } + } + + E_INFOCONT("\n"); + } + ckd_free(pos); + E_INFOCONT("\n"); +} + +static char ** +parse_string_list(const char *str) +{ + int count, i, j; + const char *p; + char **result; + + p = str; + count = 1; + while (*p) { + if (*p == ',') + count++; + p++; + } + /* Should end with NULL */ + result = (char **) ckd_calloc(count + 1, sizeof(char *)); + p = str; + for (i = 0; i < count; i++) { + for (j = 0; p[j] != ',' && p[j] != 0; j++); + result[i] = (char *)ckd_calloc(j + 1, sizeof(char)); + strncpy( result[i], p, j); + p = p + j + 1; + } + return result; +} + +static cmd_ln_val_t * +cmd_ln_val_init(int t, const char *name, const char *str) +{ + cmd_ln_val_t *v; + anytype_t val; + char *e_str; + + if (!str) { + /* For lack of a better default value. */ + memset(&val, 0, sizeof(val)); + } + else { + int valid = 1; + e_str = arg_resolve_env(str); + + switch (t) { + case ARG_INTEGER: + case REQARG_INTEGER: + if (sscanf(e_str, "%ld", &val.i) != 1) + valid = 0; + break; + case ARG_FLOATING: + case REQARG_FLOATING: + if (e_str == NULL || e_str[0] == 0) + valid = 0; + val.fl = atof_c(e_str); + break; + case ARG_BOOLEAN: + case REQARG_BOOLEAN: + if ((e_str[0] == 'y') || (e_str[0] == 't') || + (e_str[0] == 'Y') || (e_str[0] == 'T') || (e_str[0] == '1')) { + val.i = TRUE; + } + else if ((e_str[0] == 'n') || (e_str[0] == 'f') || + (e_str[0] == 'N') || (e_str[0] == 'F') | + (e_str[0] == '0')) { + val.i = FALSE; + } + else { + E_ERROR("Unparsed boolean value '%s'\n", str); + valid = 0; + } + break; + case ARG_STRING: + case REQARG_STRING: + val.ptr = ckd_salloc(e_str); + break; + case ARG_STRING_LIST: + val.ptr = parse_string_list(e_str); + break; + default: + E_ERROR("Unknown argument type: %d\n", t); + valid = 0; + } + + ckd_free(e_str); + if (valid == 0) + return NULL; + } + + v = (cmd_ln_val_t *)ckd_calloc(1, sizeof(*v)); + memcpy(v, &val, sizeof(val)); + v->type = t; + v->name = ckd_salloc(name); + + return v; +} + +/* + * Handles option parsing for cmd_ln_parse_file_r() and cmd_ln_init() + * also takes care of storing argv. + * DO NOT call it from cmd_ln_parse_r() + */ +static cmd_ln_t * +parse_options(cmd_ln_t *cmdln, const arg_t *defn, int32 argc, char* argv[], int32 strict) +{ + cmd_ln_t *new_cmdln; + + new_cmdln = cmd_ln_parse_r(cmdln, defn, argc, argv, strict); + /* If this failed then clean up and return NULL. */ + if (new_cmdln == NULL) { + int32 i; + for (i = 0; i < argc; ++i) + ckd_free(argv[i]); + ckd_free(argv); + return NULL; + } + + /* Otherwise, we need to add the contents of f_argv to the new object. */ + if (new_cmdln == cmdln) { + /* If we are adding to a previously passed-in cmdln, then + * store our allocated strings in its f_argv. */ + new_cmdln->f_argv = (char **)ckd_realloc(new_cmdln->f_argv, + (new_cmdln->f_argc + argc) + * sizeof(*new_cmdln->f_argv)); + memcpy(new_cmdln->f_argv + new_cmdln->f_argc, argv, + argc * sizeof(*argv)); + ckd_free(argv); + new_cmdln->f_argc += argc; + } + else { + /* Otherwise, store f_argc and f_argv. */ + new_cmdln->f_argc = argc; + new_cmdln->f_argv = argv; + } + + return new_cmdln; +} + +void +cmd_ln_val_free(cmd_ln_val_t *val) +{ + int i; + if (val->type & ARG_STRING_LIST) { + char ** array = (char **)val->val.ptr; + if (array) { + for (i = 0; array[i] != NULL; i++) { + ckd_free(array[i]); + } + ckd_free(array); + } + } + if (val->type & ARG_STRING) + ckd_free(val->val.ptr); + ckd_free(val->name); + ckd_free(val); +} + + +cmd_ln_t * +cmd_ln_parse_r(cmd_ln_t *inout_cmdln, const arg_t * defn, + int32 argc, char *argv[], int strict) +{ + int32 i, j, n, argstart; + hash_table_t *defidx = NULL; + cmd_ln_t *cmdln; + + /* Construct command-line object */ + if (inout_cmdln == NULL) { + cmdln = (cmd_ln_t*)ckd_calloc(1, sizeof(*cmdln)); + cmdln->refcount = 1; + } + else + cmdln = inout_cmdln; + cmdln->defn = defn; + + /* Build a hash table for argument definitions */ + defidx = hash_table_new(50, 0); + if (defn) { + for (n = 0; defn[n].name; n++) { + void *v; + + v = hash_table_enter(defidx, defn[n].name, (void *)&defn[n]); + if (strict && (v != &defn[n])) { + E_ERROR("Duplicate argument name in definition: %s\n", defn[n].name); + goto error; + } + } + } + else { + /* No definitions. */ + n = 0; + } + + /* Allocate memory for argument values */ + if (cmdln->ht == NULL) + cmdln->ht = hash_table_new(n, 0 /* argument names are case-sensitive */ ); + + + /* skip argv[0] if it doesn't start with dash */ + argstart = 0; + if (argc > 0 && argv[0][0] != '-') { + argstart = 1; + } + + /* Parse command line arguments (name-value pairs) */ + for (j = argstart; j < argc; j += 2) { + arg_t *argdef; + cmd_ln_val_t *val; + void *v; + + if (hash_table_lookup(defidx, argv[j], &v) < 0) { + if (strict) { + E_ERROR("Unknown argument name '%s'\n", argv[j]); + goto error; + } + else if (defn == NULL) + v = NULL; + else + continue; + } + argdef = (arg_t *)v; + + /* Enter argument value */ + if (j + 1 >= argc) { + E_ERROR("Argument value for '%s' missing\n", argv[j]); + goto error; + } + + if (argdef == NULL) + val = cmd_ln_val_init(ARG_STRING, argv[j], argv[j + 1]); + else { + if ((val = cmd_ln_val_init(argdef->type, argv[j], argv[j + 1])) == NULL) { + E_ERROR("Bad argument value for %s: %s\n", argv[j], + argv[j + 1]); + goto error; + } + } + + if ((v = hash_table_enter(cmdln->ht, val->name, (void *)val)) != + (void *)val) + { + if (strict) { + cmd_ln_val_free(val); + E_ERROR("Duplicate argument name in arguments: %s\n", + argdef->name); + goto error; + } + else { + v = hash_table_replace(cmdln->ht, val->name, (void *)val); + cmd_ln_val_free((cmd_ln_val_t *)v); + } + } + } + + /* Fill in default values, if any, for unspecified arguments */ + for (i = 0; i < n; i++) { + cmd_ln_val_t *val; + void *v; + + if (hash_table_lookup(cmdln->ht, defn[i].name, &v) < 0) { + if ((val = cmd_ln_val_init(defn[i].type, defn[i].name, defn[i].deflt)) == NULL) { + E_ERROR + ("Bad default argument value for %s: %s\n", + defn[i].name, defn[i].deflt); + goto error; + } + hash_table_enter(cmdln->ht, val->name, (void *)val); + } + } + + /* Check for required arguments; exit if any missing */ + j = 0; + for (i = 0; i < n; i++) { + if (defn[i].type & ARG_REQUIRED) { + void *v; + if (hash_table_lookup(cmdln->ht, defn[i].name, &v) != 0) + E_ERROR("Missing required argument %s\n", defn[i].name); + } + } + if (j > 0) { + goto error; + } + + if (strict && argc == 1) { + E_ERROR("No arguments given\n"); + if (defidx) + hash_table_free(defidx); + if (inout_cmdln == NULL) + cmd_ln_free_r(cmdln); + return NULL; + } + + hash_table_free(defidx); + return cmdln; + + error: + if (defidx) + hash_table_free(defidx); + if (inout_cmdln == NULL) + cmd_ln_free_r(cmdln); + E_ERROR("Failed to parse arguments list\n"); + return NULL; +} + +cmd_ln_t * +cmd_ln_init(cmd_ln_t *inout_cmdln, const arg_t *defn, int32 strict, ...) +{ + va_list args; + const char *arg, *val; + char **f_argv; + int32 f_argc; + + va_start(args, strict); + f_argc = 0; + while ((arg = va_arg(args, const char *))) { + ++f_argc; + val = va_arg(args, const char*); + if (val == NULL) { + E_ERROR("Number of arguments must be even!\n"); + return NULL; + } + ++f_argc; + } + va_end(args); + + /* Now allocate f_argv */ + f_argv = (char**)ckd_calloc(f_argc, sizeof(*f_argv)); + va_start(args, strict); + f_argc = 0; + while ((arg = va_arg(args, const char *))) { + f_argv[f_argc] = ckd_salloc(arg); + ++f_argc; + val = va_arg(args, const char*); + f_argv[f_argc] = ckd_salloc(val); + ++f_argc; + } + va_end(args); + + return parse_options(inout_cmdln, defn, f_argc, f_argv, strict); +} + +cmd_ln_t * +cmd_ln_parse_file_r(cmd_ln_t *inout_cmdln, const arg_t * defn, const char *filename, int32 strict) +{ + FILE *file; + int argc; + int argv_size; + char *str; + int arg_max_length = 512; + int len = 0; + int quoting, ch; + char **f_argv; + int rv = 0; + const char separator[] = " \t\r\n"; + + if ((file = fopen(filename, "r")) == NULL) { + E_ERROR("Cannot open configuration file %s for reading\n", + filename); + return NULL; + } + + ch = fgetc(file); + /* Skip to the next interesting character */ + for (; ch != EOF && strchr(separator, ch); ch = fgetc(file)) ; + + if (ch == EOF) { + fclose(file); + return NULL; + } + + /* + * Initialize default argv, argc, and argv_size. + */ + argv_size = 30; + argc = 0; + f_argv = (char **)ckd_calloc(argv_size, sizeof(char *)); + /* Silently make room for \0 */ + str = (char* )ckd_calloc(arg_max_length + 1, sizeof(char)); + quoting = 0; + + do { + /* Handle arguments that are commented out */ + if (len == 0 && argc % 2 == 0) { + while (ch == '#') { + /* Skip everything until newline */ + for (ch = fgetc(file); ch != EOF && ch != '\n'; ch = fgetc(file)) ; + /* Skip to the next interesting character */ + for (ch = fgetc(file); ch != EOF && strchr(separator, ch); ch = fgetc(file)) ; + } + + /* Check if we are at the last line (without anything interesting in it) */ + if (ch == EOF) + break; + } + + /* Handle quoted arguments */ + if (ch == '"' || ch == '\'') { + if (quoting == ch) /* End a quoted section with the same type */ + quoting = 0; + else if (quoting) { + E_ERROR("Nesting quotations is not supported!\n"); + rv = 1; + break; + } + else + quoting = ch; /* Start a quoted section */ + } + else if (ch == EOF || (!quoting && strchr(separator, ch))) { + /* Reallocate argv so it is big enough to contain all the arguments */ + if (argc >= argv_size) { + char **tmp_argv; + if (!(tmp_argv = + (char **)ckd_realloc(f_argv, argv_size * 2 * sizeof(char *)))) { + rv = 1; + break; + } + f_argv = tmp_argv; + argv_size *= 2; + } + + /* Add the string to the list of arguments */ + f_argv[argc] = ckd_salloc(str); + len = 0; + str[0] = '\0'; + argc++; + + if (quoting) + E_WARN("Unclosed quotation, having EOF close it...\n"); + + /* Skip to the next interesting character */ + for (; ch != EOF && strchr(separator, ch); ch = fgetc(file)) ; + + if (ch == EOF) + break; + + /* We already have the next character */ + continue; + } + else { + if (len >= arg_max_length) { + /* Make room for more chars (including the \0 !) */ + char *tmp_str = str; + if ((tmp_str = (char *)ckd_realloc(str, (1 + arg_max_length * 2) * sizeof(char))) == NULL) { + rv = 1; + break; + } + str = tmp_str; + arg_max_length *= 2; + } + /* Add the char to the argument string */ + str[len++] = ch; + /* Always null terminate */ + str[len] = '\0'; + } + + ch = fgetc(file); + } while (1); + + fclose(file); + + ckd_free(str); + + if (rv) { + for (ch = 0; ch < argc; ++ch) + ckd_free(f_argv[ch]); + ckd_free(f_argv); + return NULL; + } + + return parse_options(inout_cmdln, defn, argc, f_argv, strict); +} + +void +cmd_ln_log_help_r(cmd_ln_t *cmdln, arg_t const* defn) +{ + if (defn == NULL) + return; + E_INFO("Arguments list definition:\n"); + if (cmdln == NULL) { + cmdln = cmd_ln_parse_r(NULL, defn, 0, NULL, FALSE); + arg_log_r(cmdln, defn, TRUE, FALSE); + cmd_ln_free_r(cmdln); + } + else + arg_log_r(cmdln, defn, TRUE, FALSE); +} + +void +cmd_ln_log_values_r(cmd_ln_t *cmdln, arg_t const* defn) +{ + if (defn == NULL) + return; + E_INFO("Current configuration:\n"); + arg_log_r(cmdln, defn, FALSE, FALSE); +} + +int +cmd_ln_exists_r(cmd_ln_t *cmdln, const char *name) +{ + void *val; + if (cmdln == NULL) + return FALSE; + return (hash_table_lookup(cmdln->ht, name, &val) == 0); +} + +cmd_ln_val_t * +cmd_ln_access_r(cmd_ln_t *cmdln, const char *name) +{ + void *val; + if (hash_table_lookup(cmdln->ht, name, &val) < 0) { + E_ERROR("Unknown argument: %s\n", name); + return NULL; + } + return (cmd_ln_val_t *)val; +} + +int +cmd_ln_type_r(cmd_ln_t *cmdln, char const *name) +{ + cmd_ln_val_t *val = cmd_ln_access_r(cmdln, name); + if (val == NULL) + return 0; + return val->type; +} + + +char const * +cmd_ln_str_r(cmd_ln_t *cmdln, char const *name) +{ + cmd_ln_val_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) + return NULL; + if (!(val->type & ARG_STRING)) { + E_ERROR("Argument %s does not have string type\n", name); + return NULL; + } + return (char const *)val->val.ptr; +} + +char const ** +cmd_ln_str_list_r(cmd_ln_t *cmdln, char const *name) +{ + cmd_ln_val_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) + return NULL; + if (!(val->type & ARG_STRING_LIST)) { + E_ERROR("Argument %s does not have string list type\n", name); + return NULL; + } + return (char const **)val->val.ptr; +} + +long +cmd_ln_int_r(cmd_ln_t *cmdln, char const *name) +{ + cmd_ln_val_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) + return 0L; + if (!(val->type & (ARG_INTEGER | ARG_BOOLEAN))) { + E_ERROR("Argument %s does not have integer type\n", name); + return 0L; + } + return val->val.i; +} + +double +cmd_ln_float_r(cmd_ln_t *cmdln, char const *name) +{ + cmd_ln_val_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) + return 0.0; + if (!(val->type & ARG_FLOATING)) { + E_ERROR("Argument %s does not have floating-point type\n", name); + return 0.0; + } + return val->val.fl; +} + +void +cmd_ln_set_str_r(cmd_ln_t *cmdln, char const *name, char const *str) +{ + cmd_ln_val_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) { + E_ERROR("Unknown argument: %s\n", name); + return; + } + if (!(val->type & ARG_STRING)) { + E_ERROR("Argument %s does not have string type\n", name); + return; + } + ckd_free(val->val.ptr); + val->val.ptr = ckd_salloc(str); +} + +void +cmd_ln_set_str_extra_r(cmd_ln_t *cmdln, char const *name, char const *str) +{ + cmd_ln_val_t *val; + if (hash_table_lookup(cmdln->ht, name, (void **)&val) < 0) { + val = cmd_ln_val_init(ARG_STRING, name, str); + hash_table_enter(cmdln->ht, val->name, (void *)val); + } else { + if (!(val->type & ARG_STRING)) { + E_ERROR("Argument %s does not have string type\n", name); + return; + } + ckd_free(val->val.ptr); + val->val.ptr = ckd_salloc(str); + } +} + +void +cmd_ln_set_int_r(cmd_ln_t *cmdln, char const *name, long iv) +{ + cmd_ln_val_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) { + E_ERROR("Unknown argument: %s\n", name); + return; + } + if (!(val->type & (ARG_INTEGER | ARG_BOOLEAN))) { + E_ERROR("Argument %s does not have integer type\n", name); + return; + } + val->val.i = iv; +} + +void +cmd_ln_set_float_r(cmd_ln_t *cmdln, char const *name, double fv) +{ + cmd_ln_val_t *val; + val = cmd_ln_access_r(cmdln, name); + if (val == NULL) { + E_ERROR("Unknown argument: %s\n", name); + return; + } + if (!(val->type & ARG_FLOATING)) { + E_ERROR("Argument %s does not have floating-point type\n", name); + return; + } + val->val.fl = fv; +} + +cmd_ln_t * +cmd_ln_retain(cmd_ln_t *cmdln) +{ + ++cmdln->refcount; + return cmdln; +} + +int +cmd_ln_free_r(cmd_ln_t *cmdln) +{ + if (cmdln == NULL) + return 0; + if (--cmdln->refcount > 0) + return cmdln->refcount; + + if (cmdln->ht) { + glist_t entries; + gnode_t *gn; + int32 n; + + entries = hash_table_tolist(cmdln->ht, &n); + for (gn = entries; gn; gn = gnode_next(gn)) { + hash_entry_t *e = (hash_entry_t *)gnode_ptr(gn); + cmd_ln_val_free((cmd_ln_val_t *)e->val); + } + glist_free(entries); + hash_table_free(cmdln->ht); + cmdln->ht = NULL; + } + + if (cmdln->f_argv) { + int32 i; + for (i = 0; i < (int32)cmdln->f_argc; ++i) { + ckd_free(cmdln->f_argv[i]); + } + ckd_free(cmdln->f_argv); + cmdln->f_argv = NULL; + cmdln->f_argc = 0; + } + ckd_free(cmdln); + return 0; +} + +/* vim: set ts=4 sw=4: */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/dtoa.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/dtoa.c new file mode 100644 index 0000000000000000000000000000000000000000..f8801d6028243e02e3e8723fb3f4e11de56b6e87 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/dtoa.c @@ -0,0 +1,2985 @@ +/**************************************************************** + * + * The author of this software is David M. Gay. + * + * Copyright (c) 1991, 2000, 2001 by Lucent Technologies. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose without fee is hereby granted, provided that this entire notice + * is included in all copies of any software which is or includes a copy + * or modification of this software and in all copies of the supporting + * documentation for such software. + * + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR LUCENT MAKES ANY + * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY + * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE. + * + ***************************************************************/ + +/**************************************************************** + * This is dtoa.c by David M. Gay, downloaded from + * http://www.netlib.org/fp/dtoa.c on April 15, 2009 and modified for + * inclusion into the Python core by Mark E. T. Dickinson and Eric V. Smith. + * It was taken from Python distribution then and imported into sphinxbase. + * Python version is preferred due to cleanups, though original + * version at netlib is still maintained. + * + * Please remember to check http://www.netlib.org/fp regularly for bugfixes and updates. + * + * The major modifications from Gay's original code are as follows: + * + * 0. The original code has been specialized to Sphinxbase's needs by removing + * many of the #ifdef'd sections. In particular, code to support VAX and + * IBM floating-point formats, hex NaNs, hex floats, locale-aware + * treatment of the decimal point, and setting of the inexact flag have + * been removed. + * + * 1. We use cdk_calloc and ckd_free in place of malloc and free. + * + * 2. The public functions strtod, dtoa and freedtoa all now have + * a sb_ prefix. + * + * 3. Instead of assuming that malloc always succeeds, we thread + * malloc failures through the code. The functions + * + * Balloc, multadd, s2b, i2b, mult, pow5mult, lshift, diff, d2b + * + * of return type *Bigint all return NULL to indicate a malloc failure. + * Similarly, rv_alloc and nrv_alloc (return type char *) return NULL on + * failure. bigcomp now has return type int (it used to be void) and + * returns -1 on failure and 0 otherwise. sb_dtoa returns NULL + * on failure. sb_strtod indicates failure due to malloc failure + * by returning -1.0, setting errno=ENOMEM and *se to s00. + * + * 4. The static variable dtoa_result has been removed. Callers of + * sb_dtoa are expected to call sb_freedtoa to free the memory allocated + * by sb_dtoa. + * + * 5. The code has been reformatted to better fit with C style. + * + * 6. A bug in the memory allocation has been fixed: to avoid FREEing memory + * that hasn't been MALLOC'ed, private_mem should only be used when k <= + * Kmax. + * + * 7. sb_strtod has been modified so that it doesn't accept strings with + * leading whitespace. + * + * 8. Global static variables are not used due to memory access issues. Fixes + * usage from multiple threads. + * + ***************************************************************/ + +/* Please send bug reports for the original dtoa.c code to David M. Gay (dmg + * at acm dot org, with " at " changed at "@" and " dot " changed to "."). + */ + +/* On a machine with IEEE extended-precision registers, it is + * necessary to specify double-precision (53-bit) rounding precision + * before invoking strtod or dtoa. If the machine uses (the equivalent + * of) Intel 80x87 arithmetic, the call + * _control87(PC_53, MCW_PC); + * does this with many compilers. Whether this or another call is + * appropriate depends on the compiler; for this to work, it may be + * necessary to #include "float.h" or another system-dependent header + * file. + */ + +/* strtod for IEEE-, VAX-, and IBM-arithmetic machines. + * + * This strtod returns a nearest machine number to the input decimal + * string (or sets errno to ERANGE). With IEEE arithmetic, ties are + * broken by the IEEE round-even rule. Otherwise ties are broken by + * biased rounding (add half and chop). + * + * Inspired loosely by William D. Clinger's paper "How to Read Floating + * Point Numbers Accurately" [Proc. ACM SIGPLAN '90, pp. 92-101]. + * + * Modifications: + * + * 1. We only require IEEE, IBM, or VAX double-precision + * arithmetic (not IEEE double-extended). + * 2. We get by with floating-point arithmetic in a case that + * Clinger missed -- when we're computing d * 10^n + * for a small integer d and the integer n is not too + * much larger than 22 (the maximum integer k for which + * we can represent 10^k exactly), we may be able to + * compute (d*10^k) * 10^(e-k) with just one roundoff. + * 3. Rather than a bit-at-a-time adjustment of the binary + * result in the hard case, we use floating-point + * arithmetic to determine the adjustment to within + * one bit; only in really hard cases do we need to + * compute a second residual. + * 4. Because of 3., we don't need a large table of powers of 10 + * for ten-to-e (just some small tables, e.g. of 10^k + * for 0 <= k <= 22). + */ + +/* Linking of sphinxbase's #defines to Gay's #defines starts here. */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include + +#include +#include + +#ifdef WORDS_BIGENDIAN +#define IEEE_MC68k +#else +#define IEEE_8087 +#endif + +#define Long int32 /* ZOMG */ +#define ULong uint32 /* WTF */ +#ifdef HAVE_LONG_LONG +#define ULLong uint64 +#endif + +#define MALLOC ckd_malloc +#define FREE ckd_free + +#define DBL_DIG 15 +#define DBL_MAX_10_EXP 308 +#define DBL_MAX_EXP 1024 +#define FLT_RADIX 2 + +/* maximum permitted exponent value for strtod; exponents larger than + MAX_ABS_EXP in absolute value get truncated to +-MAX_ABS_EXP. MAX_ABS_EXP + should fit into an int. */ +#ifndef MAX_ABS_EXP +#define MAX_ABS_EXP 1100000000U +#endif +/* Bound on length of pieces of input strings in sb_strtod; specifically, + this is used to bound the total number of digits ignoring leading zeros and + the number of digits that follow the decimal point. Ideally, MAX_DIGITS + should satisfy MAX_DIGITS + 400 < MAX_ABS_EXP; that ensures that the + exponent clipping in sb_strtod can't affect the value of the output. */ +#ifndef MAX_DIGITS +#define MAX_DIGITS 1000000000U +#endif + +/* End sphinxbase #define linking */ + +#ifdef DEBUG +#define Bug(x) {fprintf(stderr, "%s\n", x); exit(1);} +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + +typedef union { double d; ULong L[2]; } U; + +#ifdef IEEE_8087 +#define word0(x) (x)->L[1] +#define word1(x) (x)->L[0] +#else +#define word0(x) (x)->L[0] +#define word1(x) (x)->L[1] +#endif +#define dval(x) (x)->d + +#ifndef STRTOD_DIGLIM +#define STRTOD_DIGLIM 40 +#endif + +/* maximum permitted exponent value for strtod; exponents larger than + MAX_ABS_EXP in absolute value get truncated to +-MAX_ABS_EXP. MAX_ABS_EXP + should fit into an int. */ +#ifndef MAX_ABS_EXP +#define MAX_ABS_EXP 1100000000U +#endif +/* Bound on length of pieces of input strings in sb_strtod; specifically, + this is used to bound the total number of digits ignoring leading zeros and + the number of digits that follow the decimal point. Ideally, MAX_DIGITS + should satisfy MAX_DIGITS + 400 < MAX_ABS_EXP; that ensures that the + exponent clipping in sb_strtod can't affect the value of the output. */ +#ifndef MAX_DIGITS +#define MAX_DIGITS 1000000000U +#endif + +/* Guard against trying to use the above values on unusual platforms with ints + * of width less than 32 bits. */ +#if MAX_ABS_EXP > 0x7fffffff +#error "MAX_ABS_EXP should fit in an int" +#endif +#if MAX_DIGITS > 0x7fffffff +#error "MAX_DIGITS should fit in an int" +#endif + +/* The following definition of Storeinc is appropriate for MIPS processors. + * An alternative that might be better on some machines is + * #define Storeinc(a,b,c) (*a++ = b << 16 | c & 0xffff) + */ +#if defined(IEEE_8087) +#define Storeinc(a,b,c) (((unsigned short *)a)[1] = (unsigned short)b, \ + ((unsigned short *)a)[0] = (unsigned short)c, a++) +#else +#define Storeinc(a,b,c) (((unsigned short *)a)[0] = (unsigned short)b, \ + ((unsigned short *)a)[1] = (unsigned short)c, a++) +#endif + +/* #define P DBL_MANT_DIG */ +/* Ten_pmax = floor(P*log(2)/log(5)) */ +/* Bletch = (highest power of 2 < DBL_MAX_10_EXP) / 16 */ +/* Quick_max = floor((P-1)*log(FLT_RADIX)/log(10) - 1) */ +/* Int_max = floor(P*log(FLT_RADIX)/log(10) - 1) */ + +#define Exp_shift 20 +#define Exp_shift1 20 +#define Exp_msk1 0x100000 +#define Exp_msk11 0x100000 +#define Exp_mask 0x7ff00000 +#define P 53 +#define Nbits 53 +#define Bias 1023 +#define Emax 1023 +#define Emin (-1022) +#define Etiny (-1074) /* smallest denormal is 2**Etiny */ +#define Exp_1 0x3ff00000 +#define Exp_11 0x3ff00000 +#define Ebits 11 +#define Frac_mask 0xfffff +#define Frac_mask1 0xfffff +#define Ten_pmax 22 +#define Bletch 0x10 +#define Bndry_mask 0xfffff +#define Bndry_mask1 0xfffff +#define Sign_bit 0x80000000 +#define Log2P 1 +#define Tiny0 0 +#define Tiny1 1 +#define Quick_max 14 +#define Int_max 14 + +#ifndef Flt_Rounds +#ifdef FLT_ROUNDS +#define Flt_Rounds FLT_ROUNDS +#else +#define Flt_Rounds 1 +#endif +#endif /*Flt_Rounds*/ + +#define Rounding Flt_Rounds + +#define Big0 (Frac_mask1 | Exp_msk1*(DBL_MAX_EXP+Bias-1)) +#define Big1 0xffffffff + +/* Standard NaN used by sb_stdnan. */ + +#define NAN_WORD0 0x7ff80000 +#define NAN_WORD1 0 + +/* Bits of the representation of positive infinity. */ + +#define POSINF_WORD0 0x7ff00000 +#define POSINF_WORD1 0 + +/* struct BCinfo is used to pass information from sb_strtod to bigcomp */ + +typedef struct BCinfo BCinfo; +struct +BCinfo { + int e0, nd, nd0, scale; +}; + +#define FFFFFFFF 0xffffffffUL + +#define Kmax 7 + +/* struct Bigint is used to represent arbitrary-precision integers. These + integers are stored in sign-magnitude format, with the magnitude stored as + an array of base 2**32 digits. Bigints are always normalized: if x is a + Bigint then x->wds >= 1, and either x->wds == 1 or x[wds-1] is nonzero. + + The Bigint fields are as follows: + + - next is a header used by Balloc and Bfree to keep track of lists + of freed Bigints; it's also used for the linked list of + powers of 5 of the form 5**2**i used by pow5mult. + - k indicates which pool this Bigint was allocated from + - maxwds is the maximum number of words space was allocated for + (usually maxwds == 2**k) + - sign is 1 for negative Bigints, 0 for positive. The sign is unused + (ignored on inputs, set to 0 on outputs) in almost all operations + involving Bigints: a notable exception is the diff function, which + ignores signs on inputs but sets the sign of the output correctly. + - wds is the actual number of significant words + - x contains the vector of words (digits) for this Bigint, from least + significant (x[0]) to most significant (x[wds-1]). +*/ + +struct +Bigint { + struct Bigint *next; + int k, maxwds, sign, wds; + ULong x[1]; +}; + +typedef struct Bigint Bigint; + +#define SPHINXBASE_USING_MEMORY_DEBUGGER 1 + +#ifndef SPHINXBASE_USING_MEMORY_DEBUGGER + +#ifndef PRIVATE_MEM +#define PRIVATE_MEM 2304 +#endif +#define PRIVATE_mem ((PRIVATE_MEM+sizeof(double)-1)/sizeof(double)) +static double private_mem[PRIVATE_mem], *pmem_next = private_mem; + +/* Memory management: memory is allocated from, and returned to, Kmax+1 pools + of memory, where pool k (0 <= k <= Kmax) is for Bigints b with b->maxwds == + 1 << k. These pools are maintained as linked lists, with freelist[k] + pointing to the head of the list for pool k. + + On allocation, if there's no free slot in the appropriate pool, MALLOC is + called to get more memory. This memory is not returned to the system until + Python quits. There's also a private memory pool that's allocated from + in preference to using MALLOC. + + For Bigints with more than (1 << Kmax) digits (which implies at least 1233 + decimal digits), memory is directly allocated using MALLOC, and freed using + FREE. + + XXX: it would be easy to bypass this memory-management system and + translate each call to Balloc into a call to PyMem_Malloc, and each + Bfree to PyMem_Free. Investigate whether this has any significant + performance on impact. */ + +static Bigint *freelist[Kmax+1]; + +/* Allocate space for a Bigint with up to 1<next; + else { + x = 1 << k; + len = (sizeof(Bigint) + (x-1)*sizeof(ULong) + sizeof(double) - 1) + /sizeof(double); + if (k <= Kmax && pmem_next - private_mem + len <= PRIVATE_mem) { + rv = (Bigint*)pmem_next; + pmem_next += len; + } + else { + rv = (Bigint*)MALLOC(len*sizeof(double)); + if (rv == NULL) + return NULL; + } + rv->k = k; + rv->maxwds = x; + } + rv->sign = rv->wds = 0; + return rv; +} + +/* Free a Bigint allocated with Balloc */ + +static void +Bfree(Bigint *v) +{ + if (v) { + if (v->k > Kmax) + FREE((void*)v); + else { + v->next = freelist[v->k]; + freelist[v->k] = v; + } + } +} + +#else + +/* Alternative versions of Balloc and Bfree that use PyMem_Malloc and + PyMem_Free directly in place of the custom memory allocation scheme above. + These are provided for the benefit of memory debugging tools like + Valgrind. */ + +/* Allocate space for a Bigint with up to 1<k = k; + rv->maxwds = x; + rv->sign = rv->wds = 0; + return rv; +} + +/* Free a Bigint allocated with Balloc */ + +static void +Bfree(Bigint *v) +{ + if (v) { + FREE((void*)v); + } +} + +#endif /* SPHINXBASE_USING_MEMORY_DEBUGGER */ + +#define Bcopy(x,y) memcpy((char *)&x->sign, (char *)&y->sign, \ + y->wds*sizeof(Long) + 2*sizeof(int)) + +/* Multiply a Bigint b by m and add a. Either modifies b in place and returns + a pointer to the modified b, or Bfrees b and returns a pointer to a copy. + On failure, return NULL. In this case, b will have been already freed. */ + +static Bigint * +multadd(Bigint *b, int m, int a) /* multiply by m and add a */ +{ + int i, wds; +#ifdef ULLong + ULong *x; + ULLong carry, y; +#else + ULong carry, *x, y; + ULong xi, z; +#endif + Bigint *b1; + + wds = b->wds; + x = b->x; + i = 0; + carry = a; + do { +#ifdef ULLong + y = *x * (ULLong)m + carry; + carry = y >> 32; + *x++ = (ULong)(y & FFFFFFFF); +#else + xi = *x; + y = (xi & 0xffff) * m + carry; + z = (xi >> 16) * m + (y >> 16); + carry = z >> 16; + *x++ = (z << 16) + (y & 0xffff); +#endif + } + while(++i < wds); + if (carry) { + if (wds >= b->maxwds) { + b1 = Balloc(b->k+1); + if (b1 == NULL){ + Bfree(b); + return NULL; + } + Bcopy(b1, b); + Bfree(b); + b = b1; + } + b->x[wds++] = (ULong)carry; + b->wds = wds; + } + return b; +} + +/* convert a string s containing nd decimal digits (possibly containing a + decimal separator at position nd0, which is ignored) to a Bigint. This + function carries on where the parsing code in sb_strtod leaves off: on + entry, y9 contains the result of converting the first 9 digits. Returns + NULL on failure. */ + +static Bigint * +s2b(const char *s, int nd0, int nd, ULong y9) +{ + Bigint *b; + int i, k; + Long x, y; + + x = (nd + 8) / 9; + for(k = 0, y = 1; x > y; y <<= 1, k++) ; + b = Balloc(k); + if (b == NULL) + return NULL; + b->x[0] = y9; + b->wds = 1; + + if (nd <= 9) + return b; + + s += 9; + for (i = 9; i < nd0; i++) { + b = multadd(b, 10, *s++ - '0'); + if (b == NULL) + return NULL; + } + s++; + for(; i < nd; i++) { + b = multadd(b, 10, *s++ - '0'); + if (b == NULL) + return NULL; + } + return b; +} + +/* count leading 0 bits in the 32-bit integer x. */ + +static int +hi0bits(ULong x) +{ + int k = 0; + + if (!(x & 0xffff0000)) { + k = 16; + x <<= 16; + } + if (!(x & 0xff000000)) { + k += 8; + x <<= 8; + } + if (!(x & 0xf0000000)) { + k += 4; + x <<= 4; + } + if (!(x & 0xc0000000)) { + k += 2; + x <<= 2; + } + if (!(x & 0x80000000)) { + k++; + if (!(x & 0x40000000)) + return 32; + } + return k; +} + +/* count trailing 0 bits in the 32-bit integer y, and shift y right by that + number of bits. */ + +static int +lo0bits(ULong *y) +{ + int k; + ULong x = *y; + + if (x & 7) { + if (x & 1) + return 0; + if (x & 2) { + *y = x >> 1; + return 1; + } + *y = x >> 2; + return 2; + } + k = 0; + if (!(x & 0xffff)) { + k = 16; + x >>= 16; + } + if (!(x & 0xff)) { + k += 8; + x >>= 8; + } + if (!(x & 0xf)) { + k += 4; + x >>= 4; + } + if (!(x & 0x3)) { + k += 2; + x >>= 2; + } + if (!(x & 1)) { + k++; + x >>= 1; + if (!x) + return 32; + } + *y = x; + return k; +} + +/* convert a small nonnegative integer to a Bigint */ + +static Bigint * +i2b(int i) +{ + Bigint *b; + + b = Balloc(1); + if (b == NULL) + return NULL; + b->x[0] = i; + b->wds = 1; + return b; +} + +/* multiply two Bigints. Returns a new Bigint, or NULL on failure. Ignores + the signs of a and b. */ + +static Bigint * +mult(Bigint *a, Bigint *b) +{ + Bigint *c; + int k, wa, wb, wc; + ULong *x, *xa, *xae, *xb, *xbe, *xc, *xc0; + ULong y; +#ifdef ULLong + ULLong carry, z; +#else + ULong carry, z; + ULong z2; +#endif + + if ((!a->x[0] && a->wds == 1) || (!b->x[0] && b->wds == 1)) { + c = Balloc(0); + if (c == NULL) + return NULL; + c->wds = 1; + c->x[0] = 0; + return c; + } + + if (a->wds < b->wds) { + c = a; + a = b; + b = c; + } + k = a->k; + wa = a->wds; + wb = b->wds; + wc = wa + wb; + if (wc > a->maxwds) + k++; + c = Balloc(k); + if (c == NULL) + return NULL; + for(x = c->x, xa = x + wc; x < xa; x++) + *x = 0; + xa = a->x; + xae = xa + wa; + xb = b->x; + xbe = xb + wb; + xc0 = c->x; +#ifdef ULLong + for(; xb < xbe; xc0++) { + if ((y = *xb++)) { + x = xa; + xc = xc0; + carry = 0; + do { + z = *x++ * (ULLong)y + *xc + carry; + carry = z >> 32; + *xc++ = (ULong)(z & FFFFFFFF); + } + while(x < xae); + *xc = (ULong)carry; + } + } +#else + for(; xb < xbe; xb++, xc0++) { + if (y = *xb & 0xffff) { + x = xa; + xc = xc0; + carry = 0; + do { + z = (*x & 0xffff) * y + (*xc & 0xffff) + carry; + carry = z >> 16; + z2 = (*x++ >> 16) * y + (*xc >> 16) + carry; + carry = z2 >> 16; + Storeinc(xc, z2, z); + } + while(x < xae); + *xc = carry; + } + if (y = *xb >> 16) { + x = xa; + xc = xc0; + carry = 0; + z2 = *xc; + do { + z = (*x & 0xffff) * y + (*xc >> 16) + carry; + carry = z >> 16; + Storeinc(xc, z, z2); + z2 = (*x++ >> 16) * y + (*xc & 0xffff) + carry; + carry = z2 >> 16; + } + while(x < xae); + *xc = z2; + } + } +#endif + for(xc0 = c->x, xc = xc0 + wc; wc > 0 && !*--xc; --wc) ; + c->wds = wc; + return c; +} + +#ifndef SPHINXBASE_USING_MEMORY_DEBUGGER + +/* p5s is a linked list of powers of 5 of the form 5**(2**i), i >= 2 */ + +static Bigint *p5s; + +/* multiply the Bigint b by 5**k. Returns a pointer to the result, or NULL on + failure; if the returned pointer is distinct from b then the original + Bigint b will have been Bfree'd. Ignores the sign of b. */ + +static Bigint * +pow5mult(Bigint *b, int k) +{ + Bigint *b1, *p5, *p51; + int i; + static int p05[3] = { 5, 25, 125 }; + + if ((i = k & 3)) { + b = multadd(b, p05[i-1], 0); + if (b == NULL) + return NULL; + } + + if (!(k >>= 2)) + return b; + p5 = p5s; + if (!p5) { + /* first time */ + p5 = i2b(625); + if (p5 == NULL) { + Bfree(b); + return NULL; + } + p5s = p5; + p5->next = 0; + } + for(;;) { + if (k & 1) { + b1 = mult(b, p5); + Bfree(b); + b = b1; + if (b == NULL) + return NULL; + } + if (!(k >>= 1)) + break; + p51 = p5->next; + if (!p51) { + p51 = mult(p5,p5); + if (p51 == NULL) { + Bfree(b); + return NULL; + } + p51->next = 0; + p5->next = p51; + } + p5 = p51; + } + return b; +} + +#else + +/* Version of pow5mult that doesn't cache powers of 5. Provided for + the benefit of memory debugging tools like Valgrind. */ + +static Bigint * +pow5mult(Bigint *b, int k) +{ + Bigint *b1, *p5, *p51; + int i; + static int p05[3] = { 5, 25, 125 }; + + if ((i = k & 3)) { + b = multadd(b, p05[i-1], 0); + if (b == NULL) + return NULL; + } + + if (!(k >>= 2)) + return b; + p5 = i2b(625); + if (p5 == NULL) { + Bfree(b); + return NULL; + } + + for(;;) { + if (k & 1) { + b1 = mult(b, p5); + Bfree(b); + b = b1; + if (b == NULL) { + Bfree(p5); + return NULL; + } + } + if (!(k >>= 1)) + break; + p51 = mult(p5, p5); + Bfree(p5); + p5 = p51; + if (p5 == NULL) { + Bfree(b); + return NULL; + } + } + Bfree(p5); + return b; +} + +#endif /* SPHINXBASE_USING_MEMORY_DEBUGGER */ + +/* shift a Bigint b left by k bits. Return a pointer to the shifted result, + or NULL on failure. If the returned pointer is distinct from b then the + original b will have been Bfree'd. Ignores the sign of b. */ + +static Bigint * +lshift(Bigint *b, int k) +{ + int i, k1, n, n1; + Bigint *b1; + ULong *x, *x1, *xe, z; + + if (!k || (!b->x[0] && b->wds == 1)) + return b; + + n = k >> 5; + k1 = b->k; + n1 = n + b->wds + 1; + for(i = b->maxwds; n1 > i; i <<= 1) + k1++; + b1 = Balloc(k1); + if (b1 == NULL) { + Bfree(b); + return NULL; + } + x1 = b1->x; + for(i = 0; i < n; i++) + *x1++ = 0; + x = b->x; + xe = x + b->wds; + if (k &= 0x1f) { + k1 = 32 - k; + z = 0; + do { + *x1++ = *x << k | z; + z = *x++ >> k1; + } + while(x < xe); + if ((*x1 = z)) + ++n1; + } + else do + *x1++ = *x++; + while(x < xe); + b1->wds = n1 - 1; + Bfree(b); + return b1; +} + +/* Do a three-way compare of a and b, returning -1 if a < b, 0 if a == b and + 1 if a > b. Ignores signs of a and b. */ + +static int +cmp(Bigint *a, Bigint *b) +{ + ULong *xa, *xa0, *xb, *xb0; + int i, j; + + i = a->wds; + j = b->wds; +#ifdef DEBUG + if (i > 1 && !a->x[i-1]) + Bug("cmp called with a->x[a->wds-1] == 0"); + if (j > 1 && !b->x[j-1]) + Bug("cmp called with b->x[b->wds-1] == 0"); +#endif + if (i -= j) + return i; + xa0 = a->x; + xa = xa0 + j; + xb0 = b->x; + xb = xb0 + j; + for(;;) { + if (*--xa != *--xb) + return *xa < *xb ? -1 : 1; + if (xa <= xa0) + break; + } + return 0; +} + +/* Take the difference of Bigints a and b, returning a new Bigint. Returns + NULL on failure. The signs of a and b are ignored, but the sign of the + result is set appropriately. */ + +static Bigint * +diff(Bigint *a, Bigint *b) +{ + Bigint *c; + int i, wa, wb; + ULong *xa, *xae, *xb, *xbe, *xc; +#ifdef ULLong + ULLong borrow, y; +#else + ULong borrow, y; + ULong z; +#endif + + i = cmp(a,b); + if (!i) { + c = Balloc(0); + if (c == NULL) + return NULL; + c->wds = 1; + c->x[0] = 0; + return c; + } + if (i < 0) { + c = a; + a = b; + b = c; + i = 1; + } + else + i = 0; + c = Balloc(a->k); + if (c == NULL) + return NULL; + c->sign = i; + wa = a->wds; + xa = a->x; + xae = xa + wa; + wb = b->wds; + xb = b->x; + xbe = xb + wb; + xc = c->x; + borrow = 0; +#ifdef ULLong + do { + y = (ULLong)*xa++ - *xb++ - borrow; + borrow = y >> 32 & (ULong)1; + *xc++ = (ULong)(y & FFFFFFFF); + } + while(xb < xbe); + while(xa < xae) { + y = *xa++ - borrow; + borrow = y >> 32 & (ULong)1; + *xc++ = (ULong)(y & FFFFFFFF); + } +#else + do { + y = (*xa & 0xffff) - (*xb & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + z = (*xa++ >> 16) - (*xb++ >> 16) - borrow; + borrow = (z & 0x10000) >> 16; + Storeinc(xc, z, y); + } + while(xb < xbe); + while(xa < xae) { + y = (*xa & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + z = (*xa++ >> 16) - borrow; + borrow = (z & 0x10000) >> 16; + Storeinc(xc, z, y); + } +#endif + while(!*--xc) + wa--; + c->wds = wa; + return c; +} + +/* Given a positive normal double x, return the difference between x and the + next double up. Doesn't give correct results for subnormals. */ + +static double +ulp(U *x) +{ + Long L; + U u; + + L = (word0(x) & Exp_mask) - (P-1)*Exp_msk1; + word0(&u) = L; + word1(&u) = 0; + return dval(&u); +} + +/* Convert a Bigint to a double plus an exponent */ + +static double +b2d(Bigint *a, int *e) +{ + ULong *xa, *xa0, w, y, z; + int k; + U d; + + xa0 = a->x; + xa = xa0 + a->wds; + y = *--xa; +#ifdef DEBUG + if (!y) Bug("zero y in b2d"); +#endif + k = hi0bits(y); + *e = 32 - k; + if (k < Ebits) { + word0(&d) = Exp_1 | y >> (Ebits - k); + w = xa > xa0 ? *--xa : 0; + word1(&d) = y << ((32-Ebits) + k) | w >> (Ebits - k); + goto ret_d; + } + z = xa > xa0 ? *--xa : 0; + if (k -= Ebits) { + word0(&d) = Exp_1 | y << k | z >> (32 - k); + y = xa > xa0 ? *--xa : 0; + word1(&d) = z << k | y >> (32 - k); + } + else { + word0(&d) = Exp_1 | y; + word1(&d) = z; + } + ret_d: + return dval(&d); +} + +/* Convert a scaled double to a Bigint plus an exponent. Similar to d2b, + except that it accepts the scale parameter used in sb_strtod (which + should be either 0 or 2*P), and the normalization for the return value is + different (see below). On input, d should be finite and nonnegative, and d + / 2**scale should be exactly representable as an IEEE 754 double. + + Returns a Bigint b and an integer e such that + + dval(d) / 2**scale = b * 2**e. + + Unlike d2b, b is not necessarily odd: b and e are normalized so + that either 2**(P-1) <= b < 2**P and e >= Etiny, or b < 2**P + and e == Etiny. This applies equally to an input of 0.0: in that + case the return values are b = 0 and e = Etiny. + + The above normalization ensures that for all possible inputs d, + 2**e gives ulp(d/2**scale). + + Returns NULL on failure. +*/ + +static Bigint * +sd2b(U *d, int scale, int *e) +{ + Bigint *b; + + b = Balloc(1); + if (b == NULL) + return NULL; + + /* First construct b and e assuming that scale == 0. */ + b->wds = 2; + b->x[0] = word1(d); + b->x[1] = word0(d) & Frac_mask; + *e = Etiny - 1 + (int)((word0(d) & Exp_mask) >> Exp_shift); + if (*e < Etiny) + *e = Etiny; + else + b->x[1] |= Exp_msk1; + + /* Now adjust for scale, provided that b != 0. */ + if (scale && (b->x[0] || b->x[1])) { + *e -= scale; + if (*e < Etiny) { + scale = Etiny - *e; + *e = Etiny; + /* We can't shift more than P-1 bits without shifting out a 1. */ + assert(0 < scale && scale <= P - 1); + if (scale >= 32) { + /* The bits shifted out should all be zero. */ + assert(b->x[0] == 0); + b->x[0] = b->x[1]; + b->x[1] = 0; + scale -= 32; + } + if (scale) { + /* The bits shifted out should all be zero. */ + assert(b->x[0] << (32 - scale) == 0); + b->x[0] = (b->x[0] >> scale) | (b->x[1] << (32 - scale)); + b->x[1] >>= scale; + } + } + } + /* Ensure b is normalized. */ + if (!b->x[1]) + b->wds = 1; + + return b; +} + +/* Convert a double to a Bigint plus an exponent. Return NULL on failure. + + Given a finite nonzero double d, return an odd Bigint b and exponent *e + such that fabs(d) = b * 2**e. On return, *bbits gives the number of + significant bits of b; that is, 2**(*bbits-1) <= b < 2**(*bbits). + + If d is zero, then b == 0, *e == -1010, *bbits = 0. + */ + +static Bigint * +d2b(U *d, int *e, int *bits) +{ + Bigint *b; + int de, k; + ULong *x, y, z; + int i; + + b = Balloc(1); + if (b == NULL) + return NULL; + x = b->x; + + z = word0(d) & Frac_mask; + word0(d) &= 0x7fffffff; /* clear sign bit, which we ignore */ + if ((de = (int)(word0(d) >> Exp_shift))) + z |= Exp_msk1; + if ((y = word1(d))) { + if ((k = lo0bits(&y))) { + x[0] = y | z << (32 - k); + z >>= k; + } + else + x[0] = y; + i = + b->wds = (x[1] = z) ? 2 : 1; + } + else { + k = lo0bits(&z); + x[0] = z; + i = + b->wds = 1; + k += 32; + } + if (de) { + *e = de - Bias - (P-1) + k; + *bits = P - k; + } + else { + *e = de - Bias - (P-1) + 1 + k; + *bits = 32*i - hi0bits(x[i-1]); + } + return b; +} + +/* Compute the ratio of two Bigints, as a double. The result may have an + error of up to 2.5 ulps. */ + +static double +ratio(Bigint *a, Bigint *b) +{ + U da, db; + int k, ka, kb; + + dval(&da) = b2d(a, &ka); + dval(&db) = b2d(b, &kb); + k = ka - kb + 32*(a->wds - b->wds); + if (k > 0) + word0(&da) += k*Exp_msk1; + else { + k = -k; + word0(&db) += k*Exp_msk1; + } + return dval(&da) / dval(&db); +} + +static const double +tens[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, + 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, + 1e20, 1e21, 1e22 +}; + +static const double +bigtens[] = { 1e16, 1e32, 1e64, 1e128, 1e256 }; +static const double tinytens[] = { 1e-16, 1e-32, 1e-64, 1e-128, + 9007199254740992.*9007199254740992.e-256 + /* = 2^106 * 1e-256 */ +}; +/* The factor of 2^53 in tinytens[4] helps us avoid setting the underflow */ +/* flag unnecessarily. It leads to a song and dance at the end of strtod. */ +#define Scale_Bit 0x10 +#define n_bigtens 5 + +#define ULbits 32 +#define kshift 5 +#define kmask 31 + + +static int +dshift(Bigint *b, int p2) +{ + int rv = hi0bits(b->x[b->wds-1]) - 4; + if (p2 > 0) + rv -= p2; + return rv & kmask; +} + +/* special case of Bigint division. The quotient is always in the range 0 <= + quotient < 10, and on entry the divisor S is normalized so that its top 4 + bits (28--31) are zero and bit 27 is set. */ + +static int +quorem(Bigint *b, Bigint *S) +{ + int n; + ULong *bx, *bxe, q, *sx, *sxe; +#ifdef ULLong + ULLong borrow, carry, y, ys; +#else + ULong borrow, carry, y, ys; + ULong si, z, zs; +#endif + + n = S->wds; +#ifdef DEBUG + /*debug*/ if (b->wds > n) + /*debug*/ Bug("oversize b in quorem"); +#endif + if (b->wds < n) + return 0; + sx = S->x; + sxe = sx + --n; + bx = b->x; + bxe = bx + n; + q = *bxe / (*sxe + 1); /* ensure q <= true quotient */ +#ifdef DEBUG + /*debug*/ if (q > 9) + /*debug*/ Bug("oversized quotient in quorem"); +#endif + if (q) { + borrow = 0; + carry = 0; + do { +#ifdef ULLong + ys = *sx++ * (ULLong)q + carry; + carry = ys >> 32; + y = *bx - (ys & FFFFFFFF) - borrow; + borrow = y >> 32 & (ULong)1; + *bx++ = (ULong)(y & FFFFFFFF); +#else + si = *sx++; + ys = (si & 0xffff) * q + carry; + zs = (si >> 16) * q + (ys >> 16); + carry = zs >> 16; + y = (*bx & 0xffff) - (ys & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + z = (*bx >> 16) - (zs & 0xffff) - borrow; + borrow = (z & 0x10000) >> 16; + Storeinc(bx, z, y); +#endif + } + while(sx <= sxe); + if (!*bxe) { + bx = b->x; + while(--bxe > bx && !*bxe) + --n; + b->wds = n; + } + } + if (cmp(b, S) >= 0) { + q++; + borrow = 0; + carry = 0; + bx = b->x; + sx = S->x; + do { +#ifdef ULLong + ys = *sx++ + carry; + carry = ys >> 32; + y = *bx - (ys & FFFFFFFF) - borrow; + borrow = y >> 32 & (ULong)1; + *bx++ = (ULong)(y & FFFFFFFF); +#else + si = *sx++; + ys = (si & 0xffff) + carry; + zs = (si >> 16) + (ys >> 16); + carry = zs >> 16; + y = (*bx & 0xffff) - (ys & 0xffff) - borrow; + borrow = (y & 0x10000) >> 16; + z = (*bx >> 16) - (zs & 0xffff) - borrow; + borrow = (z & 0x10000) >> 16; + Storeinc(bx, z, y); +#endif + } + while(sx <= sxe); + bx = b->x; + bxe = bx + n; + if (!*bxe) { + while(--bxe > bx && !*bxe) + --n; + b->wds = n; + } + } + return q; +} + +/* sulp(x) is a version of ulp(x) that takes bc.scale into account. + + Assuming that x is finite and nonnegative (positive zero is fine + here) and x / 2^bc.scale is exactly representable as a double, + sulp(x) is equivalent to 2^bc.scale * ulp(x / 2^bc.scale). */ + +static double +sulp(U *x, BCinfo *bc) +{ + U u; + + if (bc->scale && 2*P + 1 > (int)((word0(x) & Exp_mask) >> Exp_shift)) { + /* rv/2^bc->scale is subnormal */ + word0(&u) = (P+2)*Exp_msk1; + word1(&u) = 0; + return u.d; + } + else { + assert(word0(x) || word1(x)); /* x != 0.0 */ + return ulp(x); + } +} + +/* The bigcomp function handles some hard cases for strtod, for inputs + with more than STRTOD_DIGLIM digits. It's called once an initial + estimate for the double corresponding to the input string has + already been obtained by the code in sb_strtod. + + The bigcomp function is only called after sb_strtod has found a + double value rv such that either rv or rv + 1ulp represents the + correctly rounded value corresponding to the original string. It + determines which of these two values is the correct one by + computing the decimal digits of rv + 0.5ulp and comparing them with + the corresponding digits of s0. + + In the following, write dv for the absolute value of the number represented + by the input string. + + Inputs: + + s0 points to the first significant digit of the input string. + + rv is a (possibly scaled) estimate for the closest double value to the + value represented by the original input to sb_strtod. If + bc->scale is nonzero, then rv/2^(bc->scale) is the approximation to + the input value. + + bc is a struct containing information gathered during the parsing and + estimation steps of sb_strtod. Description of fields follows: + + bc->e0 gives the exponent of the input value, such that dv = (integer + given by the bd->nd digits of s0) * 10**e0 + + bc->nd gives the total number of significant digits of s0. It will + be at least 1. + + bc->nd0 gives the number of significant digits of s0 before the + decimal separator. If there's no decimal separator, bc->nd0 == + bc->nd. + + bc->scale is the value used to scale rv to avoid doing arithmetic with + subnormal values. It's either 0 or 2*P (=106). + + Outputs: + + On successful exit, rv/2^(bc->scale) is the closest double to dv. + + Returns 0 on success, -1 on failure (e.g., due to a failed malloc call). */ + +static int +bigcomp(U *rv, const char *s0, BCinfo *bc) +{ + Bigint *b, *d; + int b2, d2, dd, i, nd, nd0, odd, p2, p5; + + nd = bc->nd; + nd0 = bc->nd0; + p5 = nd + bc->e0; + b = sd2b(rv, bc->scale, &p2); + if (b == NULL) + return -1; + + /* record whether the lsb of rv/2^(bc->scale) is odd: in the exact halfway + case, this is used for round to even. */ + odd = b->x[0] & 1; + + /* left shift b by 1 bit and or a 1 into the least significant bit; + this gives us b * 2**p2 = rv/2^(bc->scale) + 0.5 ulp. */ + b = lshift(b, 1); + if (b == NULL) + return -1; + b->x[0] |= 1; + p2--; + + p2 -= p5; + d = i2b(1); + if (d == NULL) { + Bfree(b); + return -1; + } + /* Arrange for convenient computation of quotients: + * shift left if necessary so divisor has 4 leading 0 bits. + */ + if (p5 > 0) { + d = pow5mult(d, p5); + if (d == NULL) { + Bfree(b); + return -1; + } + } + else if (p5 < 0) { + b = pow5mult(b, -p5); + if (b == NULL) { + Bfree(d); + return -1; + } + } + if (p2 > 0) { + b2 = p2; + d2 = 0; + } + else { + b2 = 0; + d2 = -p2; + } + i = dshift(d, d2); + if ((b2 += i) > 0) { + b = lshift(b, b2); + if (b == NULL) { + Bfree(d); + return -1; + } + } + if ((d2 += i) > 0) { + d = lshift(d, d2); + if (d == NULL) { + Bfree(b); + return -1; + } + } + + /* Compare s0 with b/d: set dd to -1, 0, or 1 according as s0 < b/d, s0 == + * b/d, or s0 > b/d. Here the digits of s0 are thought of as representing + * a number in the range [0.1, 1). */ + if (cmp(b, d) >= 0) + /* b/d >= 1 */ + dd = -1; + else { + i = 0; + for(;;) { + b = multadd(b, 10, 0); + if (b == NULL) { + Bfree(d); + return -1; + } + dd = s0[i < nd0 ? i : i+1] - '0' - quorem(b, d); + i++; + + if (dd) + break; + if (!b->x[0] && b->wds == 1) { + /* b/d == 0 */ + dd = i < nd; + break; + } + if (!(i < nd)) { + /* b/d != 0, but digits of s0 exhausted */ + dd = -1; + break; + } + } + } + Bfree(b); + Bfree(d); + if (dd > 0 || (dd == 0 && odd)) + dval(rv) += sulp(rv, bc); + return 0; +} + +/* Return a 'standard' NaN value. + + There are exactly two quiet NaNs that don't arise by 'quieting' signaling + NaNs (see IEEE 754-2008, section 6.2.1). If sign == 0, return the one whose + sign bit is cleared. Otherwise, return the one whose sign bit is set. +*/ + +double +sb_stdnan(int sign) +{ + U rv; + word0(&rv) = NAN_WORD0; + word1(&rv) = NAN_WORD1; + if (sign) + word0(&rv) |= Sign_bit; + return dval(&rv); +} + +/* Return positive or negative infinity, according to the given sign (0 for + * positive infinity, 1 for negative infinity). */ + +double +sb_infinity(int sign) +{ + U rv; + word0(&rv) = POSINF_WORD0; + word1(&rv) = POSINF_WORD1; + return sign ? -dval(&rv) : dval(&rv); +} + +double +sb_strtod(const char *s00, char **se) +{ + int bb2, bb5, bbe, bd2, bd5, bs2, c, dsign, e, e1, error; + int esign, i, j, k, lz, nd, nd0, odd, sign; + const char *s, *s0, *s1; + double aadj, aadj1; + U aadj2, adj, rv, rv0; + ULong y, z, abs_exp; + Long L; + BCinfo bc; + Bigint *bb, *bb1, *bd, *bd0, *bs, *delta; + size_t ndigits, fraclen; + + dval(&rv) = 0.; + + /* Start parsing. */ + c = *(s = s00); + + /* Parse optional sign, if present. */ + sign = 0; + switch (c) { + case '-': + sign = 1; + /* FALLTHRU */ + case '+': + c = *++s; + } + + /* Skip leading zeros: lz is true iff there were leading zeros. */ + s1 = s; + while (c == '0') + c = *++s; + lz = s != s1; + + /* Point s0 at the first nonzero digit (if any). fraclen will be the + number of digits between the decimal point and the end of the + digit string. ndigits will be the total number of digits ignoring + leading zeros. */ + s0 = s1 = s; + while ('0' <= c && c <= '9') + c = *++s; + ndigits = s - s1; + fraclen = 0; + + /* Parse decimal point and following digits. */ + if (c == '.') { + c = *++s; + if (!ndigits) { + s1 = s; + while (c == '0') + c = *++s; + lz = lz || s != s1; + fraclen += (s - s1); + s0 = s; + } + s1 = s; + while ('0' <= c && c <= '9') + c = *++s; + ndigits += s - s1; + fraclen += s - s1; + } + + /* Now lz is true if and only if there were leading zero digits, and + ndigits gives the total number of digits ignoring leading zeros. A + valid input must have at least one digit. */ + if (!ndigits && !lz) { + if (se) + *se = (char *)s00; + goto parse_error; + } + + /* Range check ndigits and fraclen to make sure that they, and values + computed with them, can safely fit in an int. */ + if (ndigits > MAX_DIGITS || fraclen > MAX_DIGITS) { + if (se) + *se = (char *)s00; + goto parse_error; + } + nd = (int)ndigits; + nd0 = (int)ndigits - (int)fraclen; + + /* Parse exponent. */ + e = 0; + if (c == 'e' || c == 'E') { + s00 = s; + c = *++s; + + /* Exponent sign. */ + esign = 0; + switch (c) { + case '-': + esign = 1; + /* FALLTHRU */ + case '+': + c = *++s; + } + + /* Skip zeros. lz is true iff there are leading zeros. */ + s1 = s; + while (c == '0') + c = *++s; + lz = s != s1; + + /* Get absolute value of the exponent. */ + s1 = s; + abs_exp = 0; + while ('0' <= c && c <= '9') { + abs_exp = 10*abs_exp + (c - '0'); + c = *++s; + } + + /* abs_exp will be correct modulo 2**32. But 10**9 < 2**32, so if + there are at most 9 significant exponent digits then overflow is + impossible. */ + if (s - s1 > 9 || abs_exp > MAX_ABS_EXP) + e = (int)MAX_ABS_EXP; + else + e = (int)abs_exp; + if (esign) + e = -e; + + /* A valid exponent must have at least one digit. */ + if (s == s1 && !lz) + s = s00; + } + + /* Adjust exponent to take into account position of the point. */ + e -= nd - nd0; + if (nd0 <= 0) + nd0 = nd; + + /* Finished parsing. Set se to indicate how far we parsed */ + if (se) + *se = (char *)s; + + /* If all digits were zero, exit with return value +-0.0. Otherwise, + strip trailing zeros: scan back until we hit a nonzero digit. */ + if (!nd) + goto ret; + for (i = nd; i > 0; ) { + --i; + if (s0[i < nd0 ? i : i+1] != '0') { + ++i; + break; + } + } + e += nd - i; + nd = i; + if (nd0 > nd) + nd0 = nd; + + /* Summary of parsing results. After parsing, and dealing with zero + * inputs, we have values s0, nd0, nd, e, sign, where: + * + * - s0 points to the first significant digit of the input string + * + * - nd is the total number of significant digits (here, and + * below, 'significant digits' means the set of digits of the + * significand of the input that remain after ignoring leading + * and trailing zeros). + * + * - nd0 indicates the position of the decimal point, if present; it + * satisfies 1 <= nd0 <= nd. The nd significant digits are in + * s0[0:nd0] and s0[nd0+1:nd+1] using the usual Python half-open slice + * notation. (If nd0 < nd, then s0[nd0] contains a '.' character; if + * nd0 == nd, then s0[nd0] could be any non-digit character.) + * + * - e is the adjusted exponent: the absolute value of the number + * represented by the original input string is n * 10**e, where + * n is the integer represented by the concatenation of + * s0[0:nd0] and s0[nd0+1:nd+1] + * + * - sign gives the sign of the input: 1 for negative, 0 for positive + * + * - the first and last significant digits are nonzero + */ + + /* put first DBL_DIG+1 digits into integer y and z. + * + * - y contains the value represented by the first min(9, nd) + * significant digits + * + * - if nd > 9, z contains the value represented by significant digits + * with indices in [9, min(16, nd)). So y * 10**(min(16, nd) - 9) + z + * gives the value represented by the first min(16, nd) sig. digits. + */ + + bc.e0 = e1 = e; + y = z = 0; + for (i = 0; i < nd; i++) { + if (i < 9) + y = 10*y + s0[i < nd0 ? i : i+1] - '0'; + else if (i < DBL_DIG+1) + z = 10*z + s0[i < nd0 ? i : i+1] - '0'; + else + break; + } + + k = nd < DBL_DIG + 1 ? nd : DBL_DIG + 1; + dval(&rv) = y; + if (k > 9) { + dval(&rv) = tens[k - 9] * dval(&rv) + z; + } + bd0 = 0; + if (nd <= DBL_DIG + && Flt_Rounds == 1 + ) { + if (!e) + goto ret; + if (e > 0) { + if (e <= Ten_pmax) { + dval(&rv) *= tens[e]; + goto ret; + } + i = DBL_DIG - nd; + if (e <= Ten_pmax + i) { + /* A fancier test would sometimes let us do + * this for larger i values. + */ + e -= i; + dval(&rv) *= tens[i]; + dval(&rv) *= tens[e]; + goto ret; + } + } + else if (e >= -Ten_pmax) { + dval(&rv) /= tens[-e]; + goto ret; + } + } + e1 += nd - k; + + bc.scale = 0; + + /* Get starting approximation = rv * 10**e1 */ + + if (e1 > 0) { + if ((i = e1 & 15)) + dval(&rv) *= tens[i]; + if (e1 &= ~15) { + if (e1 > DBL_MAX_10_EXP) + goto ovfl; + e1 >>= 4; + for(j = 0; e1 > 1; j++, e1 >>= 1) + if (e1 & 1) + dval(&rv) *= bigtens[j]; + /* The last multiplication could overflow. */ + word0(&rv) -= P*Exp_msk1; + dval(&rv) *= bigtens[j]; + if ((z = word0(&rv) & Exp_mask) + > Exp_msk1*(DBL_MAX_EXP+Bias-P)) + goto ovfl; + if (z > Exp_msk1*(DBL_MAX_EXP+Bias-1-P)) { + /* set to largest number */ + /* (Can't trust DBL_MAX) */ + word0(&rv) = Big0; + word1(&rv) = Big1; + } + else + word0(&rv) += P*Exp_msk1; + } + } + else if (e1 < 0) { + /* The input decimal value lies in [10**e1, 10**(e1+16)). + + If e1 <= -512, underflow immediately. + If e1 <= -256, set bc.scale to 2*P. + + So for input value < 1e-256, bc.scale is always set; + for input value >= 1e-240, bc.scale is never set. + For input values in [1e-256, 1e-240), bc.scale may or may + not be set. */ + + e1 = -e1; + if ((i = e1 & 15)) + dval(&rv) /= tens[i]; + if (e1 >>= 4) { + if (e1 >= 1 << n_bigtens) + goto undfl; + if (e1 & Scale_Bit) + bc.scale = 2*P; + for(j = 0; e1 > 0; j++, e1 >>= 1) + if (e1 & 1) + dval(&rv) *= tinytens[j]; + if (bc.scale && (j = 2*P + 1 - ((word0(&rv) & Exp_mask) + >> Exp_shift)) > 0) { + /* scaled rv is denormal; clear j low bits */ + if (j >= 32) { + word1(&rv) = 0; + if (j >= 53) + word0(&rv) = (P+2)*Exp_msk1; + else + word0(&rv) &= 0xffffffff << (j-32); + } + else + word1(&rv) &= 0xffffffff << j; + } + if (!dval(&rv)) + goto undfl; + } + } + + /* Now the hard part -- adjusting rv to the correct value.*/ + + /* Put digits into bd: true value = bd * 10^e */ + + bc.nd = nd; + bc.nd0 = nd0; /* Only needed if nd > STRTOD_DIGLIM, but done here */ + /* to silence an erroneous warning about bc.nd0 */ + /* possibly not being initialized. */ + if (nd > STRTOD_DIGLIM) { + /* ASSERT(STRTOD_DIGLIM >= 18); 18 == one more than the */ + /* minimum number of decimal digits to distinguish double values */ + /* in IEEE arithmetic. */ + + /* Truncate input to 18 significant digits, then discard any trailing + zeros on the result by updating nd, nd0, e and y suitably. (There's + no need to update z; it's not reused beyond this point.) */ + for (i = 18; i > 0; ) { + /* scan back until we hit a nonzero digit. significant digit 'i' + is s0[i] if i < nd0, s0[i+1] if i >= nd0. */ + --i; + if (s0[i < nd0 ? i : i+1] != '0') { + ++i; + break; + } + } + e += nd - i; + nd = i; + if (nd0 > nd) + nd0 = nd; + if (nd < 9) { /* must recompute y */ + y = 0; + for(i = 0; i < nd0; ++i) + y = 10*y + s0[i] - '0'; + for(; i < nd; ++i) + y = 10*y + s0[i+1] - '0'; + } + } + bd0 = s2b(s0, nd0, nd, y); + if (bd0 == NULL) + goto failed_malloc; + + /* Notation for the comments below. Write: + + - dv for the absolute value of the number represented by the original + decimal input string. + + - if we've truncated dv, write tdv for the truncated value. + Otherwise, set tdv == dv. + + - srv for the quantity rv/2^bc.scale; so srv is the current binary + approximation to tdv (and dv). It should be exactly representable + in an IEEE 754 double. + */ + + for(;;) { + + /* This is the main correction loop for sb_strtod. + + We've got a decimal value tdv, and a floating-point approximation + srv=rv/2^bc.scale to tdv. The aim is to determine whether srv is + close enough (i.e., within 0.5 ulps) to tdv, and to compute a new + approximation if not. + + To determine whether srv is close enough to tdv, compute integers + bd, bb and bs proportional to tdv, srv and 0.5 ulp(srv) + respectively, and then use integer arithmetic to determine whether + |tdv - srv| is less than, equal to, or greater than 0.5 ulp(srv). + */ + + bd = Balloc(bd0->k); + if (bd == NULL) { + Bfree(bd0); + goto failed_malloc; + } + Bcopy(bd, bd0); + bb = sd2b(&rv, bc.scale, &bbe); /* srv = bb * 2^bbe */ + if (bb == NULL) { + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + /* Record whether lsb of bb is odd, in case we need this + for the round-to-even step later. */ + odd = bb->x[0] & 1; + + /* tdv = bd * 10**e; srv = bb * 2**bbe */ + bs = i2b(1); + if (bs == NULL) { + Bfree(bb); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + + if (e >= 0) { + bb2 = bb5 = 0; + bd2 = bd5 = e; + } + else { + bb2 = bb5 = -e; + bd2 = bd5 = 0; + } + if (bbe >= 0) + bb2 += bbe; + else + bd2 -= bbe; + bs2 = bb2; + bb2++; + bd2++; + + /* At this stage bd5 - bb5 == e == bd2 - bb2 + bbe, bb2 - bs2 == 1, + and bs == 1, so: + + tdv == bd * 10**e = bd * 2**(bbe - bb2 + bd2) * 5**(bd5 - bb5) + srv == bb * 2**bbe = bb * 2**(bbe - bb2 + bb2) + 0.5 ulp(srv) == 2**(bbe-1) = bs * 2**(bbe - bb2 + bs2) + + It follows that: + + M * tdv = bd * 2**bd2 * 5**bd5 + M * srv = bb * 2**bb2 * 5**bb5 + M * 0.5 ulp(srv) = bs * 2**bs2 * 5**bb5 + + for some constant M. (Actually, M == 2**(bb2 - bbe) * 5**bb5, but + this fact is not needed below.) + */ + + /* Remove factor of 2**i, where i = min(bb2, bd2, bs2). */ + i = bb2 < bd2 ? bb2 : bd2; + if (i > bs2) + i = bs2; + if (i > 0) { + bb2 -= i; + bd2 -= i; + bs2 -= i; + } + + /* Scale bb, bd, bs by the appropriate powers of 2 and 5. */ + if (bb5 > 0) { + bs = pow5mult(bs, bb5); + if (bs == NULL) { + Bfree(bb); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + bb1 = mult(bs, bb); + Bfree(bb); + bb = bb1; + if (bb == NULL) { + Bfree(bs); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + } + if (bb2 > 0) { + bb = lshift(bb, bb2); + if (bb == NULL) { + Bfree(bs); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + } + if (bd5 > 0) { + bd = pow5mult(bd, bd5); + if (bd == NULL) { + Bfree(bb); + Bfree(bs); + Bfree(bd0); + goto failed_malloc; + } + } + if (bd2 > 0) { + bd = lshift(bd, bd2); + if (bd == NULL) { + Bfree(bb); + Bfree(bs); + Bfree(bd0); + goto failed_malloc; + } + } + if (bs2 > 0) { + bs = lshift(bs, bs2); + if (bs == NULL) { + Bfree(bb); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + } + + /* Now bd, bb and bs are scaled versions of tdv, srv and 0.5 ulp(srv), + respectively. Compute the difference |tdv - srv|, and compare + with 0.5 ulp(srv). */ + + delta = diff(bb, bd); + if (delta == NULL) { + Bfree(bb); + Bfree(bs); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + dsign = delta->sign; + delta->sign = 0; + i = cmp(delta, bs); + if (bc.nd > nd && i <= 0) { + if (dsign) + break; /* Must use bigcomp(). */ + + /* Here rv overestimates the truncated decimal value by at most + 0.5 ulp(rv). Hence rv either overestimates the true decimal + value by <= 0.5 ulp(rv), or underestimates it by some small + amount (< 0.1 ulp(rv)); either way, rv is within 0.5 ulps of + the true decimal value, so it's possible to exit. + + Exception: if scaled rv is a normal exact power of 2, but not + DBL_MIN, then rv - 0.5 ulp(rv) takes us all the way down to the + next double, so the correctly rounded result is either rv - 0.5 + ulp(rv) or rv; in this case, use bigcomp to distinguish. */ + + if (!word1(&rv) && !(word0(&rv) & Bndry_mask)) { + /* rv can't be 0, since it's an overestimate for some + nonzero value. So rv is a normal power of 2. */ + j = (int)(word0(&rv) & Exp_mask) >> Exp_shift; + /* rv / 2^bc.scale = 2^(j - 1023 - bc.scale); use bigcomp if + rv / 2^bc.scale >= 2^-1021. */ + if (j - bc.scale >= 2) { + dval(&rv) -= 0.5 * sulp(&rv, &bc); + break; /* Use bigcomp. */ + } + } + + { + bc.nd = nd; + i = -1; /* Discarded digits make delta smaller. */ + } + } + + if (i < 0) { + /* Error is less than half an ulp -- check for + * special case of mantissa a power of two. + */ + if (dsign || word1(&rv) || word0(&rv) & Bndry_mask + || (word0(&rv) & Exp_mask) <= (2*P+1)*Exp_msk1 + ) { + break; + } + if (!delta->x[0] && delta->wds <= 1) { + /* exact result */ + break; + } + delta = lshift(delta,Log2P); + if (delta == NULL) { + Bfree(bb); + Bfree(bs); + Bfree(bd); + Bfree(bd0); + goto failed_malloc; + } + if (cmp(delta, bs) > 0) + goto drop_down; + break; + } + if (i == 0) { + /* exactly half-way between */ + if (dsign) { + if ((word0(&rv) & Bndry_mask1) == Bndry_mask1 + && word1(&rv) == ( + (bc.scale && + (y = word0(&rv) & Exp_mask) <= 2*P*Exp_msk1) ? + (0xffffffff & (0xffffffff << (2*P+1-(y>>Exp_shift)))) : + 0xffffffff)) { + /*boundary case -- increment exponent*/ + word0(&rv) = (word0(&rv) & Exp_mask) + + Exp_msk1 + ; + word1(&rv) = 0; + /* dsign = 0; */ + break; + } + } + else if (!(word0(&rv) & Bndry_mask) && !word1(&rv)) { + drop_down: + /* boundary case -- decrement exponent */ + if (bc.scale) { + L = word0(&rv) & Exp_mask; + if (L <= (2*P+1)*Exp_msk1) { + if (L > (P+2)*Exp_msk1) + /* round even ==> */ + /* accept rv */ + break; + /* rv = smallest denormal */ + if (bc.nd > nd) + break; + goto undfl; + } + } + L = (word0(&rv) & Exp_mask) - Exp_msk1; + word0(&rv) = L | Bndry_mask1; + word1(&rv) = 0xffffffff; + break; + } + if (!odd) + break; + if (dsign) + dval(&rv) += sulp(&rv, &bc); + else { + dval(&rv) -= sulp(&rv, &bc); + if (!dval(&rv)) { + if (bc.nd >nd) + break; + goto undfl; + } + } + /* dsign = 1 - dsign; */ + break; + } + if ((aadj = ratio(delta, bs)) <= 2.) { + if (dsign) + aadj = aadj1 = 1.; + else if (word1(&rv) || word0(&rv) & Bndry_mask) { + if (word1(&rv) == Tiny1 && !word0(&rv)) { + if (bc.nd >nd) + break; + goto undfl; + } + aadj = 1.; + aadj1 = -1.; + } + else { + /* special case -- power of FLT_RADIX to be */ + /* rounded down... */ + + if (aadj < 2./FLT_RADIX) + aadj = 1./FLT_RADIX; + else + aadj *= 0.5; + aadj1 = -aadj; + } + } + else { + aadj *= 0.5; + aadj1 = dsign ? aadj : -aadj; + if (Flt_Rounds == 0) + aadj1 += 0.5; + } + y = word0(&rv) & Exp_mask; + + /* Check for overflow */ + + if (y == Exp_msk1*(DBL_MAX_EXP+Bias-1)) { + dval(&rv0) = dval(&rv); + word0(&rv) -= P*Exp_msk1; + adj.d = aadj1 * ulp(&rv); + dval(&rv) += adj.d; + if ((word0(&rv) & Exp_mask) >= + Exp_msk1*(DBL_MAX_EXP+Bias-P)) { + if (word0(&rv0) == Big0 && word1(&rv0) == Big1) { + Bfree(bb); + Bfree(bd); + Bfree(bs); + Bfree(bd0); + Bfree(delta); + goto ovfl; + } + word0(&rv) = Big0; + word1(&rv) = Big1; + goto cont; + } + else + word0(&rv) += P*Exp_msk1; + } + else { + if (bc.scale && y <= 2*P*Exp_msk1) { + if (aadj <= 0x7fffffff) { + if ((z = (ULong)aadj) <= 0) + z = 1; + aadj = z; + aadj1 = dsign ? aadj : -aadj; + } + dval(&aadj2) = aadj1; + word0(&aadj2) += (2*P+1)*Exp_msk1 - y; + aadj1 = dval(&aadj2); + } + adj.d = aadj1 * ulp(&rv); + dval(&rv) += adj.d; + } + z = word0(&rv) & Exp_mask; + if (bc.nd == nd) { + if (!bc.scale) + if (y == z) { + /* Can we stop now? */ + L = (Long)aadj; + aadj -= L; + /* The tolerances below are conservative. */ + if (dsign || word1(&rv) || word0(&rv) & Bndry_mask) { + if (aadj < .4999999 || aadj > .5000001) + break; + } + else if (aadj < .4999999/FLT_RADIX) + break; + } + } + cont: + Bfree(bb); + Bfree(bd); + Bfree(bs); + Bfree(delta); + } + Bfree(bb); + Bfree(bd); + Bfree(bs); + Bfree(bd0); + Bfree(delta); + if (bc.nd > nd) { + error = bigcomp(&rv, s0, &bc); + if (error) + goto failed_malloc; + } + + if (bc.scale) { + word0(&rv0) = Exp_1 - 2*P*Exp_msk1; + word1(&rv0) = 0; + dval(&rv) *= dval(&rv0); + } + + ret: + return sign ? -dval(&rv) : dval(&rv); + + parse_error: + return 0.0; + + failed_malloc: + errno = ENOMEM; + return -1.0; + + undfl: + return sign ? -0.0 : 0.0; + + ovfl: + errno = ERANGE; + /* Can't trust HUGE_VAL */ + word0(&rv) = Exp_mask; + word1(&rv) = 0; + return sign ? -dval(&rv) : dval(&rv); + +} + +static char * +rv_alloc(int i) +{ + int j, k, *r; + + j = sizeof(ULong); + for(k = 0; + sizeof(Bigint) - sizeof(ULong) - sizeof(int) + j <= (unsigned)i; + j <<= 1) + k++; + r = (int*)Balloc(k); + if (r == NULL) + return NULL; + *r = k; + return (char *)(r+1); +} + +static char * +nrv_alloc(char *s, char **rve, int n) +{ + char *rv, *t; + + rv = rv_alloc(n); + if (rv == NULL) + return NULL; + t = rv; + while((*t = *s++)) t++; + if (rve) + *rve = t; + return rv; +} + +/* freedtoa(s) must be used to free values s returned by dtoa + * when MULTIPLE_THREADS is #defined. It should be used in all cases, + * but for consistency with earlier versions of dtoa, it is optional + * when MULTIPLE_THREADS is not defined. + */ + +void +sb_freedtoa(char *s) +{ + Bigint *b = (Bigint *)((int *)s - 1); + b->maxwds = 1 << (b->k = *(int*)b); + Bfree(b); +} + +/* dtoa for IEEE arithmetic (dmg): convert double to ASCII string. + * + * Inspired by "How to Print Floating-Point Numbers Accurately" by + * Guy L. Steele, Jr. and Jon L. White [Proc. ACM SIGPLAN '90, pp. 112-126]. + * + * Modifications: + * 1. Rather than iterating, we use a simple numeric overestimate + * to determine k = floor(log10(d)). We scale relevant + * quantities using O(log2(k)) rather than O(k) multiplications. + * 2. For some modes > 2 (corresponding to ecvt and fcvt), we don't + * try to generate digits strictly left to right. Instead, we + * compute with fewer bits and propagate the carry if necessary + * when rounding the final digit up. This is often faster. + * 3. Under the assumption that input will be rounded nearest, + * mode 0 renders 1e23 as 1e23 rather than 9.999999999999999e22. + * That is, we allow equality in stopping tests when the + * round-nearest rule will give the same floating-point value + * as would satisfaction of the stopping test with strict + * inequality. + * 4. We remove common factors of powers of 2 from relevant + * quantities. + * 5. When converting floating-point integers less than 1e16, + * we use floating-point arithmetic rather than resorting + * to multiple-precision integers. + * 6. When asked to produce fewer than 15 digits, we first try + * to get by with floating-point arithmetic; we resort to + * multiple-precision integer arithmetic only if we cannot + * guarantee that the floating-point calculation has given + * the correctly rounded result. For k requested digits and + * "uniformly" distributed input, the probability is + * something like 10^(k-15) that we must resort to the Long + * calculation. + */ + +/* Additional notes (METD): (1) returns NULL on failure. (2) to avoid memory + leakage, a successful call to sb_dtoa should always be matched by a + call to sb_freedtoa. */ + +char * +sb_dtoa(double dd, int mode, int ndigits, + int *decpt, int *sign, char **rve) +{ + /* Arguments ndigits, decpt, sign are similar to those + of ecvt and fcvt; trailing zeros are suppressed from + the returned string. If not null, *rve is set to point + to the end of the return value. If d is +-Infinity or NaN, + then *decpt is set to 9999. + + mode: + 0 ==> shortest string that yields d when read in + and rounded to nearest. + 1 ==> like 0, but with Steele & White stopping rule; + e.g. with IEEE P754 arithmetic , mode 0 gives + 1e23 whereas mode 1 gives 9.999999999999999e22. + 2 ==> max(1,ndigits) significant digits. This gives a + return value similar to that of ecvt, except + that trailing zeros are suppressed. + 3 ==> through ndigits past the decimal point. This + gives a return value similar to that from fcvt, + except that trailing zeros are suppressed, and + ndigits can be negative. + 4,5 ==> similar to 2 and 3, respectively, but (in + round-nearest mode) with the tests of mode 0 to + possibly return a shorter string that rounds to d. + With IEEE arithmetic and compilation with + -DHonor_FLT_ROUNDS, modes 4 and 5 behave the same + as modes 2 and 3 when FLT_ROUNDS != 1. + 6-9 ==> Debugging modes similar to mode - 4: don't try + fast floating-point estimate (if applicable). + + Values of mode other than 0-9 are treated as mode 0. + + Sufficient space is allocated to the return value + to hold the suppressed trailing zeros. + */ + + int bbits, b2, b5, be, dig, i, ieps, ilim, ilim0, ilim1, + j, j1, k, k0, k_check, leftright, m2, m5, s2, s5, + spec_case, try_quick; + Long L; + int denorm; + ULong x; + Bigint *b, *b1, *delta, *mlo, *mhi, *S; + U d2, eps, u; + double ds; + char *s, *s0; + + /* set pointers to NULL, to silence gcc compiler warnings and make + cleanup easier on error */ + mlo = mhi = S = 0; + s0 = 0; + + u.d = dd; + if (word0(&u) & Sign_bit) { + /* set sign for everything, including 0's and NaNs */ + *sign = 1; + word0(&u) &= ~Sign_bit; /* clear sign bit */ + } + else + *sign = 0; + + /* quick return for Infinities, NaNs and zeros */ + if ((word0(&u) & Exp_mask) == Exp_mask) + { + /* Infinity or NaN */ + *decpt = 9999; + if (!word1(&u) && !(word0(&u) & 0xfffff)) + return nrv_alloc("Infinity", rve, 8); + return nrv_alloc("NaN", rve, 3); + } + if (!dval(&u)) { + *decpt = 1; + return nrv_alloc("0", rve, 1); + } + + /* compute k = floor(log10(d)). The computation may leave k + one too large, but should never leave k too small. */ + b = d2b(&u, &be, &bbits); + if (b == NULL) + goto failed_malloc; + if ((i = (int)(word0(&u) >> Exp_shift1 & (Exp_mask>>Exp_shift1)))) { + dval(&d2) = dval(&u); + word0(&d2) &= Frac_mask1; + word0(&d2) |= Exp_11; + + /* log(x) ~=~ log(1.5) + (x-1.5)/1.5 + * log10(x) = log(x) / log(10) + * ~=~ log(1.5)/log(10) + (x-1.5)/(1.5*log(10)) + * log10(d) = (i-Bias)*log(2)/log(10) + log10(d2) + * + * This suggests computing an approximation k to log10(d) by + * + * k = (i - Bias)*0.301029995663981 + * + ( (d2-1.5)*0.289529654602168 + 0.176091259055681 ); + * + * We want k to be too large rather than too small. + * The error in the first-order Taylor series approximation + * is in our favor, so we just round up the constant enough + * to compensate for any error in the multiplication of + * (i - Bias) by 0.301029995663981; since |i - Bias| <= 1077, + * and 1077 * 0.30103 * 2^-52 ~=~ 7.2e-14, + * adding 1e-13 to the constant term more than suffices. + * Hence we adjust the constant term to 0.1760912590558. + * (We could get a more accurate k by invoking log10, + * but this is probably not worthwhile.) + */ + + i -= Bias; + denorm = 0; + } + else { + /* d is denormalized */ + + i = bbits + be + (Bias + (P-1) - 1); + x = i > 32 ? word0(&u) << (64 - i) | word1(&u) >> (i - 32) + : word1(&u) << (32 - i); + dval(&d2) = x; + word0(&d2) -= 31*Exp_msk1; /* adjust exponent */ + i -= (Bias + (P-1) - 1) + 1; + denorm = 1; + } + ds = (dval(&d2)-1.5)*0.289529654602168 + 0.1760912590558 + + i*0.301029995663981; + k = (int)ds; + if (ds < 0. && ds != k) + k--; /* want k = floor(ds) */ + k_check = 1; + if (k >= 0 && k <= Ten_pmax) { + if (dval(&u) < tens[k]) + k--; + k_check = 0; + } + j = bbits - i - 1; + if (j >= 0) { + b2 = 0; + s2 = j; + } + else { + b2 = -j; + s2 = 0; + } + if (k >= 0) { + b5 = 0; + s5 = k; + s2 += k; + } + else { + b2 -= k; + b5 = -k; + s5 = 0; + } + if (mode < 0 || mode > 9) + mode = 0; + + try_quick = 1; + + if (mode > 5) { + mode -= 4; + try_quick = 0; + } + leftright = 1; + ilim = ilim1 = -1; /* Values for cases 0 and 1; done here to */ + /* silence erroneous "gcc -Wall" warning. */ + switch(mode) { + case 0: + case 1: + i = 18; + ndigits = 0; + break; + case 2: + leftright = 0; + /* FALLTHRU */ + case 4: + if (ndigits <= 0) + ndigits = 1; + ilim = ilim1 = i = ndigits; + break; + case 3: + leftright = 0; + /* FALLTHRU */ + case 5: + i = ndigits + k + 1; + ilim = i; + ilim1 = i - 1; + if (i <= 0) + i = 1; + } + s0 = rv_alloc(i); + if (s0 == NULL) + goto failed_malloc; + s = s0; + + + if (ilim >= 0 && ilim <= Quick_max && try_quick) { + + /* Try to get by with floating-point arithmetic. */ + + i = 0; + dval(&d2) = dval(&u); + k0 = k; + ilim0 = ilim; + ieps = 2; /* conservative */ + if (k > 0) { + ds = tens[k&0xf]; + j = k >> 4; + if (j & Bletch) { + /* prevent overflows */ + j &= Bletch - 1; + dval(&u) /= bigtens[n_bigtens-1]; + ieps++; + } + for(; j; j >>= 1, i++) + if (j & 1) { + ieps++; + ds *= bigtens[i]; + } + dval(&u) /= ds; + } + else if ((j1 = -k)) { + dval(&u) *= tens[j1 & 0xf]; + for(j = j1 >> 4; j; j >>= 1, i++) + if (j & 1) { + ieps++; + dval(&u) *= bigtens[i]; + } + } + if (k_check && dval(&u) < 1. && ilim > 0) { + if (ilim1 <= 0) + goto fast_failed; + ilim = ilim1; + k--; + dval(&u) *= 10.; + ieps++; + } + dval(&eps) = ieps*dval(&u) + 7.; + word0(&eps) -= (P-1)*Exp_msk1; + if (ilim == 0) { + S = mhi = 0; + dval(&u) -= 5.; + if (dval(&u) > dval(&eps)) + goto one_digit; + if (dval(&u) < -dval(&eps)) + goto no_digits; + goto fast_failed; + } + if (leftright) { + /* Use Steele & White method of only + * generating digits needed. + */ + dval(&eps) = 0.5/tens[ilim-1] - dval(&eps); + for(i = 0;;) { + L = (Long)dval(&u); + dval(&u) -= L; + *s++ = '0' + (int)L; + if (dval(&u) < dval(&eps)) + goto ret1; + if (1. - dval(&u) < dval(&eps)) + goto bump_up; + if (++i >= ilim) + break; + dval(&eps) *= 10.; + dval(&u) *= 10.; + } + } + else { + /* Generate ilim digits, then fix them up. */ + dval(&eps) *= tens[ilim-1]; + for(i = 1;; i++, dval(&u) *= 10.) { + L = (Long)(dval(&u)); + if (!(dval(&u) -= L)) + ilim = i; + *s++ = '0' + (int)L; + if (i == ilim) { + if (dval(&u) > 0.5 + dval(&eps)) + goto bump_up; + else if (dval(&u) < 0.5 - dval(&eps)) { + while(*--s == '0'); + s++; + goto ret1; + } + break; + } + } + } + fast_failed: + s = s0; + dval(&u) = dval(&d2); + k = k0; + ilim = ilim0; + } + + /* Do we have a "small" integer? */ + + if (be >= 0 && k <= Int_max) { + /* Yes. */ + ds = tens[k]; + if (ndigits < 0 && ilim <= 0) { + S = mhi = 0; + if (ilim < 0 || dval(&u) <= 5*ds) + goto no_digits; + goto one_digit; + } + for(i = 1;; i++, dval(&u) *= 10.) { + L = (Long)(dval(&u) / ds); + dval(&u) -= L*ds; + *s++ = '0' + (int)L; + if (!dval(&u)) { + break; + } + if (i == ilim) { + dval(&u) += dval(&u); + if (dval(&u) > ds || (dval(&u) == ds && L & 1)) { + bump_up: + while(*--s == '9') + if (s == s0) { + k++; + *s = '0'; + break; + } + ++*s++; + } + break; + } + } + goto ret1; + } + + m2 = b2; + m5 = b5; + if (leftright) { + i = + denorm ? be + (Bias + (P-1) - 1 + 1) : + 1 + P - bbits; + b2 += i; + s2 += i; + mhi = i2b(1); + if (mhi == NULL) + goto failed_malloc; + } + if (m2 > 0 && s2 > 0) { + i = m2 < s2 ? m2 : s2; + b2 -= i; + m2 -= i; + s2 -= i; + } + if (b5 > 0) { + if (leftright) { + if (m5 > 0) { + mhi = pow5mult(mhi, m5); + if (mhi == NULL) + goto failed_malloc; + b1 = mult(mhi, b); + Bfree(b); + b = b1; + if (b == NULL) + goto failed_malloc; + } + if ((j = b5 - m5)) { + b = pow5mult(b, j); + if (b == NULL) + goto failed_malloc; + } + } + else { + b = pow5mult(b, b5); + if (b == NULL) + goto failed_malloc; + } + } + S = i2b(1); + if (S == NULL) + goto failed_malloc; + if (s5 > 0) { + S = pow5mult(S, s5); + if (S == NULL) + goto failed_malloc; + } + + /* Check for special case that d is a normalized power of 2. */ + + spec_case = 0; + if ((mode < 2 || leftright) + ) { + if (!word1(&u) && !(word0(&u) & Bndry_mask) + && word0(&u) & (Exp_mask & ~Exp_msk1) + ) { + /* The special case */ + b2 += Log2P; + s2 += Log2P; + spec_case = 1; + } + } + + /* Arrange for convenient computation of quotients: + * shift left if necessary so divisor has 4 leading 0 bits. + * + * Perhaps we should just compute leading 28 bits of S once + * and for all and pass them and a shift to quorem, so it + * can do shifts and ors to compute the numerator for q. + */ +#define iInc 28 + i = dshift(S, s2); + b2 += i; + m2 += i; + s2 += i; + if (b2 > 0) { + b = lshift(b, b2); + if (b == NULL) + goto failed_malloc; + } + if (s2 > 0) { + S = lshift(S, s2); + if (S == NULL) + goto failed_malloc; + } + if (k_check) { + if (cmp(b,S) < 0) { + k--; + b = multadd(b, 10, 0); /* we botched the k estimate */ + if (b == NULL) + goto failed_malloc; + if (leftright) { + mhi = multadd(mhi, 10, 0); + if (mhi == NULL) + goto failed_malloc; + } + ilim = ilim1; + } + } + if (ilim <= 0 && (mode == 3 || mode == 5)) { + if (ilim < 0) { + /* no digits, fcvt style */ + no_digits: + k = -1 - ndigits; + goto ret; + } + else { + S = multadd(S, 5, 0); + if (S == NULL) + goto failed_malloc; + if (cmp(b, S) <= 0) + goto no_digits; + } + one_digit: + *s++ = '1'; + k++; + goto ret; + } + if (leftright) { + if (m2 > 0) { + mhi = lshift(mhi, m2); + if (mhi == NULL) + goto failed_malloc; + } + + /* Compute mlo -- check for special case + * that d is a normalized power of 2. + */ + + mlo = mhi; + if (spec_case) { + mhi = Balloc(mhi->k); + if (mhi == NULL) + goto failed_malloc; + Bcopy(mhi, mlo); + mhi = lshift(mhi, Log2P); + if (mhi == NULL) + goto failed_malloc; + } + + for(i = 1;;i++) { + dig = quorem(b,S) + '0'; + /* Do we yet have the shortest decimal string + * that will round to d? + */ + j = cmp(b, mlo); + delta = diff(S, mhi); + if (delta == NULL) + goto failed_malloc; + j1 = delta->sign ? 1 : cmp(b, delta); + Bfree(delta); + if (j1 == 0 && mode != 1 && !(word1(&u) & 1) + ) { + if (dig == '9') + goto round_9_up; + if (j > 0) + dig++; + *s++ = dig; + goto ret; + } + if (j < 0 || (j == 0 && mode != 1 + && !(word1(&u) & 1) + )) { + if (!b->x[0] && b->wds <= 1) { + goto accept_dig; + } + if (j1 > 0) { + b = lshift(b, 1); + if (b == NULL) + goto failed_malloc; + j1 = cmp(b, S); + if ((j1 > 0 || (j1 == 0 && dig & 1)) + && dig++ == '9') + goto round_9_up; + } + accept_dig: + *s++ = dig; + goto ret; + } + if (j1 > 0) { + if (dig == '9') { /* possible if i == 1 */ + round_9_up: + *s++ = '9'; + goto roundoff; + } + *s++ = dig + 1; + goto ret; + } + *s++ = dig; + if (i == ilim) + break; + b = multadd(b, 10, 0); + if (b == NULL) + goto failed_malloc; + if (mlo == mhi) { + mlo = mhi = multadd(mhi, 10, 0); + if (mlo == NULL) + goto failed_malloc; + } + else { + mlo = multadd(mlo, 10, 0); + if (mlo == NULL) + goto failed_malloc; + mhi = multadd(mhi, 10, 0); + if (mhi == NULL) + goto failed_malloc; + } + } + } + else + for(i = 1;; i++) { + *s++ = dig = quorem(b,S) + '0'; + if (!b->x[0] && b->wds <= 1) { + goto ret; + } + if (i >= ilim) + break; + b = multadd(b, 10, 0); + if (b == NULL) + goto failed_malloc; + } + + /* Round off last digit */ + + b = lshift(b, 1); + if (b == NULL) + goto failed_malloc; + j = cmp(b, S); + if (j > 0 || (j == 0 && dig & 1)) { + roundoff: + while(*--s == '9') + if (s == s0) { + k++; + *s++ = '1'; + goto ret; + } + ++*s++; + } + else { + while(*--s == '0'); + s++; + } + ret: + Bfree(S); + if (mhi) { + if (mlo && mlo != mhi) + Bfree(mlo); + Bfree(mhi); + } + ret1: + Bfree(b); + *s = 0; + *decpt = k + 1; + if (rve) + *rve = s; + return s0; + failed_malloc: + if (S) + Bfree(S); + if (mlo && mlo != mhi) + Bfree(mlo); + if (mhi) + Bfree(mhi); + if (b) + Bfree(b); + if (s0) + sb_freedtoa(s0); + return NULL; +} +#ifdef __cplusplus +} +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/err.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/err.c new file mode 100644 index 0000000000000000000000000000000000000000..8eb2c2b0590a0ff82857ece988bc8bada449f7e4 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/err.c @@ -0,0 +1,305 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/** + * @file err.c + * @brief Somewhat antiquated logging and error interface. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include +#include + +#include "sphinxbase/err.h" +#include "sphinxbase/prim_type.h" +#include "sphinxbase/filename.h" +#include "sphinxbase/ckd_alloc.h" + +static FILE* logfp = NULL; +static int logfp_disabled = FALSE; + +#if defined(__ANDROID__) +#include +static void +err_logcat_cb(void* user_data, err_lvl_t level, const char *fmt, ...); +#elif defined(_WIN32_WCE) +#include +#define vsnprintf _vsnprintf +static void +err_wince_cb(void* user_data, err_lvl_t level, const char *fmt, ...); +#endif + +#if defined(__ANDROID__) +static err_cb_f err_cb = err_logcat_cb; +#elif defined(_WIN32_WCE) +static err_cb_f err_cb = err_wince_cb; +#else +static err_cb_f err_cb = err_logfp_cb; +#endif +static void* err_user_data; +static err_lvl_t min_loglevel = ERR_WARN; +static const char *err_level[ERR_MAX] = + { + "DEBUG", "INFO", "WARN", "ERROR", "FATAL" + }; + +int +err_set_loglevel(err_lvl_t lvl) +{ + int rv = min_loglevel; + min_loglevel = lvl; + return rv; +} + +const char * +err_set_loglevel_str(char const *lvl) +{ + const char *rv = err_level[min_loglevel]; + int i; + + if (lvl == NULL) + return NULL; + if (!strncmp(lvl, "ERR_", 4)) + lvl += 4; + for (i = 0; i < ERR_MAX; ++i) { + if (!strcmp(lvl, err_level[i])) { + min_loglevel = i; + return rv; + } + } + return NULL; +} + +void +err_msg(err_lvl_t lvl, const char *path, long ln, const char *fmt, ...) +{ + + char msg[1024]; + va_list ap; + + if (!err_cb) + return; + if (lvl < min_loglevel) + return; + + va_start(ap, fmt); + vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + + if (path) { + const char *fname = path2basename(path); + if (lvl == ERR_INFO) + err_cb(err_user_data, lvl, "%s: %s(%ld): %s", err_level[lvl], fname, ln, msg); + else + err_cb(err_user_data, lvl, "%s: \"%s\", line %ld: %s", err_level[lvl], fname, ln, msg); + } else { + err_cb(err_user_data, lvl, "%s", msg); + } +} + +#ifdef _WIN32_WCE /* No strerror for WinCE, so a separate implementation */ +void +err_msg_system(err_lvl_t lvl, const char *path, long ln, const char *fmt, ...) +{ + va_list ap; + LPVOID error_wstring; + DWORD error; + char msg[1024]; + char error_string[1024]; + + if (!err_cb) + return; + if (lvl < min_loglevel) + return; + + error = GetLastError(); + FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM | + FORMAT_MESSAGE_IGNORE_INSERTS, + NULL, + error, + 0, // Default language + (LPTSTR) &error_wstring, + 0, + NULL); + wcstombs(error_string, error_wstring, 1023); + LocalFree(error_wstring); + + va_start(ap, fmt); + vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + + if (path) { + const char *fname = path2basename(path); + if (lvl == ERR_INFO) + err_cb(err_user_data, lvl, "%s: %s(%ld): %s: %s\n", err_prefix[lvl], fname, ln, msg, error_string); + else + err_cb(err_user_data, lvl, "%s: \"%s\", line %ld: %s: %s\n", err_prefix[lvl], fname, ln, msg, error_string); + } else { + err_cb(err_user_data, lvl, "%s: %s\n", msg, error_string); + } +} +#else +void +err_msg_system(err_lvl_t lvl, const char *path, long ln, const char *fmt, ...) +{ + int local_errno = errno; + + char msg[1024]; + va_list ap; + + if (!err_cb) + return; + if (lvl < min_loglevel) + return; + + va_start(ap, fmt); + vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + + if (path) { + const char *fname = path2basename(path); + if (lvl == ERR_INFO) + err_cb(err_user_data, lvl, "%s: %s(%ld): %s: %s\n", err_level[lvl], fname, ln, msg, strerror(local_errno)); + else + err_cb(err_user_data, lvl, "%s: \"%s\", line %ld: %s: %s\n", err_level[lvl], fname, ln, msg, strerror(local_errno)); + } else { + err_cb(err_user_data, lvl, "%s: %s\n", msg, strerror(local_errno)); + } +} +#endif + +#if defined(__ANDROID__) +static void +err_logcat_cb(void *user_data, err_lvl_t lvl, const char *fmt, ...) +{ + static const int android_level[ERR_MAX] = {ANDROID_LOG_DEBUG, ANDROID_LOG_INFO, + ANDROID_LOG_INFO, ANDROID_LOG_WARN, ANDROID_LOG_ERROR, ANDROID_LOG_ERROR}; + + va_list ap; + va_start(ap, fmt); + __android_log_vprint(android_level[lvl], "cmusphinx", fmt, ap); + va_end(ap); +} +#elif defined(_WIN32_WCE) +static void +err_wince_cb(void *user_data, err_lvl_t lvl, const char *fmt, ...) +{ + char msg[1024]; + WCHAR *wmsg; + size_t size; + va_list ap; + + va_start(ap, fmt); + _vsnprintf(msg, sizeof(msg), fmt, ap); + va_end(ap); + + size = mbstowcs(NULL, msg, 0) + 1; + wmsg = ckd_calloc(size, sizeof(*wmsg)); + mbstowcs(wmsg, msg, size); + + OutputDebugStringW(wmsg); + ckd_free(wmsg); +} +#else +void +err_logfp_cb(void *user_data, err_lvl_t lvl, const char *fmt, ...) +{ + va_list ap; + FILE *fp = err_get_logfp(); + + (void)user_data; + (void)lvl; /* FIXME?!?! */ + + if (!fp) + return; + + va_start(ap, fmt); + vfprintf(fp, fmt, ap); + va_end(ap); + fflush(fp); +} +#endif + +int +err_set_logfile(const char *path) +{ + FILE *newfp; + + if ((newfp = fopen(path, "a")) == NULL) + return -1; + err_set_logfp(newfp); + return 0; +} + +void +err_set_logfp(FILE *stream) +{ + if (logfp != NULL && logfp != stdout && logfp != stderr) + fclose(logfp); + if (stream == NULL) { + logfp_disabled = TRUE; + logfp = NULL; + return; + } + logfp_disabled = FALSE; + logfp = stream; + return; +} + +FILE * +err_get_logfp(void) +{ + if (logfp_disabled) + return NULL; + if (logfp == NULL) + return stderr; + + return logfp; +} + +void +err_set_callback(err_cb_f cb, void* user_data) +{ + err_cb = cb; + err_user_data= user_data; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/errno.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/errno.c new file mode 100644 index 0000000000000000000000000000000000000000..844b6f53804fe616b1360d93d7cc63a561bc3823 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/errno.c @@ -0,0 +1,51 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + */ +/********************************************************************* + * + * File: errno.c + * + * Description: functions and variables missing from Windows CE standard + * library + * + * Author: Silvio Moioli + * + *********************************************************************/ + +#include + +#if defined(_WIN32_WCE) +int errno; +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/f2c_lite.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/f2c_lite.c new file mode 100644 index 0000000000000000000000000000000000000000..cbbefead3ee22a4b74cfba84e5cbf81f6cdb411a --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/f2c_lite.c @@ -0,0 +1,551 @@ +#include +#include +#include +#include +#include + +#include "sphinxbase/f2c.h" + +#ifdef _MSC_VER +#pragma warning (disable: 4244) +#endif + + +extern void +s_wsfe(cilist * f) +{(void)f; +} +extern void +e_wsfe(void) +{; +} +extern void +do_fio(integer * c, char *s, ftnlen l) +{(void)c;(void)s;(void)l; +} + +/* You'll want this if you redo the *_lite.c files with the -C option + * to f2c for checking array subscripts. (It's not suggested you do that + * for production use, of course.) */ +extern int +s_rnge(char *var, int index, char *routine, int lineno) +{ + fprintf(stderr, + "array index out-of-bounds for %s[%d] in routine %s:%d\n", var, + index, routine, lineno); + fflush(stderr); + assert(2+2 == 5); + return 0; +} + + +#ifdef KR_headers +extern double sqrt(); +float +f__cabs(real, imag) +float real, imag; +#else +#undef abs + +float +f__cabs(float real, float imag) +#endif +{ + float temp; + + if (real < 0) + real = -real; + if (imag < 0) + imag = -imag; + if (imag > real) { + temp = real; + real = imag; + imag = temp; + } + if ((imag + real) == real) + return ((float) real); + + temp = imag / real; + temp = real * sqrt(1.0 + temp * temp); /*overflow!! */ + return (temp); +} + + +VOID +#ifdef KR_headers +s_cnjg(r, z) +complex *r, *z; +#else +s_cnjg(complex * r, complex * z) +#endif +{ + r->r = z->r; + r->i = -z->i; +} + + +#ifdef KR_headers +float +r_imag(z) +complex *z; +#else +float +r_imag(complex * z) +#endif +{ + return (z->i); +} + + +#define log10e 0.43429448190325182765 + +#ifdef KR_headers +double log(); +float +r_lg10(x) +real *x; +#else +#undef abs + +float +r_lg10(real * x) +#endif +{ + return (log10e * log(*x)); +} + + +#ifdef KR_headers +float +r_sign(a, b) +real *a, *b; +#else +float +r_sign(real * a, real * b) +#endif +{ + float x; + x = (*a >= 0 ? *a : -*a); + return (*b >= 0 ? x : -x); +} + + +#ifdef KR_headers +double floor(); +integer +i_dnnt(x) +real *x; +#else +#undef abs + +integer +i_dnnt(real * x) +#endif +{ + return ((*x) >= 0 ? floor(*x + .5) : -floor(.5 - *x)); +} + + +#ifdef KR_headers +double pow(); +double +pow_dd(ap, bp) +doublereal *ap, *bp; +#else +#undef abs + +double +pow_dd(doublereal * ap, doublereal * bp) +#endif +{ + return (pow(*ap, *bp)); +} + + +#ifdef KR_headers +float +pow_ri(ap, bp) +real *ap; +integer *bp; +#else +float +pow_ri(real * ap, integer * bp) +#endif +{ + float pow, x; + integer n; + unsigned long u; + + pow = 1; + x = *ap; + n = *bp; + + if (n != 0) { + if (n < 0) { + n = -n; + x = 1 / x; + } + for (u = n;;) { + if (u & 01) + pow *= x; + if (u >>= 1) + x *= x; + else + break; + } + } + return (pow); +} + +/* Unless compiled with -DNO_OVERWRITE, this variant of s_cat allows the + * target of a concatenation to appear on its right-hand side (contrary + * to the Fortran 77 Standard, but in accordance with Fortran 90). + */ +#define NO_OVERWRITE + + +#ifndef NO_OVERWRITE + +#undef abs +#ifdef KR_headers +extern char *F77_aloc(); +extern void free(); +extern void exit_(); +#else + +extern char *F77_aloc(ftnlen, char *); +#endif + +#endif /* NO_OVERWRITE */ + +VOID +#ifdef KR_headers +s_cat(lp, rpp, rnp, np, ll) +char *lp, *rpp[]; +ftnlen rnp[], *np, ll; +#else +s_cat(char *lp, char *rpp[], ftnlen rnp[], ftnlen * np, ftnlen ll) +#endif +{ + ftnlen i, nc; + char *rp; + ftnlen n = *np; +#ifndef NO_OVERWRITE + ftnlen L, m; + char *lp0, *lp1; + + lp0 = 0; + lp1 = lp; + L = ll; + i = 0; + while (i < n) { + rp = rpp[i]; + m = rnp[i++]; + if (rp >= lp1 || rp + m <= lp) { + if ((L -= m) <= 0) { + n = i; + break; + } + lp1 += m; + continue; + } + lp0 = lp; + lp = lp1 = F77_aloc(L = ll, "s_cat"); + break; + } + lp1 = lp; +#endif /* NO_OVERWRITE */ + for (i = 0; i < n; ++i) { + nc = ll; + if (rnp[i] < nc) + nc = rnp[i]; + ll -= nc; + rp = rpp[i]; + while (--nc >= 0) + *lp++ = *rp++; + } + while (--ll >= 0) + *lp++ = ' '; +#ifndef NO_OVERWRITE + if (lp0) { + memmove(lp0, lp1, L); + free(lp1); + } +#endif +} + + +/* compare two strings */ + +#ifdef KR_headers +integer +s_cmp(a0, b0, la, lb) +char *a0, *b0; +ftnlen la, lb; +#else +integer +s_cmp(char *a0, char *b0, ftnlen la, ftnlen lb) +#endif +{ + register unsigned char *a, *aend, *b, *bend; + a = (unsigned char *) a0; + b = (unsigned char *) b0; + aend = a + la; + bend = b + lb; + + if (la <= lb) { + while (a < aend) + if (*a != *b) + return (*a - *b); + else { + ++a; + ++b; + } + + while (b < bend) + if (*b != ' ') + return (' ' - *b); + else + ++b; + } + + else { + while (b < bend) + if (*a == *b) { + ++a; + ++b; + } + else + return (*a - *b); + while (a < aend) + if (*a != ' ') + return (*a - ' '); + else + ++a; + } + return (0); +} + +/* Unless compiled with -DNO_OVERWRITE, this variant of s_copy allows the + * target of an assignment to appear on its right-hand side (contrary + * to the Fortran 77 Standard, but in accordance with Fortran 90), + * as in a(2:5) = a(4:7) . + */ + + + +/* assign strings: a = b */ + +#ifdef KR_headers +VOID +s_copy(a, b, la, lb) +register char *a, *b; +ftnlen la, lb; +#else +void +s_copy(register char *a, register char *b, ftnlen la, ftnlen lb) +#endif +{ + register char *aend, *bend; + + aend = a + la; + + if (la <= lb) +#ifndef NO_OVERWRITE + if (a <= b || a >= b + la) +#endif + while (a < aend) + *a++ = *b++; +#ifndef NO_OVERWRITE + else + for (b += la; a < aend;) + *--aend = *--b; +#endif + + else { + bend = b + lb; +#ifndef NO_OVERWRITE + if (a <= b || a >= bend) +#endif + while (b < bend) + *a++ = *b++; +#ifndef NO_OVERWRITE + else { + a += lb; + while (b < bend) + *--a = *--bend; + a += lb; + } +#endif + while (a < aend) + *a++ = ' '; + } +} + + +#ifdef KR_headers +float f__cabs(); +float +z_abs(z) +complex *z; +#else +float f__cabs(float, float); +float +z_abs(complex * z) +#endif +{ + return (f__cabs(z->r, z->i)); +} + + +#ifdef KR_headers +extern void sig_die(); +VOID +z_div(c, a, b) +complex *a, *b, *c; +#else +extern void sig_die(char *, int); +void +z_div(complex * c, complex * a, complex * b) +#endif +{ + float ratio, den; + float abr, abi; + + if ((abr = b->r) < 0.) + abr = -abr; + if ((abi = b->i) < 0.) + abi = -abi; + if (abr <= abi) { + /*Let IEEE Infinties handle this ;( */ + /*if(abi == 0) + sig_die("complex division by zero", 1); */ + ratio = b->r / b->i; + den = b->i * (1 + ratio * ratio); + c->r = (a->r * ratio + a->i) / den; + c->i = (a->i * ratio - a->r) / den; + } + + else { + ratio = b->i / b->r; + den = b->r * (1 + ratio * ratio); + c->r = (a->r + a->i * ratio) / den; + c->i = (a->i - a->r * ratio) / den; + } + +} + + +#ifdef KR_headers +double sqrt(); +double f__cabs(); +VOID +z_sqrt(r, z) +complex *r, *z; +#else +#undef abs + +extern float f__cabs(float, float); +void +z_sqrt(complex * r, complex * z) +#endif +{ + float mag; + + if ((mag = f__cabs(z->r, z->i)) == 0.) + r->r = r->i = 0.; + else if (z->r > 0) { + r->r = sqrt(0.5 * (mag + z->r)); + r->i = z->i / r->r / 2; + } + else { + r->i = sqrt(0.5 * (mag - z->r)); + if (z->i < 0) + r->i = -r->i; + r->r = z->i / r->i / 2; + } +} + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef KR_headers + integer pow_ii(ap, bp) integer *ap, *bp; +#else + integer pow_ii(integer * ap, integer * bp) +#endif + { + integer pow, x, n; + unsigned long u; + + x = *ap; + n = *bp; + + if (n <= 0) { + if (n == 0 || x == 1) + return 1; + if (x != -1) + return x != 0 ? 1 / x : 0; + n = -n; + } u = n; + for (pow = 1;;) { + if (u & 01) + pow *= x; + if (u >>= 1) + x *= x; + else + break; + } + return (pow); + } +#ifdef __cplusplus +} +#endif + +#ifdef KR_headers +extern void f_exit(); +VOID +s_stop(s, n) +char *s; +ftnlen n; +#else +#undef abs +#undef min +#undef max +#ifdef __cplusplus +extern "C" { +#endif +#ifdef __cplusplus + extern "C" { +#endif + void f_exit(void); + + int s_stop(char *s, ftnlen n) +#endif + { + int i; + + if (n > 0) { + fprintf(stderr, "STOP "); + for (i = 0; i < n; ++i) + putc(*s++, stderr); + fprintf(stderr, " statement executed\n"); + } +#ifdef NO_ONEXIT + f_exit(); +#endif + exit(0); + +/* We cannot avoid (useless) compiler diagnostics here: */ +/* some compilers complain if there is no return statement, */ +/* and others complain that this one cannot be reached. */ + + return 0; /* NOT REACHED */ + } +#ifdef __cplusplus + } +#endif +#ifdef __cplusplus +} +#endif diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/filename.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/filename.c new file mode 100644 index 0000000000000000000000000000000000000000..3f4ae4750bf1b44f6336df38974d7a7413dbd243 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/filename.c @@ -0,0 +1,120 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * filename.c -- File and path name operations. + */ + +#include +#include +#include +#include + +#include "sphinxbase/filename.h" + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +const char * +path2basename(const char *path) +{ + const char *result; + +#if defined(_WIN32) || defined(__CYGWIN__) + result = strrchr(path, '\\'); +#else + result = strrchr(path, '/'); +#endif + + return (result == NULL ? path : result + 1); +} + +/* Return all leading pathname components */ +void +path2dirname(const char *path, char *dir) +{ + size_t i, l; + + l = strlen(path); +#if defined(_WIN32) || defined(__CYGWIN__) + for (i = l - 1; (i > 0) && !(path[i] == '/' || path[i] == '\\'); --i); +#else + for (i = l - 1; (i > 0) && !(path[i] == '/'); --i); +#endif + if (i == 0) { + dir[0] = '.'; + dir[1] = '\0'; + } else { + memcpy(dir, path, i); + dir[i] = '\0'; + } +} + + +/* Strip off the shortest trailing .xyz suffix */ +void +strip_fileext(const char *path, char *root) +{ + size_t i, l; + + l = strlen(path); + for (i = l - 1; (i > 0) && (path[i] != '.'); --i); + if (i == 0) { + strcpy(root, path); /* Didn't find a . */ + } else { + strncpy(root, path, i); + } +} + +/* Test if this path is absolute. */ +int +path_is_absolute(const char *path) +{ +#if defined(_WIN32) && !defined(_WIN32_WCE) /* FIXME: Also SymbianOS */ + return /* Starts with drive letter : \ or / */ + (strlen(path) >= 3 + && + ((path[0] >= 'A' && path[0] <= 'Z') + || (path[0] >= 'a' && path[0] <= 'z')) + && path[1] == ':' + && (path[2] == '/' || path[2] == '\\')); +#elif defined(_WIN32_WCE) + return path[0] == '\\' || path[0] == '/'; +#else /* Assume Unix */ + return path[0] == '/'; +#endif +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/fortran.py b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/fortran.py new file mode 100644 index 0000000000000000000000000000000000000000..7be986a8ec0b27671d19845cdcbb6fcae0b9fea8 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/fortran.py @@ -0,0 +1,114 @@ +import re +import itertools + +def isBlank(line): + return not line +def isLabel(line): + return line[0].isdigit() +def isComment(line): + return line[0] != ' ' +def isContinuation(line): + return line[5] != ' ' + +COMMENT, STATEMENT, CONTINUATION = 0, 1, 2 +def lineType(line): + """Return the type of a line of Fortan code.""" + if isBlank(line): + return COMMENT + elif isLabel(line): + return STATEMENT + elif isComment(line): + return COMMENT + elif isContinuation(line): + return CONTINUATION + else: + return STATEMENT + +class LineIterator(object): + """LineIterator(iterable) + + Return rstrip()'d lines from iterable, while keeping a count of the + line number in the .lineno attribute. + """ + def __init__(self, iterable): + object.__init__(self) + self.iterable = iter(iterable) + self.lineno = 0 + def __iter__(self): + return self + def next(self): + self.lineno += 1 + line = self.iterable.next() + line = line.rstrip() + return line + +class PushbackIterator(object): + """PushbackIterator(iterable) + + Return an iterator for which items can be pushed back into. + Call the .pushback(item) method to have item returned as the next + value of .next(). + """ + def __init__(self, iterable): + object.__init__(self) + self.iterable = iter(iterable) + self.buffer = [] + + def __iter__(self): + return self + + def next(self): + if self.buffer: + return self.buffer.pop() + else: + return self.iterable.next() + + def pushback(self, item): + self.buffer.append(item) + +def fortranSourceLines(fo): + """Return an iterator over statement lines of a Fortran source file. + + Comment and blank lines are stripped out, and continuation lines are + merged. + """ + numberingiter = LineIterator(fo) + # add an extra '' at the end + with_extra = itertools.chain(numberingiter, ['']) + pushbackiter = PushbackIterator(with_extra) + for line in pushbackiter: + t = lineType(line) + if t == COMMENT: + continue + elif t == STATEMENT: + lines = [line] + # this is where we need the extra '', so we don't finish reading + # the iterator when we don't want to handle that + for next_line in pushbackiter: + t = lineType(next_line) + if t == CONTINUATION: + lines.append(next_line[6:]) + else: + pushbackiter.pushback(next_line) + break + yield numberingiter.lineno, ''.join(lines) + else: + raise ValueError("jammed: continuation line not expected: %s:%d" % + (fo.name, numberingiter.lineno)) + +def getDependencies(filename): + """For a Fortran source file, return a list of routines declared as EXTERNAL + in it. + """ + fo = open(filename) + external_pat = re.compile(r'^\s*EXTERNAL\s', re.I) + routines = [] + for lineno, line in fortranSourceLines(fo): + m = external_pat.match(line) + if m: + names = line = line[m.end():].strip().split(',') + names = [n.strip().lower() for n in names] + names = [n for n in names if n] + routines.extend(names) + fo.close() + return routines diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/genrand.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/genrand.c new file mode 100644 index 0000000000000000000000000000000000000000..ca75c590a4ed2b7c4e1f59315fbec7dec79c744f --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/genrand.c @@ -0,0 +1,198 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + A C-program for MT19937, with initialization improved 2002/1/26. + Coded by Takuji Nishimura and Makoto Matsumoto. + + Before using, initialize the state by using init_genrand(seed) + or init_by_array(init_key, key_length). + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright +` notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + + Any feedback is very welcome. + http://www.math.keio.ac.jp/matumoto/emt.html + email: matumoto@math.keio.ac.jp +*/ + +#include + +#include "sphinxbase/genrand.h" + +/* Period parameters */ +#define N 624 +#define M 397 +#define MATRIX_A 0x9908b0dfUL /* constant vector a */ +#define UPPER_MASK 0x80000000UL /* most significant w-r bits */ +#define LOWER_MASK 0x7fffffffUL /* least significant r bits */ + +void init_genrand(unsigned long s); + +void +genrand_seed(unsigned long s) +{ + init_genrand(s); +} + + +static unsigned long mt[N]; /* the array for the state vector */ +static int mti = N + 1; /* mti==N+1 means mt[N] is not initialized */ + +/* initializes mt[N] with a seed */ +void +init_genrand(unsigned long s) +{ + mt[0] = s & 0xffffffffUL; + for (mti = 1; mti < N; mti++) { + mt[mti] = + (1812433253UL * (mt[mti - 1] ^ (mt[mti - 1] >> 30)) + mti); + /* See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. */ + /* In the previous versions, MSBs of the seed affect */ + /* only MSBs of the array mt[]. */ + /* 2002/01/09 modified by Makoto Matsumoto */ + mt[mti] &= 0xffffffffUL; + /* for >32 bit machines */ + } +} + +/* generates a random number on [0,0xffffffff]-interval */ +unsigned long +genrand_int32(void) +{ + unsigned long y; + static unsigned long mag01[2] = { 0x0UL, MATRIX_A }; + /* mag01[x] = x * MATRIX_A for x=0,1 */ + + if (mti >= N) { /* generate N words at one time */ + int kk; + + if (mti == N + 1) /* if init_genrand() has not been called, */ + init_genrand(5489UL); /* a default initial seed is used */ + + for (kk = 0; kk < N - M; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + M] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + for (; kk < N - 1; kk++) { + y = (mt[kk] & UPPER_MASK) | (mt[kk + 1] & LOWER_MASK); + mt[kk] = mt[kk + (M - N)] ^ (y >> 1) ^ mag01[y & 0x1UL]; + } + y = (mt[N - 1] & UPPER_MASK) | (mt[0] & LOWER_MASK); + mt[N - 1] = mt[M - 1] ^ (y >> 1) ^ mag01[y & 0x1UL]; + + mti = 0; + } + + y = mt[mti++]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; +} + +/* generates a random number on [0,0x7fffffff]-interval */ +long +genrand_int31(void) +{ + return (long) (genrand_int32() >> 1); +} + +/* generates a random number on [0,1]-real-interval */ +double +genrand_real1(void) +{ + return genrand_int32() * (1.0 / 4294967295.0); + /* divided by 2^32-1 */ +} + +/* generates a random number on [0,1)-real-interval */ +double +genrand_real2(void) +{ + return genrand_int32() * (1.0 / 4294967296.0); + /* divided by 2^32 */ +} + +/* generates a random number on (0,1)-real-interval */ +double +genrand_real3(void) +{ + return (((double) genrand_int32()) + 0.5) * (1.0 / 4294967296.0); + /* divided by 2^32 */ +} + +/* generates a random number on [0,1) with 53-bit resolution*/ +double +genrand_res53(void) +{ + unsigned long a = genrand_int32() >> 5, b = genrand_int32() >> 6; + return (a * 67108864.0 + b) * (1.0 / 9007199254740992.0); +} + +/* These real versions are due to Isaku Wada, 2002/01/09 added */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/glist.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/glist.c new file mode 100644 index 0000000000000000000000000000000000000000..a97e719789260bfc8922633cc2fc98e30798d016 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/glist.c @@ -0,0 +1,271 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * glist.h -- Module for maintaining a generic, linear linked-list structure. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: glist.c,v $ + * Revision 1.8 2005/06/22 03:02:51 arthchan2003 + * 1, Fixed doxygen documentation, 2, add keyword. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 09-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added glist_chkdup_*(). + * + * 13-Feb-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created from earlier version. + */ + + +#include +#include +#include +#include + +#include "sphinxbase/glist.h" +#include "sphinxbase/ckd_alloc.h" + + +glist_t +glist_add_ptr(glist_t g, void *ptr) +{ + gnode_t *gn; + + gn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + gn->data.ptr = ptr; + gn->next = g; + return ((glist_t) gn); /* Return the new head of the list */ +} + + +glist_t +glist_add_int32(glist_t g, int32 val) +{ + gnode_t *gn; + + gn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + gn->data.i = (long)val; + gn->next = g; + return ((glist_t) gn); /* Return the new head of the list */ +} + + +glist_t +glist_add_uint32(glist_t g, uint32 val) +{ + gnode_t *gn; + + gn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + gn->data.ui = (unsigned long)val; + gn->next = g; + return ((glist_t) gn); /* Return the new head of the list */ +} + + +glist_t +glist_add_float32(glist_t g, float32 val) +{ + gnode_t *gn; + + gn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + gn->data.fl = (double)val; + gn->next = g; + return ((glist_t) gn); /* Return the new head of the list */ +} + + +glist_t +glist_add_float64(glist_t g, float64 val) +{ + gnode_t *gn; + + gn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + gn->data.fl = (double)val; + gn->next = g; + return ((glist_t) gn); /* Return the new head of the list */ +} + +void +glist_free(glist_t g) +{ + gnode_t *gn; + + while (g) { + gn = g; + g = gn->next; + ckd_free((void *) gn); + } +} + +int32 +glist_count(glist_t g) +{ + gnode_t *gn; + int32 n; + + for (gn = g, n = 0; gn; gn = gn->next, n++); + return n; +} + + +gnode_t * +glist_tail(glist_t g) +{ + gnode_t *gn; + + if (!g) + return NULL; + + for (gn = g; gn->next; gn = gn->next); + return gn; +} + + +glist_t +glist_reverse(glist_t g) +{ + gnode_t *gn, *nextgn; + gnode_t *rev; + + rev = NULL; + for (gn = g; gn; gn = nextgn) { + nextgn = gn->next; + + gn->next = rev; + rev = gn; + } + + return rev; +} + + +gnode_t * +glist_insert_ptr(gnode_t * gn, void *ptr) +{ + gnode_t *newgn; + + newgn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + newgn->data.ptr = ptr; + newgn->next = gn->next; + gn->next = newgn; + + return newgn; +} + + +gnode_t * +glist_insert_int32(gnode_t * gn, int32 val) +{ + gnode_t *newgn; + + newgn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + newgn->data.i = val; + newgn->next = gn->next; + gn->next = newgn; + + return newgn; +} + + +gnode_t * +glist_insert_uint32(gnode_t * gn, uint32 val) +{ + gnode_t *newgn; + + newgn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + newgn->data.ui = val; + newgn->next = gn->next; + + gn->next = newgn; + + return newgn; +} + + +gnode_t * +glist_insert_float32(gnode_t * gn, float32 val) +{ + gnode_t *newgn; + + newgn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + newgn->data.fl = (double)val; + newgn->next = gn->next; + gn->next = newgn; + + return newgn; +} + + +gnode_t * +glist_insert_float64(gnode_t * gn, float64 val) +{ + gnode_t *newgn; + + newgn = (gnode_t *) ckd_calloc(1, sizeof(gnode_t)); + newgn->data.fl = (double)val; + newgn->next = gn->next; + gn->next = newgn; + + return newgn; +} + +gnode_t * +gnode_free(gnode_t * gn, gnode_t * pred) +{ + gnode_t *next; + + next = gn->next; + if (pred) { + assert(pred->next == gn); + + pred->next = next; + } + + ckd_free((char *) gn); + + return next; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/hash_table.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/hash_table.c new file mode 100644 index 0000000000000000000000000000000000000000..6a944069901ecb9c6936f485e788d30dd792e4dd --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/hash_table.c @@ -0,0 +1,706 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * hash.c -- Hash table module. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: hash.c,v $ + * Revision 1.5 2005/06/22 03:04:01 arthchan2003 + * 1, Implemented hash_delete and hash_display, 2, Fixed doxygen documentation, 3, Added keyword. + * + * Revision 1.9 2005/05/25 06:17:53 archan + * Delete the test code in cmd_ln.c and fixed platform specific code of hash.c + * + * Revision 1.8 2005/05/24 01:10:54 archan + * Fix a bug when the value only appear in the hash but there is no chain. Also make sure that prev was initialized to NULL. All success cases were tested, but not tested with the deletion is tested. + * + * Revision 1.6 2005/05/24 00:00:45 archan + * Added basic functionalities to hash_t: 1, display and 2, delete a key from a hash. \n + * + * Revision 1.5 2005/05/11 07:01:38 archan + * Added comments on the usage of the current implementation of hash tables. + * + * Revision 1.4 2005/05/03 04:09:11 archan + * Implemented the heart of word copy search. For every ci-phone, every word end, a tree will be allocated to preserve its pathscore. This is different from 3.5 or below, only the best score for a particular ci-phone, regardless of the word-ends will be preserved at every frame. The graph propagation will not collect unused word tree at this point. srch_WST_propagate_wd_lv2 is also as the most stupid in the century. But well, after all, everything needs a start. I will then really get the results from the search and see how it looks. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 05-May-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Removed hash_key2hash(). Added hash_enter_bkey() and hash_lookup_bkey(), + * and len attribute to hash_entry_t. + * + * 30-Apr-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Added hash_key2hash(). + * + * 18-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Included case sensitive/insensitive option. Removed local, static + * maintenance of all hash tables. + * + * 31-Jul-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon + * Created. + */ + + +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning (disable: 4018) +#endif + +#include "sphinxbase/hash_table.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/case.h" + + +#if 0 +static void +prime_sieve(int32 max) +{ + char *notprime; + int32 p, pp; + + notprime = (char *) ckd_calloc(max + 1, 1); + p = 2; + for (;;) { + printf("%d\n", p); + for (pp = p + p; pp <= max; pp += p) + notprime[pp] = 1; + for (++p; (p <= max) && notprime[p]; p++); + if (p > max) + break; + } +} +#endif + + +/* + * HACK!! Initial hash table size is restricted by this set of primes. (Of course, + * collision resolution by chaining will accommodate more entries indefinitely, but + * efficiency will drop.) + */ +const int32 prime[] = { + 101, 211, 307, 401, 503, 601, 701, 809, 907, + 1009, 1201, 1601, 2003, 2411, 3001, 4001, 5003, 6007, 7001, 8009, + 9001, + 10007, 12007, 16001, 20011, 24001, 30011, 40009, 50021, 60013, + 70001, 80021, 90001, + 100003, 120011, 160001, 200003, 240007, 300007, 400009, 500009, + 600011, 700001, 800011, 900001, + -1 +}; + + +/** + * This function returns a very large prime. + */ +static int32 +prime_size(int32 size) +{ + int32 i; + + for (i = 0; (prime[i] > 0) && (prime[i] < size); i++); + if (prime[i] <= 0) { + E_WARN("Very large hash table requested (%d entries)\n", size); + --i; + } + return (prime[i]); +} + + +hash_table_t * +hash_table_new(int32 size, int32 casearg) +{ + hash_table_t *h; + + h = (hash_table_t *) ckd_calloc(1, sizeof(hash_table_t)); + h->size = prime_size(size + (size >> 1)); + h->nocase = (casearg == HASH_CASE_NO); + h->table = (hash_entry_t *) ckd_calloc(h->size, sizeof(hash_entry_t)); + /* The above calloc clears h->table[*].key and .next to NULL, i.e. an empty table */ + + return h; +} + + +/* + * Compute hash value for given key string. + * Somewhat tuned for English text word strings. + */ +static uint32 +key2hash(hash_table_t * h, const char *key) +{ + + register const char *cp; + + /* This is a hack because the best way to solve it is to make sure + all character representation is unsigned character in the first place. + (or better unicode.) */ + register unsigned char c; + register int32 s; + register uint32 hash; + + hash = 0; + s = 0; + + if (h->nocase) { + for (cp = key; *cp; cp++) { + c = *cp; + c = UPPER_CASE(c); + hash += c << s; + s += 5; + if (s >= 25) + s -= 24; + } + } + else { + for (cp = key; *cp; cp++) { + hash += (*cp) << s; + s += 5; + if (s >= 25) + s -= 24; + } + } + + return (hash % h->size); +} + + +static char * +makekey(uint8 * data, size_t len, char *key) +{ + size_t i, j; + + if (!key) + key = (char *) ckd_calloc(len * 2 + 1, sizeof(char)); + + for (i = 0, j = 0; i < len; i++, j += 2) { + key[j] = 'A' + (data[i] & 0x000f); + key[j + 1] = 'J' + ((data[i] >> 4) & 0x000f); + } + key[j] = '\0'; + + return key; +} + + +static int32 +keycmp_nocase(hash_entry_t * entry, const char *key) +{ + char c1, c2; + int32 i; + const char *str; + + str = entry->key; + for (i = 0; (uint32)i < entry->len; i++) { + c1 = *(str++); + c1 = UPPER_CASE(c1); + c2 = *(key++); + c2 = UPPER_CASE(c2); + if (c1 != c2) + return (c1 - c2); + } + + return 0; +} + + +static int32 +keycmp_case(hash_entry_t * entry, const char *key) +{ + char c1, c2; + int32 i; + const char *str; + + str = entry->key; + for (i = 0; (uint32)i < entry->len; i++) { + c1 = *(str++); + c2 = *(key++); + if (c1 != c2) + return (c1 - c2); + } + + return 0; +} + + +/* + * Lookup entry with hash-value hash in table h for given key + * Return value: hash_entry_t for key + */ +static hash_entry_t * +lookup(hash_table_t * h, uint32 hash, const char *key, size_t len) +{ + hash_entry_t *entry; + + entry = &(h->table[hash]); + if (entry->key == NULL) + return NULL; + + if (h->nocase) { + while (entry && ((entry->len != len) + || (keycmp_nocase(entry, key) != 0))) + entry = entry->next; + } + else { + while (entry && ((entry->len != len) + || (keycmp_case(entry, key) != 0))) + entry = entry->next; + } + + return entry; +} + + +int32 +hash_table_lookup(hash_table_t * h, const char *key, void ** val) +{ + hash_entry_t *entry; + uint32 hash; + size_t len; + + hash = key2hash(h, key); + len = strlen(key); + + entry = lookup(h, hash, key, len); + if (entry) { + if (val) + *val = entry->val; + return 0; + } + else + return -1; +} + +int32 +hash_table_lookup_int32(hash_table_t * h, const char *key, int32 *val) +{ + void *vval; + int32 rv; + + rv = hash_table_lookup(h, key, &vval); + if (rv != 0) + return rv; + if (val) + *val = (int32)(long)vval; + return 0; +} + + +int32 +hash_table_lookup_bkey(hash_table_t * h, const char *key, size_t len, void ** val) +{ + hash_entry_t *entry; + uint32 hash; + char *str; + + str = makekey((uint8 *) key, len, NULL); + hash = key2hash(h, str); + ckd_free(str); + + entry = lookup(h, hash, key, len); + if (entry) { + if (val) + *val = entry->val; + return 0; + } + else + return -1; +} + +int32 +hash_table_lookup_bkey_int32(hash_table_t * h, const char *key, size_t len, int32 *val) +{ + void *vval; + int32 rv; + + rv = hash_table_lookup_bkey(h, key, len, &vval); + if (rv != 0) + return rv; + if (val) + *val = (int32)(long)vval; + return 0; +} + + +static void * +enter(hash_table_t * h, uint32 hash, const char *key, size_t len, void *val, int32 replace) +{ + hash_entry_t *cur, *new; + + if ((cur = lookup(h, hash, key, len)) != NULL) { + void *oldval; + /* Key already exists. */ + oldval = cur->val; + if (replace) { + /* Replace the pointer if replacement is requested, + * because this might be a different instance of the same + * string (this verges on magic, sorry) */ + cur->key = key; + cur->val = val; + } + return oldval; + } + + cur = &(h->table[hash]); + if (cur->key == NULL) { + /* Empty slot at hashed location; add this entry */ + cur->key = key; + cur->len = len; + cur->val = val; + + /* Added by ARCHAN at 20050515. This allows deletion could work. */ + cur->next = NULL; + + } + else { + /* Key collision; create new entry and link to hashed location */ + new = (hash_entry_t *) ckd_calloc(1, sizeof(hash_entry_t)); + new->key = key; + new->len = len; + new->val = val; + new->next = cur->next; + cur->next = new; + } + ++h->inuse; + + return val; +} + +/* 20050523 Added by ARCHAN to delete a key from a hash table */ +static void * +delete(hash_table_t * h, uint32 hash, const char *key, size_t len) +{ + hash_entry_t *entry, *prev; + void *val; + + prev = NULL; + entry = &(h->table[hash]); + if (entry->key == NULL) + return NULL; + + if (h->nocase) { + while (entry && ((entry->len != len) + || (keycmp_nocase(entry, key) != 0))) { + prev = entry; + entry = entry->next; + } + } + else { + while (entry && ((entry->len != len) + || (keycmp_case(entry, key) != 0))) { + prev = entry; + entry = entry->next; + } + } + + if (entry == NULL) + return NULL; + + /* At this point, entry will be the one required to be deleted, prev + will contain the previous entry + */ + val = entry->val; + + if (prev == NULL) { + /* That is to say the entry in the hash table (not the chain) matched the key. */ + /* We will then copy the things from the next entry to the hash table */ + prev = entry; + if (entry->next) { /* There is a next entry, great, copy it. */ + entry = entry->next; + prev->key = entry->key; + prev->len = entry->len; + prev->val = entry->val; + prev->next = entry->next; + ckd_free(entry); + } + else { /* There is not a next entry, just set the key to null */ + prev->key = NULL; + prev->len = 0; + prev->next = NULL; + } + + } + else { /* This case is simple */ + prev->next = entry->next; + ckd_free(entry); + } + + /* Do wiring and free the entry */ + + --h->inuse; + + return val; +} + +void +hash_table_empty(hash_table_t *h) +{ + hash_entry_t *e, *e2; + int32 i; + + for (i = 0; i < h->size; i++) { + /* Free collision lists. */ + for (e = h->table[i].next; e; e = e2) { + e2 = e->next; + ckd_free((void *) e); + } + memset(&h->table[i], 0, sizeof(h->table[i])); + } + h->inuse = 0; +} + + +void * +hash_table_enter(hash_table_t * h, const char *key, void *val) +{ + uint32 hash; + size_t len; + + hash = key2hash(h, key); + len = strlen(key); + return (enter(h, hash, key, len, val, 0)); +} + +void * +hash_table_replace(hash_table_t * h, const char *key, void *val) +{ + uint32 hash; + size_t len; + + hash = key2hash(h, key); + len = strlen(key); + return (enter(h, hash, key, len, val, 1)); +} + +void * +hash_table_delete(hash_table_t * h, const char *key) +{ + uint32 hash; + size_t len; + + hash = key2hash(h, key); + len = strlen(key); + + return (delete(h, hash, key, len)); +} + +void * +hash_table_enter_bkey(hash_table_t * h, const char *key, size_t len, void *val) +{ + uint32 hash; + char *str; + + str = makekey((uint8 *) key, len, NULL); + hash = key2hash(h, str); + ckd_free(str); + + return (enter(h, hash, key, len, val, 0)); +} + +void * +hash_table_replace_bkey(hash_table_t * h, const char *key, size_t len, void *val) +{ + uint32 hash; + char *str; + + str = makekey((uint8 *) key, len, NULL); + hash = key2hash(h, str); + ckd_free(str); + + return (enter(h, hash, key, len, val, 1)); +} + +void * +hash_table_delete_bkey(hash_table_t * h, const char *key, size_t len) +{ + uint32 hash; + char *str; + + str = makekey((uint8 *) key, len, NULL); + hash = key2hash(h, str); + ckd_free(str); + + return (delete(h, hash, key, len)); +} + +void +hash_table_display(hash_table_t * h, int32 showdisplay) +{ + hash_entry_t *e; + int i, j; + j = 0; + + printf("Hash with chaining representation of the hash table\n"); + + for (i = 0; i < h->size; i++) { + e = &(h->table[i]); + if (e->key != NULL) { + printf("|key:"); + if (showdisplay) + printf("%s", e->key); + else + printf("%p", e->key); + + printf("|len:%zd|val=%ld|->", e->len, (long)e->val); + if (e->next == NULL) { + printf("NULL\n"); + } + j++; + + for (e = e->next; e; e = e->next) { + printf("|key:"); + if (showdisplay) + printf("%s", e->key); + + printf("|len:%zd|val=%ld|->", e->len, (long)e->val); + if (e->next == NULL) { + printf("NULL\n"); + } + j++; + } + } + } + + printf("The total number of keys =%d\n", j); +} + + +glist_t +hash_table_tolist(hash_table_t * h, int32 * count) +{ + glist_t g; + hash_entry_t *e; + int32 i, j; + + g = NULL; + + j = 0; + for (i = 0; i < h->size; i++) { + e = &(h->table[i]); + + if (e->key != NULL) { + g = glist_add_ptr(g, (void *) e); + j++; + + for (e = e->next; e; e = e->next) { + g = glist_add_ptr(g, (void *) e); + j++; + } + } + } + + if (count) + *count = j; + + return g; +} + +hash_iter_t * +hash_table_iter(hash_table_t *h) +{ + hash_iter_t *itor; + + itor = ckd_calloc(1, sizeof(*itor)); + itor->ht = h; + return hash_table_iter_next(itor); +} + +hash_iter_t * +hash_table_iter_next(hash_iter_t *itor) +{ + /* If there is an entry, walk down its list. */ + if (itor->ent) + itor->ent = itor->ent->next; + /* If we got to the end of the chain, or we had no entry, scan + * forward in the table to find the next non-empty bucket. */ + if (itor->ent == NULL) { + while (itor->idx < (size_t)itor->ht->size + && itor->ht->table[itor->idx].key == NULL) + ++itor->idx; + /* If we did not find one then delete the iterator and + * return NULL. */ + if (itor->idx == (size_t)itor->ht->size) { + hash_table_iter_free(itor); + return NULL; + } + /* Otherwise use this next entry. */ + itor->ent = itor->ht->table + itor->idx; + /* Increase idx for the next time around. */ + ++itor->idx; + } + return itor; +} + +void +hash_table_iter_free(hash_iter_t *itor) +{ + ckd_free(itor); +} + +void +hash_table_free(hash_table_t * h) +{ + hash_entry_t *e, *e2; + int32 i; + + if (h == NULL) + return; + + /* Free additional entries created for key collision cases */ + for (i = 0; i < h->size; i++) { + for (e = h->table[i].next; e; e = e2) { + e2 = e->next; + ckd_free((void *) e); + } + } + + ckd_free((void *) h->table); + ckd_free((void *) h); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/heap.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/heap.c new file mode 100644 index 0000000000000000000000000000000000000000..e3b4421b7f1daa1edce06064438f43d8f00927c8 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/heap.c @@ -0,0 +1,292 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * heap.c -- Generic heap structure for inserting in any and popping in sorted + * order. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: heap.c,v $ + * Revision 1.4 2005/06/22 03:05:49 arthchan2003 + * 1, Fixed doxygen documentation, 2, Add keyword. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 05-Mar-99 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Fixed bug in heap_destroy() (in while loop exit condition). + * + * 23-Dec-96 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Started. + */ + + +#include +#include +#include +#include + +#include "sphinxbase/heap.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" + +/** + * One node on the heap + */ +typedef struct heapnode_s { + void *data; /**< Application data at this node */ + int32 val; /**< Associated with above application data; according to which + heap is sorted (in ascending order) */ + int32 nl, nr; /**< left/right descendants of this node (for balancing heap) */ + struct heapnode_s *l; /**< Root of left descendant heap */ + struct heapnode_s *r; /**< Root of right descendant heap */ +} heapnode_t; + +/** + * Internal heap data structure. + */ +struct heap_s { + heapnode_t *top; +}; + + +#if 0 +static void +heap_dump(heapnode_t * top, int32 level) +{ + int32 i; + + if (!top) + return; + + for (i = 0; i < level; i++) + printf(" "); + /* print top info */ + heap_dump(top->l, level + 1); + heap_dump(top->r, level + 1); +} +#endif + + +heap_t * +heap_new(void) +{ + heap_t *h = ckd_calloc(1, sizeof(*h)); + return h; +} + + +static heapnode_t * +subheap_insert(heapnode_t * root, void *data, int32 val) +{ + heapnode_t *h; + void *tmpdata; + int32 tmpval; + + if (!root) { + h = (heapnode_t *) ckd_calloc(1, sizeof(heapnode_t)); + h->data = data; + h->val = val; + h->l = h->r = NULL; + h->nl = h->nr = 0; + return h; + } + + /* Root already exists; if new value is less, replace root node */ + if (root->val > val) { + tmpdata = root->data; + tmpval = root->val; + root->data = data; + root->val = val; + data = tmpdata; + val = tmpval; + } + + /* Insert new or old (replaced) node in right or left subtree; keep them balanced */ + if (root->nl > root->nr) { + root->r = subheap_insert(root->r, data, val); + root->nr++; + } + else { + root->l = subheap_insert(root->l, data, val); + root->nl++; + } + + return root; +} + + +int +heap_insert(heap_t *heap, void *data, int32 val) +{ + heap->top = subheap_insert(heap->top, data, val); + return 0; +} + + +static heapnode_t * +subheap_pop(heapnode_t * root) +{ + heapnode_t *l, *r; + + /* Propagate best value from below into root, if any */ + l = root->l; + r = root->r; + + if (!l) { + if (!r) { + ckd_free((char *) root); + return NULL; + } + else { + root->data = r->data; + root->val = r->val; + root->r = subheap_pop(r); + root->nr--; + } + } + else { + if ((!r) || (l->val < r->val)) { + root->data = l->data; + root->val = l->val; + root->l = subheap_pop(l); + root->nl--; + } + else { + root->data = r->data; + root->val = r->val; + root->r = subheap_pop(r); + root->nr--; + } + } + + return root; +} + + +int +heap_pop(heap_t *heap, void **data, int32 * val) +{ + if (heap->top == NULL) + return 0; + *data = heap->top->data; + *val = heap->top->val; + heap->top = subheap_pop(heap->top); + return 1; +} + + +int +heap_top(heap_t *heap, void **data, int32 * val) +{ + if (heap->top == NULL) + return 0; + *data = heap->top->data; + *val = heap->top->val; + return 1; +} + +static int +heap_remove_one(heap_t *heap, heapnode_t *top, void *data) +{ + if (top == NULL) + return -1; + else if (top->data == data) { + assert(top == heap->top); + heap->top = subheap_pop(heap->top); + return 0; + } + if (top->l) { + if (top->l->data == data) { + top->l = subheap_pop(top->l); + --top->nl; + return 0; + } + else if (heap_remove_one(heap, top->l, data) == 0) { + --top->nl; + return 0; + } + } + if (top->r) { + if (top->r->data == data) { + top->r = subheap_pop(top->r); + --top->nr; + return 0; + } + else if (heap_remove_one(heap, top->r, data) == 0) { + --top->nr; + return 0; + } + } + return -1; +} + +int +heap_remove(heap_t *heap, void *data) +{ + return heap_remove_one(heap, heap->top, data); +} + + +size_t +heap_size(heap_t *heap) +{ + if (heap->top == NULL) + return 0; + return heap->top->nl + heap->top->nr + 1; +} + +int +heap_destroy(heap_t *heap) +{ + void *data; + int32 val; + + /* Empty the heap and free it */ + while (heap_pop(heap, &data, &val) > 0) + ; + ckd_free(heap); + + return 0; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/listelem_alloc.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/listelem_alloc.c new file mode 100644 index 0000000000000000000000000000000000000000..4374c84fa023efdd7d82b1094980ce9d0ffac25a --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/listelem_alloc.c @@ -0,0 +1,295 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include +#include + +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/listelem_alloc.h" +#include "sphinxbase/glist.h" + +/** + * Fast linked list allocator. + * + * We keep a separate linked list for each element-size. Element-size + * must be a multiple of pointer-size. + * + * Initially a block of empty elements is allocated, where the first + * machine word in each element points to the next available element. + * To allocate, we use this pointer to move the freelist to the next + * element, then return the current element. + * + * The last element in the list starts with a NULL pointer, which is + * used as a signal to allocate a new block of elements. + * + * In order to be able to actually release the memory allocated, we + * have to add a linked list of block pointers. This shouldn't create + * much overhead since we never access it except when freeing the + * allocator. + */ +struct listelem_alloc_s { + char **freelist; /**< ptr to first element in freelist */ + glist_t blocks; /**< Linked list of blocks allocated. */ + glist_t blocksize; /**< Number of elements in each block */ + size_t elemsize; /**< Number of (char *) in element */ + size_t blk_alloc; /**< Number of alloc operations before increasing blocksize */ + size_t n_blocks; + size_t n_alloc; + size_t n_freed; +}; + +#define MIN_ALLOC 50 /**< Minimum number of elements to allocate in one block */ +#define BLKID_SHIFT 16 /**< Bit position of block number in element ID */ +#define BLKID_MASK ((1<freelist = NULL; + list->blocks = NULL; + list->elemsize = elemsize; + /* Intent of this is to increase block size once we allocate + * 256KiB (i.e. 1<<18). If somehow the element size is big enough + * to overflow that, just fail, people should use malloc anyway. */ + list->blk_alloc = (1 << 18) / (MIN_ALLOC * elemsize); + if (list->blk_alloc <= 0) { + E_ERROR("Element size * block size exceeds 256k, use malloc instead.\n"); + ckd_free(list); + return NULL; + } + list->n_alloc = 0; + list->n_freed = 0; + + /* Allocate an initial block to minimize latency. */ + listelem_add_block(list, __FILE__, __LINE__); + return list; +} + +void +listelem_alloc_free(listelem_alloc_t *list) +{ + gnode_t *gn; + if (list == NULL) + return; + for (gn = list->blocks; gn; gn = gnode_next(gn)) + ckd_free(gnode_ptr(gn)); + glist_free(list->blocks); + glist_free(list->blocksize); + ckd_free(list); +} + +static void +listelem_add_block(listelem_alloc_t *list, char *caller_file, int caller_line) +{ + char **cpp, *cp; + size_t j; + int32 blocksize; + + blocksize = list->blocksize ? gnode_int32(list->blocksize) : MIN_ALLOC; + /* Check if block size should be increased (if many requests for this size) */ + if (list->blk_alloc == 0) { + /* See above. No sense in allocating blocks bigger than + * 256KiB (well, actually, there might be, but we'll worry + * about that later). */ + blocksize <<= 1; + if (blocksize * list->elemsize > (1 << 18)) + blocksize = (1 << 18) / list->elemsize; + list->blk_alloc = (1 << 18) / (blocksize * list->elemsize); + } + + /* Allocate block */ + cpp = list->freelist = + (char **) __ckd_calloc__(blocksize, list->elemsize, + caller_file, caller_line); + list->blocks = glist_add_ptr(list->blocks, cpp); + list->blocksize = glist_add_int32(list->blocksize, blocksize); + cp = (char *) cpp; + /* Link up the blocks via their first machine word. */ + for (j = blocksize - 1; j > 0; --j) { + cp += list->elemsize; + *cpp = cp; + cpp = (char **) cp; + } + /* Make sure the last element's forward pointer is NULL */ + *cpp = NULL; + --list->blk_alloc; + ++list->n_blocks; +} + + +void * +__listelem_malloc__(listelem_alloc_t *list, char *caller_file, int caller_line) +{ + char **ptr; + + /* Allocate a new block if list empty */ + if (list->freelist == NULL) + listelem_add_block(list, caller_file, caller_line); + + /* Unlink and return first element in freelist */ + ptr = list->freelist; + list->freelist = (char **) (*(list->freelist)); + (list->n_alloc)++; + + return (void *)ptr; +} + +void * +__listelem_malloc_id__(listelem_alloc_t *list, char *caller_file, + int caller_line, int32 *out_id) +{ + char **ptr; + + /* Allocate a new block if list empty */ + if (list->freelist == NULL) + listelem_add_block(list, caller_file, caller_line); + + /* Unlink and return first element in freelist */ + ptr = list->freelist; + list->freelist = (char **) (*(list->freelist)); + (list->n_alloc)++; + + if (out_id) { + int32 blksize, blkidx, ptridx; + gnode_t *gn, *gn2; + char **block; + + gn2 = list->blocksize; + block = NULL; + blkidx = 0; + for (gn = list->blocks; gn; gn = gnode_next(gn)) { + block = gnode_ptr(gn); + blksize = gnode_int32(gn2) * list->elemsize / sizeof(*block); + if (ptr >= block && ptr < block + blksize) + break; + gn2 = gnode_next(gn2); + ++blkidx; + } + if (gn == NULL) { + E_ERROR("Failed to find block index for pointer %p!\n", ptr); + } + ptridx = (ptr - block) / (list->elemsize / sizeof(*block)); + E_DEBUG("ptr %p block %p blkidx %d ptridx %d\n", + ptr, block, list->n_blocks - blkidx - 1, ptridx); + *out_id = ((list->n_blocks - blkidx - 1) << BLKID_SHIFT) | ptridx; + } + + return ptr; +} + +void * +listelem_get_item(listelem_alloc_t *list, int32 id) +{ + int32 blkidx, ptridx, i; + gnode_t *gn; + + blkidx = (id >> BLKID_SHIFT) & BLKID_MASK; + ptridx = id & BLKID_MASK; + + i = 0; + blkidx = list->n_blocks - blkidx; + for (gn = list->blocks; gn; gn = gnode_next(gn)) { + if (++i == blkidx) + break; + } + if (gn == NULL) { + E_ERROR("Failed to find block index %d\n", blkidx); + return NULL; + } + + return (void *)((char **)gnode_ptr(gn) + + ptridx * (list->elemsize / sizeof(void *))); +} + +void +__listelem_free__(listelem_alloc_t *list, void *elem, + char *caller_file, int caller_line) +{ + char **cpp; + (void)caller_file; + (void)caller_line; + /* + * Insert freed item at head of list. + */ + cpp = (char **) elem; + *cpp = (char *) list->freelist; + list->freelist = cpp; + (list->n_freed)++; +} + + +void +listelem_stats(listelem_alloc_t *list) +{ + gnode_t *gn, *gn2; + char **cpp; + size_t n; + + E_INFO("Linklist stats:\n"); + for (n = 0, cpp = list->freelist; cpp; + cpp = (char **) (*cpp), n++); + E_INFO + ("elemsize %lu, #alloc %lu, #freed %lu, #freelist %lu\n", + (unsigned long)list->elemsize, + (unsigned long)list->n_alloc, + (unsigned long)list->n_freed, + (unsigned long)n); + E_INFO("Allocated blocks:\n"); + gn2 = list->blocksize; + for (gn = list->blocks; gn; gn = gnode_next(gn)) { + E_INFO("%p (%d * %d bytes)\n", gnode_ptr(gn), gnode_int32(gn2), list->elemsize); + gn2 = gnode_next(gn2); + } +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/logmath.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/logmath.c new file mode 100644 index 0000000000000000000000000000000000000000..78a042a344f7b8d4c29721d326d0821e2116144d --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/logmath.c @@ -0,0 +1,503 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#include +#include +#include + +#include "sphinxbase/logmath.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/mmio.h" +#include "sphinxbase/bio.h" +#include "sphinxbase/strfuncs.h" + +struct logmath_s { + logadd_t t; + int refcount; + mmio_file_t *filemap; + float64 base; + float64 log_of_base; + float64 log10_of_base; + float64 inv_log_of_base; + float64 inv_log10_of_base; + int32 zero; +}; + +logmath_t * +logmath_init(float64 base, int shift, int use_table) +{ + logmath_t *lmath; + uint32 maxyx, i; + float64 byx; + int width; + + /* Check that the base is correct. */ + if (base <= 1.0) { + E_ERROR("Base must be greater than 1.0\n"); + return NULL; + } + + /* Set up various necessary constants. */ + lmath = ckd_calloc(1, sizeof(*lmath)); + lmath->refcount = 1; + lmath->base = base; + lmath->log_of_base = log(base); + lmath->log10_of_base = log10(base); + lmath->inv_log_of_base = 1.0/lmath->log_of_base; + lmath->inv_log10_of_base = 1.0/lmath->log10_of_base; + lmath->t.shift = shift; + /* Shift this sufficiently that overflows can be avoided. */ + lmath->zero = MAX_NEG_INT32 >> (shift + 2); + + if (!use_table) + return lmath; + + /* Create a logadd table with the appropriate width */ + maxyx = (uint32) (log(2.0) / log(base) + 0.5) >> shift; + /* Poor man's log2 */ + if (maxyx < 256) width = 1; + else if (maxyx < 65536) width = 2; + else width = 4; + + lmath->t.width = width; + /* Figure out size of add table required. */ + byx = 1.0; /* Maximum possible base^{y-x} value - note that this implies that y-x == 0 */ + for (i = 0;; ++i) { + float64 lobyx = log(1.0 + byx) * lmath->inv_log_of_base; /* log_{base}(1 + base^{y-x}); */ + int32 k = (int32) (lobyx + 0.5 * (1<> shift; /* Round to shift */ + + /* base^{y-x} has reached the smallest representable value. */ + if (k <= 0) + break; + + /* This table is indexed by -(y-x), so we multiply byx by + * base^{-1} here which is equivalent to subtracting one from + * (y-x). */ + byx /= base; + } + i >>= shift; + + /* Never produce a table smaller than 256 entries. */ + if (i < 255) i = 255; + + lmath->t.table = ckd_calloc(i+1, width); + lmath->t.table_size = i + 1; + /* Create the add table (see above). */ + byx = 1.0; + for (i = 0;; ++i) { + float64 lobyx = log(1.0 + byx) * lmath->inv_log_of_base; + int32 k = (int32) (lobyx + 0.5 * (1<> shift; /* Round to shift */ + uint32 prev = 0; + + /* Check any previous value - if there is a shift, we want to + * only store the highest one. */ + switch (width) { + case 1: + prev = ((uint8 *)lmath->t.table)[i >> shift]; + break; + case 2: + prev = ((uint16 *)lmath->t.table)[i >> shift]; + break; + case 4: + prev = ((uint32 *)lmath->t.table)[i >> shift]; + break; + } + if (prev == 0) { + switch (width) { + case 1: + ((uint8 *)lmath->t.table)[i >> shift] = (uint8) k; + break; + case 2: + ((uint16 *)lmath->t.table)[i >> shift] = (uint16) k; + break; + case 4: + ((uint32 *)lmath->t.table)[i >> shift] = (uint32) k; + break; + } + } + if (k <= 0) + break; + + /* Decay base^{y-x} exponentially according to base. */ + byx /= base; + } + + return lmath; +} + +logmath_t * +logmath_read(const char *file_name) +{ + logmath_t *lmath; + char **argname, **argval; + int32 byteswap, i; + int chksum_present, do_mmap; + uint32 chksum; + long pos; + FILE *fp; + + E_INFO("Reading log table file '%s'\n", file_name); + if ((fp = fopen(file_name, "rb")) == NULL) { + E_ERROR_SYSTEM("Failed to open log table file '%s' for reading", file_name); + return NULL; + } + + /* Read header, including argument-value info and 32-bit byteorder magic */ + if (bio_readhdr(fp, &argname, &argval, &byteswap) < 0) { + E_ERROR("Failed to read the header from the file '%s'\n", file_name); + fclose(fp); + return NULL; + } + + lmath = ckd_calloc(1, sizeof(*lmath)); + /* Default values. */ + lmath->t.shift = 0; + lmath->t.width = 2; + lmath->base = 1.0001; + + /* Parse argument-value list */ + chksum_present = 0; + for (i = 0; argname[i]; i++) { + if (strcmp(argname[i], "version") == 0) { + } + else if (strcmp(argname[i], "chksum0") == 0) { + if (strcmp(argval[i], "yes") == 0) + chksum_present = 1; + } + else if (strcmp(argname[i], "width") == 0) { + lmath->t.width = atoi(argval[i]); + } + else if (strcmp(argname[i], "shift") == 0) { + lmath->t.shift = atoi(argval[i]); + } + else if (strcmp(argname[i], "logbase") == 0) { + lmath->base = atof_c(argval[i]); + } + } + bio_hdrarg_free(argname, argval); + chksum = 0; + + /* Set up various necessary constants. */ + lmath->log_of_base = log(lmath->base); + lmath->log10_of_base = log10(lmath->base); + lmath->inv_log_of_base = 1.0/lmath->log_of_base; + lmath->inv_log10_of_base = 1.0/lmath->log10_of_base; + /* Shift this sufficiently that overflows can be avoided. */ + lmath->zero = MAX_NEG_INT32 >> (lmath->t.shift + 2); + + /* #Values to follow */ + if (bio_fread(&lmath->t.table_size, sizeof(int32), 1, fp, byteswap, &chksum) != 1) { + E_ERROR("Failed to read values from the file '%s'", file_name); + goto error_out; + } + + /* Check alignment constraints for memory mapping */ + do_mmap = 1; + pos = ftell(fp); + if (pos & ((long)lmath->t.width - 1)) { + E_WARN("%s: Data start %ld is not aligned on %d-byte boundary, will not memory map\n", + file_name, pos, lmath->t.width); + do_mmap = 0; + } + /* Check byte order for memory mapping */ + if (byteswap) { + E_WARN("%s: Data is wrong-endian, will not memory map\n", file_name); + do_mmap = 0; + } + + if (do_mmap) { + lmath->filemap = mmio_file_read(file_name); + lmath->t.table = (char *)mmio_file_ptr(lmath->filemap) + pos; + } + else { + lmath->t.table = ckd_calloc(lmath->t.table_size, lmath->t.width); + if ((uint32)bio_fread(lmath->t.table, lmath->t.width, lmath->t.table_size, + fp, byteswap, &chksum) != lmath->t.table_size) { + E_ERROR("Failed to read data (%d x %d bytes) from the file '%s' failed", + lmath->t.table_size, lmath->t.width, file_name); + goto error_out; + } + if (chksum_present) + bio_verify_chksum(fp, byteswap, chksum); + + if (fread(&i, 1, 1, fp) == 1) { + E_ERROR("%s: More data than expected\n", file_name); + goto error_out; + } + } + fclose(fp); + + return lmath; +error_out: + logmath_free(lmath); + return NULL; +} + +int32 +logmath_write(logmath_t *lmath, const char *file_name) +{ + FILE *fp; + long pos; + uint32 chksum; + + if (lmath->t.table == NULL) { + E_ERROR("No log table to write!\n"); + return -1; + } + + E_INFO("Writing log table file '%s'\n", file_name); + if ((fp = fopen(file_name, "wb")) == NULL) { + E_ERROR_SYSTEM("Failed to open logtable file '%s' for writing", file_name); + return -1; + } + + /* For whatever reason, we have to do this manually at the + * moment. */ + fprintf(fp, "s3\nversion 1.0\nchksum0 yes\n"); + fprintf(fp, "width %d\n", lmath->t.width); + fprintf(fp, "shift %d\n", lmath->t.shift); + fprintf(fp, "logbase %f\n", lmath->base); + /* Pad it out to ensure alignment. */ + pos = ftell(fp) + strlen("endhdr\n"); + if (pos & ((long)lmath->t.width - 1)) { + size_t align = lmath->t.width - (pos & ((long)lmath->t.width - 1)); + assert(lmath->t.width <= 8); + fwrite(" " /* 8 spaces */, 1, align, fp); + } + fprintf(fp, "endhdr\n"); + + /* Now write the binary data. */ + chksum = (uint32)BYTE_ORDER_MAGIC; + fwrite(&chksum, sizeof(uint32), 1, fp); + chksum = 0; + /* #Values to follow */ + if (bio_fwrite(&lmath->t.table_size, sizeof(uint32), + 1, fp, 0, &chksum) != 1) { + E_ERROR("Failed to write data to a file '%s'", file_name); + goto error_out; + } + + if ((uint32)bio_fwrite(lmath->t.table, lmath->t.width, lmath->t.table_size, + fp, 0, &chksum) != lmath->t.table_size) { + E_ERROR("Failed to write data (%d x %d bytes) to the file '%s'", + lmath->t.table_size, lmath->t.width, file_name); + goto error_out; + } + if (bio_fwrite(&chksum, sizeof(uint32), 1, fp, 0, NULL) != 1) { + E_ERROR("Failed to write checksum to the file '%s'", file_name); + goto error_out; + } + + fclose(fp); + return 0; + +error_out: + fclose(fp); + return -1; +} + +logmath_t * +logmath_retain(logmath_t *lmath) +{ + ++lmath->refcount; + return lmath; +} + +int +logmath_free(logmath_t *lmath) +{ + if (lmath == NULL) + return 0; + if (--lmath->refcount > 0) + return lmath->refcount; + if (lmath->filemap) + mmio_file_unmap(lmath->filemap); + else + ckd_free(lmath->t.table); + ckd_free(lmath); + return 0; +} + +int32 +logmath_get_table_shape(logmath_t *lmath, uint32 *out_size, + uint32 *out_width, uint32 *out_shift) +{ + if (out_size) *out_size = lmath->t.table_size; + if (out_width) *out_width = lmath->t.width; + if (out_shift) *out_shift = lmath->t.shift; + + return lmath->t.table_size * lmath->t.width; +} + +float64 +logmath_get_base(logmath_t *lmath) +{ + return lmath->base; +} + +int +logmath_get_zero(logmath_t *lmath) +{ + return lmath->zero; +} + +int +logmath_get_width(logmath_t *lmath) +{ + return lmath->t.width; +} + +int +logmath_get_shift(logmath_t *lmath) +{ + return lmath->t.shift; +} + +int +logmath_add(logmath_t *lmath, int logb_x, int logb_y) +{ + logadd_t *t = LOGMATH_TABLE(lmath); + int d, r; + + /* handle 0 + x = x case. */ + if (logb_x <= lmath->zero) + return logb_y; + if (logb_y <= lmath->zero) + return logb_x; + + if (t->table == NULL) + return logmath_add_exact(lmath, logb_x, logb_y); + + /* d must be positive, obviously. */ + if (logb_x > logb_y) { + d = (logb_x - logb_y); + r = logb_x; + } + else { + d = (logb_y - logb_x); + r = logb_y; + } + + if (d < 0) { + /* Some kind of overflow has occurred, fail gracefully. */ + return r; + } + if ((size_t)d >= t->table_size) { + /* If this happens, it's not actually an error, because the + * last entry in the logadd table is guaranteed to be zero. + * Therefore we just return the larger of the two values. */ + return r; + } + + switch (t->width) { + case 1: + return r + (((uint8 *)t->table)[d]); + case 2: + return r + (((uint16 *)t->table)[d]); + case 4: + return r + (((uint32 *)t->table)[d]); + } + return r; +} + +int +logmath_add_exact(logmath_t *lmath, int logb_p, int logb_q) +{ + return logmath_log(lmath, + logmath_exp(lmath, logb_p) + + logmath_exp(lmath, logb_q)); +} + +int +logmath_log(logmath_t *lmath, float64 p) +{ + if (p <= 0) { + return lmath->zero; + } + return (int)(log(p) * lmath->inv_log_of_base) >> lmath->t.shift; +} + +float64 +logmath_exp(logmath_t *lmath, int logb_p) +{ + return pow(lmath->base, (float64)(logb_p << lmath->t.shift)); +} + +int +logmath_ln_to_log(logmath_t *lmath, float64 log_p) +{ + return (int)(log_p * lmath->inv_log_of_base) >> lmath->t.shift; +} + +float64 +logmath_log_to_ln(logmath_t *lmath, int logb_p) +{ + return (float64)(logb_p << lmath->t.shift) * lmath->log_of_base; +} + +int +logmath_log10_to_log(logmath_t *lmath, float64 log_p) +{ + return (int)(log_p * lmath->inv_log10_of_base) >> lmath->t.shift; +} + +float +logmath_log10_to_log_float(logmath_t *lmath, float64 log_p) +{ + int i; + float res = (float)(log_p * lmath->inv_log10_of_base); + for (i = 0; i < lmath->t.shift; i++) + res /= 2.0f; + return res; +} + +float64 +logmath_log_to_log10(logmath_t *lmath, int logb_p) +{ + return (float64)(logb_p << lmath->t.shift) * lmath->log10_of_base; +} + +float64 +logmath_log_float_to_log10(logmath_t *lmath, float log_p) +{ + int i; + for (i = 0; i < lmath->t.shift; i++) { + log_p *= 2; + } + return log_p * lmath->log10_of_base; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/make_lite.py b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/make_lite.py new file mode 100644 index 0000000000000000000000000000000000000000..b0ea2637151ec6061b6f2bd96ba150c3966eb66c --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/make_lite.py @@ -0,0 +1,265 @@ +#!/usr/bin/env python + +import sys, os +import fortran +import clapack_scrub + +try: set +except NameError: + from sets import Set as set + +# Arguments to pass to f2c. You'll always want -A for ANSI C prototypes +# Others of interest: -a to not make variables static by default +# -C to check array subscripts +F2C_ARGS = '-A' + +# The header to add to the top of the *_lite.c file. Note that slamch_() calls +# will be replaced by the macros below by clapack_scrub.scrub_source() +HEADER = '''\ +/* +NOTE: This is generated code. Look in README.python for information on + remaking this file. +*/ +#include "f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal slamch_(char *); +#define EPSILON slamch_("Epsilon") +#define SAFEMINIMUM slamch_("Safe minimum") +#define PRECISION slamch_("Precision") +#define BASE slamch_("Base") +#endif + + +extern doublereal slapy2_(real *, real *); + +''' + +class FortranRoutine: + """Wrapper for a Fortran routine in a file. + """ + type = 'generic' + def __init__(self, name=None, filename=None): + self.filename = filename + if name is None: + root, ext = os.path.splitext(filename) + name = root + self.name = name + self._dependencies = None + + def dependencies(self): + if self._dependencies is None: + deps = fortran.getDependencies(self.filename) + self._dependencies = [d.lower() for d in deps] + return self._dependencies + +class UnknownFortranRoutine(FortranRoutine): + """Wrapper for a Fortran routine for which the corresponding file + is not known. + """ + type = 'unknown' + def __init__(self, name): + FortranRoutine.__init__(self, name=name, filename='') + + def dependencies(self): + return [] + +class FortranLibrary: + """Container for a bunch of Fortran routines. + """ + def __init__(self, src_dirs): + self._src_dirs = src_dirs + self.names_to_routines = {} + + def _findRoutine(self, rname): + rname = rname.lower() + for s in self._src_dirs: + ffilename = os.path.join(s, rname + '.f') + if os.path.exists(ffilename): + return self._newFortranRoutine(rname, ffilename) + return UnknownFortranRoutine(rname) + + def _newFortranRoutine(self, rname, filename): + return FortranRoutine(rname, filename) + + def addIgnorableRoutine(self, rname): + """Add a routine that we don't want to consider when looking at + dependencies. + """ + rname = rname.lower() + routine = UnknownFortranRoutine(rname) + self.names_to_routines[rname] = routine + + def addRoutine(self, rname): + """Add a routine to the library. + """ + self.getRoutine(rname) + + def getRoutine(self, rname): + """Get a routine from the library. Will add if it's not found. + """ + unique = [] + rname = rname.lower() + routine = self.names_to_routines.get(rname, unique) + if routine is unique: + routine = self._findRoutine(rname) + self.names_to_routines[rname] = routine + return routine + + def allRoutineNames(self): + """Return the names of all the routines. + """ + return self.names_to_routines.keys() + + def allRoutines(self): + """Return all the routines. + """ + return self.names_to_routines.values() + + def resolveAllDependencies(self): + """Try to add routines to the library to satisfy all the dependencies + for each routine in the library. + + Returns a set of routine names that have the dependencies unresolved. + """ + done_this = set() + last_todo = set() + while 1: + todo = set(self.allRoutineNames()) - done_this + if todo == last_todo: + break + for rn in todo: + r = self.getRoutine(rn) + deps = r.dependencies() + for d in deps: + self.addRoutine(d) + done_this.add(rn) + last_todo = todo + return todo + +class LapackLibrary(FortranLibrary): + def _newFortranRoutine(self, rname, filename): + routine = FortranLibrary._newFortranRoutine(self, rname, filename) + if filename.find('BLAS') != -1: + routine.type = 'blas' + elif rname.startswith('z'): + routine.type = 'zlapack' + else: + routine.type = 'slapack' + return routine + + def allRoutinesByType(self, typename): + routines = [(r.name,r) for r in self.allRoutines() if r.type == typename] + routines.sort() + return [a[1] for a in routines] + +def printRoutineNames(desc, routines): + print desc + for r in routines: + print '\t%s' % r.name + +def getLapackRoutines(wrapped_routines, ignores, lapack_dir): + blas_src_dir = os.path.join(lapack_dir, 'BLAS', 'SRC') + if not os.path.exists(blas_src_dir): + blas_src_dir = os.path.join(lapack_dir, 'blas', 'src') + lapack_src_dir = os.path.join(lapack_dir, 'SRC') + if not os.path.exists(lapack_src_dir): + lapack_src_dir = os.path.join(lapack_dir, 'src') + library = LapackLibrary([blas_src_dir, lapack_src_dir]) + + for r in ignores: + library.addIgnorableRoutine(r) + + for w in wrapped_routines: + library.addRoutine(w) + + library.resolveAllDependencies() + + return library + +def getWrappedRoutineNames(wrapped_routines_file): + fo = open(wrapped_routines_file) + routines = [] + ignores = [] + for line in fo: + line = line.strip() + if not line or line.startswith('#'): + continue + if line.startswith('IGNORE:'): + line = line[7:].strip() + ig = line.split() + ignores.extend(ig) + else: + routines.append(line) + return routines, ignores + +def dumpRoutineNames(library, output_dir): + for typename in ['unknown', 'blas', 'slapack', 'zlapack']: + routines = library.allRoutinesByType(typename) + filename = os.path.join(output_dir, typename + '_routines.lst') + fo = open(filename, 'w') + for r in routines: + deps = r.dependencies() + fo.write('%s: %s\n' % (r.name, ' '.join(deps))) + fo.close() + +def concatenateRoutines(routines, output_file): + output_fo = open(output_file, 'w') + for r in routines: + fo = open(r.filename, 'r') + source = fo.read() + fo.close() + output_fo.write(source) + output_fo.close() + +class F2CError(Exception): + pass + +def runF2C(fortran_filename, output_dir): + # we're assuming no funny business that needs to be quoted for the shell + cmd = "f2c %s -d %s %s" % (F2C_ARGS, output_dir, fortran_filename) + rc = os.system(cmd) + if rc != 0: + raise F2CError + +def scrubF2CSource(c_file): + fo = open(c_file, 'r') + source = fo.read() + fo.close() + source = clapack_scrub.scrubSource(source, verbose=True) + fo = open(c_file, 'w') + fo.write(HEADER) + fo.write(source) + fo.close() + +def main(): + if len(sys.argv) != 4: + print 'Usage: %s wrapped_routines_file lapack_dir output_dir' % \ + (sys.argv[0],) + return + wrapped_routines_file = sys.argv[1] + lapack_src_dir = sys.argv[2] + output_dir = sys.argv[3] + + wrapped_routines, ignores = getWrappedRoutineNames(wrapped_routines_file) + library = getLapackRoutines(wrapped_routines, ignores, lapack_src_dir) + + dumpRoutineNames(library, output_dir) + + for typename in ['blas', 'slapack']: + print 'creating %s_lite.c ...' % typename + routines = library.allRoutinesByType(typename) + fortran_file = os.path.join(output_dir, typename+'_lite.f') + c_file = fortran_file[:-2] + '.c' + concatenateRoutines(routines, fortran_file) + try: + runF2C(fortran_file, output_dir) + except F2CError: + print 'f2c failed on %s' % fortran_file + break + scrubF2CSource(c_file) + +if __name__ == '__main__': + main() diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/matrix.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/matrix.c new file mode 100644 index 0000000000000000000000000000000000000000..120152b8151810a41beb845465ced28e49bf6795 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/matrix.c @@ -0,0 +1,279 @@ +/* -*- c-basic-offset: 4 -*- */ +/* ==================================================================== + * Copyright (c) 1997-2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +#include +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sphinxbase/clapack_lite.h" +#include "sphinxbase/matrix.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" + +void +norm_3d(float32 ***arr, + uint32 d1, + uint32 d2, + uint32 d3) +{ + uint32 i, j, k; + float64 s; + + for (i = 0; i < d1; i++) { + for (j = 0; j < d2; j++) { + + /* compute sum (i, j) as over all k */ + for (k = 0, s = 0; k < d3; k++) { + s += arr[i][j][k]; + } + + /* do 1 floating point divide */ + s = 1.0 / s; + + /* divide all k by sum over k */ + for (k = 0; k < d3; k++) { + arr[i][j][k] *= s; + } + } + } +} + +void +accum_3d(float32 ***out, + float32 ***in, + uint32 d1, + uint32 d2, + uint32 d3) +{ + uint32 i, j, k; + + for (i = 0; i < d1; i++) { + for (j = 0; j < d2; j++) { + for (k = 0; k < d3; k++) { + out[i][j][k] += in[i][j][k]; + } + } + } +} + +void +floor_nz_3d(float32 ***m, + uint32 d1, + uint32 d2, + uint32 d3, + float32 floor) +{ + uint32 i, j, k; + + for (i = 0; i < d1; i++) { + for (j = 0; j < d2; j++) { + for (k = 0; k < d3; k++) { + if ((m[i][j][k] != 0) && (m[i][j][k] < floor)) + m[i][j][k] = floor; + } + } + } +} +void +floor_nz_1d(float32 *v, + uint32 d1, + float32 floor) +{ + uint32 i; + + for (i = 0; i < d1; i++) { + if ((v[i] != 0) && (v[i] < floor)) + v[i] = floor; + } +} + +void +band_nz_1d(float32 *v, + uint32 d1, + float32 band) +{ + uint32 i; + + for (i = 0; i < d1; i++) { + if (v[i] != 0) { + if ((v[i] > 0) && (v[i] < band)) { + v[i] = band; + } + else if ((v[i] < 0) && (v[i] > -band)) { + v[i] = -band; + } + } + } +} + +/* Find determinant through LU decomposition. */ +float64 +determinant(float32 ** a, int32 n) +{ + float32 **tmp_a; + float64 det; + char uplo; + int32 info, i; + + /* a is assumed to be symmetric, so we don't need to switch the + * ordering of the data. But we do need to copy it since it is + * overwritten by LAPACK. */ + tmp_a = (float32 **)ckd_calloc_2d(n, n, sizeof(float32)); + memcpy(tmp_a[0], a[0], n*n*sizeof(float32)); + + uplo = 'L'; + spotrf_(&uplo, &n, tmp_a[0], &n, &info); + det = tmp_a[0][0]; + /* det = prod(diag(l))^2 */ + for (i = 1; i < n; ++i) + det *= tmp_a[i][i]; + ckd_free_2d((void **)tmp_a); + if (info > 0) + return -1.0; /* Generic "not positive-definite" answer */ + else + return det * det; +} + +int32 +solve(float32 **a, /*Input : an n*n matrix A */ + float32 *b, /*Input : a n dimesion vector b */ + float32 *out_x, /*Output : a n dimesion vector x */ + int32 n) +{ + char uplo; + float32 **tmp_a; + int32 info, nrhs; + + /* a is assumed to be symmetric, so we don't need to switch the + * ordering of the data. But we do need to copy it since it is + * overwritten by LAPACK. */ + tmp_a = (float32 **)ckd_calloc_2d(n, n, sizeof(float32)); + memcpy(tmp_a[0], a[0], n*n*sizeof(float32)); + memcpy(out_x, b, n*sizeof(float32)); + uplo = 'L'; + nrhs = 1; + sposv_(&uplo, &n, &nrhs, tmp_a[0], &n, out_x, &n, &info); + ckd_free_2d((void **)tmp_a); + + if (info != 0) + return -1; + else + return info; +} + +/* Find inverse by solving AX=I. */ +int32 +invert(float32 ** ainv, float32 ** a, int32 n) +{ + char uplo; + float32 **tmp_a; + int32 info, nrhs, i; + + /* a is assumed to be symmetric, so we don't need to switch the + * ordering of the data. But we do need to copy it since it is + * overwritten by LAPACK. */ + tmp_a = (float32 **)ckd_calloc_2d(n, n, sizeof(float32)); + memcpy(tmp_a[0], a[0], n*n*sizeof(float32)); + + /* Construct an identity matrix. */ + memset(ainv[0], 0, sizeof(float32) * n * n); + for (i = 0; i < n; i++) + ainv[i][i] = 1.0; + + uplo = 'L'; + nrhs = n; + sposv_(&uplo, &n, &nrhs, tmp_a[0], &n, ainv[0], &n, &info); + + ckd_free_2d((void **)tmp_a); + + if (info != 0) + return -1; + else + return info; +} + +void +matrixmultiply(float32 ** c, float32 ** a, float32 ** b, int32 n) +{ + char side, uplo; + float32 alpha; + + side = 'L'; + uplo = 'L'; + alpha = 1.0; + ssymm_(&side, &uplo, &n, &n, &alpha, a[0], &n, b[0], &n, &alpha, c[0], &n); +} + +void +outerproduct(float32 ** a, float32 * x, float32 * y, int32 len) +{ + int32 i, j; + + for (i = 0; i < len; ++i) { + a[i][i] = x[i] * y[i]; + for (j = i + 1; j < len; ++j) { + a[i][j] = x[i] * y[j]; + a[j][i] = x[j] * y[i]; + } + } +} + +void +scalarmultiply(float32 ** a, float32 x, int32 n) +{ + int32 i, j; + + for (i = 0; i < n; ++i) { + a[i][i] *= x; + for (j = i+1; j < n; ++j) { + a[i][j] *= x; + a[j][i] *= x; + } + } +} + +void +matrixadd(float32 ** a, float32 ** b, int32 n) +{ + int32 i, j; + + for (i = 0; i < n; ++i) + for (j = 0; j < n; ++j) + a[i][j] += b[i][j]; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/mmio.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/mmio.c new file mode 100644 index 0000000000000000000000000000000000000000..dde0d7f191b0cba4bfd8a2ed8788dcd66dc9497f --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/mmio.c @@ -0,0 +1,257 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2005 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/********************************************************************* + * + * File: mmio.c + * + * Description: mmap() wrappers for Unix/Windows + * + * Author: David Huggins-Daines + * + *********************************************************************/ + +#include +#include + + +#ifdef GNUWINCE +# include +# include +# include +# include +#elif defined(__SYMBIAN32__) /* SYMBIAN32 must be before WIN32 since Symbian SDK defines WIN32 as well */ +# include +# include +# include +# include +#elif defined(_WIN32) +# include +#else +# include +# include +# include +# include +# include +#endif + +#include "sphinxbase/prim_type.h" +#include "sphinxbase/err.h" +#include "sphinxbase/mmio.h" +#include "sphinxbase/ckd_alloc.h" + +#if defined(_WIN32_WCE) || defined(GNUWINCE) +struct mmio_file_s { + int dummy; +}; + +mmio_file_t * +mmio_file_read(const char *filename) +{ + HANDLE ffm, fd; + WCHAR *wfilename; + void *rv; + int len; + + len = mbstowcs(NULL, filename, 0) + 1; + wfilename = malloc(len * sizeof(WCHAR)); + mbstowcs(wfilename, filename, len); + + if ((ffm = + CreateFileForMappingW(wfilename, GENERIC_READ, FILE_SHARE_READ, + NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, + NULL)) == INVALID_HANDLE_VALUE) { + E_ERROR("Failed to create mapping for the file '%s': %08x\n", filename, + GetLastError()); + return NULL; + } + if ((fd = + CreateFileMappingW(ffm, NULL, PAGE_READONLY, 0, 0, NULL)) == NULL) { + E_ERROR("Failed to CreateFileMapping: %08x\n", GetLastError()); + CloseHandle(ffm); + return NULL; + } + rv = MapViewOfFile(fd, FILE_MAP_READ, 0, 0, 0); + free(wfilename); + CloseHandle(ffm); + CloseHandle(fd); + + return (mmio_file_t *) rv; +} + +void +mmio_file_unmap(mmio_file_t *mf) +{ + if (!UnmapViewOfFile((void *)mf)) { + E_ERROR("Failed to UnmapViewOfFile: %08x\n", GetLastError()); + } +} + +void * +mmio_file_ptr(mmio_file_t *mf) +{ + return (void *)mf; +} + +#elif defined(_WIN32) && !defined(_WIN32_WP) /* !WINCE */ +struct mmio_file_s { + int dummy; +}; + +mmio_file_t * +mmio_file_read(const char *filename) +{ + HANDLE ffm, fd; + void *rv; + + if ((ffm = CreateFile(filename, GENERIC_READ, FILE_SHARE_READ, + NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, + NULL)) == INVALID_HANDLE_VALUE) { + E_ERROR("Failed to create file '%s': %08x\n", + filename, GetLastError()); + return NULL; + } + if ((fd = CreateFileMapping(ffm, NULL, + PAGE_READONLY, 0, 0, NULL)) == NULL) { + E_ERROR("Failed to CreateFileMapping: %08x\n", GetLastError()); + CloseHandle(ffm); + } + rv = MapViewOfFile(fd, FILE_MAP_READ, 0, 0, 0); + CloseHandle(ffm); + CloseHandle(fd); + + return (mmio_file_t *)rv; +} + +void +mmio_file_unmap(mmio_file_t *mf) +{ + if (!UnmapViewOfFile((void *)mf)) { + E_ERROR("Failed to UnmapViewOfFile: %08x\n", GetLastError()); + } +} + +void * +mmio_file_ptr(mmio_file_t *mf) +{ + return (void *)mf; +} + +#else /* !WIN32, !WINCE */ +#if defined(__ADSPBLACKFIN__) || defined(_WIN32_WP) + /* This is true for both uClinux and VisualDSP++, + but actually we need a better way to detect it. */ +struct mmio_file_s { + int dummy; +}; + +mmio_file_t * +mmio_file_read(const char *filename) +{ + E_ERROR("mmio is not implemented on this platform!"); + return NULL; +} + +void +mmio_file_unmap(mmio_file_t *mf) +{ + E_ERROR("mmio is not implemented on this platform!"); +} + +void * +mmio_file_ptr(mmio_file_t *mf) +{ + E_ERROR("mmio is not implemented on this platform!"); + return NULL; +} +#else /* !__ADSPBLACKFIN__ */ +struct mmio_file_s { + void *ptr; + size_t mapsize; +}; + +mmio_file_t * +mmio_file_read(const char *filename) +{ + mmio_file_t *mf; + struct stat buf; + void *ptr; + int fd; + size_t pagesize; + + if ((fd = open(filename, O_RDONLY)) == -1) { + E_ERROR_SYSTEM("Failed to open %s", filename); + return NULL; + } + if (fstat(fd, &buf) == -1) { + E_ERROR_SYSTEM("Failed to stat %s", filename); + close(fd); + return NULL; + } + ptr = mmap(NULL, buf.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (ptr == (void *)-1) { + E_ERROR_SYSTEM("Failed to mmap %lld bytes", (unsigned long long)buf.st_size); + close(fd); + return NULL; + } + close(fd); + mf = ckd_calloc(1, sizeof(*mf)); + mf->ptr = ptr; + /* Align map size to next page. */ + pagesize = sysconf(_SC_PAGESIZE); + mf->mapsize = (buf.st_size + pagesize - 1) / pagesize * pagesize; + + return mf; +} + +void +mmio_file_unmap(mmio_file_t *mf) +{ + if (mf == NULL) + return; + if (munmap(mf->ptr, mf->mapsize) < 0) { + E_ERROR_SYSTEM("Failed to unmap %ld bytes at %p", mf->mapsize, mf->ptr); + } + ckd_free(mf); +} + +void * +mmio_file_ptr(mmio_file_t *mf) +{ + return mf->ptr; +} +#endif /* !__ADSPBLACKFIN__ */ +#endif /* !(WINCE || WIN32) */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/pio.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/pio.c new file mode 100644 index 0000000000000000000000000000000000000000..72cf8e9c995323a735e78a68bb7dba179670dc57 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/pio.c @@ -0,0 +1,657 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include + +#ifdef HAVE_UNISTD_H +#include +#endif + +#ifdef HAVE_SYS_TYPES_H +#include +#endif + +#ifdef HAVE_SYS_STAT_H +#include +#endif + +#if defined(_WIN32) && !defined(CYGWIN) +#include +#endif + +#include "sphinxbase/pio.h" +#include "sphinxbase/filename.h" +#include "sphinxbase/err.h" +#include "sphinxbase/strfuncs.h" +#include "sphinxbase/ckd_alloc.h" + +#ifndef EXEEXT +#define EXEEXT "" +#endif + +enum { + COMP_NONE, + COMP_COMPRESS, + COMP_GZIP, + COMP_BZIP2 +}; + +static void +guess_comptype(char const *file, int32 *ispipe, int32 *isgz) +{ + size_t k; + + k = strlen(file); + *ispipe = 0; + *isgz = COMP_NONE; + if ((k > 2) + && ((strcmp(file + k - 2, ".Z") == 0) + || (strcmp(file + k - 2, ".z") == 0))) { + *ispipe = 1; + *isgz = COMP_COMPRESS; + } + else if ((k > 3) && ((strcmp(file + k - 3, ".gz") == 0) + || (strcmp(file + k - 3, ".GZ") == 0))) { + *ispipe = 1; + *isgz = COMP_GZIP; + } + else if ((k > 4) && ((strcmp(file + k - 4, ".bz2") == 0) + || (strcmp(file + k - 4, ".BZ2") == 0))) { + *ispipe = 1; + *isgz = COMP_BZIP2; + } +} + +FILE * +fopen_comp(const char *file, const char *mode, int32 * ispipe) +{ + FILE *fp; + +#ifndef HAVE_POPEN + *ispipe = 0; /* No popen() on WinCE */ +#else /* HAVE_POPEN */ + int32 isgz; + guess_comptype(file, ispipe, &isgz); +#endif /* HAVE_POPEN */ + + if (*ispipe) { +#ifndef HAVE_POPEN + /* Shouldn't get here, anyway */ + E_FATAL("No popen() on WinCE\n"); +#else + if (strcmp(mode, "r") == 0) { + char *command; + switch (isgz) { + case COMP_GZIP: + command = string_join("gunzip" EXEEXT, " -c ", file, NULL); + break; + case COMP_COMPRESS: + command = string_join("zcat" EXEEXT, " ", file, NULL); + break; + case COMP_BZIP2: + command = string_join("bunzip2" EXEEXT, " -c ", file, NULL); + break; + default: + command = NULL; /* Make compiler happy. */ + E_FATAL("Unknown compression type %d\n", isgz); + } + if ((fp = popen(command, mode)) == NULL) { + E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode); + ckd_free(command); + return NULL; + } + ckd_free(command); + } + else if (strcmp(mode, "w") == 0) { + char *command; + switch (isgz) { + case COMP_GZIP: + command = string_join("gzip" EXEEXT, " > ", file, NULL); + break; + case COMP_COMPRESS: + command = string_join("compress" EXEEXT, " -c > ", file, NULL); + break; + case COMP_BZIP2: + command = string_join("bzip2" EXEEXT, " > ", file, NULL); + break; + default: + command = NULL; /* Make compiler happy. */ + E_FATAL("Unknown compression type %d\n", isgz); + } + if ((fp = popen(command, mode)) == NULL) { + E_ERROR_SYSTEM("Failed to open a pipe for a command '%s' mode '%s'", command, mode); + ckd_free(command); + return NULL; + } + ckd_free(command); + } + else { + E_ERROR("Compressed file operation for mode %s is not supported\n", mode); + return NULL; + } +#endif /* HAVE_POPEN */ + } + else { + fp = fopen(file, mode); + } + + return (fp); +} + + +void +fclose_comp(FILE * fp, int32 ispipe) +{ + if (ispipe) { +#ifdef HAVE_POPEN +#if defined(_WIN32) && (!defined(__SYMBIAN32__)) + _pclose(fp); +#else + pclose(fp); +#endif +#endif + } + else + fclose(fp); +} + + +FILE * +fopen_compchk(const char *file, int32 * ispipe) +{ +#ifndef HAVE_POPEN + *ispipe = 0; /* No popen() on WinCE */ + /* And therefore the rest of this function is useless. */ + return (fopen_comp(file, "r", ispipe)); +#else /* HAVE_POPEN */ + int32 isgz; + FILE *fh; + + /* First just try to fopen_comp() it */ + if ((fh = fopen_comp(file, "r", ispipe)) != NULL) + return fh; + else { + char *tmpfile; + size_t k; + + /* File doesn't exist; try other compressed/uncompressed form, as appropriate */ + guess_comptype(file, ispipe, &isgz); + k = strlen(file); + tmpfile = ckd_calloc(k+5, 1); + strcpy(tmpfile, file); + switch (isgz) { + case COMP_GZIP: + tmpfile[k - 3] = '\0'; + break; + case COMP_BZIP2: + tmpfile[k - 4] = '\0'; + break; + case COMP_COMPRESS: + tmpfile[k - 2] = '\0'; + break; + case COMP_NONE: + strcpy(tmpfile + k, ".gz"); + if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { + E_WARN("Using %s instead of %s\n", tmpfile, file); + ckd_free(tmpfile); + return fh; + } + strcpy(tmpfile + k, ".bz2"); + if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { + E_WARN("Using %s instead of %s\n", tmpfile, file); + ckd_free(tmpfile); + return fh; + } + strcpy(tmpfile + k, ".Z"); + if ((fh = fopen_comp(tmpfile, "r", ispipe)) != NULL) { + E_WARN("Using %s instead of %s\n", tmpfile, file); + ckd_free(tmpfile); + return fh; + } + ckd_free(tmpfile); + return NULL; + } + E_WARN("Using %s instead of %s\n", tmpfile, file); + fh = fopen_comp(tmpfile, "r", ispipe); + ckd_free(tmpfile); + return NULL; + } +#endif /* HAVE_POPEN */ +} + +lineiter_t * +lineiter_start(FILE *fh) +{ + lineiter_t *li; + + li = (lineiter_t *)ckd_calloc(1, sizeof(*li)); + li->buf = (char *)ckd_malloc(128); + li->buf[0] = '\0'; + li->bsiz = 128; + li->len = 0; + li->fh = fh; + + li = lineiter_next(li); + + /* Strip the UTF-8 BOM */ + + if (li && 0 == strncmp(li->buf, "\xef\xbb\xbf", 3)) { + memmove(li->buf, li->buf + 3, strlen(li->buf + 1)); + li->len -= 3; + } + + return li; +} + +lineiter_t * +lineiter_start_clean(FILE *fh) +{ + lineiter_t *li; + + li = lineiter_start(fh); + + if (li == NULL) + return li; + + li->clean = TRUE; + + if (li->buf && li->buf[0] == '#') { + li = lineiter_next(li); + } else { + string_trim(li->buf, STRING_BOTH); + } + + return li; +} + + +static lineiter_t * +lineiter_next_plain(lineiter_t *li) +{ + /* We are reading the next line */ + li->lineno++; + + /* Read a line and check for EOF. */ + if (fgets(li->buf, li->bsiz, li->fh) == NULL) { + lineiter_free(li); + return NULL; + } + /* If we managed to read the whole thing, then we are done + * (this will be by far the most common result). */ + li->len = (int32)strlen(li->buf); + if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n') + return li; + + /* Otherwise we have to reallocate and keep going. */ + while (1) { + li->bsiz *= 2; + li->buf = (char *)ckd_realloc(li->buf, li->bsiz); + /* If we get an EOF, we are obviously done. */ + if (fgets(li->buf + li->len, li->bsiz - li->len, li->fh) == NULL) { + li->len += strlen(li->buf + li->len); + return li; + } + li->len += strlen(li->buf + li->len); + /* If we managed to read the whole thing, then we are done. */ + if (li->len < li->bsiz - 1 || li->buf[li->len - 1] == '\n') + return li; + } + + /* Shouldn't get here. */ + return li; +} + + +lineiter_t * +lineiter_next(lineiter_t *li) +{ + if (!li->clean) + return lineiter_next_plain(li); + + for (li = lineiter_next_plain(li); li; li = lineiter_next_plain(li)) { + if (li->buf) { + li->buf = string_trim(li->buf, STRING_BOTH); + if (li->buf[0] != 0 && li->buf[0] != '#') + break; + } + } + return li; +} + +int lineiter_lineno(lineiter_t *li) +{ + return li->lineno; +} + +void +lineiter_free(lineiter_t *li) +{ + if (li == NULL) + return; + ckd_free(li->buf); + ckd_free(li); +} + +char * +fread_line(FILE *stream, size_t *out_len) +{ + char *output, *outptr; + char buf[128]; + + output = outptr = NULL; + while (fgets(buf, sizeof(buf), stream)) { + size_t len = strlen(buf); + /* Append this data to the buffer. */ + if (output == NULL) { + output = (char *)ckd_malloc(len + 1); + outptr = output; + } + else { + size_t cur = outptr - output; + output = (char *)ckd_realloc(output, cur + len + 1); + outptr = output + cur; + } + memcpy(outptr, buf, len + 1); + outptr += len; + /* Stop on a short read or end of line. */ + if (len < sizeof(buf)-1 || buf[len-1] == '\n') + break; + } + if (out_len) *out_len = outptr - output; + return output; +} + +#define FREAD_RETRY_COUNT 60 + +int32 +fread_retry(void *pointer, int32 size, int32 num_items, FILE * stream) +{ + char *data; + size_t n_items_read; + size_t n_items_rem; + uint32 n_retry_rem; + int32 loc; + + n_retry_rem = FREAD_RETRY_COUNT; + + data = (char *)pointer; + loc = 0; + n_items_rem = num_items; + + do { + n_items_read = fread(&data[loc], size, n_items_rem, stream); + + n_items_rem -= n_items_read; + + if (n_items_rem > 0) { + /* an incomplete read occurred */ + + if (n_retry_rem == 0) + return -1; + + if (n_retry_rem == FREAD_RETRY_COUNT) { + E_ERROR_SYSTEM("fread() failed; retrying...\n"); + } + + --n_retry_rem; + + loc += n_items_read * size; +#if !defined(_WIN32) && defined(HAVE_UNISTD_H) + sleep(1); +#endif + } + } while (n_items_rem > 0); + + return num_items; +} + + +#ifdef _WIN32_WCE /* No stat() on WinCE */ +int32 +stat_retry(const char *file, struct stat * statbuf) +{ + WIN32_FIND_DATAW file_data; + HANDLE *h; + wchar_t *wfile; + size_t len; + + len = mbstowcs(NULL, file, 0) + 1; + wfile = ckd_calloc(len, sizeof(*wfile)); + mbstowcs(wfile, file, len); + if ((h = FindFirstFileW(wfile, &file_data)) == INVALID_HANDLE_VALUE) { + ckd_free(wfile); + return -1; + } + ckd_free(wfile); + memset(statbuf, 0, sizeof(*statbuf)); + statbuf->st_mtime = file_data.ftLastWriteTime.dwLowDateTime; + statbuf->st_size = file_data.nFileSizeLow; + FindClose(h); + + return 0; +} + + +int32 +stat_mtime(const char *file) +{ + struct stat statbuf; + + if (stat_retry(file, &statbuf) != 0) + return -1; + + return ((int32) statbuf.st_mtime); +} +#else +#define STAT_RETRY_COUNT 10 +int32 +stat_retry(const char *file, struct stat * statbuf) +{ + int32 i; + + for (i = 0; i < STAT_RETRY_COUNT; i++) { +#ifndef HAVE_SYS_STAT_H + FILE *fp; + + if ((fp = (FILE *)fopen(file, "r")) != 0) { + fseek(fp, 0, SEEK_END); + statbuf->st_size = ftell(fp); + fclose(fp); + return 0; + } +#else /* HAVE_SYS_STAT_H */ + if (stat(file, statbuf) == 0) + return 0; +#endif + if (i == 0) { + E_ERROR_SYSTEM("Failed to stat file '%s'; retrying...", file); + } +#ifdef HAVE_UNISTD_H + sleep(1); +#endif + } + + return -1; +} + +int32 +stat_mtime(const char *file) +{ + struct stat statbuf; + +#ifdef HAVE_SYS_STAT_H + if (stat(file, &statbuf) != 0) + return -1; +#else /* HAVE_SYS_STAT_H */ + if (stat_retry(file, &statbuf) != 0) + return -1; +#endif /* HAVE_SYS_STAT_H */ + + return ((int32) statbuf.st_mtime); +} +#endif /* !_WIN32_WCE */ + +struct bit_encode_s { + FILE *fh; + unsigned char buf, bbits; + int16 refcount; +}; + +bit_encode_t * +bit_encode_attach(FILE *outfh) +{ + bit_encode_t *be; + + be = (bit_encode_t *)ckd_calloc(1, sizeof(*be)); + be->refcount = 1; + be->fh = outfh; + return be; +} + +bit_encode_t * +bit_encode_retain(bit_encode_t *be) +{ + ++be->refcount; + return be; +} + +int +bit_encode_free(bit_encode_t *be) +{ + if (be == NULL) + return 0; + if (--be->refcount > 0) + return be->refcount; + ckd_free(be); + + return 0; +} + +int +bit_encode_write(bit_encode_t *be, unsigned char const *bits, int nbits) +{ + int tbits; + + tbits = nbits + be->bbits; + if (tbits < 8) { + /* Append to buffer. */ + be->buf |= ((bits[0] >> (8 - nbits)) << (8 - tbits)); + } + else { + int i = 0; + while (tbits >= 8) { + /* Shift bits out of the buffer and splice with high-order bits */ + fputc(be->buf | ((bits[i]) >> be->bbits), be->fh); + /* Put low-order bits back into buffer */ + be->buf = (bits[i] << (8 - be->bbits)) & 0xff; + tbits -= 8; + ++i; + } + } + /* tbits contains remaining number of bits. */ + be->bbits = tbits; + + return nbits; +} + +int +bit_encode_write_cw(bit_encode_t *be, uint32 codeword, int nbits) +{ + unsigned char bits[4]; + codeword <<= (32 - nbits); + bits[0] = (codeword >> 24) & 0xff; + bits[1] = (codeword >> 16) & 0xff; + bits[2] = (codeword >> 8) & 0xff; + bits[3] = codeword & 0xff; + return bit_encode_write(be, bits, nbits); +} + +int +bit_encode_flush(bit_encode_t *be) +{ + if (be->bbits) { + fputc(be->buf, be->fh); + be->bbits = 0; + } + return 0; +} + +int +build_directory(const char *path) +{ + int rv; + + /* Utterly failed... */ + if (strlen(path) == 0) + return -1; + +#if defined(_WIN32) && !defined(CYGWIN) + else if ((rv = _mkdir(path)) == 0) + return 0; +#elif defined(HAVE_SYS_STAT_H) /* Unix, Cygwin, doesn't work on MINGW */ + else if ((rv = mkdir(path, 0777)) == 0) + return 0; +#endif + + /* Or, it already exists... */ + else if (errno == EEXIST) + return 0; + else if (errno != ENOENT) { + E_ERROR_SYSTEM("Failed to create %s", path); + return -1; + } + else { + char *dirname = ckd_salloc(path); + path2dirname(path, dirname); + build_directory(dirname); + ckd_free(dirname); + +#if defined(_WIN32) && !defined(CYGWIN) + return _mkdir(path); +#elif defined(HAVE_SYS_STAT_H) /* Unix, Cygwin, doesn't work on MINGW */ + return mkdir(path, 0777); +#endif + } + return -1; //control should never reach here; fixes some compiler warnings +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/priority_queue.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/priority_queue.c new file mode 100644 index 0000000000000000000000000000000000000000..fdfb5690c26159722b98c0368709ff845ec948cd --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/priority_queue.c @@ -0,0 +1,144 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2015 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +struct priority_queue_s { + void **pointers; + size_t alloc_size; + size_t size; + void *max_element; + int (*compare)(const void *a, const void *b); +}; + +priority_queue_t* priority_queue_create(size_t len, int (*compare)(const void *a, const void *b)) +{ + priority_queue_t* queue; + + queue = (priority_queue_t *)ckd_calloc(1, sizeof(*queue)); + queue->alloc_size = len; + queue->pointers = (void **)ckd_calloc(len, sizeof(*queue->pointers)); + queue->size = 0; + queue->max_element = NULL; + queue->compare = compare; + + return queue; +} + +void* priority_queue_poll(priority_queue_t *queue) +{ + + size_t i; + void *res; + + if (queue->size == 0) { + E_WARN("Trying to poll from empty queue\n"); + return NULL; + } + if (queue->max_element == NULL) { + E_ERROR("Trying to poll from queue and max element is undefined\n"); + return NULL; + } + res = queue->max_element; + for (i = 0; i < queue->alloc_size; i++) { + if (queue->pointers[i] == queue->max_element) { + queue->pointers[i] = NULL; + break; + } + } + queue->max_element = NULL; + for (i = 0; i < queue->alloc_size; i++) { + if (queue->pointers[i] == 0) + continue; + if (queue->max_element == NULL) { + queue->max_element = queue->pointers[i]; + } else { + if (queue->compare(queue->pointers[i], queue->max_element) < 0) + queue->max_element = queue->pointers[i]; + } + } + queue->size--; + return res; +} + +void priority_queue_add(priority_queue_t *queue, void *element) +{ + size_t i; + if (queue->size == queue->alloc_size) { + E_ERROR("Trying to add element into full queue\n"); + return; + } + for (i = 0; i < queue->alloc_size; i++) { + if (queue->pointers[i] == NULL) { + queue->pointers[i] = element; + break; + } + } + + if (queue->max_element == NULL || queue->compare(element, queue->max_element) < 0) { + queue->max_element = element; + } + queue->size++; +} + +size_t priority_queue_size(priority_queue_t *queue) +{ + return queue->size; +} + +void priority_queue_free(priority_queue_t *queue, void (*free_ptr)(void *a)) +{ + size_t i; + + for (i = 0; i < queue->alloc_size; i++) { + if (queue->pointers[i] != NULL) { + if (free_ptr == NULL) { + ckd_free(queue->pointers[i]); + } else { + free_ptr(queue->pointers[i]); + } + } + } + ckd_free(queue->pointers); + ckd_free(queue); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/profile.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/profile.c new file mode 100644 index 0000000000000000000000000000000000000000..c8d84ca983d73a14d678bbdcb3a7b2f12735c6a1 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/profile.c @@ -0,0 +1,345 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2001 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * profile.c -- For timing and event counting. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1999 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * $Log: profile.c,v $ + * Revision 1.7 2005/06/22 03:10:59 arthchan2003 + * 1, Fixed doxygen documentation, 2, Added keyword. + * + * Revision 1.3 2005/03/30 01:22:48 archan + * Fixed mistakes in last updates. Add + * + * + * 11-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Added ptmr_init(). + * + * 19-Jun-97 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Created. + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include +#include + +#if defined(_WIN32) && !defined(__SYMBIAN32__) +# include +# ifndef _WIN32_WCE +# include +# endif +#elif defined(HAVE_UNISTD_H) /* I know this, this is Unix... */ +# include +# include +# include +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4996) +#endif + +#include "sphinxbase/profile.h" +#include "sphinxbase/err.h" +#include "sphinxbase/ckd_alloc.h" + +#if defined(_WIN32_WCE) || defined(_WIN32_WP) +DWORD unlink(const char *filename) +{ + WCHAR *wfilename; + DWORD rv; + size_t len; + + len = mbstowcs(NULL, filename, 0); + wfilename = ckd_calloc(len+1, sizeof(*wfilename)); + mbstowcs(wfilename, filename, len); + rv = DeleteFileW(wfilename); + ckd_free(wfilename); + + return rv; +} +#endif + +pctr_t * +pctr_new(char *nm) +{ + pctr_t *pc; + + pc = ckd_calloc(1, sizeof(pctr_t)); + pc->name = ckd_salloc(nm); + pc->count = 0; + + return pc; +} + +void +pctr_reset(pctr_t * ctr) +{ + ctr->count = 0; +} + + +void +pctr_increment(pctr_t * ctr, int32 inc) +{ + ctr->count += inc; + /* E_INFO("Name %s, Count %d, inc %d\n",ctr->name, ctr->count, inc); */ +} + +void +pctr_print(FILE * fp, pctr_t * ctr) +{ + fprintf(fp, "CTR:"); + fprintf(fp, "[%d %s]", ctr->count, ctr->name); +} + +void +pctr_free(pctr_t * pc) +{ + if (pc) { + if (pc->name) + ckd_free(pc->name); + } + ckd_free(pc); +} + + +#if defined(_WIN32) && !defined(GNUWINCE) && !defined(__SYMBIAN32__) + +#define TM_LOWSCALE 1e-7 +#define TM_HIGHSCALE (4294967296.0 * TM_LOWSCALE); + +static float64 +make_sec(FILETIME * tm) +{ + float64 dt; + + dt = tm->dwLowDateTime * TM_LOWSCALE; + dt += tm->dwHighDateTime * TM_HIGHSCALE; + + return (dt); +} + +#else /* NOT WINDOWS */ + +static float64 +make_sec(struct timeval *s) +{ + return (s->tv_sec + s->tv_usec * 0.000001); +} + +#endif + + +void +ptmr_start(ptmr_t * tm) +{ +#if (! defined(_WIN32)) || defined(GNUWINCE) || defined(__SYMBIAN32__) + struct timeval e_start; /* Elapsed time */ + +#if (! defined(_HPUX_SOURCE)) && (! defined(__SYMBIAN32__)) + struct rusage start; /* CPU time */ + + /* Unix but not HPUX */ + getrusage(RUSAGE_SELF, &start); + tm->start_cpu = make_sec(&start.ru_utime) + make_sec(&start.ru_stime); +#endif + /* Unix + HP */ + gettimeofday(&e_start, 0); + tm->start_elapsed = make_sec(&e_start); +#elif defined(_WIN32_WP) + tm->start_cpu = GetTickCount64() / 1000; + tm->start_elapsed = GetTickCount64() / 1000; +#elif defined(_WIN32_WCE) + /* No GetProcessTimes() on WinCE. (Note CPU time will be bogus) */ + tm->start_cpu = GetTickCount() / 1000; + tm->start_elapsed = GetTickCount() / 1000; +#else + HANDLE pid; + FILETIME t_create, t_exit, kst, ust; + + /* PC */ + pid = GetCurrentProcess(); + GetProcessTimes(pid, &t_create, &t_exit, &kst, &ust); + tm->start_cpu = make_sec(&ust) + make_sec(&kst); + + tm->start_elapsed = (float64) clock() / CLOCKS_PER_SEC; +#endif +} + + +void +ptmr_stop(ptmr_t * tm) +{ + float64 dt_cpu, dt_elapsed; + +#if (! defined(_WIN32)) || defined(GNUWINCE) || defined(__SYMBIAN32__) + struct timeval e_stop; /* Elapsed time */ + +#if (! defined(_HPUX_SOURCE)) && (! defined(__SYMBIAN32__)) + struct rusage stop; /* CPU time */ + + /* Unix but not HPUX */ + getrusage(RUSAGE_SELF, &stop); + dt_cpu = + make_sec(&stop.ru_utime) + make_sec(&stop.ru_stime) - + tm->start_cpu; +#else + dt_cpu = 0.0; +#endif + /* Unix + HP */ + gettimeofday(&e_stop, 0); + dt_elapsed = (make_sec(&e_stop) - tm->start_elapsed); +#elif defined(_WIN32_WP) + dt_cpu = GetTickCount64() / 1000 - tm->start_cpu; + dt_elapsed = GetTickCount64() / 1000 - tm->start_elapsed; +#elif defined(_WIN32_WCE) + /* No GetProcessTimes() on WinCE. (Note CPU time will be bogus) */ + dt_cpu = GetTickCount() / 1000 - tm->start_cpu; + dt_elapsed = GetTickCount() / 1000 - tm->start_elapsed; +#else + HANDLE pid; + FILETIME t_create, t_exit, kst, ust; + + /* PC */ + pid = GetCurrentProcess(); + GetProcessTimes(pid, &t_create, &t_exit, &kst, &ust); + dt_cpu = make_sec(&ust) + make_sec(&kst) - tm->start_cpu; + dt_elapsed = ((float64) clock() / CLOCKS_PER_SEC) - tm->start_elapsed; +#endif + + tm->t_cpu += dt_cpu; + tm->t_elapsed += dt_elapsed; + + tm->t_tot_cpu += dt_cpu; + tm->t_tot_elapsed += dt_elapsed; +} + + +void +ptmr_reset(ptmr_t * tm) +{ + tm->t_cpu = 0.0; + tm->t_elapsed = 0.0; +} + + +void +ptmr_init(ptmr_t * tm) +{ + tm->t_cpu = 0.0; + tm->t_elapsed = 0.0; + tm->t_tot_cpu = 0.0; + tm->t_tot_elapsed = 0.0; +} + + +void +ptmr_reset_all(ptmr_t * tm) +{ + for (; tm->name; tm++) + ptmr_reset(tm); +} + + +void +ptmr_print_all(FILE * fp, ptmr_t * tm, float64 norm) +{ + if (norm != 0.0) { + norm = 1.0 / norm; + for (; tm->name; tm++) + fprintf(fp, " %6.2fx %s", tm->t_cpu * norm, tm->name); + } +} + + +int32 +host_endian(void) +{ + FILE *fp; + int32 BYTE_ORDER_MAGIC; + char *file; + char buf[8]; + int32 k, endian; + + file = "/tmp/__EnDiAn_TeSt__"; + + if ((fp = fopen(file, "wb")) == NULL) { + E_ERROR("Failed to open file '%s' for writing", file); + return -1; + } + + BYTE_ORDER_MAGIC = (int32) 0x11223344; + + k = (int32) BYTE_ORDER_MAGIC; + if (fwrite(&k, sizeof(int32), 1, fp) != 1) { + E_ERROR("Failed to write to file '%s'\n", file); + fclose(fp); + unlink(file); + return -1; + } + + fclose(fp); + if ((fp = fopen(file, "rb")) == NULL) { + E_ERROR_SYSTEM("Failed to open file '%s' for reading", file); + unlink(file); + return -1; + } + if (fread(buf, 1, sizeof(int32), fp) != sizeof(int32)) { + E_ERROR("Failed to read from file '%s'\n", file); + fclose(fp); + unlink(file); + return -1; + } + fclose(fp); + unlink(file); + + /* If buf[0] == lsB of BYTE_ORDER_MAGIC, we are little-endian */ + endian = (buf[0] == (BYTE_ORDER_MAGIC & 0x000000ff)) ? 1 : 0; + + return (endian); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/sbthread.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/sbthread.c new file mode 100644 index 0000000000000000000000000000000000000000..7c3bfec19b241e7e4db428c6691d8a5ecb6ac852 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/sbthread.c @@ -0,0 +1,739 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2008 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/** + * @file sbthread.c + * @brief Simple portable thread functions + * @author David Huggins-Daines + */ + +#include + +#include "sphinxbase/sbthread.h" +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/err.h" + +/* + * Platform-specific parts: threads, mutexes, and signals. + */ +#if (defined(_WIN32) || defined(__CYGWIN__)) && !defined(__SYMBIAN32__) +#define _WIN32_WINNT 0x0400 +#include + +struct sbthread_s { + cmd_ln_t *config; + sbmsgq_t *msgq; + sbthread_main func; + void *arg; + HANDLE th; + DWORD tid; +}; + +struct sbmsgq_s { + /* Ringbuffer for passing messages. */ + char *data; + size_t depth; + size_t out; + size_t nbytes; + + /* Current message is stored here. */ + char *msg; + size_t msglen; + CRITICAL_SECTION mtx; + HANDLE evt; +}; + +struct sbevent_s { + HANDLE evt; +}; + +struct sbmtx_s { + CRITICAL_SECTION mtx; +}; + +DWORD WINAPI +sbthread_internal_main(LPVOID arg) +{ + sbthread_t *th = (sbthread_t *)arg; + int rv; + + rv = (*th->func)(th); + return (DWORD)rv; +} + +sbthread_t * +sbthread_start(cmd_ln_t *config, sbthread_main func, void *arg) +{ + sbthread_t *th; + + th = ckd_calloc(1, sizeof(*th)); + th->config = config; + th->func = func; + th->arg = arg; + th->msgq = sbmsgq_init(256); + th->th = CreateThread(NULL, 0, sbthread_internal_main, th, 0, &th->tid); + if (th->th == NULL) { + sbthread_free(th); + return NULL; + } + return th; +} + +int +sbthread_wait(sbthread_t *th) +{ + DWORD rv, exit; + + /* It has already been joined. */ + if (th->th == NULL) + return -1; + + rv = WaitForSingleObject(th->th, INFINITE); + if (rv == WAIT_FAILED) { + E_ERROR("Failed to join thread: WAIT_FAILED\n"); + return -1; + } + GetExitCodeThread(th->th, &exit); + CloseHandle(th->th); + th->th = NULL; + return (int)exit; +} + +static DWORD +cond_timed_wait(HANDLE cond, int sec, int nsec) +{ + DWORD rv; + if (sec == -1) { + rv = WaitForSingleObject(cond, INFINITE); + } + else { + DWORD ms; + + ms = sec * 1000 + nsec / (1000*1000); + rv = WaitForSingleObject(cond, ms); + } + return rv; +} + +/* Updated to use Unicode */ +sbevent_t * +sbevent_init(void) +{ + sbevent_t *evt; + + evt = ckd_calloc(1, sizeof(*evt)); + evt->evt = CreateEventW(NULL, FALSE, FALSE, NULL); + if (evt->evt == NULL) { + ckd_free(evt); + return NULL; + } + return evt; +} + +void +sbevent_free(sbevent_t *evt) +{ + CloseHandle(evt->evt); + ckd_free(evt); +} + +int +sbevent_signal(sbevent_t *evt) +{ + return SetEvent(evt->evt) ? 0 : -1; +} + +int +sbevent_wait(sbevent_t *evt, int sec, int nsec) +{ + DWORD rv; + + rv = cond_timed_wait(evt->evt, sec, nsec); + return rv; +} + +sbmtx_t * +sbmtx_init(void) +{ + sbmtx_t *mtx; + + mtx = ckd_calloc(1, sizeof(*mtx)); + InitializeCriticalSection(&mtx->mtx); + return mtx; +} + +int +sbmtx_trylock(sbmtx_t *mtx) +{ + return TryEnterCriticalSection(&mtx->mtx) ? 0 : -1; +} + +int +sbmtx_lock(sbmtx_t *mtx) +{ + EnterCriticalSection(&mtx->mtx); + return 0; +} + +int +sbmtx_unlock(sbmtx_t *mtx) +{ + LeaveCriticalSection(&mtx->mtx); + return 0; +} + +void +sbmtx_free(sbmtx_t *mtx) +{ + DeleteCriticalSection(&mtx->mtx); + ckd_free(mtx); +} + +sbmsgq_t * +sbmsgq_init(size_t depth) +{ + sbmsgq_t *msgq; + + msgq = ckd_calloc(1, sizeof(*msgq)); + msgq->depth = depth; + msgq->evt = CreateEventW(NULL, FALSE, FALSE, NULL); + if (msgq->evt == NULL) { + ckd_free(msgq); + return NULL; + } + InitializeCriticalSection(&msgq->mtx); + msgq->data = ckd_calloc(depth, 1); + msgq->msg = ckd_calloc(depth, 1); + return msgq; +} + +void +sbmsgq_free(sbmsgq_t *msgq) +{ + CloseHandle(msgq->evt); + ckd_free(msgq->data); + ckd_free(msgq->msg); + ckd_free(msgq); +} + +int +sbmsgq_send(sbmsgq_t *q, size_t len, void const *data) +{ + char const *cdata = (char const *)data; + size_t in; + + /* Don't allow things bigger than depth to be sent! */ + if (len + sizeof(len) > q->depth) + return -1; + + if (q->nbytes + len + sizeof(len) > q->depth) + WaitForSingleObject(q->evt, INFINITE); + + /* Lock things while we manipulate the buffer (FIXME: this + actually should have been atomic with the wait above ...) */ + EnterCriticalSection(&q->mtx); + in = (q->out + q->nbytes) % q->depth; + /* First write the size of the message. */ + if (in + sizeof(len) > q->depth) { + /* Handle the annoying case where the size field gets wrapped around. */ + size_t len1 = q->depth - in; + memcpy(q->data + in, &len, len1); + memcpy(q->data, ((char *)&len) + len1, sizeof(len) - len1); + q->nbytes += sizeof(len); + in = sizeof(len) - len1; + } + else { + memcpy(q->data + in, &len, sizeof(len)); + q->nbytes += sizeof(len); + in += sizeof(len); + } + + /* Now write the message body. */ + if (in + len > q->depth) { + /* Handle wraparound. */ + size_t len1 = q->depth - in; + memcpy(q->data + in, cdata, len1); + q->nbytes += len1; + cdata += len1; + len -= len1; + in = 0; + } + memcpy(q->data + in, cdata, len); + q->nbytes += len; + + /* Signal the condition variable. */ + SetEvent(q->evt); + /* Unlock. */ + LeaveCriticalSection(&q->mtx); + + return 0; +} + +void * +sbmsgq_wait(sbmsgq_t *q, size_t *out_len, int sec, int nsec) +{ + char *outptr; + size_t len; + + /* Wait for data to be available. */ + if (q->nbytes == 0) { + if (cond_timed_wait(q->evt, sec, nsec) == WAIT_FAILED) + /* Timed out or something... */ + return NULL; + } + /* Lock to manipulate the queue (FIXME) */ + EnterCriticalSection(&q->mtx); + /* Get the message size. */ + if (q->out + sizeof(q->msglen) > q->depth) { + /* Handle annoying wraparound case. */ + size_t len1 = q->depth - q->out; + memcpy(&q->msglen, q->data + q->out, len1); + memcpy(((char *)&q->msglen) + len1, q->data, + sizeof(q->msglen) - len1); + q->out = sizeof(q->msglen) - len1; + } + else { + memcpy(&q->msglen, q->data + q->out, sizeof(q->msglen)); + q->out += sizeof(q->msglen); + } + q->nbytes -= sizeof(q->msglen); + /* Get the message body. */ + outptr = q->msg; + len = q->msglen; + if (q->out + q->msglen > q->depth) { + /* Handle wraparound. */ + size_t len1 = q->depth - q->out; + memcpy(outptr, q->data + q->out, len1); + outptr += len1; + len -= len1; + q->nbytes -= len1; + q->out = 0; + } + memcpy(outptr, q->data + q->out, len); + q->nbytes -= len; + q->out += len; + + /* Signal the condition variable. */ + SetEvent(q->evt); + /* Unlock. */ + LeaveCriticalSection(&q->mtx); + if (out_len) + *out_len = q->msglen; + return q->msg; +} + +#else /* POSIX */ +#include +#include + +struct sbthread_s { + cmd_ln_t *config; + sbmsgq_t *msgq; + sbthread_main func; + void *arg; + pthread_t th; +}; + +struct sbmsgq_s { + /* Ringbuffer for passing messages. */ + char *data; + size_t depth; + size_t out; + size_t nbytes; + + /* Current message is stored here. */ + char *msg; + size_t msglen; + pthread_mutex_t mtx; + pthread_cond_t cond; +}; + +struct sbevent_s { + pthread_mutex_t mtx; + pthread_cond_t cond; + int signalled; +}; + +struct sbmtx_s { + pthread_mutex_t mtx; +}; + +static void * +sbthread_internal_main(void *arg) +{ + sbthread_t *th = (sbthread_t *)arg; + int rv; + + rv = (*th->func)(th); + return (void *)(long)rv; +} + +sbthread_t * +sbthread_start(cmd_ln_t *config, sbthread_main func, void *arg) +{ + sbthread_t *th; + int rv; + + th = ckd_calloc(1, sizeof(*th)); + th->config = config; + th->func = func; + th->arg = arg; + th->msgq = sbmsgq_init(1024); + if ((rv = pthread_create(&th->th, NULL, &sbthread_internal_main, th)) != 0) { + E_ERROR("Failed to create thread: %d\n", rv); + sbthread_free(th); + return NULL; + } + return th; +} + +int +sbthread_wait(sbthread_t *th) +{ + void *exit; + int rv; + + /* It has already been joined. */ + if (th->th == (pthread_t)-1) + return -1; + + rv = pthread_join(th->th, &exit); + if (rv != 0) { + E_ERROR("Failed to join thread: %d\n", rv); + return -1; + } + th->th = (pthread_t)-1; + return (int)(long)exit; +} + +sbmsgq_t * +sbmsgq_init(size_t depth) +{ + sbmsgq_t *msgq; + + msgq = ckd_calloc(1, sizeof(*msgq)); + msgq->depth = depth; + if (pthread_cond_init(&msgq->cond, NULL) != 0) { + ckd_free(msgq); + return NULL; + } + if (pthread_mutex_init(&msgq->mtx, NULL) != 0) { + pthread_cond_destroy(&msgq->cond); + ckd_free(msgq); + return NULL; + } + msgq->data = ckd_calloc(depth, 1); + msgq->msg = ckd_calloc(depth, 1); + return msgq; +} + +void +sbmsgq_free(sbmsgq_t *msgq) +{ + pthread_mutex_destroy(&msgq->mtx); + pthread_cond_destroy(&msgq->cond); + ckd_free(msgq->data); + ckd_free(msgq->msg); + ckd_free(msgq); +} + +int +sbmsgq_send(sbmsgq_t *q, size_t len, void const *data) +{ + size_t in; + + /* Don't allow things bigger than depth to be sent! */ + if (len + sizeof(len) > q->depth) + return -1; + + /* Lock the condition variable while we manipulate the buffer. */ + pthread_mutex_lock(&q->mtx); + if (q->nbytes + len + sizeof(len) > q->depth) { + /* Unlock and wait for space to be available. */ + if (pthread_cond_wait(&q->cond, &q->mtx) != 0) { + /* Timed out, don't send anything. */ + pthread_mutex_unlock(&q->mtx); + return -1; + } + /* Condition is now locked again. */ + } + in = (q->out + q->nbytes) % q->depth; + + /* First write the size of the message. */ + if (in + sizeof(len) > q->depth) { + /* Handle the annoying case where the size field gets wrapped around. */ + size_t len1 = q->depth - in; + memcpy(q->data + in, &len, len1); + memcpy(q->data, ((char *)&len) + len1, sizeof(len) - len1); + q->nbytes += sizeof(len); + in = sizeof(len) - len1; + } + else { + memcpy(q->data + in, &len, sizeof(len)); + q->nbytes += sizeof(len); + in += sizeof(len); + } + + /* Now write the message body. */ + if (in + len > q->depth) { + /* Handle wraparound. */ + size_t len1 = q->depth - in; + memcpy(q->data + in, data, len1); + q->nbytes += len1; + data = (char const *)data + len1; + len -= len1; + in = 0; + } + memcpy(q->data + in, data, len); + q->nbytes += len; + + /* Signal the condition variable. */ + pthread_cond_signal(&q->cond); + /* Unlock it, we have nothing else to do. */ + pthread_mutex_unlock(&q->mtx); + return 0; +} + +static int +cond_timed_wait(pthread_cond_t *cond, pthread_mutex_t *mtx, int sec, int nsec) +{ + int rv; + if (sec == -1) { + rv = pthread_cond_wait(cond, mtx); + } + else { + struct timeval now; + struct timespec end; + + gettimeofday(&now, NULL); + end.tv_sec = now.tv_sec + sec; + end.tv_nsec = now.tv_usec * 1000 + nsec; + if (end.tv_nsec > (1000*1000*1000)) { + sec += end.tv_nsec / (1000*1000*1000); + end.tv_nsec = end.tv_nsec % (1000*1000*1000); + } + rv = pthread_cond_timedwait(cond, mtx, &end); + } + return rv; +} + +void * +sbmsgq_wait(sbmsgq_t *q, size_t *out_len, int sec, int nsec) +{ + char *outptr; + size_t len; + + /* Lock the condition variable while we manipulate nmsg. */ + pthread_mutex_lock(&q->mtx); + if (q->nbytes == 0) { + /* Unlock the condition variable and wait for a signal. */ + if (cond_timed_wait(&q->cond, &q->mtx, sec, nsec) != 0) { + /* Timed out or something... */ + pthread_mutex_unlock(&q->mtx); + return NULL; + } + /* Condition variable is now locked again. */ + } + /* Get the message size. */ + if (q->out + sizeof(q->msglen) > q->depth) { + /* Handle annoying wraparound case. */ + size_t len1 = q->depth - q->out; + memcpy(&q->msglen, q->data + q->out, len1); + memcpy(((char *)&q->msglen) + len1, q->data, + sizeof(q->msglen) - len1); + q->out = sizeof(q->msglen) - len1; + } + else { + memcpy(&q->msglen, q->data + q->out, sizeof(q->msglen)); + q->out += sizeof(q->msglen); + } + q->nbytes -= sizeof(q->msglen); + /* Get the message body. */ + outptr = q->msg; + len = q->msglen; + if (q->out + q->msglen > q->depth) { + /* Handle wraparound. */ + size_t len1 = q->depth - q->out; + memcpy(outptr, q->data + q->out, len1); + outptr += len1; + len -= len1; + q->nbytes -= len1; + q->out = 0; + } + memcpy(outptr, q->data + q->out, len); + q->nbytes -= len; + q->out += len; + + /* Signal the condition variable. */ + pthread_cond_signal(&q->cond); + /* Unlock the condition variable, we are done. */ + pthread_mutex_unlock(&q->mtx); + if (out_len) + *out_len = q->msglen; + return q->msg; +} + +sbevent_t * +sbevent_init(void) +{ + sbevent_t *evt; + int rv; + + evt = ckd_calloc(1, sizeof(*evt)); + if ((rv = pthread_mutex_init(&evt->mtx, NULL)) != 0) { + E_ERROR("Failed to initialize mutex: %d\n", rv); + ckd_free(evt); + return NULL; + } + if ((rv = pthread_cond_init(&evt->cond, NULL)) != 0) { + E_ERROR_SYSTEM("Failed to initialize mutex: %d\n", rv); + pthread_mutex_destroy(&evt->mtx); + ckd_free(evt); + return NULL; + } + return evt; +} + +void +sbevent_free(sbevent_t *evt) +{ + pthread_mutex_destroy(&evt->mtx); + pthread_cond_destroy(&evt->cond); + ckd_free(evt); +} + +int +sbevent_signal(sbevent_t *evt) +{ + int rv; + + pthread_mutex_lock(&evt->mtx); + evt->signalled = TRUE; + rv = pthread_cond_signal(&evt->cond); + pthread_mutex_unlock(&evt->mtx); + return rv; +} + +int +sbevent_wait(sbevent_t *evt, int sec, int nsec) +{ + int rv = 0; + + /* Lock the mutex before we check its signalled state. */ + pthread_mutex_lock(&evt->mtx); + /* If it's not signalled, then wait until it is. */ + if (!evt->signalled) + rv = cond_timed_wait(&evt->cond, &evt->mtx, sec, nsec); + /* Set its state to unsignalled if we were successful. */ + if (rv == 0) + evt->signalled = FALSE; + /* And unlock its mutex. */ + pthread_mutex_unlock(&evt->mtx); + + return rv; +} + +sbmtx_t * +sbmtx_init(void) +{ + sbmtx_t *mtx; + + mtx = ckd_calloc(1, sizeof(*mtx)); + if (pthread_mutex_init(&mtx->mtx, NULL) != 0) { + ckd_free(mtx); + return NULL; + } + return mtx; +} + +int +sbmtx_trylock(sbmtx_t *mtx) +{ + return pthread_mutex_trylock(&mtx->mtx); +} + +int +sbmtx_lock(sbmtx_t *mtx) +{ + return pthread_mutex_lock(&mtx->mtx); +} + +int +sbmtx_unlock(sbmtx_t *mtx) +{ + return pthread_mutex_unlock(&mtx->mtx); +} + +void +sbmtx_free(sbmtx_t *mtx) +{ + pthread_mutex_destroy(&mtx->mtx); + ckd_free(mtx); +} +#endif /* not WIN32 */ + +cmd_ln_t * +sbthread_config(sbthread_t *th) +{ + return th->config; +} + +void * +sbthread_arg(sbthread_t *th) +{ + return th->arg; +} + +sbmsgq_t * +sbthread_msgq(sbthread_t *th) +{ + return th->msgq; +} + +int +sbthread_send(sbthread_t *th, size_t len, void const *data) +{ + return sbmsgq_send(th->msgq, len, data); +} + +void +sbthread_free(sbthread_t *th) +{ + sbthread_wait(th); + sbmsgq_free(th->msgq); + ckd_free(th); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/slamch.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/slamch.c new file mode 100644 index 0000000000000000000000000000000000000000..260628dd3ff6fecc9a4753e7ca3532cdb7691c73 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/slamch.c @@ -0,0 +1,1029 @@ +/* src/slamch.f -- translated by f2c (version 20050501). + You must link the resulting object file with libf2c: + on Microsoft Windows system, link with libf2c.lib; + on Linux or Unix systems, link with .../path/to/libf2c.a -lm + or, if you install libf2c.a in a standard place, with -lf2c -lm + -- in that order, at the end of the command line, as in + cc *.o -lf2c -lm + Source for libf2c is in /netlib/f2c/libf2c.zip, e.g., + + http://www.netlib.org/f2c/libf2c.zip +*/ + +#include "sphinxbase/f2c.h" + +#ifdef _MSC_VER +#pragma warning (disable: 4244) +#endif + +/* Table of constant values */ + +static integer c__1 = 1; +static real c_b32 = 0.f; + +doublereal +slamch_(char *cmach, ftnlen cmach_len) +{ + /* Initialized data */ + (void)cmach_len; + static logical first = TRUE_; + + /* System generated locals */ + integer i__1; + real ret_val; + + /* Builtin functions */ + double pow_ri(real *, integer *); + + /* Local variables */ + static real t; + static integer it; + static real rnd, eps, base; + static integer beta; + static real emin, prec, emax; + static integer imin, imax; + static logical lrnd; + static real rmin, rmax, rmach; + extern logical lsame_(char *, char *, ftnlen, ftnlen); + static real small, sfmin; + extern /* Subroutine */ int slamc2_(integer *, integer *, logical *, real + *, integer *, real *, integer *, + real *); + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* Courant Institute, Argonne National Lab, and Rice University */ +/* October 31, 1992 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SLAMCH determines single precision machine parameters. */ + +/* Arguments */ +/* ========= */ + +/* CMACH (input) CHARACTER*1 */ +/* Specifies the value to be returned by SLAMCH: */ +/* = 'E' or 'e', SLAMCH := eps */ +/* = 'S' or 's , SLAMCH := sfmin */ +/* = 'B' or 'b', SLAMCH := base */ +/* = 'P' or 'p', SLAMCH := eps*base */ +/* = 'N' or 'n', SLAMCH := t */ +/* = 'R' or 'r', SLAMCH := rnd */ +/* = 'M' or 'm', SLAMCH := emin */ +/* = 'U' or 'u', SLAMCH := rmin */ +/* = 'L' or 'l', SLAMCH := emax */ +/* = 'O' or 'o', SLAMCH := rmax */ + +/* where */ + +/* eps = relative machine precision */ +/* sfmin = safe minimum, such that 1/sfmin does not overflow */ +/* base = base of the machine */ +/* prec = eps*base */ +/* t = number of (base) digits in the mantissa */ +/* rnd = 1.0 when rounding occurs in addition, 0.0 otherwise */ +/* emin = minimum exponent before (gradual) underflow */ +/* rmin = underflow threshold - base**(emin-1) */ +/* emax = largest exponent before overflow */ +/* rmax = overflow threshold - (base**emax)*(1-eps) */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Save statement .. */ +/* .. */ +/* .. Data statements .. */ +/* .. */ +/* .. Executable Statements .. */ + + if (first) { + first = FALSE_; + slamc2_(&beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax); + base = (real) beta; + t = (real) it; + if (lrnd) { + rnd = 1.f; + i__1 = 1 - it; + eps = pow_ri(&base, &i__1) / 2; + } + else { + rnd = 0.f; + i__1 = 1 - it; + eps = pow_ri(&base, &i__1); + } + prec = eps * base; + emin = (real) imin; + emax = (real) imax; + sfmin = rmin; + small = 1.f / rmax; + if (small >= sfmin) { + +/* Use SMALL plus a bit, to avoid the possibility of rounding */ +/* causing overflow when computing 1/sfmin. */ + + sfmin = small * (eps + 1.f); + } + } + + if (lsame_(cmach, "E", (ftnlen) 1, (ftnlen) 1)) { + rmach = eps; + } + else if (lsame_(cmach, "S", (ftnlen) 1, (ftnlen) 1)) { + rmach = sfmin; + } + else if (lsame_(cmach, "B", (ftnlen) 1, (ftnlen) 1)) { + rmach = base; + } + else if (lsame_(cmach, "P", (ftnlen) 1, (ftnlen) 1)) { + rmach = prec; + } + else if (lsame_(cmach, "N", (ftnlen) 1, (ftnlen) 1)) { + rmach = t; + } + else if (lsame_(cmach, "R", (ftnlen) 1, (ftnlen) 1)) { + rmach = rnd; + } + else if (lsame_(cmach, "M", (ftnlen) 1, (ftnlen) 1)) { + rmach = emin; + } + else if (lsame_(cmach, "U", (ftnlen) 1, (ftnlen) 1)) { + rmach = rmin; + } + else if (lsame_(cmach, "L", (ftnlen) 1, (ftnlen) 1)) { + rmach = emax; + } + else if (lsame_(cmach, "O", (ftnlen) 1, (ftnlen) 1)) { + rmach = rmax; + } + + ret_val = rmach; + return ret_val; + +/* End of SLAMCH */ + +} /* slamch_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int +slamc1_(integer * beta, integer * t, logical * rnd, logical * ieee1) +{ + /* Initialized data */ + + static logical first = TRUE_; + + /* System generated locals */ + real r__1, r__2; + + /* Local variables */ + static real a, b, c__, f, t1, t2; + static integer lt; + static real one, qtr; + static logical lrnd; + static integer lbeta; + static real savec; + static logical lieee1; + extern doublereal slamc3_(real *, real *); + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* Courant Institute, Argonne National Lab, and Rice University */ +/* October 31, 1992 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SLAMC1 determines the machine parameters given by BETA, T, RND, and */ +/* IEEE1. */ + +/* Arguments */ +/* ========= */ + +/* BETA (output) INTEGER */ +/* The base of the machine. */ + +/* T (output) INTEGER */ +/* The number of ( BETA ) digits in the mantissa. */ + +/* RND (output) LOGICAL */ +/* Specifies whether proper rounding ( RND = .TRUE. ) or */ +/* chopping ( RND = .FALSE. ) occurs in addition. This may not */ +/* be a reliable guide to the way in which the machine performs */ +/* its arithmetic. */ + +/* IEEE1 (output) LOGICAL */ +/* Specifies whether rounding appears to be done in the IEEE */ +/* 'round to nearest' style. */ + +/* Further Details */ +/* =============== */ + +/* The routine is based on the routine ENVRON by Malcolm and */ +/* incorporates suggestions by Gentleman and Marovich. See */ + +/* Malcolm M. A. (1972) Algorithms to reveal properties of */ +/* floating-point arithmetic. Comms. of the ACM, 15, 949-951. */ + +/* Gentleman W. M. and Marovich S. B. (1974) More on algorithms */ +/* that reveal properties of floating point arithmetic units. */ +/* Comms. of the ACM, 17, 276-277. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Save statement .. */ +/* .. */ +/* .. Data statements .. */ +/* .. */ +/* .. Executable Statements .. */ + + if (first) { + first = FALSE_; + one = 1.f; + +/* LBETA, LIEEE1, LT and LRND are the local values of BETA, */ +/* IEEE1, T and RND. */ + +/* Throughout this routine we use the function SLAMC3 to ensure */ +/* that relevant values are stored and not held in registers, or */ +/* are not affected by optimizers. */ + +/* Compute a = 2.0**m with the smallest positive integer m such */ +/* that */ + +/* fl( a + 1.0 ) = a. */ + + a = 1.f; + c__ = 1.f; + +/* + WHILE( C.EQ.ONE )LOOP */ + L10: + if (c__ == one) { + a *= 2; + c__ = slamc3_(&a, &one); + r__1 = -a; + c__ = slamc3_(&c__, &r__1); + goto L10; + } +/* + END WHILE */ + +/* Now compute b = 2.0**m with the smallest positive integer m */ +/* such that */ + +/* fl( a + b ) .gt. a. */ + + b = 1.f; + c__ = slamc3_(&a, &b); + +/* + WHILE( C.EQ.A )LOOP */ + L20: + if (c__ == a) { + b *= 2; + c__ = slamc3_(&a, &b); + goto L20; + } +/* + END WHILE */ + +/* Now compute the base. a and c are neighbouring floating point */ +/* numbers in the interval ( beta**t, beta**( t + 1 ) ) and so */ +/* their difference is beta. Adding 0.25 to c is to ensure that it */ +/* is truncated to beta and not ( beta - 1 ). */ + + qtr = one / 4; + savec = c__; + r__1 = -a; + c__ = slamc3_(&c__, &r__1); + lbeta = c__ + qtr; + +/* Now determine whether rounding or chopping occurs, by adding a */ +/* bit less than beta/2 and a bit more than beta/2 to a. */ + + b = (real) lbeta; + r__1 = b / 2; + r__2 = -b / 100; + f = slamc3_(&r__1, &r__2); + c__ = slamc3_(&f, &a); + if (c__ == a) { + lrnd = TRUE_; + } + else { + lrnd = FALSE_; + } + r__1 = b / 2; + r__2 = b / 100; + f = slamc3_(&r__1, &r__2); + c__ = slamc3_(&f, &a); + if (lrnd && c__ == a) { + lrnd = FALSE_; + } + +/* Try and decide whether rounding is done in the IEEE 'round to */ +/* nearest' style. B/2 is half a unit in the last place of the two */ +/* numbers A and SAVEC. Furthermore, A is even, i.e. has last bit */ +/* zero, and SAVEC is odd. Thus adding B/2 to A should not change */ +/* A, but adding B/2 to SAVEC should change SAVEC. */ + + r__1 = b / 2; + t1 = slamc3_(&r__1, &a); + r__1 = b / 2; + t2 = slamc3_(&r__1, &savec); + lieee1 = t1 == a && t2 > savec && lrnd; + +/* Now find the mantissa, t. It should be the integer part of */ +/* log to the base beta of a, however it is safer to determine t */ +/* by powering. So we find t as the smallest positive integer for */ +/* which */ + +/* fl( beta**t + 1.0 ) = 1.0. */ + + lt = 0; + a = 1.f; + c__ = 1.f; + +/* + WHILE( C.EQ.ONE )LOOP */ + L30: + if (c__ == one) { + ++lt; + a *= lbeta; + c__ = slamc3_(&a, &one); + r__1 = -a; + c__ = slamc3_(&c__, &r__1); + goto L30; + } +/* + END WHILE */ + + } + + *beta = lbeta; + *t = lt; + *rnd = lrnd; + *ieee1 = lieee1; + return 0; + +/* End of SLAMC1 */ + +} /* slamc1_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int +slamc2_(integer * beta, integer * t, logical * rnd, real * + eps, integer * emin, real * rmin, integer * emax, real * rmax) +{ + /* Initialized data */ + + static logical first = TRUE_; + static logical iwarn = FALSE_; + + /* Format strings */ + static char fmt_9999[] = + "(//\002 WARNING. The value EMIN may be incorre" + "ct:-\002,\002 EMIN = \002,i8,/\002 If, after inspection, the va" + "lue EMIN looks\002,\002 acceptable please comment out \002,/\002" + " the IF block as marked within the code of routine\002,\002 SLAM" + "C2,\002,/\002 otherwise supply EMIN explicitly.\002,/)"; + + /* System generated locals */ + integer i__1; + real r__1, r__2, r__3, r__4, r__5; + + /* Builtin functions */ + double pow_ri(real *, integer *); + integer s_wsfe(cilist *), do_fio(integer *, char *, ftnlen), + e_wsfe(void); + + /* Local variables */ + static real a, b, c__; + static integer i__, lt; + static real one, two; + static logical ieee; + static real half; + static logical lrnd; + static real leps, zero; + static integer lbeta; + static real rbase; + static integer lemin, lemax, gnmin; + static real small; + static integer gpmin; + static real third, lrmin, lrmax, sixth; + static logical lieee1; + extern /* Subroutine */ int slamc1_(integer *, integer *, logical *, + logical *); + extern doublereal slamc3_(real *, real *); + extern /* Subroutine */ int slamc4_(integer *, real *, integer *), + slamc5_(integer *, integer *, integer *, logical *, integer *, + real *); + static integer ngnmin, ngpmin; + + /* Fortran I/O blocks */ + static cilist io___58 = { 0, 6, 0, fmt_9999, 0 }; + + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* Courant Institute, Argonne National Lab, and Rice University */ +/* October 31, 1992 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SLAMC2 determines the machine parameters specified in its argument */ +/* list. */ + +/* Arguments */ +/* ========= */ + +/* BETA (output) INTEGER */ +/* The base of the machine. */ + +/* T (output) INTEGER */ +/* The number of ( BETA ) digits in the mantissa. */ + +/* RND (output) LOGICAL */ +/* Specifies whether proper rounding ( RND = .TRUE. ) or */ +/* chopping ( RND = .FALSE. ) occurs in addition. This may not */ +/* be a reliable guide to the way in which the machine performs */ +/* its arithmetic. */ + +/* EPS (output) REAL */ +/* The smallest positive number such that */ + +/* fl( 1.0 - EPS ) .LT. 1.0, */ + +/* where fl denotes the computed value. */ + +/* EMIN (output) INTEGER */ +/* The minimum exponent before (gradual) underflow occurs. */ + +/* RMIN (output) REAL */ +/* The smallest normalized number for the machine, given by */ +/* BASE**( EMIN - 1 ), where BASE is the floating point value */ +/* of BETA. */ + +/* EMAX (output) INTEGER */ +/* The maximum exponent before overflow occurs. */ + +/* RMAX (output) REAL */ +/* The largest positive number for the machine, given by */ +/* BASE**EMAX * ( 1 - EPS ), where BASE is the floating point */ +/* value of BETA. */ + +/* Further Details */ +/* =============== */ + +/* The computation of EPS is based on a routine PARANOIA by */ +/* W. Kahan of the University of California at Berkeley. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. External Subroutines .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Save statement .. */ +/* .. */ +/* .. Data statements .. */ +/* .. */ +/* .. Executable Statements .. */ + + if (first) { + first = FALSE_; + zero = 0.f; + one = 1.f; + two = 2.f; + +/* LBETA, LT, LRND, LEPS, LEMIN and LRMIN are the local values of */ +/* BETA, T, RND, EPS, EMIN and RMIN. */ + +/* Throughout this routine we use the function SLAMC3 to ensure */ +/* that relevant values are stored and not held in registers, or */ +/* are not affected by optimizers. */ + +/* SLAMC1 returns the parameters LBETA, LT, LRND and LIEEE1. */ + + slamc1_(&lbeta, <, &lrnd, &lieee1); + +/* Start to find EPS. */ + + b = (real) lbeta; + i__1 = -lt; + a = pow_ri(&b, &i__1); + leps = a; + +/* Try some tricks to see whether or not this is the correct EPS. */ + + b = two / 3; + half = one / 2; + r__1 = -half; + sixth = slamc3_(&b, &r__1); + third = slamc3_(&sixth, &sixth); + r__1 = -half; + b = slamc3_(&third, &r__1); + b = slamc3_(&b, &sixth); + b = dabs(b); + if (b < leps) { + b = leps; + } + + leps = 1.f; + +/* + WHILE( ( LEPS.GT.B ).AND.( B.GT.ZERO ) )LOOP */ + L10: + if (leps > b && b > zero) { + leps = b; + r__1 = half * leps; +/* Computing 5th power */ + r__3 = two, r__4 = r__3, r__3 *= r__3; +/* Computing 2nd power */ + r__5 = leps; + r__2 = r__4 * (r__3 * r__3) * (r__5 * r__5); + c__ = slamc3_(&r__1, &r__2); + r__1 = -c__; + c__ = slamc3_(&half, &r__1); + b = slamc3_(&half, &c__); + r__1 = -b; + c__ = slamc3_(&half, &r__1); + b = slamc3_(&half, &c__); + goto L10; + } +/* + END WHILE */ + + if (a < leps) { + leps = a; + } + +/* Computation of EPS complete. */ + +/* Now find EMIN. Let A = + or - 1, and + or - (1 + BASE**(-3)). */ +/* Keep dividing A by BETA until (gradual) underflow occurs. This */ +/* is detected when we cannot recover the previous A. */ + + rbase = one / lbeta; + small = one; + for (i__ = 1; i__ <= 3; ++i__) { + r__1 = small * rbase; + small = slamc3_(&r__1, &zero); +/* L20: */ + } + a = slamc3_(&one, &small); + slamc4_(&ngpmin, &one, &lbeta); + r__1 = -one; + slamc4_(&ngnmin, &r__1, &lbeta); + slamc4_(&gpmin, &a, &lbeta); + r__1 = -a; + slamc4_(&gnmin, &r__1, &lbeta); + ieee = FALSE_; + + if (ngpmin == ngnmin && gpmin == gnmin) { + if (ngpmin == gpmin) { + lemin = ngpmin; +/* ( Non twos-complement machines, no gradual underflow; */ +/* e.g., VAX ) */ + } + else if (gpmin - ngpmin == 3) { + lemin = ngpmin - 1 + lt; + ieee = TRUE_; +/* ( Non twos-complement machines, with gradual underflow; */ +/* e.g., IEEE standard followers ) */ + } + else { + lemin = min(ngpmin, gpmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } + else if (ngpmin == gpmin && ngnmin == gnmin) { + if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1) { + lemin = max(ngpmin, ngnmin); +/* ( Twos-complement machines, no gradual underflow; */ +/* e.g., CYBER 205 ) */ + } + else { + lemin = min(ngpmin, ngnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } + else if ((i__1 = ngpmin - ngnmin, abs(i__1)) == 1 + && gpmin == gnmin) { + if (gpmin - min(ngpmin, ngnmin) == 3) { + lemin = max(ngpmin, ngnmin) - 1 + lt; +/* ( Twos-complement machines with gradual underflow; */ +/* no known machine ) */ + } + else { + lemin = min(ngpmin, ngnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } + + } + else { +/* Computing MIN */ + i__1 = min(ngpmin, ngnmin), i__1 = min(i__1, gpmin); + lemin = min(i__1, gnmin); +/* ( A guess; no known machine ) */ + iwarn = TRUE_; + } +/* ** */ +/* Comment out this if block if EMIN is ok */ + if (iwarn) { + first = TRUE_; + s_wsfe(&io___58); + do_fio(&c__1, (char *) &lemin, (ftnlen) sizeof(integer)); + e_wsfe(); + } +/* ** */ + +/* Assume IEEE arithmetic if we found denormalised numbers above, */ +/* or if arithmetic seems to round in the IEEE style, determined */ +/* in routine SLAMC1. A true IEEE machine should have both things */ +/* true; however, faulty machines may have one or the other. */ + + ieee = ieee || lieee1; + +/* Compute RMIN by successive division by BETA. We could compute */ +/* RMIN as BASE**( EMIN - 1 ), but some machines underflow during */ +/* this computation. */ + + lrmin = 1.f; + i__1 = 1 - lemin; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = lrmin * rbase; + lrmin = slamc3_(&r__1, &zero); +/* L30: */ + } + +/* Finally, call SLAMC5 to compute EMAX and RMAX. */ + + slamc5_(&lbeta, <, &lemin, &ieee, &lemax, &lrmax); + } + + *beta = lbeta; + *t = lt; + *rnd = lrnd; + *eps = leps; + *emin = lemin; + *rmin = lrmin; + *emax = lemax; + *rmax = lrmax; + + return 0; + + +/* End of SLAMC2 */ + +} /* slamc2_ */ + + +/* *********************************************************************** */ + +doublereal +slamc3_(real * a, real * b) +{ + /* System generated locals */ + real ret_val; + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* Courant Institute, Argonne National Lab, and Rice University */ +/* October 31, 1992 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SLAMC3 is intended to force A and B to be stored prior to doing */ +/* the addition of A and B , for use in situations where optimizers */ +/* might hold one of these in a register. */ + +/* Arguments */ +/* ========= */ + +/* A, B (input) REAL */ +/* The values A and B. */ + +/* ===================================================================== */ + +/* .. Executable Statements .. */ + + ret_val = *a + *b; + + return ret_val; + +/* End of SLAMC3 */ + +} /* slamc3_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int +slamc4_(integer * emin, real * start, integer * base) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Local variables */ + static real a; + static integer i__; + static real b1, b2, c1, c2, d1, d2, one, zero, rbase; + extern doublereal slamc3_(real *, real *); + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* Courant Institute, Argonne National Lab, and Rice University */ +/* October 31, 1992 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SLAMC4 is a service routine for SLAMC2. */ + +/* Arguments */ +/* ========= */ + +/* EMIN (output) EMIN */ +/* The minimum exponent before (gradual) underflow, computed by */ +/* setting A = START and dividing by BASE until the previous A */ +/* can not be recovered. */ + +/* START (input) REAL */ +/* The starting point for determining EMIN. */ + +/* BASE (input) INTEGER */ +/* The base of the machine. */ + +/* ===================================================================== */ + +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + + a = *start; + one = 1.f; + rbase = one / *base; + zero = 0.f; + *emin = 1; + r__1 = a * rbase; + b1 = slamc3_(&r__1, &zero); + c1 = a; + c2 = a; + d1 = a; + d2 = a; +/* + WHILE( ( C1.EQ.A ).AND.( C2.EQ.A ).AND. */ +/* $ ( D1.EQ.A ).AND.( D2.EQ.A ) )LOOP */ + L10: + if (c1 == a && c2 == a && d1 == a && d2 == a) { + --(*emin); + a = b1; + r__1 = a / *base; + b1 = slamc3_(&r__1, &zero); + r__1 = b1 * *base; + c1 = slamc3_(&r__1, &zero); + d1 = zero; + i__1 = *base; + for (i__ = 1; i__ <= i__1; ++i__) { + d1 += b1; +/* L20: */ + } + r__1 = a * rbase; + b2 = slamc3_(&r__1, &zero); + r__1 = b2 / rbase; + c2 = slamc3_(&r__1, &zero); + d2 = zero; + i__1 = *base; + for (i__ = 1; i__ <= i__1; ++i__) { + d2 += b2; +/* L30: */ + } + goto L10; + } +/* + END WHILE */ + + return 0; + +/* End of SLAMC4 */ + +} /* slamc4_ */ + + +/* *********************************************************************** */ + +/* Subroutine */ int +slamc5_(integer * beta, integer * p, integer * emin, + logical * ieee, integer * emax, real * rmax) +{ + /* System generated locals */ + integer i__1; + real r__1; + + /* Local variables */ + static integer i__; + static real y, z__; + static integer try__, lexp; + static real oldy; + static integer uexp, nbits; + extern doublereal slamc3_(real *, real *); + static real recbas; + static integer exbits, expsum; + + +/* -- LAPACK auxiliary routine (version 3.0) -- */ +/* Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., */ +/* Courant Institute, Argonne National Lab, and Rice University */ +/* October 31, 1992 */ + +/* .. Scalar Arguments .. */ +/* .. */ + +/* Purpose */ +/* ======= */ + +/* SLAMC5 attempts to compute RMAX, the largest machine floating-point */ +/* number, without overflow. It assumes that EMAX + abs(EMIN) sum */ +/* approximately to a power of 2. It will fail on machines where this */ +/* assumption does not hold, for example, the Cyber 205 (EMIN = -28625, */ +/* EMAX = 28718). It will also fail if the value supplied for EMIN is */ +/* too large (i.e. too close to zero), probably with overflow. */ + +/* Arguments */ +/* ========= */ + +/* BETA (input) INTEGER */ +/* The base of floating-point arithmetic. */ + +/* P (input) INTEGER */ +/* The number of base BETA digits in the mantissa of a */ +/* floating-point value. */ + +/* EMIN (input) INTEGER */ +/* The minimum exponent before (gradual) underflow. */ + +/* IEEE (input) LOGICAL */ +/* A logical flag specifying whether or not the arithmetic */ +/* system is thought to comply with the IEEE standard. */ + +/* EMAX (output) INTEGER */ +/* The largest exponent before overflow */ + +/* RMAX (output) REAL */ +/* The largest machine floating-point number. */ + +/* ===================================================================== */ + +/* .. Parameters .. */ +/* .. */ +/* .. Local Scalars .. */ +/* .. */ +/* .. External Functions .. */ +/* .. */ +/* .. Intrinsic Functions .. */ +/* .. */ +/* .. Executable Statements .. */ + +/* First compute LEXP and UEXP, two powers of 2 that bound */ +/* abs(EMIN). We then assume that EMAX + abs(EMIN) will sum */ +/* approximately to the bound that is closest to abs(EMIN). */ +/* (EMAX is the exponent of the required number RMAX). */ + + lexp = 1; + exbits = 1; + L10: + try__ = lexp << 1; + if (try__ <= -(*emin)) { + lexp = try__; + ++exbits; + goto L10; + } + if (lexp == -(*emin)) { + uexp = lexp; + } + else { + uexp = try__; + ++exbits; + } + +/* Now -LEXP is less than or equal to EMIN, and -UEXP is greater */ +/* than or equal to EMIN. EXBITS is the number of bits needed to */ +/* store the exponent. */ + + if (uexp + *emin > -lexp - *emin) { + expsum = lexp << 1; + } + else { + expsum = uexp << 1; + } + +/* EXPSUM is the exponent range, approximately equal to */ +/* EMAX - EMIN + 1 . */ + + *emax = expsum + *emin - 1; + nbits = exbits + 1 + *p; + +/* NBITS is the total number of bits needed to store a */ +/* floating-point number. */ + + if (nbits % 2 == 1 && *beta == 2) { + +/* Either there are an odd number of bits used to store a */ +/* floating-point number, which is unlikely, or some bits are */ +/* not used in the representation of numbers, which is possible, */ +/* (e.g. Cray machines) or the mantissa has an implicit bit, */ +/* (e.g. IEEE machines, Dec Vax machines), which is perhaps the */ +/* most likely. We have to assume the last alternative. */ +/* If this is true, then we need to reduce EMAX by one because */ +/* there must be some way of representing zero in an implicit-bit */ +/* system. On machines like Cray, we are reducing EMAX by one */ +/* unnecessarily. */ + + --(*emax); + } + + if (*ieee) { + +/* Assume we are on an IEEE machine which reserves one exponent */ +/* for infinity and NaN. */ + + --(*emax); + } + +/* Now create RMAX, the largest machine number, which should */ +/* be equal to (1.0 - BETA**(-P)) * BETA**EMAX . */ + +/* First compute 1.0 - BETA**(-P), being careful that the */ +/* result is less than 1.0 . */ + + recbas = 1.f / *beta; + z__ = *beta - 1.f; + y = 0.f; + i__1 = *p; + for (i__ = 1; i__ <= i__1; ++i__) { + z__ *= recbas; + if (y < 1.f) { + oldy = y; + } + y = slamc3_(&y, &z__); +/* L20: */ + } + if (y >= 1.f) { + y = oldy; + } + +/* Now multiply by BETA**EMAX to get RMAX. */ + + i__1 = *emax; + for (i__ = 1; i__ <= i__1; ++i__) { + r__1 = y * *beta; + y = slamc3_(&r__1, &c_b32); +/* L30: */ + } + + *rmax = y; + return 0; + +/* End of SLAMC5 */ + +} /* slamc5_ */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/slapack_lite.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/slapack_lite.c new file mode 100644 index 0000000000000000000000000000000000000000..a81f27fe360a59760b9b6a00d70a4c14880e6de3 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/slapack_lite.c @@ -0,0 +1,1463 @@ +/* +NOTE: This is generated code. Look in README.python for information on + remaking this file. +*/ +#include "sphinxbase/f2c.h" + +#ifdef HAVE_CONFIG +#include "config.h" +#else +extern doublereal slamch_(char *); +#define EPSILON slamch_("Epsilon") +#define SAFEMINIMUM slamch_("Safe minimum") +#define PRECISION slamch_("Precision") +#define BASE slamch_("Base") +#endif + + +extern doublereal slapy2_(real *, real *); + + + +/* Table of constant values */ + +static integer c__0 = 0; +static real c_b163 = 0.f; +static real c_b164 = 1.f; +static integer c__1 = 1; +static real c_b181 = -1.f; +static integer c_n1 = -1; + +integer ieeeck_(integer *ispec, real *zero, real *one) +{ + /* System generated locals */ + integer ret_val; + + /* Local variables */ + static real nan1, nan2, nan3, nan4, nan5, nan6, neginf, posinf, negzro, + newzro; + + +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1998 + + + Purpose + ======= + + IEEECK is called from the ILAENV to verify that Infinity and + possibly NaN arithmetic is safe (i.e. will not trap). + + Arguments + ========= + + ISPEC (input) INTEGER + Specifies whether to test just for inifinity arithmetic + or whether to test for infinity and NaN arithmetic. + = 0: Verify infinity arithmetic only. + = 1: Verify infinity and NaN arithmetic. + + ZERO (input) REAL + Must contain the value 0.0 + This is passed to prevent the compiler from optimizing + away this code. + + ONE (input) REAL + Must contain the value 1.0 + This is passed to prevent the compiler from optimizing + away this code. + + RETURN VALUE: INTEGER + = 0: Arithmetic failed to produce the correct answers + = 1: Arithmetic produced the correct answers +*/ + + ret_val = 1; + + posinf = *one / *zero; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf = -(*one) / *zero; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + negzro = *one / (neginf + *one); + if (negzro != *zero) { + ret_val = 0; + return ret_val; + } + + neginf = *one / negzro; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + newzro = negzro + *zero; + if (newzro != *zero) { + ret_val = 0; + return ret_val; + } + + posinf = *one / newzro; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + neginf *= posinf; + if (neginf >= *zero) { + ret_val = 0; + return ret_val; + } + + posinf *= posinf; + if (posinf <= *one) { + ret_val = 0; + return ret_val; + } + + +/* Return if we were only asked to check infinity arithmetic */ + + if (*ispec == 0) { + return ret_val; + } + + nan1 = posinf + neginf; + + nan2 = posinf / neginf; + + nan3 = posinf / posinf; + + nan4 = posinf * *zero; + + nan5 = neginf * negzro; + + nan6 = nan5 * 0.f; + + if (nan1 == nan1) { + ret_val = 0; + return ret_val; + } + + if (nan2 == nan2) { + ret_val = 0; + return ret_val; + } + + if (nan3 == nan3) { + ret_val = 0; + return ret_val; + } + + if (nan4 == nan4) { + ret_val = 0; + return ret_val; + } + + if (nan5 == nan5) { + ret_val = 0; + return ret_val; + } + + if (nan6 == nan6) { + ret_val = 0; + return ret_val; + } + + return ret_val; +} /* ieeeck_ */ + +integer ilaenv_(integer *ispec, char *name__, char *opts, integer *n1, + integer *n2, integer *n3, integer *n4, ftnlen name_len, ftnlen + opts_len) +{ + /* System generated locals */ + integer ret_val; + + /* Builtin functions */ + /* Subroutine */ int s_copy(char *, char *, ftnlen, ftnlen); + integer s_cmp(char *, char *, ftnlen, ftnlen); + + /* Local variables */ + static integer i__; + static char c1[1], c2[2], c3[3], c4[2]; + static integer ic, nb, iz, nx; + static logical cname, sname; + static integer nbmin; + extern integer ieeeck_(integer *, real *, real *); + static char subnam[6]; + + (void)opts; + (void)n3; + (void)opts_len; +/* + -- LAPACK auxiliary routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + June 30, 1999 + + + Purpose + ======= + + ILAENV is called from the LAPACK routines to choose problem-dependent + parameters for the local environment. See ISPEC for a description of + the parameters. + + This version provides a set of parameters which should give good, + but not optimal, performance on many of the currently available + computers. Users are encouraged to modify this subroutine to set + the tuning parameters for their particular machine using the option + and problem size information in the arguments. + + This routine will not function correctly if it is converted to all + lower case. Converting it to all upper case is allowed. + + Arguments + ========= + + ISPEC (input) INTEGER + Specifies the parameter to be returned as the value of + ILAENV. + = 1: the optimal blocksize; if this value is 1, an unblocked + algorithm will give the best performance. + = 2: the minimum block size for which the block routine + should be used; if the usable block size is less than + this value, an unblocked routine should be used. + = 3: the crossover point (in a block routine, for N less + than this value, an unblocked routine should be used) + = 4: the number of shifts, used in the nonsymmetric + eigenvalue routines + = 5: the minimum column dimension for blocking to be used; + rectangular blocks must have dimension at least k by m, + where k is given by ILAENV(2,...) and m by ILAENV(5,...) + = 6: the crossover point for the SVD (when reducing an m by n + matrix to bidiagonal form, if max(m,n)/min(m,n) exceeds + this value, a QR factorization is used first to reduce + the matrix to a triangular form.) + = 7: the number of processors + = 8: the crossover point for the multishift QR and QZ methods + for nonsymmetric eigenvalue problems. + = 9: maximum size of the subproblems at the bottom of the + computation tree in the divide-and-conquer algorithm + (used by xGELSD and xGESDD) + =10: ieee NaN arithmetic can be trusted not to trap + =11: infinity arithmetic can be trusted not to trap + + NAME (input) CHARACTER*(*) + The name of the calling subroutine, in either upper case or + lower case. + + OPTS (input) CHARACTER*(*) + The character options to the subroutine NAME, concatenated + into a single character string. For example, UPLO = 'U', + TRANS = 'T', and DIAG = 'N' for a triangular routine would + be specified as OPTS = 'UTN'. + + N1 (input) INTEGER + N2 (input) INTEGER + N3 (input) INTEGER + N4 (input) INTEGER + Problem dimensions for the subroutine NAME; these may not all + be required. + + (ILAENV) (output) INTEGER + >= 0: the value of the parameter specified by ISPEC + < 0: if ILAENV = -k, the k-th argument had an illegal value. + + Further Details + =============== + + The following conventions have been used when calling ILAENV from the + LAPACK routines: + 1) OPTS is a concatenation of all of the character options to + subroutine NAME, in the same order that they appear in the + argument list for NAME, even if they are not used in determining + the value of the parameter specified by ISPEC. + 2) The problem dimensions N1, N2, N3, N4 are specified in the order + that they appear in the argument list for NAME. N1 is used + first, N2 second, and so on, and unused problem dimensions are + passed a value of -1. + 3) The parameter value returned by ILAENV is checked for validity in + the calling subroutine. For example, ILAENV is used to retrieve + the optimal blocksize for STRTRI as follows: + + NB = ILAENV( 1, 'STRTRI', UPLO // DIAG, N, -1, -1, -1 ) + IF( NB.LE.1 ) NB = MAX( 1, N ) + + ===================================================================== +*/ + + + switch (*ispec) { + case 1: goto L100; + case 2: goto L100; + case 3: goto L100; + case 4: goto L400; + case 5: goto L500; + case 6: goto L600; + case 7: goto L700; + case 8: goto L800; + case 9: goto L900; + case 10: goto L1000; + case 11: goto L1100; + } + +/* Invalid value for ISPEC */ + + ret_val = -1; + return ret_val; + +L100: + +/* Convert NAME to upper case if the first character is lower case. */ + + ret_val = 1; + s_copy(subnam, name__, (ftnlen)6, name_len); + ic = *(unsigned char *)subnam; + iz = 'Z'; + if (iz == 90 || iz == 122) { + +/* ASCII character set */ + + if (ic >= 97 && ic <= 122) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 97 && ic <= 122) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L10: */ + } + } + + } else if (iz == 233 || iz == 169) { + +/* EBCDIC character set */ + + if ((ic >= 129 && ic <= 137) || (ic >= 145 && ic <= 153) || (ic >= 162 && + ic <= 169)) { + *(unsigned char *)subnam = (char) (ic + 64); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if ((ic >= 129 && ic <= 137) || (ic >= 145 && ic <= 153) || (ic >= + 162 && ic <= 169)) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic + 64); + } +/* L20: */ + } + } + + } else if (iz == 218 || iz == 250) { + +/* Prime machines: ASCII+128 */ + + if (ic >= 225 && ic <= 250) { + *(unsigned char *)subnam = (char) (ic - 32); + for (i__ = 2; i__ <= 6; ++i__) { + ic = *(unsigned char *)&subnam[i__ - 1]; + if (ic >= 225 && ic <= 250) { + *(unsigned char *)&subnam[i__ - 1] = (char) (ic - 32); + } +/* L30: */ + } + } + } + + *(unsigned char *)c1 = *(unsigned char *)subnam; + sname = *(unsigned char *)c1 == 'S' || *(unsigned char *)c1 == 'D'; + cname = *(unsigned char *)c1 == 'C' || *(unsigned char *)c1 == 'Z'; + if (! (cname || sname)) { + return ret_val; + } + s_copy(c2, subnam + 1, (ftnlen)2, (ftnlen)2); + s_copy(c3, subnam + 3, (ftnlen)3, (ftnlen)3); + s_copy(c4, c3 + 1, (ftnlen)2, (ftnlen)2); + + switch (*ispec) { + case 1: goto L110; + case 2: goto L200; + case 3: goto L300; + } + +L110: + +/* + ISPEC = 1: block size + + In these examples, separate code is provided for setting NB for + real and complex. We assume that NB will take the same value in + single or double precision. +*/ + + nb = 1; + + if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, + "RQF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen) + 3, (ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) + == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 32; + } else { + nb = 32; + } + } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "PO", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nb = 32; + } else if (sname && s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } else if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nb = 32; + } else if (s_cmp(c3, "GST", (ftnlen)3, (ftnlen)3) == 0) { + nb = 64; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nb = 32; + } + } + } else if (s_cmp(c2, "GB", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n4 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, "PB", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } else { + if (*n2 <= 64) { + nb = 1; + } else { + nb = 32; + } + } + } + } else if (s_cmp(c2, "TR", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (s_cmp(c2, "LA", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "UUM", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nb = 64; + } else { + nb = 64; + } + } + } else if (sname && s_cmp(c2, "ST", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "EBZ", (ftnlen)3, (ftnlen)3) == 0) { + nb = 1; + } + } + ret_val = nb; + return ret_val; + +L200: + +/* ISPEC = 2: minimum block size */ + + nbmin = 2; + if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( + ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, ( + ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0) + { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } else if (s_cmp(c3, "TRI", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 2; + } else { + nbmin = 2; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRF", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nbmin = 8; + } else { + nbmin = 8; + } + } else if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nbmin = 2; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } else if (*(unsigned char *)c3 == 'M') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nbmin = 2; + } + } + } + ret_val = nbmin; + return ret_val; + +L300: + +/* ISPEC = 3: crossover point */ + + nx = 0; + if (s_cmp(c2, "GE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "QRF", (ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "RQF", ( + ftnlen)3, (ftnlen)3) == 0 || s_cmp(c3, "LQF", (ftnlen)3, ( + ftnlen)3) == 0 || s_cmp(c3, "QLF", (ftnlen)3, (ftnlen)3) == 0) + { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, "HRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } else if (s_cmp(c3, "BRD", (ftnlen)3, (ftnlen)3) == 0) { + if (sname) { + nx = 128; + } else { + nx = 128; + } + } + } else if (s_cmp(c2, "SY", (ftnlen)2, (ftnlen)2) == 0) { + if (sname && s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nx = 32; + } + } else if (cname && s_cmp(c2, "HE", (ftnlen)2, (ftnlen)2) == 0) { + if (s_cmp(c3, "TRD", (ftnlen)3, (ftnlen)3) == 0) { + nx = 32; + } + } else if (sname && s_cmp(c2, "OR", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nx = 128; + } + } + } else if (cname && s_cmp(c2, "UN", (ftnlen)2, (ftnlen)2) == 0) { + if (*(unsigned char *)c3 == 'G') { + if (s_cmp(c4, "QR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "RQ", + (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "LQ", (ftnlen)2, ( + ftnlen)2) == 0 || s_cmp(c4, "QL", (ftnlen)2, (ftnlen)2) == + 0 || s_cmp(c4, "HR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp( + c4, "TR", (ftnlen)2, (ftnlen)2) == 0 || s_cmp(c4, "BR", ( + ftnlen)2, (ftnlen)2) == 0) { + nx = 128; + } + } + } + ret_val = nx; + return ret_val; + +L400: + +/* ISPEC = 4: number of shifts (used by xHSEQR) */ + + ret_val = 6; + return ret_val; + +L500: + +/* ISPEC = 5: minimum column dimension (not used) */ + + ret_val = 2; + return ret_val; + +L600: + +/* ISPEC = 6: crossover point for SVD (used by xGELSS and xGESVD) */ + + ret_val = (integer) ((real) min(*n1,*n2) * 1.6f); + return ret_val; + +L700: + +/* ISPEC = 7: number of processors (not used) */ + + ret_val = 1; + return ret_val; + +L800: + +/* ISPEC = 8: crossover point for multishift (used by xHSEQR) */ + + ret_val = 50; + return ret_val; + +L900: + +/* + ISPEC = 9: maximum size of the subproblems at the bottom of the + computation tree in the divide-and-conquer algorithm + (used by xGELSD and xGESDD) +*/ + + ret_val = 25; + return ret_val; + +L1000: + +/* + ISPEC = 10: ieee NaN arithmetic can be trusted not to trap + + ILAENV = 0 +*/ + ret_val = 1; + if (ret_val == 1) { + ret_val = ieeeck_(&c__0, &c_b163, &c_b164); + } + return ret_val; + +L1100: + +/* + ISPEC = 11: infinity arithmetic can be trusted not to trap + + ILAENV = 0 +*/ + ret_val = 1; + if (ret_val == 1) { + ret_val = ieeeck_(&c__1, &c_b163, &c_b164); + } + return ret_val; + +/* End of ILAENV */ + +} /* ilaenv_ */ + +/* Subroutine */ int sposv_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + extern /* Subroutine */ int xerbla_(char *, integer *), spotrf_( + char *, integer *, real *, integer *, integer *), spotrs_( + char *, integer *, integer *, real *, integer *, real *, integer * + , integer *); + + +/* + -- LAPACK driver routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + SPOSV computes the solution to a real system of linear equations + A * X = B, + where A is an N-by-N symmetric positive definite matrix and X and B + are N-by-NRHS matrices. + + The Cholesky decomposition is used to factor A as + A = U**T* U, if UPLO = 'U', or + A = L * L**T, if UPLO = 'L', + where U is an upper triangular matrix and L is a lower triangular + matrix. The factored form of A is then used to solve the system of + equations A * X = B. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The number of linear equations, i.e., the order of the + matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U**T*U or A = L*L**T. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + B (input/output) REAL array, dimension (LDB,NRHS) + On entry, the N-by-NRHS right hand side matrix B. + On exit, if INFO = 0, the N-by-NRHS solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the leading minor of order i of A is not + positive definite, so the factorization could not be + completed, and the solution has not been computed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + if (! lsame_(uplo, "U") && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOSV ", &i__1); + return 0; + } + +/* Compute the Cholesky factorization A = U'*U or A = L*L'. */ + + spotrf_(uplo, n, &a[a_offset], lda, info); + if (*info == 0) { + +/* Solve the system A*X = B, overwriting B with X. */ + + spotrs_(uplo, n, nrhs, &a[a_offset], lda, &b[b_offset], ldb, info); + + } + return 0; + +/* End of SPOSV */ + +} /* sposv_ */ + +/* Subroutine */ int spotf2_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3; + real r__1; + + /* Builtin functions */ + double sqrt(doublereal); + + /* Local variables */ + static integer j; + static real ajj; + extern doublereal sdot_(integer *, real *, integer *, real *, integer *); + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sscal_(integer *, real *, real *, integer *), + sgemv_(char *, integer *, integer *, real *, real *, integer *, + real *, integer *, real *, real *, integer *); + static logical upper; + extern /* Subroutine */ int xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + February 29, 1992 + + + Purpose + ======= + + SPOTF2 computes the Cholesky factorization of a real symmetric + positive definite matrix A. + + The factorization has the form + A = U' * U , if UPLO = 'U', or + A = L * L', if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the unblocked version of the algorithm, calling Level 2 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + Specifies whether the upper or lower triangular part of the + symmetric matrix A is stored. + = 'U': Upper triangular + = 'L': Lower triangular + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + n by n upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading n by n lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U'*U or A = L*L'. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -k, the k-th argument had an illegal value + > 0: if INFO = k, the leading minor of order k is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTF2", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute U(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j * a_dim1 + 1], &c__1, + &a[j * a_dim1 + 1], &c__1); + if (ajj <= 0.f) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of row J. */ + + if (j < *n) { + i__2 = j - 1; + i__3 = *n - j; + sgemv_("Transpose", &i__2, &i__3, &c_b181, &a[(j + 1) * + a_dim1 + 1], lda, &a[j * a_dim1 + 1], &c__1, &c_b164, + &a[j + (j + 1) * a_dim1], lda); + i__2 = *n - j; + r__1 = 1.f / ajj; + sscal_(&i__2, &r__1, &a[j + (j + 1) * a_dim1], lda); + } +/* L10: */ + } + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__1 = *n; + for (j = 1; j <= i__1; ++j) { + +/* Compute L(J,J) and test for non-positive-definiteness. */ + + i__2 = j - 1; + ajj = a[j + j * a_dim1] - sdot_(&i__2, &a[j + a_dim1], lda, &a[j + + a_dim1], lda); + if (ajj <= 0.f) { + a[j + j * a_dim1] = ajj; + goto L30; + } + ajj = sqrt(ajj); + a[j + j * a_dim1] = ajj; + +/* Compute elements J+1:N of column J. */ + + if (j < *n) { + i__2 = *n - j; + i__3 = j - 1; + sgemv_("No transpose", &i__2, &i__3, &c_b181, &a[j + 1 + + a_dim1], lda, &a[j + a_dim1], lda, &c_b164, &a[j + 1 + + j * a_dim1], &c__1); + i__2 = *n - j; + r__1 = 1.f / ajj; + sscal_(&i__2, &r__1, &a[j + 1 + j * a_dim1], &c__1); + } +/* L20: */ + } + } + goto L40; + +L30: + *info = j; + +L40: + return 0; + +/* End of SPOTF2 */ + +} /* spotf2_ */ + +/* Subroutine */ int spotrf_(char *uplo, integer *n, real *a, integer *lda, + integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, i__1, i__2, i__3, i__4; + + /* Local variables */ + static integer j, jb, nb; + extern logical lsame_(char *, char *); + extern /* Subroutine */ int sgemm_(char *, char *, integer *, integer *, + integer *, real *, real *, integer *, real *, integer *, real *, + real *, integer *); + static logical upper; + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), ssyrk_(char *, char *, integer + *, integer *, real *, real *, integer *, real *, real *, integer * + ), spotf2_(char *, integer *, real *, integer *, + integer *), xerbla_(char *, integer *); + extern integer ilaenv_(integer *, char *, char *, integer *, integer *, + integer *, integer *, ftnlen, ftnlen); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + SPOTRF computes the Cholesky factorization of a real symmetric + positive definite matrix A. + + The factorization has the form + A = U**T * U, if UPLO = 'U', or + A = L * L**T, if UPLO = 'L', + where U is an upper triangular matrix and L is lower triangular. + + This is the block version of the algorithm, calling Level 3 BLAS. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + A (input/output) REAL array, dimension (LDA,N) + On entry, the symmetric matrix A. If UPLO = 'U', the leading + N-by-N upper triangular part of A contains the upper + triangular part of the matrix A, and the strictly lower + triangular part of A is not referenced. If UPLO = 'L', the + leading N-by-N lower triangular part of A contains the lower + triangular part of the matrix A, and the strictly upper + triangular part of A is not referenced. + + On exit, if INFO = 0, the factor U or L from the Cholesky + factorization A = U**T*U or A = L*L**T. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + > 0: if INFO = i, the leading minor of order i is not + positive definite, and the factorization could not be + completed. + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*lda < max(1,*n)) { + *info = -4; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTRF", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0) { + return 0; + } + +/* Determine the block size for this environment. */ + + nb = ilaenv_(&c__1, "SPOTRF", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6, ( + ftnlen)1); + if (nb <= 1 || nb >= *n) { + +/* Use unblocked code. */ + + spotf2_(uplo, n, &a[a_offset], lda, info); + } else { + +/* Use blocked code. */ + + if (upper) { + +/* Compute the Cholesky factorization A = U'*U. */ + + i__1 = *n; + i__2 = nb; + for (j = 1; i__2 < 0 ? j >= i__1 : j <= i__1; j += i__2) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + ssyrk_("Upper", "Transpose", &jb, &i__3, &c_b181, &a[j * + a_dim1 + 1], lda, &c_b164, &a[j + j * a_dim1], lda); + spotf2_("Upper", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block row. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + sgemm_("Transpose", "No transpose", &jb, &i__3, &i__4, & + c_b181, &a[j * a_dim1 + 1], lda, &a[(j + jb) * + a_dim1 + 1], lda, &c_b164, &a[j + (j + jb) * + a_dim1], lda); + i__3 = *n - j - jb + 1; + strsm_("Left", "Upper", "Transpose", "Non-unit", &jb, & + i__3, &c_b164, &a[j + j * a_dim1], lda, &a[j + (j + + jb) * a_dim1], lda); + } +/* L10: */ + } + + } else { + +/* Compute the Cholesky factorization A = L*L'. */ + + i__2 = *n; + i__1 = nb; + for (j = 1; i__1 < 0 ? j >= i__2 : j <= i__2; j += i__1) { + +/* + Update and factorize the current diagonal block and test + for non-positive-definiteness. + + Computing MIN +*/ + i__3 = nb, i__4 = *n - j + 1; + jb = min(i__3,i__4); + i__3 = j - 1; + ssyrk_("Lower", "No transpose", &jb, &i__3, &c_b181, &a[j + + a_dim1], lda, &c_b164, &a[j + j * a_dim1], lda); + spotf2_("Lower", &jb, &a[j + j * a_dim1], lda, info); + if (*info != 0) { + goto L30; + } + if (j + jb <= *n) { + +/* Compute the current block column. */ + + i__3 = *n - j - jb + 1; + i__4 = j - 1; + sgemm_("No transpose", "Transpose", &i__3, &jb, &i__4, & + c_b181, &a[j + jb + a_dim1], lda, &a[j + a_dim1], + lda, &c_b164, &a[j + jb + j * a_dim1], lda); + i__3 = *n - j - jb + 1; + strsm_("Right", "Lower", "Transpose", "Non-unit", &i__3, & + jb, &c_b164, &a[j + j * a_dim1], lda, &a[j + jb + + j * a_dim1], lda); + } +/* L20: */ + } + } + } + goto L40; + +L30: + *info = *info + j - 1; + +L40: + return 0; + +/* End of SPOTRF */ + +} /* spotrf_ */ + +/* Subroutine */ int spotrs_(char *uplo, integer *n, integer *nrhs, real *a, + integer *lda, real *b, integer *ldb, integer *info) +{ + /* System generated locals */ + integer a_dim1, a_offset, b_dim1, b_offset, i__1; + + /* Local variables */ + extern logical lsame_(char *, char *); + static logical upper; + extern /* Subroutine */ int strsm_(char *, char *, char *, char *, + integer *, integer *, real *, real *, integer *, real *, integer * + ), xerbla_(char *, integer *); + + +/* + -- LAPACK routine (version 3.0) -- + Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd., + Courant Institute, Argonne National Lab, and Rice University + March 31, 1993 + + + Purpose + ======= + + SPOTRS solves a system of linear equations A*X = B with a symmetric + positive definite matrix A using the Cholesky factorization + A = U**T*U or A = L*L**T computed by SPOTRF. + + Arguments + ========= + + UPLO (input) CHARACTER*1 + = 'U': Upper triangle of A is stored; + = 'L': Lower triangle of A is stored. + + N (input) INTEGER + The order of the matrix A. N >= 0. + + NRHS (input) INTEGER + The number of right hand sides, i.e., the number of columns + of the matrix B. NRHS >= 0. + + A (input) REAL array, dimension (LDA,N) + The triangular factor U or L from the Cholesky factorization + A = U**T*U or A = L*L**T, as computed by SPOTRF. + + LDA (input) INTEGER + The leading dimension of the array A. LDA >= max(1,N). + + B (input/output) REAL array, dimension (LDB,NRHS) + On entry, the right hand side matrix B. + On exit, the solution matrix X. + + LDB (input) INTEGER + The leading dimension of the array B. LDB >= max(1,N). + + INFO (output) INTEGER + = 0: successful exit + < 0: if INFO = -i, the i-th argument had an illegal value + + ===================================================================== + + + Test the input parameters. +*/ + + /* Parameter adjustments */ + a_dim1 = *lda; + a_offset = 1 + a_dim1; + a -= a_offset; + b_dim1 = *ldb; + b_offset = 1 + b_dim1; + b -= b_offset; + + /* Function Body */ + *info = 0; + upper = lsame_(uplo, "U"); + if (! upper && ! lsame_(uplo, "L")) { + *info = -1; + } else if (*n < 0) { + *info = -2; + } else if (*nrhs < 0) { + *info = -3; + } else if (*lda < max(1,*n)) { + *info = -5; + } else if (*ldb < max(1,*n)) { + *info = -7; + } + if (*info != 0) { + i__1 = -(*info); + xerbla_("SPOTRS", &i__1); + return 0; + } + +/* Quick return if possible */ + + if (*n == 0 || *nrhs == 0) { + return 0; + } + + if (upper) { + +/* + Solve A*X = B where A = U'*U. + + Solve U'*X = B, overwriting B with X. +*/ + + strsm_("Left", "Upper", "Transpose", "Non-unit", n, nrhs, &c_b164, &a[ + a_offset], lda, &b[b_offset], ldb); + +/* Solve U*X = B, overwriting B with X. */ + + strsm_("Left", "Upper", "No transpose", "Non-unit", n, nrhs, &c_b164, + &a[a_offset], lda, &b[b_offset], ldb); + } else { + +/* + Solve A*X = B where A = L*L'. + + Solve L*X = B, overwriting B with X. +*/ + + strsm_("Left", "Lower", "No transpose", "Non-unit", n, nrhs, &c_b164, + &a[a_offset], lda, &b[b_offset], ldb); + +/* Solve L'*X = B, overwriting B with X. */ + + strsm_("Left", "Lower", "Transpose", "Non-unit", n, nrhs, &c_b164, &a[ + a_offset], lda, &b[b_offset], ldb); + } + + return 0; + +/* End of SPOTRS */ + +} /* spotrs_ */ + diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/strfuncs.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/strfuncs.c new file mode 100644 index 0000000000000000000000000000000000000000..4d2d72ffc99fb286b7d03e2edfdca1f4bfe620c8 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/strfuncs.c @@ -0,0 +1,194 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2006 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ +/* + * strfuncs.c -- String functions + */ + + +#include +#include +#include +#include +#include + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/strfuncs.h" + +/* Defined in dtoa.c */ +double sb_strtod(const char *s00, char **se); + +double +atof_c(char const *str) +{ + return sb_strtod(str, NULL); +} + +/* Locale-independent isspace to avoid different incompatibilities */ +static int +isspace_c(char ch) +{ + if (ch == ' ' || ch == '\t' || ch == '\n' || ch == '\r') + return 1; + return 0; +} + +char * +string_join(const char *base, ...) +{ + va_list args; + size_t len; + const char *c; + char *out; + + va_start(args, base); + len = strlen(base); + while ((c = va_arg(args, const char *)) != NULL) { + len += strlen(c); + } + len++; + va_end(args); + + out = ckd_calloc(len, 1); + va_start(args, base); + strcpy(out, base); + while ((c = va_arg(args, const char *)) != NULL) { + strcat(out, c); + } + va_end(args); + + return out; +} + +char * +string_trim(char *string, enum string_edge_e which) +{ + size_t len; + + len = strlen(string); + if (which == STRING_START || which == STRING_BOTH) { + size_t sub = strspn(string, " \t\n\r\f"); + if (sub > 0) { + memmove(string, string + sub, len + 1 - sub); + len -= sub; + } + } + if (which == STRING_END || which == STRING_BOTH) { + long sub = len; + while (--sub >= 0) + if (strchr(" \t\n\r\f", string[sub]) == NULL) + break; + if (sub == -1) + string[0] = '\0'; + else + string[sub+1] = '\0'; + } + return string; +} + +int32 +str2words(char *line, char **ptr, int32 max_ptr) +{ + int32 i, n; + + n = 0; /* #words found so far */ + i = 0; /* For scanning through the input string */ + while (1) { + /* Skip whitespace before next word */ + while (line[i] && isspace_c(line[i])) + ++i; + if (!line[i]) + break; + + if (ptr != NULL && n >= max_ptr) { + /* + * Pointer array size insufficient. Restore NULL chars inserted so far + * to space chars. Not a perfect restoration, but better than nothing. + */ + for (; i >= 0; --i) + if (line[i] == '\0') + line[i] = ' '; + + return -1; + } + + /* Scan to end of word */ + if (ptr != NULL) + ptr[n] = line + i; + ++n; + while (line[i] && !isspace_c(line[i])) + ++i; + if (!line[i]) + break; + if (ptr != NULL) + line[i] = '\0'; + ++i; + } + + return n; +} + + +int32 +nextword(char *line, const char *delim, char **word, char *delimfound) +{ + const char *d; + char *w; + + /* Skip past any preceding delimiters */ + for (w = line; *w; w++) { + for (d = delim; *d && (*d != *w); d++); + if (!*d) + break; + } + if (!*w) + return -1; + + *word = w; /* Beginning of word */ + + /* Skip until first delimiter char */ + for (w++; *w; w++) { + for (d = delim; *d && (*d != *w); d++); + if (*d) + break; + } + + /* Replace delimiter with NULL char, but return the original first */ + *delimfound = *w; + *w = '\0'; + + return (w - *word); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/wrapped_routines b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/wrapped_routines new file mode 100644 index 0000000000000000000000000000000000000000..d78c8db2c7834d9e4d92a8eabb0d782396c7f1cf --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/util/wrapped_routines @@ -0,0 +1,4 @@ +ssymm +sposv +spotrf +IGNORE: slamch diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/vector.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/vector.c new file mode 100644 index 0000000000000000000000000000000000000000..edb8699666b36ad1892d498bea4695d9475ec5c9 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/vector.c @@ -0,0 +1,150 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * vector.c + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1997 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + * + * HISTORY + * + * 22-Nov-2004 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University + * Imported from s3.2, for supporting s3 format continuous + * acoustic models. + * + * 10-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. + * Added vector_accum(), vector_vqlabel(), and vector_vqgen(). + * + * 09-Mar-1999 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. + * Added vector_is_zero(), vector_cmp(), and vector_dist_eucl(). + * Changed the name vector_dist_eval to vector_dist_maha. + * + * 07-Oct-98 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. + * Added distance computation related functions. + * + * 12-Nov-95 M K Ravishankar (rkm@cs.cmu.edu) at Carnegie Mellon University. + * Copied from Eric Thayer. + */ + +/* System headers. */ +#include +#include +#include +#include +#include + +/* SphinxBase headers. */ +#include +#include +#include + +/* Local headers. */ +#include "vector.h" + +#if defined(_WIN32) +#define srandom srand +#define random rand +#endif + + +float64 +vector_sum_norm(float32 * vec, int32 len) +{ + float64 sum, f; + int32 i; + + sum = 0.0; + for (i = 0; i < len; i++) + sum += vec[i]; + + if (sum != 0.0) { + f = 1.0 / sum; + for (i = 0; i < len; i++) + vec[i] *= f; + } + + return sum; +} + + +void +vector_floor(float32 * vec, int32 len, float64 flr) +{ + int32 i; + + for (i = 0; i < len; i++) + if (vec[i] < flr) + vec[i] = (float32) flr; +} + + +void +vector_nz_floor(float32 * vec, int32 len, float64 flr) +{ + int32 i; + + for (i = 0; i < len; i++) + if ((vec[i] != 0.0) && (vec[i] < flr)) + vec[i] = (float32) flr; +} + + +void +vector_print(FILE * fp, vector_t v, int32 dim) +{ + int32 i; + + for (i = 0; i < dim; i++) + fprintf(fp, " %11.4e", v[i]); + fprintf(fp, "\n"); + fflush(fp); +} + + +int32 +vector_is_zero(float32 * vec, int32 len) +{ + int32 i; + + for (i = 0; (i < len) && (vec[i] == 0.0); i++); + return (i == len); /* TRUE iff all mean values are 0.0 */ +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/vector.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/vector.h new file mode 100644 index 0000000000000000000000000000000000000000..ee8fd52c849b07218f4949deb1341beb31561db3 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/pocketsphinx/src/vector.h @@ -0,0 +1,100 @@ +/* ==================================================================== + * Copyright (c) 1999-2004 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + */ + +/* + * vector.h -- vector routines. + * + * ********************************************** + * CMU ARPA Speech Project + * + * Copyright (c) 1997 Carnegie Mellon University. + * ALL RIGHTS RESERVED. + * ********************************************** + */ + + +#ifndef __VECTOR_H__ +#define __VECTOR_H__ + +/* System headers. */ +#include + +/* SphinxBase headers. */ +#include + +#ifdef __cplusplus +extern "C" { +#endif +#if 0 +} +#endif + +typedef float32 *vector_t; + +/* + * The reason for some of the "trivial" routines below is that they could be OPTIMIZED for SPEED + * at some point. + */ + + +/* Floor all elements of v[0..dim-1] to min value of f */ +void vector_floor(vector_t v, int32 dim, float64 f); + + +/* Floor all non-0 elements of v[0..dim-1] to min value of f */ +void vector_nz_floor(vector_t v, int32 dim, float64 f); + + +/* + * Normalize the elements of the given vector so that they sum to 1.0. If the sum is 0.0 + * to begin with, the vector is left untouched. Return value: The normalization factor. + */ +float64 vector_sum_norm(vector_t v, int32 dim); + + +/* Print vector in one line, in %11.4e format, terminated by newline */ +void vector_print(FILE *fp, vector_t v, int32 dim); + + +/* Return TRUE iff given vector is all 0.0 */ +int32 vector_is_zero (float32 *vec, /* In: Vector to be checked */ + int32 len); /* In: Length of above vector */ + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* VECTOR_H */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/utils/include/common_utils.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/utils/include/common_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..e89597e40250b06ed717a33a8adc2770ce1eb3bb --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/utils/include/common_utils.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CAR_VOICE_ASSISTANT_COMMON_UTILS_H +#define CAR_VOICE_ASSISTANT_COMMON_UTILS_H + +#include +#include +#include +#include +#include +#include + +#define CAR_VOICE_ASSISTANT_SERVICE_SA_ID 5102 + +namespace OHOS::CarVoiceAssistant::CommonUtils { +enum VoiceAssistantErrorCode { + VOICE_ASSISTANT_OK = 0, + VOICE_ASSISTANT_ERR = 8001, + VOICE_ASSISTANT_START_RECORD_FAILED, //录音开启失败 + VOICE_ASSISTANT_START_WEBSOCKET_CONNECT_FAILED, // websocket连接失败 +}; + +} // namespace OHOS::CarVoiceAssistant::CommonUtils + +#endif /* CAR_VOICE_ASSISTANT_COMMON_UTILS_H */ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/utils/include/voice_assistant_log.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/utils/include/voice_assistant_log.h new file mode 100644 index 0000000000000000000000000000000000000000..1128ef2fa18fc87ec19dbc364334757f1899f40e --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/utils/include/voice_assistant_log.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CAR_VOICE_ASSISTANT_LOG +#define CAR_VOICE_ASSISTANT_LOG + +#include +#include + +#include "hilog/log.h" + +#define MAKE_FILE_NAME (strrchr(__FILE__, '/') + 1) + +#define VOICE_ASSISTANT_LOG_TAG "CarVoiceAssistant" + +#define VOICE_ASSISTANT_LOG_DOMAIN 0xD001C00 + +static constexpr OHOS::HiviewDFX::HiLogLabel NETSTACK_LOG_LABEL = {LOG_CORE, VOICE_ASSISTANT_LOG_DOMAIN, VOICE_ASSISTANT_LOG_TAG}; + +#define VOICE_ASSISTANT_HILOG_PRINT(Level, fmt, ...) \ + (void)OHOS::HiviewDFX::HiLog::Level(NETSTACK_LOG_LABEL, "CarVoiceAssistant [%{public}s %{public}d] " fmt, MAKE_FILE_NAME, \ + __LINE__, ##__VA_ARGS__) + + +#define VOICE_ASSISTANT_LOGE(fmt, ...) VOICE_ASSISTANT_HILOG_PRINT(Error, fmt, ##__VA_ARGS__) + +#define VOICE_ASSISTANT_LOGI(fmt, ...) VOICE_ASSISTANT_HILOG_PRINT(Info, fmt, ##__VA_ARGS__) + +#endif /* CAR_VOICE_ASSISTANT_COMMON_UTILS_H */ \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/BUILD.gn b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..4a00afe2e558722b44b370ac49473d4f23fd6d7f --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/BUILD.gn @@ -0,0 +1,42 @@ +# Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import("//build/ohos.gni") + +config("ps_vad_config") { + visibility = [ ":*" ] + include_dirs = [ + "include", + "./" + ] + + cflags = ["-Wno-unused-variable", "-Wno-unused-function", "-Wno-implicit-function-declaration", "-Wno-unused-private-field"] + cflags_cc = [] +} + +ohos_shared_library("ps_vad") { + sources = [ + "src/signal_processing_library.c", + "src/vad.cc", + "src/vad_core.c", + "src/vad_filterbank.c", + "src/vad_gmm.c", + "src/vad_sp.c", + "src/webrtc_vad.c" + ] + + configs = [ ":ps_vad_config" ] + + subsystem_name = "miscservices" + part_name = "voiceassistant" +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/signal_processing_library.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/signal_processing_library.h new file mode 100644 index 0000000000000000000000000000000000000000..12bb3e0591522aea9debe9e3d5c29745727aea47 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/signal_processing_library.h @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This header file includes all of the fix point signal processing library + * (SPL) function descriptions and declarations. For specific function calls, + * see bottom of file. + */ + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_ + +#include +#include + +#include + +#include +// If you for some reson need to know if DCHECKs are on, test the value of +// RTC_DCHECK_IS_ON. (Test its value, not if it's defined; it'll always be +// defined, to either a true or a false value.) +#if !defined(NDEBUG) || defined(DCHECK_ALWAYS_ON) +#define RTC_DCHECK_IS_ON 1 +#else +#define RTC_DCHECK_IS_ON 0 +#endif + + +#define RTC_DCHECK(condition) \ + do { \ + if (RTC_DCHECK_IS_ON) { \ + assert(condition); \ + } \ + } while (0) + +#define RTC_DCHECK_EQ(a, b) RTC_DCHECK((a) == (b)) +#define RTC_DCHECK_NE(a, b) RTC_DCHECK((a) != (b)) +#define RTC_DCHECK_LE(a, b) RTC_DCHECK((a) <= (b)) +#define RTC_DCHECK_LT(a, b) RTC_DCHECK((a) < (b)) +#define RTC_DCHECK_GE(a, b) RTC_DCHECK((a) >= (b)) +#define RTC_DCHECK_GT(a, b) RTC_DCHECK((a) > (b)) + +// Processor architecture detection. For more info on what's defined, see: +// http://msdn.microsoft.com/en-us/library/b0084kay.aspx +// http://www.agner.org/optimize/calling_conventions.pdf +// or with gcc, run: "echo | gcc -E -dM -" +#if defined(_M_X64) || defined(__x86_64__) +#define WEBRTC_ARCH_X86_FAMILY +#define WEBRTC_ARCH_X86_64 +#define WEBRTC_ARCH_64_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(_M_ARM64) || defined(__aarch64__) +#define WEBRTC_ARCH_ARM_FAMILY +#define WEBRTC_ARCH_64_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(_M_IX86) || defined(__i386__) +#define WEBRTC_ARCH_X86_FAMILY +#define WEBRTC_ARCH_X86 +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__ARMEL__) +#define WEBRTC_ARCH_ARM_FAMILY +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__MIPSEL__) +#define WEBRTC_ARCH_MIPS_FAMILY +#if defined(__LP64__) +#define WEBRTC_ARCH_64_BITS +#else +#define WEBRTC_ARCH_32_BITS +#endif +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__pnacl__) +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#elif defined(__EMSCRIPTEN__) +#define WEBRTC_ARCH_32_BITS +#define WEBRTC_ARCH_LITTLE_ENDIAN +#else +#error Please add support for your architecture in rtc_base/system/arch.h +#endif + +// Macros specific for the fixed point implementation +#define WEBRTC_SPL_WORD16_MAX 32767 +#define WEBRTC_SPL_WORD16_MIN -32768 +#define WEBRTC_SPL_WORD32_MAX (int32_t)0x7fffffff +#define WEBRTC_SPL_WORD32_MIN (int32_t)0x80000000 +#define WEBRTC_SPL_MIN(A, B) (A < B ? A : B) // Get min value +// TODO(kma/bjorn): For the next two macros, investigate how to correct the code +// for inputs of a = WEBRTC_SPL_WORD16_MIN or WEBRTC_SPL_WORD32_MIN. + +#define WEBRTC_SPL_MUL(a, b) ((int32_t)((int32_t)(a) * (int32_t)(b))) +#define WEBRTC_SPL_MUL_16_U16(a, b) ((int32_t)(int16_t)(a) * (uint16_t)(b)) + +// clang-format off +// clang-format would choose some identation +// leading to presubmit error (cpplint.py) +#ifndef WEBRTC_ARCH_ARM_V7 +// For ARMv7 platforms, these are inline functions in spl_inl_armv7.h +#ifndef MIPS32_LE +// For MIPS platforms, these are inline functions in spl_inl_mips.h +#define WEBRTC_SPL_MUL_16_16(a, b) ((int32_t)(((int16_t)(a)) * ((int16_t)(b)))) +#endif +#endif + +// clang-format on + +// C + the 32 most significant bits of A * B +#define WEBRTC_SPL_SCALEDIFF32(A, B, C) \ + (C + (B >> 16) * A + (((uint32_t)(B & 0x0000FFFF) * A) >> 16)) + +// Shifting with negative numbers allowed +// Positive means left shift + +// Shifting with negative numbers not allowed +// We cannot do casting here due to signed/unsigned problem + +#ifdef __cplusplus +extern "C" { +#endif + +// inline functions: +#include "spl_inl.h" + + +int16_t WebRtcSpl_GetScalingSquare(int16_t *in_vector, + size_t in_vector_length, + size_t times); + + +// Minimum and maximum operation functions and their pointers. +// Implementation in min_max_operations.c. + +// Returns the largest absolute value in a signed 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum absolute value in vector. +typedef int16_t (*MaxAbsValueW16)(const int16_t *vector, size_t length); + +extern const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16; + +int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t *vector, size_t length); + +#if defined(WEBRTC_HAS_NEON) +int16_t WebRtcSpl_MaxAbsValueW16Neon(const int16_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MaxAbsValueW16_mips(const int16_t* vector, size_t length); +#endif + +// Returns the largest absolute value in a signed 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum absolute value in vector. +typedef int32_t (*MaxAbsValueW32)(const int32_t *vector, size_t length); + +extern const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32; + +int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t *vector, size_t length); + +#if defined(WEBRTC_HAS_NEON) +int32_t WebRtcSpl_MaxAbsValueW32Neon(const int32_t* vector, size_t length); +#endif +#if defined(MIPS_DSP_R1_LE) +int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length); +#endif + +// Returns the maximum value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum sample value in |vector|. +typedef int16_t (*MaxValueW16)(const int16_t *vector, size_t length); + +extern const MaxValueW16 WebRtcSpl_MaxValueW16; + +int16_t WebRtcSpl_MaxValueW16C(const int16_t *vector, size_t length); + +#if defined(WEBRTC_HAS_NEON) +int16_t WebRtcSpl_MaxValueW16Neon(const int16_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length); +#endif + +// Returns the maximum value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Maximum sample value in |vector|. +typedef int32_t (*MaxValueW32)(const int32_t *vector, size_t length); + +extern const MaxValueW32 WebRtcSpl_MaxValueW32; + +int32_t WebRtcSpl_MaxValueW32C(const int32_t *vector, size_t length); + +#if defined(WEBRTC_HAS_NEON) +int32_t WebRtcSpl_MaxValueW32Neon(const int32_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length); +#endif + +// Returns the minimum value of a 16-bit vector. +// +// Input: +// - vector : 16-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Minimum sample value in |vector|. +typedef int16_t (*MinValueW16)(const int16_t *vector, size_t length); + +extern const MinValueW16 WebRtcSpl_MinValueW16; + +int16_t WebRtcSpl_MinValueW16C(const int16_t *vector, size_t length); + +#if defined(WEBRTC_HAS_NEON) +int16_t WebRtcSpl_MinValueW16Neon(const int16_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length); +#endif + +// Returns the minimum value of a 32-bit vector. +// +// Input: +// - vector : 32-bit input vector. +// - length : Number of samples in vector. +// +// Return value : Minimum sample value in |vector|. +typedef int32_t (*MinValueW32)(const int32_t *vector, size_t length); + +extern const MinValueW32 WebRtcSpl_MinValueW32; + +int32_t WebRtcSpl_MinValueW32C(const int32_t *vector, size_t length); + +#if defined(WEBRTC_HAS_NEON) +int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length); +#endif +#if defined(MIPS32_LE) +int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length); +#endif + +// Divisions. Implementations collected in division_operations.c and +// descriptions at bottom of this file. +int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den); +// End: Divisions. + +int32_t WebRtcSpl_Energy(int16_t *vector, + size_t vector_length, + int *scale_factor); + + +void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K); + +typedef struct { + int32_t S_48_24[8]; + int32_t S_24_24[16]; + int32_t S_24_16[8]; + int32_t S_16_8[8]; +} WebRtcSpl_State48khzTo8khz; + +void WebRtcSpl_Resample48khzTo8khz(const int16_t *in, + int16_t *out, + WebRtcSpl_State48khzTo8khz *state, + int32_t *tmpmem); + +void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz *state); + +#ifdef __cplusplus +} +#endif // __cplusplus +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SIGNAL_PROCESSING_LIBRARY_H_ \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/spl_inl.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/spl_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..f54ea81acaa04ec057c62d5b4f8280195af22d4e --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/spl_inl.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This header file includes the inline functions in +// the fix point signal processing library. + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ + +#include "signal_processing_library.h" + +extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64]; + +// Don't call this directly except in tests! +static __inline int WebRtcSpl_CountLeadingZeros32_NotBuiltin(uint32_t n) { + // Normalize n by rounding up to the nearest number that is a sequence of 0 + // bits followed by a sequence of 1 bits. This number has the same number of + // leading zeros as the original n. There are exactly 33 such values. + n |= n >> 1; + n |= n >> 2; + n |= n >> 4; + n |= n >> 8; + n |= n >> 16; + + // Multiply the modified n with a constant selected (by exhaustive search) + // such that each of the 33 possible values of n give a product whose 6 most + // significant bits are unique. Then look up the answer in the table. + return kWebRtcSpl_CountLeadingZeros32_Table[(n * 0x8c0b2891) >> 26]; +} + +// Returns the number of leading zero bits in the argument. +static __inline int WebRtcSpl_CountLeadingZeros32(uint32_t n) { +#ifdef __GNUC__ + assert(sizeof(unsigned int) == sizeof(uint32_t)); + return n == 0 ? 32 : __builtin_clz(n); +#else + return WebRtcSpl_CountLeadingZeros32_NotBuiltin(n); +#endif +} + + +#ifdef WEBRTC_ARCH_ARM_V7 +#include "spl_inl_armv7.h" +#else + +#if defined(MIPS32_LE) +#include "spl_inl_mips.h" +#endif + +#if !defined(MIPS32_LE) + +static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { + return 32 - WebRtcSpl_CountLeadingZeros32(n); +} + +// Return the number of steps a can be left-shifted without overflow, +// or 0 if a == 0. +static __inline int16_t WebRtcSpl_NormW32(int32_t a) { + return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a < 0 ? ~a : a) - 1; +} + +// Return the number of steps a can be left-shifted without overflow, +// or 0 if a == 0. +static __inline int16_t WebRtcSpl_NormU32(uint32_t a) { + return a == 0 ? 0 : WebRtcSpl_CountLeadingZeros32(a); +} + +#endif // #if !defined(MIPS32_LE) + +#endif // WEBRTC_ARCH_ARM_V7 + +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/spl_inl_armv7.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/spl_inl_armv7.h new file mode 100644 index 0000000000000000000000000000000000000000..6e9843b193c480bf427d933740413981cb5e4ba9 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/spl_inl_armv7.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* This header file includes the inline functions for ARM processors in + * the fix point signal processing library. + */ + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_ARMV7_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_ARMV7_H_ + +/* TODO(kma): Replace some assembly code with GCC intrinsics + * (e.g. __builtin_clz). + */ + +/* This function produces result that is not bit exact with that by the generic + * C version in some cases, although the former is at least as accurate as the + * later. + */ +static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a, int32_t b) { + int32_t tmp = 0; + __asm __volatile("smulwb %0, %1, %2" : "=r"(tmp) : "r"(b), "r"(a)); + return tmp; +} + +static __inline int32_t WEBRTC_SPL_MUL_16_16(int16_t a, int16_t b) { + int32_t tmp = 0; + __asm __volatile("smulbb %0, %1, %2" : "=r"(tmp) : "r"(a), "r"(b)); + return tmp; +} + +// TODO(kma): add unit test. +static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) { + int32_t tmp = 0; + __asm __volatile("smlabb %0, %1, %2, %3" + : "=r"(tmp) + : "r"(a), "r"(b), "r"(c)); + return tmp; +} + +static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { + int32_t s_sum = 0; + + __asm __volatile("qadd16 %0, %1, %2" : "=r"(s_sum) : "r"(a), "r"(b)); + + return (int16_t) s_sum; +} + +static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_sum = 0; + + __asm __volatile("qadd %0, %1, %2" : "=r"(l_sum) : "r"(l_var1), "r"(l_var2)); + + return l_sum; +} + +static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_sub = 0; + + __asm __volatile("qsub %0, %1, %2" : "=r"(l_sub) : "r"(l_var1), "r"(l_var2)); + + return l_sub; +} + +static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { + int32_t s_sub = 0; + + __asm __volatile("qsub16 %0, %1, %2" : "=r"(s_sub) : "r"(var1), "r"(var2)); + + return (int16_t) s_sub; +} + +static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { + int32_t tmp = 0; + + __asm __volatile("clz %0, %1" : "=r"(tmp) : "r"(n)); + + return (int16_t)(32 - tmp); +} + +static __inline int16_t WebRtcSpl_NormW32(int32_t a) { + int32_t tmp = 0; + + if (a == 0) { + return 0; + } else if (a < 0) { + a ^= 0xFFFFFFFF; + } + + __asm __volatile("clz %0, %1" : "=r"(tmp) : "r"(a)); + + return (int16_t)(tmp - 1); +} + +static __inline int16_t WebRtcSpl_NormU32(uint32_t a) { + int tmp = 0; + + if (a == 0) + return 0; + + __asm __volatile("clz %0, %1" : "=r"(tmp) : "r"(a)); + + return (int16_t) tmp; +} + +static __inline int16_t WebRtcSpl_NormW16(int16_t a) { + int32_t tmp = 0; + int32_t a_32 = a; + + if (a_32 == 0) { + return 0; + } else if (a_32 < 0) { + a_32 ^= 0xFFFFFFFF; + } + + __asm __volatile("clz %0, %1" : "=r"(tmp) : "r"(a_32)); + + return (int16_t)(tmp - 17); +} + +// TODO(kma): add unit test. +static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { + int32_t out = 0; + + __asm __volatile("ssat %0, #16, %1" : "=r"(out) : "r"(value32)); + + return (int16_t) out; +} + +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_ARMV7_H_ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/spl_inl_mips.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/spl_inl_mips.h new file mode 100644 index 0000000000000000000000000000000000000000..f787a1bc13f609866a60a7454e550b33ef147585 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/spl_inl_mips.h @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This header file includes the inline functions in +// the fix point signal processing library. + +#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_MIPS_H_ +#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_MIPS_H_ + +static __inline int32_t WEBRTC_SPL_MUL_16_16(int32_t a, int32_t b) { + int32_t value32 = 0; + int32_t a1 = 0, b1 = 0; + + __asm __volatile( +#if defined(MIPS32_R2_LE) + "seh %[a1], %[a] \n\t" + "seh %[b1], %[b] \n\t" +#else + "sll %[a1], %[a], 16 \n\t" + "sll %[b1], %[b], 16 \n\t" + "sra %[a1], %[a1], 16 \n\t" + "sra %[b1], %[b1], 16 \n\t" + #endif + "mul %[value32], %[a1], %[b1] \n\t" + : [value32] "=r"(value32), [a1] "=&r"(a1), [b1] "=&r"(b1) + : [a] "r"(a), [b] "r"(b) + : "hi", "lo"); + return value32; +} + +static __inline int32_t WEBRTC_SPL_MUL_16_32_RSFT16(int16_t a, int32_t b) { + int32_t value32 = 0, b1 = 0, b2 = 0; + int32_t a1 = 0; + + __asm __volatile( +#if defined(MIPS32_R2_LE) + "seh %[a1], %[a] \n\t" +#else + "sll %[a1], %[a], 16 \n\t" + "sra %[a1], %[a1], 16 \n\t" + #endif + "andi %[b2], %[b], 0xFFFF \n\t" + "sra %[b1], %[b], 16 \n\t" + "sra %[b2], %[b2], 1 \n\t" + "mul %[value32], %[a1], %[b1] \n\t" + "mul %[b2], %[a1], %[b2] \n\t" + "addiu %[b2], %[b2], 0x4000 \n\t" + "sra %[b2], %[b2], 15 \n\t" + "addu %[value32], %[value32], %[b2] \n\t" + : [value32] "=&r"(value32), [b1] "=&r"(b1), [b2] "=&r"(b2), [a1] "=&r"(a1) + : [a] "r"(a), [b] "r"(b) + : "hi", "lo"); + return value32; +} + +#if defined(MIPS_DSP_R1_LE) +static __inline int16_t WebRtcSpl_SatW32ToW16(int32_t value32) { + __asm __volatile( + "shll_s.w %[value32], %[value32], 16 \n\t" + "sra %[value32], %[value32], 16 \n\t" + : [value32] "+r"(value32) + :); + int16_t out16 = (int16_t)value32; + return out16; +} + +static __inline int16_t WebRtcSpl_AddSatW16(int16_t a, int16_t b) { + int32_t value32 = 0; + + __asm __volatile("addq_s.ph %[value32], %[a], %[b] \n\t" + : [value32] "=r"(value32) + : [a] "r"(a), [b] "r"(b)); + return (int16_t)value32; +} + +static __inline int32_t WebRtcSpl_AddSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_sum; + + __asm __volatile( + "addq_s.w %[l_sum], %[l_var1], %[l_var2] \n\t" + : [l_sum] "=r"(l_sum) + : [l_var1] "r"(l_var1), [l_var2] "r"(l_var2)); + + return l_sum; +} + +static __inline int16_t WebRtcSpl_SubSatW16(int16_t var1, int16_t var2) { + int32_t value32; + + __asm __volatile("subq_s.ph %[value32], %[var1], %[var2] \n\t" + : [value32] "=r"(value32) + : [var1] "r"(var1), [var2] "r"(var2)); + + return (int16_t)value32; +} + +static __inline int32_t WebRtcSpl_SubSatW32(int32_t l_var1, int32_t l_var2) { + int32_t l_diff; + + __asm __volatile( + "subq_s.w %[l_diff], %[l_var1], %[l_var2] \n\t" + : [l_diff] "=r"(l_diff) + : [l_var1] "r"(l_var1), [l_var2] "r"(l_var2)); + + return l_diff; +} +#endif + +static __inline int16_t WebRtcSpl_GetSizeInBits(uint32_t n) { + int bits = 0; + int i32 = 32; + + __asm __volatile( + "clz %[bits], %[n] \n\t" + "subu %[bits], %[i32], %[bits] \n\t" + : [bits] "=&r"(bits) + : [n] "r"(n), [i32] "r"(i32)); + + return (int16_t) bits; +} + +static __inline int16_t WebRtcSpl_NormW32(int32_t a) { + int zeros = 0; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "bnez %[a], 1f \n\t" + " sra %[zeros], %[a], 31 \n\t" + "b 2f \n\t" + " move %[zeros], $zero \n\t" + "1: \n\t" + "xor %[zeros], %[a], %[zeros] \n\t" + "clz %[zeros], %[zeros] \n\t" + "addiu %[zeros], %[zeros], -1 \n\t" + "2: \n\t" + ".set pop \n\t" + : [zeros] "=&r"(zeros) + : [a] "r"(a)); + + return (int16_t) zeros; +} + +static __inline int16_t WebRtcSpl_NormU32(uint32_t a) { + int zeros = 0; + + __asm __volatile("clz %[zeros], %[a] \n\t" + : [zeros] "=r"(zeros) + : [a] "r"(a)); + + return (int16_t)(zeros & 0x1f); +} + +static __inline int16_t WebRtcSpl_NormW16(int16_t a) { + int zeros = 0; + int a0 = a << 16; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "bnez %[a0], 1f \n\t" + " sra %[zeros], %[a0], 31 \n\t" + "b 2f \n\t" + " move %[zeros], $zero \n\t" + "1: \n\t" + "xor %[zeros], %[a0], %[zeros] \n\t" + "clz %[zeros], %[zeros] \n\t" + "addiu %[zeros], %[zeros], -1 \n\t" + "2: \n\t" + ".set pop \n\t" + : [zeros] "=&r"(zeros) + : [a0] "r"(a0)); + + return (int16_t) zeros; +} + +static __inline int32_t WebRtc_MulAccumW16(int16_t a, int16_t b, int32_t c) { + int32_t res = 0, c1 = 0; + __asm __volatile( +#if defined(MIPS32_R2_LE) + "seh %[a], %[a] \n\t" + "seh %[b], %[b] \n\t" +#else + "sll %[a], %[a], 16 \n\t" + "sll %[b], %[b], 16 \n\t" + "sra %[a], %[a], 16 \n\t" + "sra %[b], %[b], 16 \n\t" + #endif + "mul %[res], %[a], %[b] \n\t" + "addu %[c1], %[c], %[res] \n\t" + : [c1] "=r"(c1), [res] "=&r"(res) + : [a] "r"(a), [b] "r"(b), [c] "r"(c) + : "hi", "lo"); + return (c1); +} + +#endif // COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_MIPS_H_ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad.h new file mode 100644 index 0000000000000000000000000000000000000000..b918a06c9e6dd394b54d4fb3c4746cc2862fd68e --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef COMMON_AUDIO_VAD_INCLUDE_VAD_H_ +#define COMMON_AUDIO_VAD_INCLUDE_VAD_H_ + +#include + +#include "webrtc_vad.h" +#include "signal_processing_library.h" + + +class Vad { +public: + enum Aggressiveness { + kVadNormal = 0, + kVadLowBitrate = 1, + kVadAggressive = 2, + kVadVeryAggressive = 3 + }; + + enum Activity { + kPassive = 0, kActive = 1, kError = -1 + }; + + virtual ~Vad() = default; + + // Calculates a VAD decision for the given audio frame. Valid sample rates + // are 8000, 16000, and 32000 Hz; the number of samples must be such that the + // frame is 10, 20, or 30 ms long. + virtual Activity VoiceActivity(const int16_t *audio, + size_t num_samples, + int sample_rate_hz) = 0; + + // Resets VAD state. + virtual void Reset() = 0; +}; + +// Returns a Vad instance that's implemented on top of WebRtcVad. +std::unique_ptr CreateVad(Vad::Aggressiveness aggressiveness); + + +#endif // COMMON_AUDIO_VAD_INCLUDE_VAD_H_ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad_core.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad_core.h new file mode 100644 index 0000000000000000000000000000000000000000..da537cbcb7df83e7cab4f3dba045bf9ffe1a5d7c --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad_core.h @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This header file includes the descriptions of the core VAD calls. + */ + +#ifndef COMMON_AUDIO_VAD_VAD_CORE_H_ +#define COMMON_AUDIO_VAD_VAD_CORE_H_ + +#include "signal_processing_library.h" + +enum { + kNumChannels = 6 +}; // Number of frequency bands (named channels). +enum { + kNumGaussians = 2 +}; // Number of Gaussians per channel in the GMM. +enum { + kTableSize = kNumChannels * kNumGaussians +}; +enum { + kMinEnergy = 10 +}; // Minimum energy required to trigger audio signal. + +typedef struct VadInstT_ { + int vad; + int32_t downsampling_filter_states[4]; + WebRtcSpl_State48khzTo8khz state_48_to_8; + int16_t noise_means[kTableSize]; + int16_t speech_means[kTableSize]; + int16_t noise_stds[kTableSize]; + int16_t speech_stds[kTableSize]; + // TODO(bjornv): Change to |frame_count|. + int32_t frame_counter; + int16_t over_hang; // Over Hang + int16_t num_of_speech; + // TODO(bjornv): Change to |age_vector|. + int16_t index_vector[16 * kNumChannels]; + int16_t low_value_vector[16 * kNumChannels]; + // TODO(bjornv): Change to |median|. + int16_t mean_value[kNumChannels]; + int16_t upper_state[5]; + int16_t lower_state[5]; + int16_t hp_filter_state[4]; + int16_t over_hang_max_1[3]; + int16_t over_hang_max_2[3]; + int16_t individual[3]; + int16_t total[3]; + + int init_flag; +} VadInstT; + +// Initializes the core VAD component. The default aggressiveness mode is +// controlled by |kDefaultMode| in vad_core.c. +// +// - self [i/o] : Instance that should be initialized +// +// returns : 0 (OK), -1 (null pointer in or if the default mode can't be +// set) +int WebRtcVad_InitCore(VadInstT *self); + +/**************************************************************************** + * WebRtcVad_set_mode_core(...) + * + * This function changes the VAD settings + * + * Input: + * - inst : VAD instance + * - mode : Aggressiveness degree + * 0 (High quality) - 3 (Highly aggressive) + * + * Output: + * - inst : Changed instance + * + * Return value : 0 - Ok + * -1 - Error + */ + +int WebRtcVad_set_mode_core(VadInstT *self, int mode); + +/**************************************************************************** + * WebRtcVad_CalcVad48khz(...) + * WebRtcVad_CalcVad32khz(...) + * WebRtcVad_CalcVad16khz(...) + * WebRtcVad_CalcVad8khz(...) + * + * Calculate probability for active speech and make VAD decision. + * + * Input: + * - inst : Instance that should be initialized + * - speech_frame : Input speech frame + * - frame_length : Number of input samples + * + * Output: + * - inst : Updated filter states etc. + * + * Return value : VAD decision + * 0 - No active speech + * 1-6 - Active speech + */ +int WebRtcVad_CalcVad48khz(VadInstT *inst, + const int16_t *speech_frame, + size_t frame_length); + +int WebRtcVad_CalcVad32khz(VadInstT *inst, + const int16_t *speech_frame, + size_t frame_length); + +int WebRtcVad_CalcVad16khz(VadInstT *inst, + const int16_t *speech_frame, + size_t frame_length); + +int WebRtcVad_CalcVad8khz(VadInstT *inst, + const int16_t *speech_frame, + size_t frame_length); + +#endif // COMMON_AUDIO_VAD_VAD_CORE_H_ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad_filterbank.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad_filterbank.h new file mode 100644 index 0000000000000000000000000000000000000000..6d73236589111f9f5cbdefa63e6478e2cae8abb2 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad_filterbank.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This file includes feature calculating functionality used in vad_core.c. + */ + +#ifndef COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ +#define COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ + +#include "vad_core.h" + +// Takes |data_length| samples of |data_in| and calculates the logarithm of the +// energy of each of the |kNumChannels| = 6 frequency bands used by the VAD: +// 80 Hz - 250 Hz +// 250 Hz - 500 Hz +// 500 Hz - 1000 Hz +// 1000 Hz - 2000 Hz +// 2000 Hz - 3000 Hz +// 3000 Hz - 4000 Hz +// +// The values are given in Q4 and written to |features|. Further, an approximate +// overall energy is returned. The return value is used in +// WebRtcVad_GmmProbability() as a signal indicator, hence it is arbitrary above +// the threshold |kMinEnergy|. +// +// - self [i/o] : State information of the VAD. +// - data_in [i] : Input audio data, for feature extraction. +// - data_length [i] : Audio data size, in number of samples. +// - features [o] : 10 * log10(energy in each frequency band), Q4. +// - returns : Total energy of the signal (NOTE! This value is not +// exact. It is only used in a comparison.) +int16_t WebRtcVad_CalculateFeatures(VadInstT *self, + const int16_t *data_in, + size_t data_length, + int16_t *features); + +#endif // COMMON_AUDIO_VAD_VAD_FILTERBANK_H_ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad_gmm.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad_gmm.h new file mode 100644 index 0000000000000000000000000000000000000000..d61a09ded57fee2cc2beabefe3640e9c2a883f07 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad_gmm.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Gaussian probability calculations internally used in vad_core.c. + +#ifndef COMMON_AUDIO_VAD_VAD_GMM_H_ +#define COMMON_AUDIO_VAD_VAD_GMM_H_ + +#include + +// Calculates the probability for |input|, given that |input| comes from a +// normal distribution with mean and standard deviation (|mean|, |std|). +// +// Inputs: +// - input : input sample in Q4. +// - mean : mean input in the statistical model, Q7. +// - std : standard deviation, Q7. +// +// Output: +// +// - delta : input used when updating the model, Q11. +// |delta| = (|input| - |mean|) / |std|^2. +// +// Return: +// (probability for |input|) = +// 1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2)); +int32_t WebRtcVad_GaussianProbability(int16_t input, + int16_t mean, + int16_t std, + int16_t *delta); + +#endif // COMMON_AUDIO_VAD_VAD_GMM_H_ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad_sp.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad_sp.h new file mode 100644 index 0000000000000000000000000000000000000000..fdb1dc7827ae61279035b66e6749c894cd11cf79 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/vad_sp.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This file includes specific signal processing tools used in vad_core.c. + +#ifndef COMMON_AUDIO_VAD_VAD_SP_H_ +#define COMMON_AUDIO_VAD_VAD_SP_H_ + +#include "vad_core.h" + +// Downsamples the signal by a factor 2, eg. 32->16 or 16->8. +// +// Inputs: +// - signal_in : Input signal. +// - in_length : Length of input signal in samples. +// +// Input & Output: +// - filter_state : Current filter states of the two all-pass filters. The +// |filter_state| is updated after all samples have been +// processed. +// +// Output: +// - signal_out : Downsampled signal (of length |in_length| / 2). +void WebRtcVad_Downsampling(const int16_t *signal_in, + int16_t *signal_out, + int32_t *filter_state, + size_t in_length); + +// Updates and returns the smoothed feature minimum. As minimum we use the +// median of the five smallest feature values in a 100 frames long window. +// As long as |handle->frame_counter| is zero, that is, we haven't received any +// "valid" data, FindMinimum() outputs the default value of 1600. +// +// Inputs: +// - feature_value : New feature value to update with. +// - channel : Channel number. +// +// Input & Output: +// - handle : State information of the VAD. +// +// Returns: +// : Smoothed minimum value for a moving window. +int16_t WebRtcVad_FindMinimum(VadInstT *handle, + int16_t feature_value, + int channel); + +#endif // COMMON_AUDIO_VAD_VAD_SP_H_ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/webrtc_vad.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/webrtc_vad.h new file mode 100644 index 0000000000000000000000000000000000000000..a71631b2b22b525d27dc2ffeb903c5b312c3df29 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/include/webrtc_vad.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * This header file includes the VAD API calls. Specific function calls are + * given below. + */ + +#ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT +#define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ + +#include +#include + +typedef struct WebRtcVadInst VadInst; + +#ifdef __cplusplus +extern "C" { +#endif + +// Creates an instance to the VAD structure. +VadInst *WebRtcVad_Create(void); + +// Frees the dynamic memory of a specified VAD instance. +// +// - handle [i] : Pointer to VAD instance that should be freed. +void WebRtcVad_Free(VadInst *handle); + +// Initializes a VAD instance. +// +// - handle [i/o] : Instance that should be initialized. +// +// returns : 0 - (OK), +// -1 - (null pointer or Default mode could not be set). +int WebRtcVad_Init(VadInst *handle); + +// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more +// restrictive in reporting speech. Put in other words the probability of being +// speech when the VAD returns 1 is increased with increasing mode. As a +// consequence also the missed detection rate goes up. +// +// - handle [i/o] : VAD instance. +// - mode [i] : Aggressiveness mode (0, 1, 2, or 3). +// +// returns : 0 - (OK), +// -1 - (null pointer, mode could not be set or the VAD instance +// has not been initialized). +int WebRtcVad_set_mode(VadInst *handle, int mode); + +// Calculates a VAD decision for the |audio_frame|. For valid sampling rates +// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths(). +// +// - handle [i/o] : VAD Instance. Needs to be initialized by +// WebRtcVad_Init() before call. +// - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000 +// - audio_frame [i] : Audio frame buffer. +// - frame_length [i] : Length of audio frame buffer in number of samples. +// +// returns : 1 - (Active Voice), +// 0 - (Non-active Voice), +// -1 - (Error) +int WebRtcVad_Process(VadInst *handle, + int fs, + const int16_t *audio_frame, + size_t frame_length); + +// Checks for valid combinations of |rate| and |frame_length|. We support 10, +// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz. +// +// - rate [i] : Sampling frequency (Hz). +// - frame_length [i] : Speech frame buffer length in number of samples. +// +// returns : 0 - (valid combination), -1 - (invalid combination) +int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length); + +#ifdef __cplusplus +} +#endif + +#endif // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/signal_processing_library.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/signal_processing_library.c new file mode 100644 index 0000000000000000000000000000000000000000..a675384b69ef37f1f91c88073d6ed98e99529210 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/signal_processing_library.c @@ -0,0 +1,668 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "include/signal_processing_library.h" + +// TODO(bugs.webrtc.org/9553): These function pointers are useless. Refactor +// things so that we simply have a bunch of regular functions with different +// implementations for different platforms. + +#if defined(WEBRTC_HAS_NEON) + +const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16Neon; +const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32Neon; +const MaxValueW16 WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16Neon; +const MaxValueW32 WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32Neon; +const MinValueW16 WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16Neon; +const MinValueW32 WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32Neon; + + +#elif defined(MIPS32_LE) + +const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16_mips; +const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32 = +#ifdef MIPS_DSP_R1_LE + WebRtcSpl_MaxAbsValueW32_mips; +#else + WebRtcSpl_MaxAbsValueW32C; +#endif +const MaxValueW16 WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16_mips; +const MaxValueW32 WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32_mips; +const MinValueW16 WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16_mips; +const MinValueW32 WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32_mips; + + +#else + +const MaxAbsValueW16 WebRtcSpl_MaxAbsValueW16 = WebRtcSpl_MaxAbsValueW16C; +const MaxAbsValueW32 WebRtcSpl_MaxAbsValueW32 = WebRtcSpl_MaxAbsValueW32C; +const MaxValueW16 WebRtcSpl_MaxValueW16 = WebRtcSpl_MaxValueW16C; +const MaxValueW32 WebRtcSpl_MaxValueW32 = WebRtcSpl_MaxValueW32C; +const MinValueW16 WebRtcSpl_MinValueW16 = WebRtcSpl_MinValueW16C; +const MinValueW32 WebRtcSpl_MinValueW32 = WebRtcSpl_MinValueW32C; + +#endif + +// Table used by WebRtcSpl_CountLeadingZeros32_NotBuiltin. For each uint32_t n +// that's a sequence of 0 bits followed by a sequence of 1 bits, the entry at +// index (n * 0x8c0b2891) >> 26 in this table gives the number of zero bits in +// n. +const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64] = { + 32, 8, 17, -1, -1, 14, -1, -1, -1, 20, -1, -1, -1, 28, -1, 18, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 26, 25, 24, + 4, 11, 23, 31, 3, 7, 10, 16, 22, 30, -1, -1, 2, 6, 13, 9, + -1, 15, -1, 21, -1, 29, 19, -1, -1, -1, -1, -1, 1, 27, 5, 12, +}; + +int32_t WebRtcSpl_DivW32W16(int32_t num, int16_t den) { + // Guard against division with 0 + if (den != 0) { + return (int32_t) (num / den); + } else { + return (int32_t) 0x7FFFFFFF; + } +} + + + +// TODO(bjorn/kma): Consolidate function pairs (e.g. combine +// WebRtcSpl_MaxAbsValueW16C and WebRtcSpl_MaxAbsIndexW16 into a single one.) +// TODO(kma): Move the next six functions into min_max_operations_c.c. + +// Maximum absolute value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MaxAbsValueW16C(const int16_t *vector, size_t length) { + size_t i = 0; + int absolute = 0, maximum = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + absolute = abs((int) vector[i]); + + if (absolute > maximum) { + maximum = absolute; + } + } + + // Guard the case for abs(-32768). + if (maximum > WEBRTC_SPL_WORD16_MAX) { + maximum = WEBRTC_SPL_WORD16_MAX; + } + + return (int16_t) maximum; +} + +// Maximum absolute value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t *vector, size_t length) { + // Use uint32_t for the local variables, to accommodate the return value + // of abs(0x80000000), which is 0x80000000. + + uint32_t absolute = 0, maximum = 0; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + absolute = abs((int) vector[i]); + if (absolute > maximum) { + maximum = absolute; + } + } + + maximum = WEBRTC_SPL_MIN(maximum, WEBRTC_SPL_WORD32_MAX); + + return (int32_t) maximum; +} + +// Maximum value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MaxValueW16C(const int16_t *vector, size_t length) { + int16_t maximum = WEBRTC_SPL_WORD16_MIN; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) + maximum = vector[i]; + } + return maximum; +} + +// Maximum value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MaxValueW32C(const int32_t *vector, size_t length) { + int32_t maximum = WEBRTC_SPL_WORD32_MIN; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] > maximum) + maximum = vector[i]; + } + return maximum; +} + +// Minimum value of word16 vector. C version for generic platforms. +int16_t WebRtcSpl_MinValueW16C(const int16_t *vector, size_t length) { + int16_t minimum = WEBRTC_SPL_WORD16_MAX; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + } + return minimum; +} + +// Minimum value of word32 vector. C version for generic platforms. +int32_t WebRtcSpl_MinValueW32C(const int32_t *vector, size_t length) { + int32_t minimum = WEBRTC_SPL_WORD32_MAX; + size_t i = 0; + + RTC_DCHECK_GT(length, 0); + + for (i = 0; i < length; i++) { + if (vector[i] < minimum) + minimum = vector[i]; + } + return minimum; +} + +// allpass filter coefficients. +static const int16_t kResampleAllpass[2][3] = { + {821, 6110, 12382}, + {3050, 9368, 15063} +}; + +// +// decimator +// input: int32_t (shifted 15 positions to the left, + offset 16384) OVERWRITTEN! +// output: int16_t (saturated) (of length len/2) +// state: filter state array; length = 8 + +void // bugs.webrtc.org/5486 +WebRtcSpl_DownBy2IntToShort(int32_t *in, int32_t len, int16_t *out, + int32_t *state) { + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + +// lower allpass filter (operates on even input samples) + for (i = 0; i < len; i++) { + tmp0 = in[i << 1]; + diff = tmp0 - state[1]; +// UBSan: -1771017321 - 999586185 cannot be represented in type 'int' + +// scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; +// scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; +// scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + +// divide by two and store temporarily + in[i << 1] = (state[3] >> 1); + } + + in++; + +// upper allpass filter (operates on odd input samples) + for (i = 0; i < len; i++) { + tmp0 = in[i << 1]; + diff = tmp0 - state[5]; +// scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; +// scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; +// scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + +// divide by two and store temporarily + in[i << 1] = (state[7] >> 1); + } + + in--; + +// combine allpass outputs + for (i = 0; i < len; i += 2) { +// divide by two, add both allpass outputs and round + tmp0 = (in[i << 1] + in[(i << 1) + 1]) >> 15; + tmp1 = (in[(i << 1) + 2] + in[(i << 1) + 3]) >> 15; + if (tmp0 > (int32_t) 0x00007FFF) + tmp0 = 0x00007FFF; + if (tmp0 < (int32_t) 0xFFFF8000) + tmp0 = 0xFFFF8000; + out[i] = (int16_t) tmp0; + if (tmp1 > (int32_t) 0x00007FFF) + tmp1 = 0x00007FFF; + if (tmp1 < (int32_t) 0xFFFF8000) + tmp1 = 0xFFFF8000; + out[i + 1] = (int16_t) tmp1; + } +} + +// +// decimator +// input: int16_t +// output: int32_t (shifted 15 positions to the left, + offset 16384) (of length len/2) +// state: filter state array; length = 8 + +void // bugs.webrtc.org/5486 +WebRtcSpl_DownBy2ShortToInt(const int16_t *in, + int32_t len, + int32_t *out, + int32_t *state) { + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter (operates on even input samples) + for (i = 0; i < len; i++) { + tmp0 = ((int32_t) in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // UBSan: -1379909682 - 834099714 cannot be represented in type 'int' + + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // divide by two and store temporarily + out[i] = (state[3] >> 1); + } + + in++; + + // upper allpass filter (operates on odd input samples) + for (i = 0; i < len; i++) { + tmp0 = ((int32_t) in[i << 1] << 15) + (1 << 14); + diff = tmp0 - state[5]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // divide by two and store temporarily + out[i] += (state[7] >> 1); + } + + in--; +} + +// lowpass filter +// input: int32_t (shifted 15 positions to the left, + offset 16384) +// output: int32_t (normalized, not saturated) +// state: filter state array; length = 8 +void // bugs.webrtc.org/5486 +WebRtcSpl_LPBy2IntToInt(const int32_t *in, int32_t len, int32_t *out, + int32_t *state) { + int32_t tmp0, tmp1, diff; + int32_t i; + + len >>= 1; + + // lower allpass filter: odd input -> even output samples + in++; + // initial state of polyphase delay element + tmp0 = state[12]; + for (i = 0; i < len; i++) { + diff = tmp0 - state[1]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[0] + diff * kResampleAllpass[1][0]; + state[0] = tmp0; + diff = tmp1 - state[2]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[1] + diff * kResampleAllpass[1][1]; + state[1] = tmp1; + diff = tmp0 - state[3]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[3] = state[2] + diff * kResampleAllpass[1][2]; + state[2] = tmp0; + + // scale down, round and store + out[i << 1] = state[3] >> 1; + tmp0 = in[i << 1]; + } + in--; + + // upper allpass filter: even input -> even output samples + for (i = 0; i < len; i++) { + tmp0 = in[i << 1]; + diff = tmp0 - state[5]; + // UBSan: -794814117 - 1566149201 cannot be represented in type 'int' + + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[4] + diff * kResampleAllpass[0][0]; + state[4] = tmp0; + diff = tmp1 - state[6]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[5] + diff * kResampleAllpass[0][1]; + state[5] = tmp1; + diff = tmp0 - state[7]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[7] = state[6] + diff * kResampleAllpass[0][2]; + state[6] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[7] >> 1)) >> 15; + } + + // switch to odd output samples + out++; + + // lower allpass filter: even input -> odd output samples + for (i = 0; i < len; i++) { + tmp0 = in[i << 1]; + diff = tmp0 - state[9]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[8] + diff * kResampleAllpass[1][0]; + state[8] = tmp0; + diff = tmp1 - state[10]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[9] + diff * kResampleAllpass[1][1]; + state[9] = tmp1; + diff = tmp0 - state[11]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[11] = state[10] + diff * kResampleAllpass[1][2]; + state[10] = tmp0; + + // scale down, round and store + out[i << 1] = state[11] >> 1; + } + + // upper allpass filter: odd input -> odd output samples + in++; + for (i = 0; i < len; i++) { + tmp0 = in[i << 1]; + diff = tmp0 - state[13]; + // scale down and round + diff = (diff + (1 << 13)) >> 14; + tmp1 = state[12] + diff * kResampleAllpass[0][0]; + state[12] = tmp0; + diff = tmp1 - state[14]; + // scale down and round + diff = diff >> 14; + if (diff < 0) + diff += 1; + tmp0 = state[13] + diff * kResampleAllpass[0][1]; + state[13] = tmp1; + diff = tmp0 - state[15]; + // scale down and truncate + diff = diff >> 14; + if (diff < 0) + diff += 1; + state[15] = state[14] + diff * kResampleAllpass[0][2]; + state[14] = tmp0; + + // average the two allpass outputs, scale down and store + out[i << 1] = (out[i << 1] + (state[15] >> 1)) >> 15; + } +} + +// interpolation coefficients +static const int16_t kCoefficients48To32[2][8] = { + {778, -2050, 1087, 23285, 12903, -3783, 441, 222}, + {222, 441, -3783, 12903, 23285, 1087, -2050, 778} +}; + + +// Resampling ratio: 2/3 +// input: int32_t (normalized, not saturated) :: size 3 * K +// output: int32_t (shifted 15 positions to the left, + offset 16384) :: size 2 * K +// K: number of blocks + +void WebRtcSpl_Resample48khzTo32khz(const int32_t *In, int32_t *Out, size_t K) { + ///////////////////////////////////////////////////////////// + // Filter operation: + // + // Perform resampling (3 input samples -> 2 output samples); + // process in sub blocks of size 3 samples. + int32_t tmp; + size_t m; + + for (m = 0; m < K; m++) { + tmp = 1 << 14; + tmp += kCoefficients48To32[0][0] * In[0]; + tmp += kCoefficients48To32[0][1] * In[1]; + tmp += kCoefficients48To32[0][2] * In[2]; + tmp += kCoefficients48To32[0][3] * In[3]; + tmp += kCoefficients48To32[0][4] * In[4]; + tmp += kCoefficients48To32[0][5] * In[5]; + tmp += kCoefficients48To32[0][6] * In[6]; + tmp += kCoefficients48To32[0][7] * In[7]; + Out[0] = tmp; + + tmp = 1 << 14; + tmp += kCoefficients48To32[1][0] * In[1]; + tmp += kCoefficients48To32[1][1] * In[2]; + tmp += kCoefficients48To32[1][2] * In[3]; + tmp += kCoefficients48To32[1][3] * In[4]; + tmp += kCoefficients48To32[1][4] * In[5]; + tmp += kCoefficients48To32[1][5] * In[6]; + tmp += kCoefficients48To32[1][6] * In[7]; + tmp += kCoefficients48To32[1][7] * In[8]; + Out[1] = tmp; + + // update pointers + In += 3; + Out += 2; + } +} + + +#ifdef WEBRTC_ARCH_ARM_V7 + +// allpass filter coefficients. +static const uint32_t kResampleAllpass1[3] = {3284, 24441, 49528 << 15}; +static const uint32_t kResampleAllpass2[3] = + {12199, 37471 << 15, 60255 << 15}; + +// Multiply two 32-bit values and accumulate to another input value. +// Return: state + ((diff * tbl_value) >> 16) + +static __inline int32_t MUL_ACCUM_1(int32_t tbl_value, + int32_t diff, + int32_t state) { + int32_t result; + __asm __volatile ("smlawb %0, %1, %2, %3": "=r"(result): "r"(diff), + "r"(tbl_value), "r"(state)); + return result; +} + +// Multiply two 32-bit values and accumulate to another input value. +// Return: Return: state + (((diff << 1) * tbl_value) >> 32) +// +// The reason to introduce this function is that, in case we can't use smlawb +// instruction (in MUL_ACCUM_1) due to input value range, we can still use +// smmla to save some cycles. + +static __inline int32_t MUL_ACCUM_2(int32_t tbl_value, + int32_t diff, + int32_t state) { + int32_t result; + __asm __volatile ("smmla %0, %1, %2, %3": "=r"(result): "r"(diff << 1), + "r"(tbl_value), "r"(state)); + return result; +} + +#else + +// Multiply a 32-bit value with a 16-bit value and accumulate to another input: +#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) +#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c) + +#endif // WEBRTC_ARCH_ARM_V7 + + +//////////////////////////// +///// 48 kHz -> 8 kHz ///// +//////////////////////////// + +// 48 -> 8 resampler +void WebRtcSpl_Resample48khzTo8khz(const int16_t *in, int16_t *out, + WebRtcSpl_State48khzTo8khz *state, int32_t *tmpmem) { + ///// 48 --> 24 ///// + // int16_t in[480] + // int32_t out[240] + ///// + WebRtcSpl_DownBy2ShortToInt(in, 480, tmpmem + 256, state->S_48_24); + + ///// 24 --> 24(LP) ///// + // int32_t in[240] + // int32_t out[240] + ///// + WebRtcSpl_LPBy2IntToInt(tmpmem + 256, 240, tmpmem + 16, state->S_24_24); + + ///// 24 --> 16 ///// + // int32_t in[240] + // int32_t out[160] + ///// + // copy state to and from input array + memcpy(tmpmem + 8, state->S_24_16, 8 * sizeof(int32_t)); + memcpy(state->S_24_16, tmpmem + 248, 8 * sizeof(int32_t)); + WebRtcSpl_Resample48khzTo32khz(tmpmem + 8, tmpmem, 80); + + ///// 16 --> 8 ///// + // int32_t in[160] + // int16_t out[80] + ///// + WebRtcSpl_DownBy2IntToShort(tmpmem, 160, out, state->S_16_8); +} + +// initialize state of 48 -> 8 resampler +void WebRtcSpl_ResetResample48khzTo8khz(WebRtcSpl_State48khzTo8khz *state) { + memset(state->S_48_24, 0, 8 * sizeof(int32_t)); + memset(state->S_24_24, 0, 16 * sizeof(int32_t)); + memset(state->S_24_16, 0, 8 * sizeof(int32_t)); + memset(state->S_16_8, 0, 8 * sizeof(int32_t)); +} + +//////////////////////////// +///// 8 kHz -> 48 kHz ///// +//////////////////////////// + +int16_t WebRtcSpl_GetScalingSquare(int16_t *in_vector, + size_t in_vector_length, + size_t times) { + int16_t nbits = WebRtcSpl_GetSizeInBits((uint32_t) times); + size_t i; + int16_t smax = -1; + int16_t sabs; + int16_t *sptr = in_vector; + int16_t t; + size_t looptimes = in_vector_length; + + for (i = looptimes; i > 0; i--) { + sabs = (*sptr > 0 ? *sptr++ : -*sptr++); + smax = (sabs > smax ? sabs : smax); + } + t = WebRtcSpl_NormW32(WEBRTC_SPL_MUL(smax, smax)); + + if (smax == 0) { + return 0; // Since norm(0) returns 0 + } else { + return (t > nbits) ? 0 : nbits - t; + } +} + +int32_t WebRtcSpl_Energy(int16_t *vector, + size_t vector_length, + int *scale_factor) { + int32_t en = 0; + size_t i; + int scaling = + WebRtcSpl_GetScalingSquare(vector, vector_length, vector_length); + size_t looptimes = vector_length; + int16_t *vectorptr = vector; + + for (i = 0; i < looptimes; i++) { + en += (*vectorptr * *vectorptr) >> scaling; + vectorptr++; + } + *scale_factor = scaling; + + return en; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad.cc b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad.cc new file mode 100644 index 0000000000000000000000000000000000000000..a2ef810a0948b9bcfc4e245dc675d924584f4019 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad.cc @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "include/vad.h" + + +class VadImpl final : public Vad { +public: + explicit VadImpl(Aggressiveness aggressiveness) + : handle_(nullptr), aggressiveness_(aggressiveness) { + Reset(); + } + + ~VadImpl() override { WebRtcVad_Free(handle_); } + + Activity VoiceActivity(const int16_t *audio, + size_t num_samples, + int sample_rate_hz) override { + int ret = WebRtcVad_Process(handle_, sample_rate_hz, audio, num_samples); + switch (ret) { + case 0: + return kPassive; + case 1: + return kActive; + default: +// RTC_NOTREACHED() << "WebRtcVad_Process returned an error."; + return kError; + } + } + + void Reset() override { + if (handle_) + WebRtcVad_Free(handle_); + handle_ = WebRtcVad_Create(); + RTC_DCHECK(handle_); + RTC_DCHECK_EQ(WebRtcVad_Init(handle_), 0); + RTC_DCHECK_EQ(WebRtcVad_set_mode(handle_, aggressiveness_), 0); + } + +private: + VadInst *handle_; + Aggressiveness aggressiveness_; +}; + + +std::unique_ptr CreateVad(Vad::Aggressiveness aggressiveness) { + return std::unique_ptr(new VadImpl(aggressiveness)); +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad_core.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad_core.c new file mode 100644 index 0000000000000000000000000000000000000000..5d3d0ab890c28dc37c1578779a0c55b83764eda0 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad_core.c @@ -0,0 +1,680 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "include/vad_core.h" + +#include "include/vad_filterbank.h" +#include "include/vad_gmm.h" +#include "include/vad_sp.h" + +// Spectrum Weighting +static const int16_t kSpectrumWeight[kNumChannels] = {6, 8, 10, 12, 14, 16}; +static const int16_t kNoiseUpdateConst = 655; // Q15 +static const int16_t kSpeechUpdateConst = 6554; // Q15 +static const int16_t kBackEta = 154; // Q8 +// Minimum difference between the two models, Q5 +static const int16_t kMinimumDifference[kNumChannels] = { + 544, 544, 576, 576, 576, 576}; +// Upper limit of mean value for speech model, Q7 +static const int16_t kMaximumSpeech[kNumChannels] = { + 11392, 11392, 11520, 11520, 11520, 11520}; +// Minimum value for mean value +static const int16_t kMinimumMean[kNumGaussians] = {640, 768}; +// Upper limit of mean value for noise model, Q7 +static const int16_t kMaximumNoise[kNumChannels] = { + 9216, 9088, 8960, 8832, 8704, 8576}; +// Start values for the Gaussian models, Q7 +// Weights for the two Gaussians for the six channels (noise) +static const int16_t kNoiseDataWeights[kTableSize] = { + 34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103}; +// Weights for the two Gaussians for the six channels (speech) +static const int16_t kSpeechDataWeights[kTableSize] = { + 48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81}; +// Means for the two Gaussians for the six channels (noise) +static const int16_t kNoiseDataMeans[kTableSize] = { + 6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362}; +// Means for the two Gaussians for the six channels (speech) +static const int16_t kSpeechDataMeans[kTableSize] = { + 8306, 10085, 10078, 11823, 11843, 6309, 9473, 9571, 10879, 7581, 8180, 7483 +}; +// Stds for the two Gaussians for the six channels (noise) +static const int16_t kNoiseDataStds[kTableSize] = { + 378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455}; +// Stds for the two Gaussians for the six channels (speech) +static const int16_t kSpeechDataStds[kTableSize] = { + 555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, 1079, 850}; + +// Constants used in GmmProbability(). +// +// Maximum number of counted speech (VAD = 1) frames in a row. +static const int16_t kMaxSpeechFrames = 6; +// Minimum standard deviation for both speech and noise. +static const int16_t kMinStd = 384; + +// Constants in WebRtcVad_InitCore(). +// Default aggressiveness mode. +static const short kDefaultMode = 0; +static const int kInitCheck = 42; + +// Constants used in WebRtcVad_set_mode_core(). +// +// Thresholds for different frame lengths (10 ms, 20 ms and 30 ms). +// +// Mode 0, Quality. +static const int16_t kOverHangMax1Q[3] = {8, 4, 3}; +static const int16_t kOverHangMax2Q[3] = {14, 7, 5}; +static const int16_t kLocalThresholdQ[3] = {24, 21, 24}; +static const int16_t kGlobalThresholdQ[3] = {57, 48, 57}; +// Mode 1, Low bitrate. +static const int16_t kOverHangMax1LBR[3] = {8, 4, 3}; +static const int16_t kOverHangMax2LBR[3] = {14, 7, 5}; +static const int16_t kLocalThresholdLBR[3] = {37, 32, 37}; +static const int16_t kGlobalThresholdLBR[3] = {100, 80, 100}; +// Mode 2, Aggressive. +static const int16_t kOverHangMax1AGG[3] = {6, 3, 2}; +static const int16_t kOverHangMax2AGG[3] = {9, 5, 3}; +static const int16_t kLocalThresholdAGG[3] = {82, 78, 82}; +static const int16_t kGlobalThresholdAGG[3] = {285, 260, 285}; +// Mode 3, Very aggressive. +static const int16_t kOverHangMax1VAG[3] = {6, 3, 2}; +static const int16_t kOverHangMax2VAG[3] = {9, 5, 3}; +static const int16_t kLocalThresholdVAG[3] = {94, 94, 94}; +static const int16_t kGlobalThresholdVAG[3] = {1100, 1050, 1100}; + +// Calculates the weighted average w.r.t. number of Gaussians. The |data| are +// updated with an |offset| before averaging. +// +// - data [i/o] : Data to average. +// - offset [i] : An offset added to |data|. +// - weights [i] : Weights used for averaging. +// +// returns : The weighted average. +static int32_t WeightedAverage(int16_t *data, int16_t offset, + const int16_t *weights) { + int k; + int32_t weighted_average = 0; + + for (k = 0; k < kNumGaussians; k++) { + data[k * kNumChannels] += offset; + weighted_average += data[k * kNumChannels] * weights[k * kNumChannels]; + } + return weighted_average; +} + +// An s16 x s32 -> s32 multiplication that's allowed to overflow. (It's still +// undefined behavior, so not a good idea; this just makes UBSan ignore the +// violation, so that our old code can continue to do what it's always been +// doing.) +static inline int32_t +OverflowingMulS16ByS32ToS32(int16_t a, int32_t b) { + return a * b; +} + +// Calculates the probabilities for both speech and background noise using +// Gaussian Mixture Models (GMM). A hypothesis-test is performed to decide which +// type of signal is most probable. +// +// - self [i/o] : Pointer to VAD instance +// - features [i] : Feature vector of length |kNumChannels| +// = log10(energy in frequency band) +// - total_power [i] : Total power in audio frame. +// - frame_length [i] : Number of input samples +// +// - returns : the VAD decision (0 - noise, 1 - speech). +static int16_t GmmProbability(VadInstT *self, int16_t *features, + int16_t total_power, size_t frame_length) { + int channel, k; + int16_t feature_minimum; + int16_t h0, h1; + int16_t log_likelihood_ratio; + int16_t vadflag = 0; + int16_t shifts_h0, shifts_h1; + int16_t tmp_s16, tmp1_s16, tmp2_s16; + int16_t diff; + int gaussian; + int16_t nmk, nmk2, nmk3, smk, smk2, nsk, ssk; + int16_t delt, ndelt; + int16_t maxspe, maxmu; + int16_t deltaN[kTableSize], deltaS[kTableSize]; + int16_t ngprvec[kTableSize] = {0}; // Conditional probability = 0. + int16_t sgprvec[kTableSize] = {0}; // Conditional probability = 0. + int32_t h0_test, h1_test; + int32_t tmp1_s32, tmp2_s32; + int32_t sum_log_likelihood_ratios = 0; + int32_t noise_global_mean, speech_global_mean; + int32_t noise_probability[kNumGaussians], speech_probability[kNumGaussians]; + int16_t overhead1, overhead2, individualTest, totalTest; + + // Set various thresholds based on frame lengths (80, 160 or 240 samples). + if (frame_length == 80) { + overhead1 = self->over_hang_max_1[0]; + overhead2 = self->over_hang_max_2[0]; + individualTest = self->individual[0]; + totalTest = self->total[0]; + } else if (frame_length == 160) { + overhead1 = self->over_hang_max_1[1]; + overhead2 = self->over_hang_max_2[1]; + individualTest = self->individual[1]; + totalTest = self->total[1]; + } else { + overhead1 = self->over_hang_max_1[2]; + overhead2 = self->over_hang_max_2[2]; + individualTest = self->individual[2]; + totalTest = self->total[2]; + } + + if (total_power > kMinEnergy) { + // The signal power of current frame is large enough for processing. The + // processing consists of two parts: + // 1) Calculating the likelihood of speech and thereby a VAD decision. + // 2) Updating the underlying model, w.r.t., the decision made. + + // The detection scheme is an LRT with hypothesis + // H0: Noise + // H1: Speech + // + // We combine a global LRT with local tests, for each frequency sub-band, + // here defined as |channel|. + for (channel = 0; channel < kNumChannels; channel++) { + // For each channel we model the probability with a GMM consisting of + // |kNumGaussians|, with different means and standard deviations depending + // on H0 or H1. + h0_test = 0; + h1_test = 0; + for (k = 0; k < kNumGaussians; k++) { + gaussian = channel + k * kNumChannels; + // Probability under H0, that is, probability of frame being noise. + // Value given in Q27 = Q7 * Q20. + tmp1_s32 = WebRtcVad_GaussianProbability(features[channel], + self->noise_means[gaussian], + self->noise_stds[gaussian], + &deltaN[gaussian]); + noise_probability[k] = kNoiseDataWeights[gaussian] * tmp1_s32; + h0_test += noise_probability[k]; // Q27 + + // Probability under H1, that is, probability of frame being speech. + // Value given in Q27 = Q7 * Q20. + tmp1_s32 = WebRtcVad_GaussianProbability(features[channel], + self->speech_means[gaussian], + self->speech_stds[gaussian], + &deltaS[gaussian]); + speech_probability[k] = kSpeechDataWeights[gaussian] * tmp1_s32; + h1_test += speech_probability[k]; // Q27 + } + + // Calculate the log likelihood ratio: log2(Pr{X|H1} / Pr{X|H1}). + // Approximation: + // log2(Pr{X|H1} / Pr{X|H1}) = log2(Pr{X|H1}*2^Q) - log2(Pr{X|H1}*2^Q) + // = log2(h1_test) - log2(h0_test) + // = log2(2^(31-shifts_h1)*(1+b1)) + // - log2(2^(31-shifts_h0)*(1+b0)) + // = shifts_h0 - shifts_h1 + // + log2(1+b1) - log2(1+b0) + // ~= shifts_h0 - shifts_h1 + // + // Note that b0 and b1 are values less than 1, hence, 0 <= log2(1+b0) < 1. + // Further, b0 and b1 are independent and on the average the two terms + // cancel. + shifts_h0 = WebRtcSpl_NormW32(h0_test); + shifts_h1 = WebRtcSpl_NormW32(h1_test); + if (h0_test == 0) { + shifts_h0 = 31; + } + if (h1_test == 0) { + shifts_h1 = 31; + } + log_likelihood_ratio = shifts_h0 - shifts_h1; + + // Update |sum_log_likelihood_ratios| with spectrum weighting. This is + // used for the global VAD decision. + sum_log_likelihood_ratios += + (int32_t) (log_likelihood_ratio * kSpectrumWeight[channel]); + + // Local VAD decision. + if ((log_likelihood_ratio * 4) > individualTest) { + vadflag = 1; + } + + // TODO(bjornv): The conditional probabilities below are applied on the + // hard coded number of Gaussians set to two. Find a way to generalize. + // Calculate local noise probabilities used later when updating the GMM. + h0 = (int16_t) (h0_test >> 12); // Q15 + if (h0 > 0) { + // High probability of noise. Assign conditional probabilities for each + // Gaussian in the GMM. + tmp1_s32 = (noise_probability[0] & 0xFFFFF000) << 2; // Q29 + ngprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h0); // Q14 + ngprvec[channel + kNumChannels] = 16384 - ngprvec[channel]; + } else { + // Low noise probability. Assign conditional probability 1 to the first + // Gaussian and 0 to the rest (which is already set at initialization). + ngprvec[channel] = 16384; + } + + // Calculate local speech probabilities used later when updating the GMM. + h1 = (int16_t) (h1_test >> 12); // Q15 + if (h1 > 0) { + // High probability of speech. Assign conditional probabilities for each + // Gaussian in the GMM. Otherwise use the initialized values, i.e., 0. + tmp1_s32 = (speech_probability[0] & 0xFFFFF000) << 2; // Q29 + sgprvec[channel] = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, h1); // Q14 + sgprvec[channel + kNumChannels] = 16384 - sgprvec[channel]; + } + } + + // Make a global VAD decision. + vadflag |= (sum_log_likelihood_ratios >= totalTest); + + // Update the model parameters. + maxspe = 12800; + for (channel = 0; channel < kNumChannels; channel++) { + + // Get minimum value in past which is used for long term correction in Q4. + feature_minimum = WebRtcVad_FindMinimum(self, features[channel], channel); + + // Compute the "global" mean, that is the sum of the two means weighted. + noise_global_mean = WeightedAverage(&self->noise_means[channel], 0, + &kNoiseDataWeights[channel]); + tmp1_s16 = (int16_t) (noise_global_mean >> 6); // Q8 + + for (k = 0; k < kNumGaussians; k++) { + gaussian = channel + k * kNumChannels; + + nmk = self->noise_means[gaussian]; + smk = self->speech_means[gaussian]; + nsk = self->noise_stds[gaussian]; + ssk = self->speech_stds[gaussian]; + + // Update noise mean vector if the frame consists of noise only. + nmk2 = nmk; + if (!vadflag) { + // deltaN = (x-mu)/sigma^2 + // ngprvec[k] = |noise_probability[k]| / + // (|noise_probability[0]| + |noise_probability[1]|) + + // (Q14 * Q11 >> 11) = Q14. + delt = (int16_t) ((ngprvec[gaussian] * deltaN[gaussian]) >> 11); + // Q7 + (Q14 * Q15 >> 22) = Q7. + nmk2 = nmk + (int16_t) ((delt * kNoiseUpdateConst) >> 22); + } + + // Long term correction of the noise mean. + // Q8 - Q8 = Q8. + ndelt = (feature_minimum << 4) - tmp1_s16; + // Q7 + (Q8 * Q8) >> 9 = Q7. + nmk3 = nmk2 + (int16_t) ((ndelt * kBackEta) >> 9); + + // Control that the noise mean does not drift to much. + tmp_s16 = (int16_t) ((k + 5) << 7); + if (nmk3 < tmp_s16) { + nmk3 = tmp_s16; + } + tmp_s16 = (int16_t) ((72 + k - channel) << 7); + if (nmk3 > tmp_s16) { + nmk3 = tmp_s16; + } + self->noise_means[gaussian] = nmk3; + + if (vadflag) { + // Update speech mean vector: + // |deltaS| = (x-mu)/sigma^2 + // sgprvec[k] = |speech_probability[k]| / + // (|speech_probability[0]| + |speech_probability[1]|) + + // (Q14 * Q11) >> 11 = Q14. + delt = (int16_t) ((sgprvec[gaussian] * deltaS[gaussian]) >> 11); + // Q14 * Q15 >> 21 = Q8. + tmp_s16 = (int16_t) ((delt * kSpeechUpdateConst) >> 21); + // Q7 + (Q8 >> 1) = Q7. With rounding. + smk2 = smk + ((tmp_s16 + 1) >> 1); + + // Control that the speech mean does not drift to much. + maxmu = maxspe + 640; + if (smk2 < kMinimumMean[k]) { + smk2 = kMinimumMean[k]; + } + if (smk2 > maxmu) { + smk2 = maxmu; + } + self->speech_means[gaussian] = smk2; // Q7. + + // (Q7 >> 3) = Q4. With rounding. + tmp_s16 = ((smk + 4) >> 3); + + tmp_s16 = features[channel] - tmp_s16; // Q4 + // (Q11 * Q4 >> 3) = Q12. + tmp1_s32 = (deltaS[gaussian] * tmp_s16) >> 3; + tmp2_s32 = tmp1_s32 - 4096; + tmp_s16 = sgprvec[gaussian] >> 2; + // (Q14 >> 2) * Q12 = Q24. + tmp1_s32 = tmp_s16 * tmp2_s32; + + tmp2_s32 = tmp1_s32 >> 4; // Q20 + + // 0.1 * Q20 / Q7 = Q13. + if (tmp2_s32 > 0) { + tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp2_s32, ssk * 10); + } else { + tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp2_s32, ssk * 10); + tmp_s16 = -tmp_s16; + } + // Divide by 4 giving an update factor of 0.025 (= 0.1 / 4). + // Note that division by 4 equals shift by 2, hence, + // (Q13 >> 8) = (Q13 >> 6) / 4 = Q7. + tmp_s16 += 128; // Rounding. + ssk += (tmp_s16 >> 8); + if (ssk < kMinStd) { + ssk = kMinStd; + } + self->speech_stds[gaussian] = ssk; + } else { + // Update GMM variance vectors. + // deltaN * (features[channel] - nmk) - 1 + // Q4 - (Q7 >> 3) = Q4. + tmp_s16 = features[channel] - (nmk >> 3); + // (Q11 * Q4 >> 3) = Q12. + tmp1_s32 = (deltaN[gaussian] * tmp_s16) >> 3; + tmp1_s32 -= 4096; + + // (Q14 >> 2) * Q12 = Q24. + tmp_s16 = (ngprvec[gaussian] + 2) >> 2; + tmp2_s32 = OverflowingMulS16ByS32ToS32(tmp_s16, tmp1_s32); + // Q20 * approx 0.001 (2^-10=0.0009766), hence, + // (Q24 >> 14) = (Q24 >> 4) / 2^10 = Q20. + tmp1_s32 = tmp2_s32 >> 14; + + // Q20 / Q7 = Q13. + if (tmp1_s32 > 0) { + tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(tmp1_s32, nsk); + } else { + tmp_s16 = (int16_t) WebRtcSpl_DivW32W16(-tmp1_s32, nsk); + tmp_s16 = -tmp_s16; + } + tmp_s16 += 32; // Rounding + nsk += tmp_s16 >> 6; // Q13 >> 6 = Q7. + if (nsk < kMinStd) { + nsk = kMinStd; + } + self->noise_stds[gaussian] = nsk; + } + } + + // Separate models if they are too close. + // |noise_global_mean| in Q14 (= Q7 * Q7). + noise_global_mean = WeightedAverage(&self->noise_means[channel], 0, + &kNoiseDataWeights[channel]); + + // |speech_global_mean| in Q14 (= Q7 * Q7). + speech_global_mean = WeightedAverage(&self->speech_means[channel], 0, + &kSpeechDataWeights[channel]); + + // |diff| = "global" speech mean - "global" noise mean. + // (Q14 >> 9) - (Q14 >> 9) = Q5. + diff = (int16_t) (speech_global_mean >> 9) - + (int16_t) (noise_global_mean >> 9); + if (diff < kMinimumDifference[channel]) { + tmp_s16 = kMinimumDifference[channel] - diff; + + // |tmp1_s16| = ~0.8 * (kMinimumDifference - diff) in Q7. + // |tmp2_s16| = ~0.2 * (kMinimumDifference - diff) in Q7. + tmp1_s16 = (int16_t) ((13 * tmp_s16) >> 2); + tmp2_s16 = (int16_t) ((3 * tmp_s16) >> 2); + + // Move Gaussian means for speech model by |tmp1_s16| and update + // |speech_global_mean|. Note that |self->speech_means[channel]| is + // changed after the call. + speech_global_mean = WeightedAverage(&self->speech_means[channel], + tmp1_s16, + &kSpeechDataWeights[channel]); + + // Move Gaussian means for noise model by -|tmp2_s16| and update + // |noise_global_mean|. Note that |self->noise_means[channel]| is + // changed after the call. + noise_global_mean = WeightedAverage(&self->noise_means[channel], + -tmp2_s16, + &kNoiseDataWeights[channel]); + } + + // Control that the speech & noise means do not drift to much. + maxspe = kMaximumSpeech[channel]; + tmp2_s16 = (int16_t) (speech_global_mean >> 7); + if (tmp2_s16 > maxspe) { + // Upper limit of speech model. + tmp2_s16 -= maxspe; + + for (k = 0; k < kNumGaussians; k++) { + self->speech_means[channel + k * kNumChannels] -= tmp2_s16; + } + } + + tmp2_s16 = (int16_t) (noise_global_mean >> 7); + if (tmp2_s16 > kMaximumNoise[channel]) { + tmp2_s16 -= kMaximumNoise[channel]; + + for (k = 0; k < kNumGaussians; k++) { + self->noise_means[channel + k * kNumChannels] -= tmp2_s16; + } + } + } + self->frame_counter++; + } + + // Smooth with respect to transition hysteresis. + if (!vadflag) { + if (self->over_hang > 0) { + vadflag = 2 + self->over_hang; + self->over_hang--; + } + self->num_of_speech = 0; + } else { + self->num_of_speech++; + if (self->num_of_speech > kMaxSpeechFrames) { + self->num_of_speech = kMaxSpeechFrames; + self->over_hang = overhead2; + } else { + self->over_hang = overhead1; + } + } + return vadflag; +} + +// Initialize the VAD. Set aggressiveness mode to default value. +int WebRtcVad_InitCore(VadInstT *self) { + int i; + + if (self == NULL) { + return -1; + } + + // Initialization of general struct variables. + self->vad = 1; // Speech active (=1). + self->frame_counter = 0; + self->over_hang = 0; + self->num_of_speech = 0; + + // Initialization of downsampling filter state. + memset(self->downsampling_filter_states, 0, + sizeof(self->downsampling_filter_states)); + + // Initialization of 48 to 8 kHz downsampling. + WebRtcSpl_ResetResample48khzTo8khz(&self->state_48_to_8); + + // Read initial PDF parameters. + for (i = 0; i < kTableSize; i++) { + self->noise_means[i] = kNoiseDataMeans[i]; + self->speech_means[i] = kSpeechDataMeans[i]; + self->noise_stds[i] = kNoiseDataStds[i]; + self->speech_stds[i] = kSpeechDataStds[i]; + } + + // Initialize Index and Minimum value vectors. + for (i = 0; i < 16 * kNumChannels; i++) { + self->low_value_vector[i] = 10000; + self->index_vector[i] = 0; + } + + // Initialize splitting filter states. + memset(self->upper_state, 0, sizeof(self->upper_state)); + memset(self->lower_state, 0, sizeof(self->lower_state)); + + // Initialize high pass filter states. + memset(self->hp_filter_state, 0, sizeof(self->hp_filter_state)); + + // Initialize mean value memory, for WebRtcVad_FindMinimum(). + for (i = 0; i < kNumChannels; i++) { + self->mean_value[i] = 1600; + } + + // Set aggressiveness mode to default (=|kDefaultMode|). + if (WebRtcVad_set_mode_core(self, kDefaultMode) != 0) { + return -1; + } + + self->init_flag = kInitCheck; + + return 0; +} + +// Set aggressiveness mode +int WebRtcVad_set_mode_core(VadInstT *self, int mode) { + int return_value = 0; + + switch (mode) { + case 0: + // Quality mode. + memcpy(self->over_hang_max_1, kOverHangMax1Q, + sizeof(self->over_hang_max_1)); + memcpy(self->over_hang_max_2, kOverHangMax2Q, + sizeof(self->over_hang_max_2)); + memcpy(self->individual, kLocalThresholdQ, + sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdQ, + sizeof(self->total)); + break; + case 1: + // Low bitrate mode. + memcpy(self->over_hang_max_1, kOverHangMax1LBR, + sizeof(self->over_hang_max_1)); + memcpy(self->over_hang_max_2, kOverHangMax2LBR, + sizeof(self->over_hang_max_2)); + memcpy(self->individual, kLocalThresholdLBR, + sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdLBR, + sizeof(self->total)); + break; + case 2: + // Aggressive mode. + memcpy(self->over_hang_max_1, kOverHangMax1AGG, + sizeof(self->over_hang_max_1)); + memcpy(self->over_hang_max_2, kOverHangMax2AGG, + sizeof(self->over_hang_max_2)); + memcpy(self->individual, kLocalThresholdAGG, + sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdAGG, + sizeof(self->total)); + break; + case 3: + // Very aggressive mode. + memcpy(self->over_hang_max_1, kOverHangMax1VAG, + sizeof(self->over_hang_max_1)); + memcpy(self->over_hang_max_2, kOverHangMax2VAG, + sizeof(self->over_hang_max_2)); + memcpy(self->individual, kLocalThresholdVAG, + sizeof(self->individual)); + memcpy(self->total, kGlobalThresholdVAG, + sizeof(self->total)); + break; + default: + return_value = -1; + break; + } + + return return_value; +} + +// Calculate VAD decision by first extracting feature values and then calculate +// probability for both speech and background noise. + +int WebRtcVad_CalcVad48khz(VadInstT *inst, const int16_t *speech_frame, + size_t frame_length) { + int vad; + size_t i; + int16_t speech_nb[240]; // 30 ms in 8 kHz. + // |tmp_mem| is a temporary memory used by resample function, length is + // frame length in 10 ms (480 samples) + 256 extra. + int32_t tmp_mem[480 + 256] = {0}; + const size_t kFrameLen10ms48khz = 480; + const size_t kFrameLen10ms8khz = 80; + size_t num_10ms_frames = frame_length / kFrameLen10ms48khz; + + for (i = 0; i < num_10ms_frames; i++) { + WebRtcSpl_Resample48khzTo8khz(speech_frame, + &speech_nb[i * kFrameLen10ms8khz], + &inst->state_48_to_8, + tmp_mem); + } + + // Do VAD on an 8 kHz signal + vad = WebRtcVad_CalcVad8khz(inst, speech_nb, frame_length / 6); + + return vad; +} + +int WebRtcVad_CalcVad32khz(VadInstT *inst, const int16_t *speech_frame, + size_t frame_length) { + size_t len; + int vad; + int16_t speechWB[480]; // Downsampled speech frame: 960 samples (30ms in SWB) + int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) + + + // Downsample signal 32->16->8 before doing VAD + WebRtcVad_Downsampling(speech_frame, speechWB, &(inst->downsampling_filter_states[2]), + frame_length); + len = frame_length / 2; + + WebRtcVad_Downsampling(speechWB, speechNB, inst->downsampling_filter_states, len); + len /= 2; + + // Do VAD on an 8 kHz signal + vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); + + return vad; +} + +int WebRtcVad_CalcVad16khz(VadInstT *inst, const int16_t *speech_frame, + size_t frame_length) { + size_t len; + int vad; + int16_t speechNB[240]; // Downsampled speech frame: 480 samples (30ms in WB) + + // Wideband: Downsample signal before doing VAD + WebRtcVad_Downsampling(speech_frame, speechNB, inst->downsampling_filter_states, + frame_length); + + len = frame_length / 2; + vad = WebRtcVad_CalcVad8khz(inst, speechNB, len); + + return vad; +} + +int WebRtcVad_CalcVad8khz(VadInstT *inst, const int16_t *speech_frame, + size_t frame_length) { + int16_t feature_vector[kNumChannels], total_power; + + // Get power in the bands + total_power = WebRtcVad_CalculateFeatures(inst, speech_frame, frame_length, + feature_vector); + + // Make a VAD + inst->vad = GmmProbability(inst, feature_vector, total_power, frame_length); + + return inst->vad; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad_filterbank.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad_filterbank.c new file mode 100644 index 0000000000000000000000000000000000000000..5a49cb4d4b6da0d428514eacede41c871cf2110b --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad_filterbank.c @@ -0,0 +1,326 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "include/vad_filterbank.h" + +// Constants used in LogOfEnergy(). +static const int16_t kLogConst = 24660; // 160*log10(2) in Q9. +static const int16_t kLogEnergyIntPart = 14336; // 14 in Q10 + +// Coefficients used by HighPassFilter, Q14. +static const int16_t kHpZeroCoefs[3] = {6631, -13262, 6631}; +static const int16_t kHpPoleCoefs[3] = {16384, -7756, 5620}; + +// Allpass filter coefficients, upper and lower, in Q15. +// Upper: 0.64, Lower: 0.17 +static const int16_t kAllPassCoefsQ15[2] = {20972, 5571}; + +// Adjustment for division with two in SplitFilter. +static const int16_t kOffsetVector[6] = {368, 368, 272, 176, 176, 176}; + +// High pass filtering, with a cut-off frequency at 80 Hz, if the |data_in| is +// sampled at 500 Hz. +// +// - data_in [i] : Input audio data sampled at 500 Hz. +// - data_length [i] : Length of input and output data. +// - filter_state [i/o] : State of the filter. +// - data_out [o] : Output audio data in the frequency interval +// 80 - 250 Hz. +static void HighPassFilter(const int16_t *data_in, size_t data_length, + int16_t *filter_state, int16_t *data_out) { + size_t i; + const int16_t *in_ptr = data_in; + int16_t *out_ptr = data_out; + int32_t tmp32 = 0; + + + // The sum of the absolute values of the impulse response: + // The zero/pole-filter has a max amplification of a single sample of: 1.4546 + // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194 + // The all-zero section has a max amplification of a single sample of: 1.6189 + // Impulse response: 0.4047 -0.8094 0.4047 0 0 0 + // The all-pole section has a max amplification of a single sample of: 1.9931 + // Impulse response: 1.0000 0.4734 -0.1189 -0.2187 -0.0627 0.04532 + + for (i = 0; i < data_length; i++) { + // All-zero section (filter coefficients in Q14). + tmp32 = kHpZeroCoefs[0] * *in_ptr; + tmp32 += kHpZeroCoefs[1] * filter_state[0]; + tmp32 += kHpZeroCoefs[2] * filter_state[1]; + filter_state[1] = filter_state[0]; + filter_state[0] = *in_ptr++; + + // All-pole section (filter coefficients in Q14). + tmp32 -= kHpPoleCoefs[1] * filter_state[2]; + tmp32 -= kHpPoleCoefs[2] * filter_state[3]; + filter_state[3] = filter_state[2]; + filter_state[2] = (int16_t) (tmp32 >> 14); + *out_ptr++ = filter_state[2]; + } +} + +// All pass filtering of |data_in|, used before splitting the signal into two +// frequency bands (low pass vs high pass). +// Note that |data_in| and |data_out| can NOT correspond to the same address. +// +// - data_in [i] : Input audio signal given in Q0. +// - data_length [i] : Length of input and output data. +// - filter_coefficient [i] : Given in Q15. +// - filter_state [i/o] : State of the filter given in Q(-1). +// - data_out [o] : Output audio signal given in Q(-1). +static void AllPassFilter(const int16_t *data_in, size_t data_length, + int16_t filter_coefficient, int16_t *filter_state, + int16_t *data_out) { + // The filter can only cause overflow (in the w16 output variable) + // if more than 4 consecutive input numbers are of maximum value and + // has the the same sign as the impulse responses first taps. + // First 6 taps of the impulse response: + // 0.6399 0.5905 -0.3779 0.2418 -0.1547 0.0990 + + size_t i; + int16_t tmp16 = 0; + int32_t tmp32 = 0; + int32_t state32 = ((int32_t) (*filter_state) * (1 << 16)); // Q15 + + for (i = 0; i < data_length; i++) { + tmp32 = state32 + filter_coefficient * *data_in; + tmp16 = (int16_t) (tmp32 >> 16); // Q(-1) + *data_out++ = tmp16; + state32 = (*data_in * (1 << 14)) - filter_coefficient * tmp16; // Q14 + state32 *= 2; // Q15. + data_in += 2; + } + + *filter_state = (int16_t) (state32 >> 16); // Q(-1) +} + +// Splits |data_in| into |hp_data_out| and |lp_data_out| corresponding to +// an upper (high pass) part and a lower (low pass) part respectively. +// +// - data_in [i] : Input audio data to be split into two frequency bands. +// - data_length [i] : Length of |data_in|. +// - upper_state [i/o] : State of the upper filter, given in Q(-1). +// - lower_state [i/o] : State of the lower filter, given in Q(-1). +// - hp_data_out [o] : Output audio data of the upper half of the spectrum. +// The length is |data_length| / 2. +// - lp_data_out [o] : Output audio data of the lower half of the spectrum. +// The length is |data_length| / 2. +static void SplitFilter(const int16_t *data_in, size_t data_length, + int16_t *upper_state, int16_t *lower_state, + int16_t *hp_data_out, int16_t *lp_data_out) { + size_t i; + size_t half_length = data_length >> 1; // Downsampling by 2. + int16_t tmp_out; + + // All-pass filtering upper branch. + AllPassFilter(&data_in[0], half_length, kAllPassCoefsQ15[0], upper_state, + hp_data_out); + + // All-pass filtering lower branch. + AllPassFilter(&data_in[1], half_length, kAllPassCoefsQ15[1], lower_state, + lp_data_out); + + // Make LP and HP signals. + for (i = 0; i < half_length; i++) { + tmp_out = *hp_data_out; + *hp_data_out++ -= *lp_data_out; + *lp_data_out++ += tmp_out; + } +} + +// Calculates the energy of |data_in| in dB, and also updates an overall +// |total_energy| if necessary. +// +// - data_in [i] : Input audio data for energy calculation. +// - data_length [i] : Length of input data. +// - offset [i] : Offset value added to |log_energy|. +// - total_energy [i/o] : An external energy updated with the energy of +// |data_in|. +// NOTE: |total_energy| is only updated if +// |total_energy| <= |kMinEnergy|. +// - log_energy [o] : 10 * log10("energy of |data_in|") given in Q4. +static void LogOfEnergy(const int16_t *data_in, size_t data_length, + int16_t offset, int16_t *total_energy, + int16_t *log_energy) { + // |tot_rshifts| accumulates the number of right shifts performed on |energy|. + int tot_rshifts = 0; + // The |energy| will be normalized to 15 bits. We use unsigned integer because + // we eventually will mask out the fractional part. + uint32_t energy = 0; + + RTC_DCHECK(data_in); + RTC_DCHECK_GT(data_length, 0); + + energy = (uint32_t) WebRtcSpl_Energy((int16_t *) data_in, data_length, + &tot_rshifts); + + if (energy != 0) { + // By construction, normalizing to 15 bits is equivalent with 17 leading + // zeros of an unsigned 32 bit value. + int normalizing_rshifts = 17 - WebRtcSpl_NormU32(energy); + // In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is + // (14 << 10), which is what we initialize |log2_energy| with. For a more + // detailed derivations, see below. + int16_t log2_energy = kLogEnergyIntPart; + + tot_rshifts += normalizing_rshifts; + // Normalize |energy| to 15 bits. + // |tot_rshifts| is now the total number of right shifts performed on + // |energy| after normalization. This means that |energy| is in + // Q(-tot_rshifts). + if (normalizing_rshifts < 0) { + energy <<= -normalizing_rshifts; + } else { + energy >>= normalizing_rshifts; + } + + // Calculate the energy of |data_in| in dB, in Q4. + // + // 10 * log10("true energy") in Q4 = 2^4 * 10 * log10("true energy") = + // 160 * log10(|energy| * 2^|tot_rshifts|) = + // 160 * log10(2) * log2(|energy| * 2^|tot_rshifts|) = + // 160 * log10(2) * (log2(|energy|) + log2(2^|tot_rshifts|)) = + // (160 * log10(2)) * (log2(|energy|) + |tot_rshifts|) = + // |kLogConst| * (|log2_energy| + |tot_rshifts|) + // + // We know by construction that |energy| is normalized to 15 bits. Hence, + // |energy| = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15. + // Further, we'd like |log2_energy| in Q10 + // log2(|energy|) in Q10 = 2^10 * log2(2^14 + frac_Q15) = + // 2^10 * log2(2^14 * (1 + frac_Q15 * 2^-14)) = + // 2^10 * (14 + log2(1 + frac_Q15 * 2^-14)) ~= + // (14 << 10) + 2^10 * (frac_Q15 * 2^-14) = + // (14 << 10) + (frac_Q15 * 2^-4) = (14 << 10) + (frac_Q15 >> 4) + // + // Note that frac_Q15 = (|energy| & 0x00003FFF) + + // Calculate and add the fractional part to |log2_energy|. + log2_energy += (int16_t) ((energy & 0x00003FFF) >> 4); + + // |kLogConst| is in Q9, |log2_energy| in Q10 and |tot_rshifts| in Q0. + // Note that we in our derivation above have accounted for an output in Q4. + *log_energy = (int16_t) (((kLogConst * log2_energy) >> 19) + + ((tot_rshifts * kLogConst) >> 9)); + + if (*log_energy < 0) { + *log_energy = 0; + } + } else { + *log_energy = offset; + return; + } + + *log_energy += offset; + + // Update the approximate |total_energy| with the energy of |data_in|, if + // |total_energy| has not exceeded |kMinEnergy|. |total_energy| is used as an + // energy indicator in WebRtcVad_GmmProbability() in vad_core.c. + if (*total_energy <= kMinEnergy) { + if (tot_rshifts >= 0) { + // We know by construction that the |energy| > |kMinEnergy| in Q0, so add + // an arbitrary value such that |total_energy| exceeds |kMinEnergy|. + *total_energy += kMinEnergy + 1; + } else { + // By construction |energy| is represented by 15 bits, hence any number of + // right shifted |energy| will fit in an int16_t. In addition, adding the + // value to |total_energy| is wrap around safe as long as + // |kMinEnergy| < 8192. + *total_energy += (int16_t) (energy >> -tot_rshifts); // Q0. + } + } +} + +int16_t WebRtcVad_CalculateFeatures(VadInstT *self, const int16_t *data_in, + size_t data_length, int16_t *features) { + int16_t total_energy = 0; + // We expect |data_length| to be 80, 160 or 240 samples, which corresponds to + // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will + // have at most 120 samples after the first split and at most 60 samples after + // the second split. + int16_t hp_120[120], lp_120[120]; + int16_t hp_60[60], lp_60[60]; + const size_t half_data_length = data_length >> 1; + size_t length = half_data_length; // |data_length| / 2, corresponds to + // bandwidth = 2000 Hz after downsampling. + + // Initialize variables for the first SplitFilter(). + int frequency_band = 0; + const int16_t *in_ptr = data_in; // [0 - 4000] Hz. + int16_t *hp_out_ptr = hp_120; // [2000 - 4000] Hz. + int16_t *lp_out_ptr = lp_120; // [0 - 2000] Hz. + + RTC_DCHECK_LE(data_length, 240); + RTC_DCHECK_LT(4, kNumChannels - 1); // Checking maximum |frequency_band|. + + // Split at 2000 Hz and downsample. + SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // For the upper band (2000 Hz - 4000 Hz) split at 3000 Hz and downsample. + frequency_band = 1; + in_ptr = hp_120; // [2000 - 4000] Hz. + hp_out_ptr = hp_60; // [3000 - 4000] Hz. + lp_out_ptr = lp_60; // [2000 - 3000] Hz. + SplitFilter(in_ptr, length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // Energy in 3000 Hz - 4000 Hz. + length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz. + + LogOfEnergy(hp_60, length, kOffsetVector[5], &total_energy, &features[5]); + + // Energy in 2000 Hz - 3000 Hz. + LogOfEnergy(lp_60, length, kOffsetVector[4], &total_energy, &features[4]); + + // For the lower band (0 Hz - 2000 Hz) split at 1000 Hz and downsample. + frequency_band = 2; + in_ptr = lp_120; // [0 - 2000] Hz. + hp_out_ptr = hp_60; // [1000 - 2000] Hz. + lp_out_ptr = lp_60; // [0 - 1000] Hz. + length = half_data_length; // |data_length| / 2 <=> bandwidth = 2000 Hz. + SplitFilter(in_ptr, length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // Energy in 1000 Hz - 2000 Hz. + length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz. + LogOfEnergy(hp_60, length, kOffsetVector[3], &total_energy, &features[3]); + + // For the lower band (0 Hz - 1000 Hz) split at 500 Hz and downsample. + frequency_band = 3; + in_ptr = lp_60; // [0 - 1000] Hz. + hp_out_ptr = hp_120; // [500 - 1000] Hz. + lp_out_ptr = lp_120; // [0 - 500] Hz. + SplitFilter(in_ptr, length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // Energy in 500 Hz - 1000 Hz. + length >>= 1; // |data_length| / 8 <=> bandwidth = 500 Hz. + LogOfEnergy(hp_120, length, kOffsetVector[2], &total_energy, &features[2]); + + // For the lower band (0 Hz - 500 Hz) split at 250 Hz and downsample. + frequency_band = 4; + in_ptr = lp_120; // [0 - 500] Hz. + hp_out_ptr = hp_60; // [250 - 500] Hz. + lp_out_ptr = lp_60; // [0 - 250] Hz. + SplitFilter(in_ptr, length, &self->upper_state[frequency_band], + &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr); + + // Energy in 250 Hz - 500 Hz. + length >>= 1; // |data_length| / 16 <=> bandwidth = 250 Hz. + LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]); + + // Remove 0 Hz - 80 Hz, by high pass filtering the lower band. + HighPassFilter(lp_60, length, self->hp_filter_state, hp_120); + + // Energy in 80 Hz - 250 Hz. + LogOfEnergy(hp_120, length, kOffsetVector[0], &total_energy, &features[0]); + + return total_energy; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad_gmm.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad_gmm.c new file mode 100644 index 0000000000000000000000000000000000000000..b1ab67a9770c98f0098a6cceab6d080fbdd8c4b7 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad_gmm.c @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "include/vad_gmm.h" + +#include "include/signal_processing_library.h" + +static const int32_t kCompVar = 22005; +static const int16_t kLog2Exp = 5909; // log2(exp(1)) in Q12. + +// For a normal distribution, the probability of |input| is calculated and +// returned (in Q20). The formula for normal distributed probability is +// +// 1 / s * exp(-(x - m)^2 / (2 * s^2)) +// +// where the parameters are given in the following Q domains: +// m = |mean| (Q7) +// s = |std| (Q7) +// x = |input| (Q4) +// in addition to the probability we output |delta| (in Q11) used when updating +// the noise/speech model. +int32_t WebRtcVad_GaussianProbability(int16_t input, + int16_t mean, + int16_t std, + int16_t *delta) { + int16_t tmp16, inv_std, inv_std2, exp_value = 0; + int32_t tmp32; + + // Calculate |inv_std| = 1 / s, in Q10. + // 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation. + // Q-domain: Q17 / Q7 = Q10. + tmp32 = (int32_t) 131072 + (int32_t) (std >> 1); + inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std); + + // Calculate |inv_std2| = 1 / s^2, in Q14. + tmp16 = (inv_std >> 2); // Q10 -> Q8. + // Q-domain: (Q8 * Q8) >> 2 = Q14. + inv_std2 = (int16_t) ((tmp16 * tmp16) >> 2); + // TODO(bjornv): Investigate if changing to + // inv_std2 = (int16_t)((inv_std * inv_std) >> 6); + // gives better accuracy. + + tmp16 = (input << 3); // Q4 -> Q7 + tmp16 = tmp16 - mean; // Q7 - Q7 = Q7 + + // To be used later, when updating noise/speech model. + // |delta| = (x - m) / s^2, in Q11. + // Q-domain: (Q14 * Q7) >> 10 = Q11. + *delta = (int16_t) ((inv_std2 * tmp16) >> 10); + + // Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing + // division by two with one shift. + // Q-domain: (Q11 * Q7) >> 8 = Q10. + tmp32 = (*delta * tmp16) >> 9; + + // If the exponent is small enough to give a non-zero probability we calculate + // |exp_value| ~= exp(-(x - m)^2 / (2 * s^2)) + // ~= exp2(-log2(exp(1)) * |tmp32|). + if (tmp32 < kCompVar) { + // Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10. + // Q-domain: (Q12 * Q10) >> 12 = Q10. + tmp16 = (int16_t) ((kLog2Exp * tmp32) >> 12); + tmp16 = -tmp16; + exp_value = (0x0400 | (tmp16 & 0x03FF)); + tmp16 ^= 0xFFFF; + tmp16 >>= 10; + tmp16 += 1; + // Get |exp_value| = exp(-|tmp32|) in Q10. + exp_value >>= tmp16; + } + + // Calculate and return (1 / s) * exp(-(x - m)^2 / (2 * s^2)), in Q20. + // Q-domain: Q10 * Q10 = Q20. + return inv_std * exp_value; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad_sp.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad_sp.c new file mode 100644 index 0000000000000000000000000000000000000000..73b2a5b1f9d193f494c82667d8aca35409d3e835 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/vad_sp.c @@ -0,0 +1,172 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "include/vad_sp.h" + +// Allpass filter coefficients, upper and lower, in Q13. +// Upper: 0.64, Lower: 0.17. +static const int16_t kAllPassCoefsQ13[2] = {5243, 1392}; // Q13. +static const int16_t kSmoothingDown = 6553; // 0.2 in Q15. +static const int16_t kSmoothingUp = 32439; // 0.99 in Q15. + +// TODO(bjornv): Move this function to vad_filterbank.c. +// Downsampling filter based on splitting filter and allpass functions. +void WebRtcVad_Downsampling(const int16_t *signal_in, + int16_t *signal_out, + int32_t *filter_state, + size_t in_length) { + int16_t tmp16_1 = 0, tmp16_2 = 0; + int32_t tmp32_1 = filter_state[0]; + int32_t tmp32_2 = filter_state[1]; + size_t n = 0; + // Downsampling by 2 gives half length. + size_t half_length = (in_length >> 1); + + // Filter coefficients in Q13, filter state in Q0. + for (n = 0; n < half_length; n++) { + // All-pass filtering upper branch. + tmp16_1 = (int16_t) ((tmp32_1 >> 1) + + ((kAllPassCoefsQ13[0] * *signal_in) >> 14)); + *signal_out = tmp16_1; + tmp32_1 = (int32_t) (*signal_in++) - ((kAllPassCoefsQ13[0] * tmp16_1) >> 12); + + // All-pass filtering lower branch. + tmp16_2 = (int16_t) ((tmp32_2 >> 1) + + ((kAllPassCoefsQ13[1] * *signal_in) >> 14)); + *signal_out++ += tmp16_2; + tmp32_2 = (int32_t) (*signal_in++) - ((kAllPassCoefsQ13[1] * tmp16_2) >> 12); + } + // Store the filter states. + filter_state[0] = tmp32_1; + filter_state[1] = tmp32_2; +} + +// Inserts |feature_value| into |low_value_vector|, if it is one of the 16 +// smallest values the last 100 frames. Then calculates and returns the median +// of the five smallest values. +int16_t WebRtcVad_FindMinimum(VadInstT *self, + int16_t feature_value, + int channel) { + int i = 0, j = 0; + int position = -1; + // Offset to beginning of the 16 minimum values in memory. + const int offset = (channel << 4); + int16_t current_median = 1600; + int16_t alpha = 0; + int32_t tmp32 = 0; + // Pointer to memory for the 16 minimum values and the age of each value of + // the |channel|. + int16_t *age = &self->index_vector[offset]; + int16_t *smallest_values = &self->low_value_vector[offset]; + + RTC_DCHECK_LT(channel, kNumChannels); + + // Each value in |smallest_values| is getting 1 loop older. Update |age|, and + // remove old values. + for (i = 0; i < 16; i++) { + if (age[i] != 100) { + age[i]++; + } else { + // Too old value. Remove from memory and shift larger values downwards. + for (j = i; j < 15; j++) { + smallest_values[j] = smallest_values[j + 1]; + age[j] = age[j + 1]; + } + age[15] = 101; + smallest_values[15] = 10000; + } + } + + // Check if |feature_value| is smaller than any of the values in + // |smallest_values|. If so, find the |position| where to insert the new value + // (|feature_value|). + if (feature_value < smallest_values[7]) { + if (feature_value < smallest_values[3]) { + if (feature_value < smallest_values[1]) { + if (feature_value < smallest_values[0]) { + position = 0; + } else { + position = 1; + } + } else if (feature_value < smallest_values[2]) { + position = 2; + } else { + position = 3; + } + } else if (feature_value < smallest_values[5]) { + if (feature_value < smallest_values[4]) { + position = 4; + } else { + position = 5; + } + } else if (feature_value < smallest_values[6]) { + position = 6; + } else { + position = 7; + } + } else if (feature_value < smallest_values[15]) { + if (feature_value < smallest_values[11]) { + if (feature_value < smallest_values[9]) { + if (feature_value < smallest_values[8]) { + position = 8; + } else { + position = 9; + } + } else if (feature_value < smallest_values[10]) { + position = 10; + } else { + position = 11; + } + } else if (feature_value < smallest_values[13]) { + if (feature_value < smallest_values[12]) { + position = 12; + } else { + position = 13; + } + } else if (feature_value < smallest_values[14]) { + position = 14; + } else { + position = 15; + } + } + + // If we have detected a new small value, insert it at the correct position + // and shift larger values up. + if (position > -1) { + for (i = 15; i > position; i--) { + smallest_values[i] = smallest_values[i - 1]; + age[i] = age[i - 1]; + } + smallest_values[position] = feature_value; + age[position] = 1; + } + + // Get |current_median|. + if (self->frame_counter > 2) { + current_median = smallest_values[2]; + } else if (self->frame_counter > 0) { + current_median = smallest_values[0]; + } + + // Smooth the median value. + if (self->frame_counter > 0) { + if (current_median < self->mean_value[channel]) { + alpha = kSmoothingDown; // 0.2 in Q15. + } else { + alpha = kSmoothingUp; // 0.99 in Q15. + } + } + tmp32 = (alpha + 1) * self->mean_value[channel]; + tmp32 += (WEBRTC_SPL_WORD16_MAX - alpha) * current_median; + tmp32 += 16384; + self->mean_value[channel] = (int16_t) (tmp32 >> 15); + + return self->mean_value[channel]; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/webrtc_vad.c b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/webrtc_vad.c new file mode 100644 index 0000000000000000000000000000000000000000..225355a1a61eb8734f6cabda1a528b8e36385b58 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/vad/src/webrtc_vad.c @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "include/webrtc_vad.h" + +#include + +#include "include/vad_core.h" + +static const int kInitCheck = 42; +static const int kValidRates[] = {8000, 16000, 32000, 48000}; +static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates); +static const int kMaxFrameLengthMs = 30; + +VadInst *WebRtcVad_Create() { + VadInstT *self = (VadInstT *) malloc(sizeof(VadInstT)); + + self->init_flag = 0; + + return (VadInst *) self; +} + +void WebRtcVad_Free(VadInst *handle) { + free(handle); +} + +// TODO(bjornv): Move WebRtcVad_InitCore() code here. +int WebRtcVad_Init(VadInst *handle) { + // Initialize the core VAD component. + return WebRtcVad_InitCore((VadInstT *) handle); +} + +// TODO(bjornv): Move WebRtcVad_set_mode_core() code here. +int WebRtcVad_set_mode(VadInst *handle, int mode) { + VadInstT *self = (VadInstT *) handle; + + if (handle == NULL) { + return -1; + } + if (self->init_flag != kInitCheck) { + return -1; + } + + return WebRtcVad_set_mode_core(self, mode); +} + +int WebRtcVad_Process(VadInst *handle, int fs, const int16_t *audio_frame, + size_t frame_length) { + int vad = -1; + VadInstT *self = (VadInstT *) handle; + + if (handle == NULL) { + return -1; + } + + if (self->init_flag != kInitCheck) { + return -1; + } + if (audio_frame == NULL) { + return -1; + } + if (WebRtcVad_ValidRateAndFrameLength(fs, frame_length) != 0) { + return -1; + } + + if (fs == 48000) { + vad = WebRtcVad_CalcVad48khz(self, audio_frame, frame_length); + } else if (fs == 32000) { + vad = WebRtcVad_CalcVad32khz(self, audio_frame, frame_length); + } else if (fs == 16000) { + vad = WebRtcVad_CalcVad16khz(self, audio_frame, frame_length); + } else if (fs == 8000) { + vad = WebRtcVad_CalcVad8khz(self, audio_frame, frame_length); + } + + if (vad > 0) { + vad = 1; + } + return vad; +} + +int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length) { + int return_value = -1; + size_t i; + int valid_length_ms; + size_t valid_length; + + // We only allow 10, 20 or 30 ms frames. Loop through valid frame rates and + // see if we have a matching pair. + for (i = 0; i < kRatesSize; i++) { + if (kValidRates[i] == rate) { + for (valid_length_ms = 10; valid_length_ms <= kMaxFrameLengthMs; + valid_length_ms += 10) { + valid_length = (size_t) (kValidRates[i] / 1000 * valid_length_ms); + if (frame_length == valid_length) { + return_value = 0; + break; + } + } + break; + } + } + + return return_value; +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/voiceclouddll/BUILD.gn b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/voiceclouddll/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..050e38cd3f725ed2fb6ac03f1012981267461ff5 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/voiceclouddll/BUILD.gn @@ -0,0 +1,21 @@ +# Copyright (c) 2022 Huawei Device Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import("//build/ohos.gni") + +ohos_prebuilt_shared_library("voicecloud_dll") { + source = "./libvoicecloud.z.so" + install_enable = true + subsystem_name = "miscservices" + part_name = "voiceassistant" +} \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/voiceclouddll/i_voice_cloud_manager.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/voiceclouddll/i_voice_cloud_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..a64838c44337059a69a4c35adbf045d150c9817d --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/frameworks/voiceclouddll/i_voice_cloud_manager.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2022 Huawei Device Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CAR_VOICE_ASSISTANT_I_VOICE_CLOUD_H +#define CAR_VOICE_ASSISTANT_I_VOICE_CLOUD_H + +#include "refbase.h" +#include +#include +#include +#include +#include + +namespace OHOS { +namespace CarVoiceAssistant { + enum VoiceCloudStatus { + VoiceCloudStatusNone, //未连接 + VoiceCloudStatusConnecting, //连接中 + VoiceCloudStatusConnected, // 已连接 + VoiceCloudStatusClosing //关闭中 + }; + + struct MemoryStruct { + char* memory; + size_t size; + }; + + class IVoiceCloudManagerCallback : public virtual RefBase { + public: + virtual void VoiceCloudStatusChanged(VoiceCloudStatus status) = 0; + virtual void ReveiceVoiceCloudMessage(void* data, size_t length, bool isBinary) = 0; + }; + + class IVoiceCloudManager : public virtual RefBase { + public: + IVoiceCloudManager() = default; + virtual ~IVoiceCloudManager() = default; + virtual bool Connect() = 0; //连接 + virtual bool Close() = 0; //关闭连接 + virtual VoiceCloudStatus GetStatus() = 0; //获取当前连接状态 + virtual bool IsSendingAudioStream() = 0; //当前是否在发送pcm流 + virtual void SetCallback(wptr callback) = 0; //设置回调 + virtual void SendNLUText(std::string text) = 0; // 发送文本消息 + virtual void SendStartAudioStream() = 0; // 发送pcm流开始消息 + virtual void SendEndAudioStream() = 0; //发送pcm流结束消息 + virtual void SendBinary(void* data, size_t length) = 0; //发送pcm流 + virtual void SendTrackStat(double latitude, double longitude, std::string& items) = 0; //发送经纬度和热词 + virtual MemoryStruct RequestTTS(std::string text, std::string speakerType) = 0; // 文字转语音pcm流 + }; +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/declaration/@ohos.carvoiceassistant.d.ts b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/declaration/@ohos.carvoiceassistant.d.ts new file mode 100644 index 0000000000000000000000000000000000000000..3d0bf61f927d2dd6ce3de2f76d1548d51be1e170 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/declaration/@ohos.carvoiceassistant.d.ts @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import { AsyncCallback } from './basic'; + +declare namespace carVoiceAssistant { + class CarVoiceAssistantManager { + + /** + * 是否开启唤醒功能 + * @returns true 开启,false 未开启 + */ + isEnableWakeUp(): boolean; + + /** + * 开启唤醒功能 + * @returns 结果 + */ + enableWakeUp(): ErrorCode; + + /** + * 关闭唤醒功能 + * @returns 结果 + */ + disableWakeUp(): ErrorCode; + + /** + * 是否正在识别 + * @returns true 正在识别 + */ + isRecognizing(): boolean; + + /** + * 开始识别 + * @returns ErrorCode 结果 + */ + startRecognize(): ErrorCode; + + /** + * 结束识别 + * @returns ErrorCode 结果 + */ + stopRecognize(): ErrorCode; + + /** + * 播放TTS + * @returns ErrorCode 结果 + */ + playTTS(tts: string): ErrorCode; + + /** + * 停止播放TTS + * @returns ErrorCode 结果 + */ + stopPlayTTS(): ErrorCode; + + /** + * 注册热词 + * @param hotwords 热词json + */ + registerHotwords(hotwords: string): void; + + /** + * 设置经纬度 + * @param latitude 纬度 + * @param longitude 经度 + */ + setCoord(latitude: number, longitude: number): void; + + /** + * 设置经纬度 + * @param speaker common,zhilingfa,qianranfa,tzruim,gqlanf, + * jlshim,madoufp_wenrou,gdfanf_boy,gdfanfp,mandarin, + * hchunf_ctn,wqingf_csn,aningf,yukaim_all + */ + changeSpeakerType(speaker: string): void; + + /** + * 监听事件 + * @param event 事件类型 + * + */ + on(event: EventType.VoiceAssistantEventTypeOnWakeUp, callback: AsyncCallback<{}>): void; + on(event: EventType.VoiceAssistantEventTypeRecognizeStateChanged, callback: AsyncCallback<{ isRecognizing: boolean }>): void; + on(event: EventType.VoiceAssistantEventTypeAsrResult, callback: AsyncCallback<{ result: string }>): void; + on(event: EventType.VoiceAssistantEventTypeTTSPlayStateChanged, callback: AsyncCallback<{ isPlaying: boolean }>): void; + + /** + * 取消监听 + * @param event 事件类型 + */ + off(event: EventType): void; + + } + + enum ErrorCode { + VOICE_ASSISTANT_OK = 0, //成功 + VOICE_ASSISTANT_ERR = 8001, //错误 + VOICE_ASSISTANT_START_RECORD_FAILED = 8002, //录音开启失败 + VOICE_ASSISTANT_START_WEBSOCKET_CONNECT_FAILED = 8003, //websocket连接失败 + } + + enum EventType { + VoiceAssistantEventTypeOnWakeUp = 0, //被唤醒 + VoiceAssistantEventTypeRecognizeStateChanged = 1, //识别状态改变 + VoiceAssistantEventTypeAsrResult = 2, //语音识别结果返回 + VoiceAssistantEventTypeTTSPlayStateChanged = 3, //tts播报状态 + } + + interface AsrResult { + op: 'realTimeASRResult' | 'nluResult', + intentName: string, //意图 + text: string, // op为realTimeASRResult时,语音转文字内容;op为nluResult时,语义解析内容 + isFinish: boolean, // op为realTimeASRResult时表示是否结束说话 + url: string, // op为nluResult时,表示响应的热词url + needDeclare: boolean, //是否多轮 + } + + function getManager(): CarVoiceAssistantManager; +} + +export default carVoiceAssistant; \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/declaration/BUILD.gn b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/declaration/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..10cd6cecc18461c3ee4d078e25a9f1ac7ca942d3 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/declaration/BUILD.gn @@ -0,0 +1,27 @@ +# Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import("//build/ohos.gni") +import("//build/ohos/ace/ace.gni") + +js_declaration("voiceassistant_js") { + part_name = "voiceassistant" + sources = [ "./@ohos.carvoiceassistant.d.ts" ] +} + +ohos_copy("voiceassistant_declaration") { + sources = [ "./@ohos.carvoiceassistant.d.ts" ] + outputs = [ target_out_dir + "/$target_name/" ] + module_source_dir = target_out_dir + "/$target_name" + module_install_name = "" +} \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/include/voice_assistant_event_target.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/include/voice_assistant_event_target.h new file mode 100644 index 0000000000000000000000000000000000000000..4069cb6a018406406dd06e798f83f9a57b9e72b9 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/include/voice_assistant_event_target.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef VOICE_ASSISTANT_EVENT_TARGET_H +#define VOICE_ASSISTANT_EVENT_TARGET_H + +#include "napi/native_api.h" +#include "napi/native_node_api.h" +#include "refbase.h" +#include "voice_assistant_log.h" +#include + +namespace OHOS { +namespace CarVoiceAssistant { + + enum VoiceAssistantEventType { + VoiceAssistantEventTypeOnWakeUp, + VoiceAssistantEventTypeRecognizeStateChanged, + VoiceAssistantEventTypeAsrResult, + VoiceAssistantEventTypeTTSPlayStateChanged + }; + + struct VoiceAssistantEventListener { + napi_env env_; + VoiceAssistantEventType eventType_; + napi_ref callbackRef_; + napi_ref thisVarRef_; + bool isOnce_; + }; + + class BaseEvent { + public: + virtual ~BaseEvent() {}; + virtual napi_value ToJsObject(napi_env env) = 0; + }; + + class RecognizeStateEvent : public BaseEvent { + public: + RecognizeStateEvent(bool isRecognizing); + ~RecognizeStateEvent() = default; + napi_value ToJsObject(napi_env env); + + private: + bool isRecognizing_; + }; + + class ArsResultEvent : public BaseEvent { + public: + ArsResultEvent(std::string text); + ~ArsResultEvent() = default; + napi_value ToJsObject(napi_env env); + + private: + std::string text_; + }; + + class TTSPlayStateEvent : public BaseEvent { + public: + TTSPlayStateEvent(bool isPlaying); + ~TTSPlayStateEvent() = default; + napi_value ToJsObject(napi_env env); + + private: + bool isPlaying_; + }; + + class VoiceAssistantEventTarget : public RefBase { + public: + VoiceAssistantEventTarget(napi_env env); + virtual ~VoiceAssistantEventTarget(); + virtual void On(napi_env env, VoiceAssistantEventType type, napi_value callbackRef, napi_value thisVar); + virtual void Once(napi_env env, VoiceAssistantEventType type, napi_value callbackRef, napi_value thisVar); + virtual void Off(napi_env env, VoiceAssistantEventType type, napi_value thisVar); + + void EmitOnWakeUp(); + void EmitRecognizeStateChanged(bool isRecognizing); + void EmitAsrResult(std::string text); + void EmitTTSPlayStateChanged(bool isPlaying); + + private: + std::list eventListenerList_; + napi_env env_; + + virtual void Emit(VoiceAssistantEventType type, BaseEvent* event); + }; + +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/include/voice_assistant_napi_tools.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/include/voice_assistant_napi_tools.h new file mode 100644 index 0000000000000000000000000000000000000000..618b55d43f456ec2e1417151bf883a4a342b37de --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/include/voice_assistant_napi_tools.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef VOICE_ASSISTANT_NAPI_TOOLS_H +#define VOICE_ASSISTANT_NAPI_TOOLS_H + +#include "napi/native_api.h" +#include "napi/native_node_api.h" + +#define MAX_VALUE_LEN 4096 + +namespace OHOS { +namespace CarVoiceAssistant { + napi_value WrapVoidToJS(napi_env env); + + napi_value GetUndefinedToJS(napi_env env); + + napi_value GetIntToJs(napi_env env, int number); + + napi_value GetBoolToJs(napi_env env, bool value); + + int GetIntProperty(napi_env env, napi_value obj); + + bool GetBoolProperty(napi_env env, napi_value obj); + + double GetDoubleProperty(napi_env env, napi_value obj); + + int64_t GetLongIntProperty(napi_env env, napi_value obj); + + std::string GetStringProperty(napi_env env, napi_value obj); + + napi_value GetGlobal(napi_env env); +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/src/voice_assistant_event_target.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/src/voice_assistant_event_target.cpp new file mode 100644 index 0000000000000000000000000000000000000000..b6744d4589bab128f1a34e8e86b7b4a49db717ee --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/src/voice_assistant_event_target.cpp @@ -0,0 +1,238 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "voice_assistant_event_target.h" +#include "securec.h" +#include "voice_assistant_napi_tools.h" +#include +#include + +namespace OHOS { +namespace CarVoiceAssistant { + struct EventTargetCB { + napi_env env_; + sptr eventTarget_; + VoiceAssistantEventType type_; + BaseEvent* event_; + }; + + RecognizeStateEvent::RecognizeStateEvent(bool isRecognizing) + : isRecognizing_(isRecognizing) + { + } + + napi_value RecognizeStateEvent::ToJsObject(napi_env env) + { + napi_value object = nullptr; + napi_create_object(env, &object); + + napi_value isRecognizingToJs = nullptr; + napi_get_boolean(env, isRecognizing_, &isRecognizingToJs); + napi_set_named_property(env, object, "isRecognizing", isRecognizingToJs); + + return object; + } + + ArsResultEvent::ArsResultEvent(std::string text) + : text_(text) + { + } + + napi_value ArsResultEvent::ToJsObject(napi_env env) + { + napi_value object = nullptr; + napi_create_object(env, &object); + + napi_value textToJs = nullptr; + napi_create_string_utf8(env, text_.c_str(), text_.length(), &textToJs); + napi_set_named_property(env, object, "result", textToJs); + + return object; + } + + TTSPlayStateEvent::TTSPlayStateEvent(bool isPlaying) + : isPlaying_(isPlaying) + { + } + + napi_value TTSPlayStateEvent::ToJsObject(napi_env env) + { + napi_value object = nullptr; + napi_create_object(env, &object); + + napi_value isPlayingToJs = nullptr; + napi_get_boolean(env, isPlaying_, &isPlayingToJs); + napi_set_named_property(env, object, "isPlaying", isPlayingToJs); + + return object; + } + + VoiceAssistantEventTarget::VoiceAssistantEventTarget(napi_env env) { env_ = env; } + + VoiceAssistantEventTarget::~VoiceAssistantEventTarget() { } + + void VoiceAssistantEventTarget::On(napi_env env, VoiceAssistantEventType type, napi_value callbackRef, napi_value thisVar) + { + struct VoiceAssistantEventListener listener; + listener.env_ = env; + listener.eventType_ = type; + listener.isOnce_ = false; + napi_create_reference(env, callbackRef, 1, &listener.callbackRef_); + napi_create_reference(env, thisVar, 1, &listener.thisVarRef_); + eventListenerList_.push_back(listener); + VOICE_ASSISTANT_LOGI("VoiceAssistantEventTarget:On:size:%{public}d", eventListenerList_.size()); + } + + void VoiceAssistantEventTarget::Once(napi_env env, VoiceAssistantEventType type, napi_value callbackRef, napi_value thisVar) + { + struct VoiceAssistantEventListener listener; + listener.env_ = env; + listener.eventType_ = type; + listener.isOnce_ = true; + napi_create_reference(env, callbackRef, 1, &listener.callbackRef_); + napi_create_reference(env, thisVar, 1, &listener.thisVarRef_); + eventListenerList_.push_back(listener); + VOICE_ASSISTANT_LOGI("VoiceAssistantEventTarget:Once:size:%{public}d", eventListenerList_.size()); + } + + void VoiceAssistantEventTarget::Off(napi_env env, VoiceAssistantEventType type, napi_value thisVar) + { + eventListenerList_.remove_if([env, thisVar, type](VoiceAssistantEventListener listener) -> bool { + bool isEqualsThisVar = false; + napi_value thisVarTemp = nullptr; + napi_get_reference_value(env, listener.thisVarRef_, &thisVarTemp); + napi_strict_equals(env, thisVar, thisVarTemp, &isEqualsThisVar); + bool isMatch = (isEqualsThisVar && listener.eventType_ == type); + if (isMatch) { + napi_delete_reference(env, listener.thisVarRef_); + napi_delete_reference(env, listener.callbackRef_); + } + return isMatch; + }); + VOICE_ASSISTANT_LOGI("VoiceAssistantEventTargets:Off:size:%{public}d", eventListenerList_.size()); + } + + void VoiceAssistantEventTarget::EmitOnWakeUp() + { + VOICE_ASSISTANT_LOGI("EmitOnWakeUp"); + Emit(VoiceAssistantEventTypeOnWakeUp, nullptr); + } + + void VoiceAssistantEventTarget::EmitRecognizeStateChanged(bool isRecognizing) + { + VOICE_ASSISTANT_LOGI("EmitRecognizeStateChanged:%{public}s", isRecognizing ? "true" : "false"); + RecognizeStateEvent* event = new RecognizeStateEvent(isRecognizing); + Emit(VoiceAssistantEventTypeRecognizeStateChanged, (BaseEvent*)event); + } + + void VoiceAssistantEventTarget::EmitAsrResult(std::string text) + { + VOICE_ASSISTANT_LOGI("EmitAsrResult:%{public}s", text.c_str()); + ArsResultEvent* event = new ArsResultEvent(text); + Emit(VoiceAssistantEventTypeAsrResult, (BaseEvent*)event); + } + + void VoiceAssistantEventTarget::EmitTTSPlayStateChanged(bool isPlaying) + { + VOICE_ASSISTANT_LOGI("EmitTTSPlayStateChanged:%{public}s", isPlaying ? "true" : "false"); + TTSPlayStateEvent* event = new TTSPlayStateEvent(isPlaying); + Emit(VoiceAssistantEventTypeTTSPlayStateChanged, (BaseEvent*)event); + } + + void VoiceAssistantEventTarget::Emit(VoiceAssistantEventType type, BaseEvent* event) + { + VOICE_ASSISTANT_LOGI("VoiceAssistantEventTarget::Emit:%{publidc}d", (int)getpid()); + uv_loop_s* loop = nullptr; + napi_get_uv_event_loop(env_, &loop); + if (loop == nullptr) { + VOICE_ASSISTANT_LOGI("VoiceAssistantEventTarget::Emit loop == nullptr"); + return; + } + + uv_work_t* work = new (std::nothrow) uv_work_t; + if (work == nullptr) { + VOICE_ASSISTANT_LOGI("VoiceAssistantEventTarget::Emit No memory work == nullptr"); + return; + } + + EventTargetCB* eventTaegetCB = new (std::nothrow) EventTargetCB { .env_ = env_, .eventTarget_ = this, .type_ = type, .event_ = event }; + + work->data = (void*)eventTaegetCB; + + int ret = uv_queue_work( + loop, work, [](uv_work_t* work) {}, + [](uv_work_t* work, int status) { + VOICE_ASSISTANT_LOGI("VoiceAssistantEventTarget::Emit start work"); + + // Js Thread + if (work == nullptr) { + VOICE_ASSISTANT_LOGI("VoiceAssistantEventTarget::Emit work == nullptr"); + return; + } + EventTargetCB* eventTargetCB = reinterpret_cast(work->data); + napi_handle_scope scope = nullptr; + napi_open_handle_scope(eventTargetCB->env_, &scope); + + sptr eventTarget = eventTargetCB->eventTarget_; + for (std::list::iterator listenerIterator = eventTarget->eventListenerList_.begin(); listenerIterator != eventTarget->eventListenerList_.end(); ++listenerIterator) { + if (listenerIterator->eventType_ != eventTargetCB->type_) { + continue; + } + + napi_env env = listenerIterator->env_; + napi_ref thisVarRef = listenerIterator->thisVarRef_; + napi_ref callbackRef = listenerIterator->callbackRef_; + + napi_value thisVar = nullptr; + napi_get_reference_value(env, thisVarRef, &thisVar); + + napi_value callbackFunc = nullptr; + napi_get_reference_value(env, callbackRef, &callbackFunc); + + napi_value callbackValues[2] = { 0 }; + callbackValues[0] = GetUndefinedToJS(env); + if (eventTargetCB->event_) { + callbackValues[1] = eventTargetCB->event_->ToJsObject(env); + } else { + callbackValues[1] = GetUndefinedToJS(env); + } + + napi_value returnVal = nullptr; + napi_call_function(env, thisVar, callbackFunc, 2, callbackValues, &returnVal); + if (listenerIterator->isOnce_) { + eventTargetCB->eventTarget_->Off(env, listenerIterator->eventType_, thisVar); + } + } + + napi_close_handle_scope(eventTargetCB->env_, scope); + + if (eventTargetCB->event_) { + delete eventTargetCB->event_; + } + + if (eventTargetCB) { + delete eventTargetCB; + } + delete work; + }); + + if (ret != 0) { + VOICE_ASSISTANT_LOGI("VoiceAssistantEventTarget::Emit failed to execute libuv work queue"); + delete work; + } + } + +} +} \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/src/voice_assistant_napi.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/src/voice_assistant_napi.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0719a23d076e8d89740ecf3a754b6860bb0f3449 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/src/voice_assistant_napi.cpp @@ -0,0 +1,395 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "common_utils.h" +#include "napi/native_api.h" +#include "napi/native_node_api.h" +#include "refbase.h" +#include "voice_assistant_client_manager.h" +#include "voice_assistant_event_target.h" +#include "voice_assistant_log.h" +#include "voice_assistant_napi_tools.h" +#include +#include + +namespace OHOS { +namespace CarVoiceAssistant { + + static napi_ref g_voiceassistantManagerConstructorJS = nullptr; + static napi_ref g_voiceassistantManagerRef = nullptr; + + napi_value VoiceAssistantManagerConstructor(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("VoiceAssistantManagerConstructor"); + std::size_t argc = 1; + napi_value argv[1]; + + napi_value thisVar = nullptr; + NAPI_CALL(env, napi_get_cb_info(env, info, &argc, argv, &thisVar, nullptr)); + + sptr target = new VoiceAssistantEventTarget(env); + VoiceAssistantClientManager::GetInstance()->SetEventTarget(target); + + return thisVar; + } + + napi_value GetManager(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("GetManager"); + + if (g_voiceassistantManagerRef != nullptr) { + VOICE_ASSISTANT_LOGI("GetManager isExist"); + napi_value result = nullptr; + napi_get_reference_value(env, g_voiceassistantManagerRef, &result); + return result; + } + + napi_value thisVar = nullptr; + NAPI_CALL(env, + napi_get_cb_info(env, info, nullptr, nullptr, &thisVar, nullptr)); + + napi_value cons = nullptr; + napi_get_reference_value(env, g_voiceassistantManagerConstructorJS, &cons); + + napi_value result = nullptr; + napi_new_instance(env, cons, 0, nullptr, &result); + + napi_create_reference(env, result, 1, &g_voiceassistantManagerRef); + + return result; + } + + napi_value IsEnableWakeUp(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("IsEnableWakeUp"); + bool isEnabled = false; + VoiceAssistantClientManager::GetInstance()->IsEnableWakeUp(isEnabled); + return GetBoolToJs(env, isEnabled); + } + + napi_value EnableWakeUp(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("EnableWakeUp"); + int32_t rst = VoiceAssistantClientManager::GetInstance()->EnableWakeUp(); + return GetIntToJs(env, rst); + } + + napi_value DisableWakeUp(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("DisableWakeUp"); + int32_t rst = VoiceAssistantClientManager::GetInstance()->DisableWakeUp(); + return GetIntToJs(env, rst); + } + + napi_value IsRecognizing(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("IsRecognizing"); + bool isRecognizing = false; + VoiceAssistantClientManager::GetInstance()->IsRecognizing(isRecognizing); + return GetBoolToJs(env, isRecognizing); + } + + napi_value StartRecognize(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("StartRecognize"); + CommonUtils::VoiceAssistantErrorCode result = CommonUtils::VOICE_ASSISTANT_ERR; + VoiceAssistantClientManager::GetInstance()->StartRecognize(result); + return GetIntToJs(env, result); + } + + napi_value StopRecognize(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("StopRecognize"); + int32_t rst = VoiceAssistantClientManager::GetInstance()->StopRecognize(); + return GetIntToJs(env, rst); + } + + napi_value PlayTTS(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("PlayTTS"); + size_t argc = 1; + napi_value argv[1] = { 0 }; + napi_value thisVar = nullptr; + void* data = nullptr; + + napi_get_cb_info(env, info, &argc, argv, &thisVar, &data); + + napi_valuetype valueType = napi_undefined; + napi_typeof(env, argv[0], &valueType); + + if (valueType == napi_string) { + std::string tts = GetStringProperty(env, argv[0]); + CommonUtils::VoiceAssistantErrorCode result = CommonUtils::VOICE_ASSISTANT_ERR; + VoiceAssistantClientManager::GetInstance()->PlayTTS(result, tts); + return GetIntToJs(env, result); + } else { + VOICE_ASSISTANT_LOGI("PlayTTS params is not string"); + } + + return GetIntToJs(env, CommonUtils::VOICE_ASSISTANT_ERR); + } + + napi_value StopPlayTTS(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("StopPlayTTS"); + int32_t rst = VoiceAssistantClientManager::GetInstance()->StopPlayTTS(); + return GetIntToJs(env, rst); + } + + napi_value RegisterHotwords(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("RegisterHotwords"); + size_t argc = 1; + napi_value argv[1] = { 0 }; + napi_value thisVar = nullptr; + void* data = nullptr; + + napi_get_cb_info(env, info, &argc, argv, &thisVar, &data); + + napi_valuetype valueType = napi_undefined; + napi_typeof(env, argv[0], &valueType); + + if (valueType == napi_string) { + std::string hotwords = GetStringProperty(env, argv[0]); + int32_t rst = VoiceAssistantClientManager::GetInstance()->RegisterHotwords(hotwords); + return GetIntToJs(env, rst); + } else { + VOICE_ASSISTANT_LOGI("RegisterHotwords params is not string"); + } + + return GetIntToJs(env, CommonUtils::VOICE_ASSISTANT_ERR); + } + + napi_value SetCoord(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("SetCoord-napi"); + size_t argc = 2; + napi_value argv[2] = { 0 }; + napi_value thisVar = nullptr; + void* data = nullptr; + double latitude = 0; + double longitude = 0; + + napi_get_cb_info(env, info, &argc, argv, &thisVar, &data); + + napi_valuetype valueType = napi_undefined; + napi_typeof(env, argv[0], &valueType); + + if (valueType == napi_number) { + latitude = GetDoubleProperty(env, argv[0]); + } else { + VOICE_ASSISTANT_LOGI("SetCoord params is not string"); + return GetIntToJs(env, CommonUtils::VOICE_ASSISTANT_ERR); + } + + napi_typeof(env, argv[1], &valueType); + if (valueType == napi_number) { + longitude = GetDoubleProperty(env, argv[1]); + } else { + VOICE_ASSISTANT_LOGI("SetCoord params is not string"); + return GetIntToJs(env, CommonUtils::VOICE_ASSISTANT_ERR); + } + VOICE_ASSISTANT_LOGI("SetCoord-napi:%{public}f,%{public}f", latitude, longitude); + int32_t rst = VoiceAssistantClientManager::GetInstance()->SetCoord(latitude, longitude); + return GetIntToJs(env, rst); + } + + napi_value ChangeSpeakerType(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("ChangeSpeakerType"); + size_t argc = 1; + napi_value argv[1] = { 0 }; + napi_value thisVar = nullptr; + void* data = nullptr; + + napi_get_cb_info(env, info, &argc, argv, &thisVar, &data); + + napi_valuetype valueType = napi_undefined; + napi_typeof(env, argv[0], &valueType); + + if (valueType == napi_string) { + std::string speaker = GetStringProperty(env, argv[0]); + int32_t rst = VoiceAssistantClientManager::GetInstance()->ChangeSpeakerType(speaker); + return GetIntToJs(env, rst); + } else { + VOICE_ASSISTANT_LOGI("ChangeSpeakerType params is not string"); + } + + return GetIntToJs(env, CommonUtils::VOICE_ASSISTANT_ERR); + } + + napi_value On(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("On"); + size_t argc = 2; + napi_value argv[2] = { 0 }; + napi_value thisVar = nullptr; + void* data = nullptr; + + napi_get_cb_info(env, info, &argc, argv, &thisVar, &data); + + napi_valuetype valueType = napi_undefined; + napi_typeof(env, argv[0], &valueType); + VoiceAssistantEventType type = static_cast(GetIntProperty(env, argv[0])); + + VoiceAssistantClientManager::GetInstance()->GetEventTarget()->On(env, type, argv[1], thisVar); + + return GetUndefinedToJS(env); + } + + napi_value Off(napi_env env, napi_callback_info info) + { + VOICE_ASSISTANT_LOGI("Off"); + size_t argc = 1; + napi_value argv[1] = { 0 }; /* 参数定义 */ + napi_value thisVar = nullptr; /* JS对象this */ + void* data = nullptr; + + napi_get_cb_info(env, info, &argc, argv, &thisVar, &data); + + napi_valuetype valueType = napi_undefined; + napi_typeof(env, argv[0], &valueType); + VoiceAssistantEventType type = static_cast(GetIntProperty(env, argv[0])); + + VoiceAssistantClientManager::GetInstance()->GetEventTarget()->Off(env, type, thisVar); + + return GetUndefinedToJS(env); + } + + void CreateErrorCodeEnum(napi_env env, napi_value object) + { + VOICE_ASSISTANT_LOGI("CreateErrorCodeEnum"); + napi_value okJs = nullptr; + napi_create_int32(env, CommonUtils::VOICE_ASSISTANT_OK, &okJs); + napi_set_named_property(env, object, "VOICE_ASSISTANT_OK", okJs); + + napi_value errJs = nullptr; + napi_create_int32(env, CommonUtils::VOICE_ASSISTANT_ERR, &errJs); + napi_set_named_property(env, object, "VOICE_ASSISTANT_ERR", errJs); + + napi_value recordFailedJs = nullptr; + napi_create_int32(env, CommonUtils::VOICE_ASSISTANT_START_RECORD_FAILED, &recordFailedJs); + napi_set_named_property(env, object, "RADIO_STATUS_SCANNING", recordFailedJs); + + napi_value websocketFailedJs = nullptr; + napi_create_int32(env, CommonUtils::VOICE_ASSISTANT_START_WEBSOCKET_CONNECT_FAILED, &websocketFailedJs); + napi_set_named_property(env, object, "VOICE_ASSISTANT_START_WEBSOCKET_CONNECT_FAILED", websocketFailedJs); + } + + void CreateEventTypeEnum(napi_env env, napi_value object) + { + VOICE_ASSISTANT_LOGI("CreateEventTypeEnum"); + napi_value wakeUpJs = nullptr; + napi_create_int32(env, VoiceAssistantEventTypeOnWakeUp, &wakeUpJs); + napi_set_named_property(env, object, "VoiceAssistantEventTypeOnWakeUp", wakeUpJs); + + napi_value recognizeStateChangedJs = nullptr; + napi_create_int32(env, VoiceAssistantEventTypeRecognizeStateChanged, &recognizeStateChangedJs); + napi_set_named_property(env, object, "VoiceAssistantEventTypeRecognizeStateChanged", recognizeStateChangedJs); + + napi_value asrResultJs = nullptr; + napi_create_int32(env, VoiceAssistantEventTypeAsrResult, &asrResultJs); + napi_set_named_property(env, object, "VoiceAssistantEventTypeAsrResult", asrResultJs); + + napi_value ttsStateChangedJs = nullptr; + napi_create_int32(env, VoiceAssistantEventTypeTTSPlayStateChanged, &ttsStateChangedJs); + napi_set_named_property(env, object, "VoiceAssistantEventTypeTTSPlayStateChanged", ttsStateChangedJs); + } + + static napi_value GetManagerFuction(napi_env env, napi_value exports) + { + + VOICE_ASSISTANT_LOGI("GetManagerFuction"); + // ErrorCode enum + napi_value errorCodeJs = nullptr; + napi_create_object(env, &errorCodeJs); + CreateErrorCodeEnum(env, errorCodeJs); + + // EventType enum + napi_value eventTypeJs = nullptr; + napi_create_object(env, &eventTypeJs); + CreateEventTypeEnum(env, eventTypeJs); + + napi_status status; + napi_property_descriptor desc[] = { + DECLARE_NAPI_FUNCTION("getManager", GetManager), + DECLARE_NAPI_PROPERTY("ErrorCode", errorCodeJs), + DECLARE_NAPI_PROPERTY("EventType", eventTypeJs) + }; + status = napi_define_properties(env, exports, sizeof(desc) / sizeof(desc[0]), + desc); + assert(status == napi_ok); + + return (exports); + } + + static napi_value RegisterManagerFuction(napi_env env, napi_value exports) + { + VOICE_ASSISTANT_LOGI("RegisterManagerFuction"); + + napi_status status; + napi_property_descriptor desc[] = { + DECLARE_NAPI_FUNCTION("isEnableWakeUp", IsEnableWakeUp), + DECLARE_NAPI_FUNCTION("enableWakeUp", EnableWakeUp), + DECLARE_NAPI_FUNCTION("disableWakeUp", DisableWakeUp), + DECLARE_NAPI_FUNCTION("isRecognizing", IsRecognizing), + DECLARE_NAPI_FUNCTION("startRecognize", StartRecognize), + DECLARE_NAPI_FUNCTION("stopRecognize", StopRecognize), + DECLARE_NAPI_FUNCTION("playTTS", PlayTTS), + DECLARE_NAPI_FUNCTION("stopPlayTTS", StopPlayTTS), + DECLARE_NAPI_FUNCTION("registerHotwords", RegisterHotwords), + DECLARE_NAPI_FUNCTION("setCoord", SetCoord), + DECLARE_NAPI_FUNCTION("changeSpeakerType", ChangeSpeakerType), + DECLARE_NAPI_FUNCTION("on", On), + DECLARE_NAPI_FUNCTION("off", Off) + }; + + napi_value cons = nullptr; + status = napi_define_class(env, "CarVoiceAssistantManager", NAPI_AUTO_LENGTH, + VoiceAssistantManagerConstructor, nullptr, + sizeof(desc) / sizeof(desc[0]), desc, &cons); + assert(status == napi_ok); + + napi_create_reference(env, cons, 1, &g_voiceassistantManagerConstructorJS); + + return (exports); + } + + static napi_value Init(napi_env env, napi_value exports) + { + RegisterManagerFuction(env, exports); + GetManagerFuction(env, exports); + + return (exports); + } + + /* NAPI_MODULE(js_radio, Init) */ + + static napi_module sampleModule = { + .nm_version = 1, + .nm_flags = 0, + .nm_filename = nullptr, + .nm_register_func = Init, + .nm_modname = "carvoiceassistant", + .nm_priv = ((void*)0), + .reserved = { 0 }, + }; + + extern "C" __attribute__((constructor)) void Register() + { + napi_module_register(&sampleModule); + } +} // namespace TUNER +} // namespace OHOS \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/src/voice_assistant_napi_tools.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/src/voice_assistant_napi_tools.cpp new file mode 100644 index 0000000000000000000000000000000000000000..cbcf966bd2f2ca01c6d4ff4e16ef1083b1611dfd --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/napi/src/voice_assistant_napi_tools.cpp @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "voice_assistant_napi_tools.h" +#include "voice_assistant_log.h" + +namespace OHOS { +namespace CarVoiceAssistant { + napi_value WrapVoidToJS(napi_env env) + { + napi_value result = nullptr; + NAPI_CALL(env, napi_get_null(env, &result)); + return (result); + } + + napi_value GetUndefinedToJS(napi_env env) + { + napi_value result = nullptr; + NAPI_CALL(env, napi_get_undefined(env, &result)); + return (result); + } + + napi_value GetIntToJs(napi_env env, int number) + { + napi_value intToJs = nullptr; + napi_create_int32(env, number, &intToJs); + return intToJs; + } + + napi_value GetBoolToJs(napi_env env, bool value) + { + napi_value boolToJs = nullptr; + napi_get_boolean(env, value, &boolToJs); + return boolToJs; + } + + int GetIntProperty(napi_env env, napi_value obj) + { + int intTypeToJs = 0; + if (napi_get_value_int32(env, obj, &intTypeToJs) != napi_ok) { + } + + return (intTypeToJs); + } + + bool GetBoolProperty(napi_env env, napi_value obj) + { + bool boolTypeToJs = 0; + if (napi_get_value_bool(env, obj, &boolTypeToJs) != napi_ok) { + } + + return (boolTypeToJs); + } + + double GetDoubleProperty(napi_env env, napi_value obj) + { + double doubleTypeToJs = 0; + if (napi_get_value_double(env, obj, &doubleTypeToJs) != napi_ok) { + } + + return doubleTypeToJs; + } + + int64_t GetLongIntProperty(napi_env env, napi_value obj) + { + int64_t intTypeToJs = 0; + if (napi_get_value_int64(env, obj, &intTypeToJs) != napi_ok) { + } + + return (intTypeToJs); + } + + std::string GetStringProperty(napi_env env, napi_value obj) + { + char propValue[MAX_VALUE_LEN] = { 0 }; + size_t propLen; + if (napi_get_value_string_utf8(env, obj, propValue, MAX_VALUE_LEN, + &propLen) + != napi_ok) { + VOICE_ASSISTANT_LOGI("Can not get string param from argv"); + } + + return (std::string(propValue)); + } + + napi_value GetGlobal(napi_env env) + { + napi_value global; + napi_get_global(env, &global); + return global; + } +} +} \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/ohos.build b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/ohos.build new file mode 100644 index 0000000000000000000000000000000000000000..c8889bc6676f90bb3299819bb9ad779c32b850d0 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/ohos.build @@ -0,0 +1,56 @@ +{ + "subsystem": "miscservices", + "parts": { + "voiceassistant": { + "variants": [ + "phone", + "wearable", + "ivi" + ], + "module_list": [ + "//base/miscservices/voiceassistant/frameworks/pocketsphinx:pocketsphinx", + "//base/miscservices/voiceassistant/frameworks/vad:ps_vad", + "//base/miscservices/voiceassistant/frameworks/voiceclouddll:voicecloud_dll", + "//base/miscservices/voiceassistant/etc/init:voice_assistant_service.rc", + "//base/miscservices/voiceassistant/profile:voice_assistant_sa_profiles", + "//base/miscservices/voiceassistant/resources:pocketsphinx_all_source", + "//base/miscservices/voiceassistant/services:voiceassistant_service_group", + "//base/miscservices/voiceassistant/interfaces/kits/js/declaration:voiceassistant_js", + "//base/miscservices/voiceassistant/test/client_test:client_test" + ], + "inner_kits": [ + { + "name": "//base/miscservices/voiceassistant/services:carvoiceassistant", + "header": { + "header_files": [ + "voice_assistant_ability_agent.h", + "voice_assistant_ability_proxy.h", + "voice_assistant_client_callback_stub.h", + "voice_assistant_client_callback.h", + "voice_assistant_client_manager.h" + ], + "header_base": "//base/miscservices/voiceassistant/services/include/client" + } + }, + { + "name": "//base/miscservices/voiceassistant/frameworks/pocketsphinx:pocketsphinx", + "header": { + "header_files": [ + "pocketsphinx.h" + ], + "header_base": "//base/miscservices/voiceassistant/frameworks/pocketsphinx/include" + } + }, + { + "name": "//base/miscservices/voiceassistant/frameworks/vad:ps_vad", + "header": { + "header_files": [ + "webrtc_vad.h" + ], + "header_base": "//base/miscservices/voiceassistant/frameworks/vad/include" + } + } + ] + } + } +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/prebuild.sh b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/prebuild.sh new file mode 100755 index 0000000000000000000000000000000000000000..4fe678b660aec1753ef89342f7b625f5c2d236af --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/prebuild.sh @@ -0,0 +1,26 @@ +# Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +shpath=$(dirname $0) + +cd ${shpath} +#解压data.zip +unzip ../data.zip -d ../ +#拷贝libvoicecloud.z.so到frameworks/voiceclouddll/目录下 +cp ../data/libvoicecloud.z.so ${shpath}/frameworks/voiceclouddll/ + +#解压zh.tar到resources/目录下 +tar xvf ../data/zh.tar -C ${shpath}/resources/ + +#拷贝voice_tip.mp3到resources/目录下并解压 +cp ../data/voice_tip.mp3 ${shpath}/resources/ diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/profile/5102.xml b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/profile/5102.xml new file mode 100644 index 0000000000000000000000000000000000000000..3c93510d78813ca042902685fcbbb03703b9ec50 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/profile/5102.xml @@ -0,0 +1,24 @@ + + + + voice_assistant_service + + 5102 + libvoiceassistant_service.z.so + true + false + 1 + + diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/profile/BUILD.gn b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/profile/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..42a9c8c4c6fd3bbd4807441ec27bc665acb5ce11 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/profile/BUILD.gn @@ -0,0 +1,19 @@ +# Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import("//build/ohos/sa_profile/sa_profile.gni") + +ohos_sa_profile("voice_assistant_sa_profiles") { + sources = [ "5102.xml" ] + part_name = "voiceassistant" +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/resources/BUILD.gn b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/resources/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..616eb914985943ab64f6d09eff1bb8c248bfee60 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/resources/BUILD.gn @@ -0,0 +1,118 @@ +# Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import("//build/ohos.gni") + +ohos_prebuilt_etc("pocketsphinx_source1") { + source = "zh/zh_cn.dic" + deps = [] + relative_install_dir = "pocketsphinx/zh" + part_name = "voiceassistant" + subsystem_name = "miscservices" +} + +ohos_prebuilt_etc("pocketsphinx_source2") { + source = "zh/zh_cn.lm.bin" + deps = [] + relative_install_dir = "pocketsphinx/zh" + part_name = "voiceassistant" + subsystem_name = "miscservices" +} + +ohos_prebuilt_etc("pocketsphinx_source3") { + source = "zh/zh/feat.params" + deps = [] + relative_install_dir = "pocketsphinx/zh/zh" + part_name = "voiceassistant" + subsystem_name = "miscservices" +} + +ohos_prebuilt_etc("pocketsphinx_source4") { + source = "zh/zh/feature_transform" + deps = [] + relative_install_dir = "pocketsphinx/zh/zh" + part_name = "voiceassistant" + subsystem_name = "miscservices" +} + +ohos_prebuilt_etc("pocketsphinx_source5") { + source = "zh/zh/mdef" + deps = [] + relative_install_dir = "pocketsphinx/zh/zh" + part_name = "voiceassistant" + subsystem_name = "miscservices" +} + +ohos_prebuilt_etc("pocketsphinx_source6") { + source = "zh/zh/means" + deps = [] + relative_install_dir = "pocketsphinx/zh/zh" + part_name = "voiceassistant" + subsystem_name = "miscservices" +} + +ohos_prebuilt_etc("pocketsphinx_source7") { + source = "zh/zh/mixture_weights" + deps = [] + relative_install_dir = "pocketsphinx/zh/zh" + part_name = "voiceassistant" + subsystem_name = "miscservices" +} + +ohos_prebuilt_etc("pocketsphinx_source8") { + source = "zh/zh/noisedict" + deps = [] + relative_install_dir = "pocketsphinx/zh/zh" + part_name = "voiceassistant" + subsystem_name = "miscservices" +} + +ohos_prebuilt_etc("pocketsphinx_source9") { + source = "zh/zh/transition_matrices" + deps = [] + relative_install_dir = "pocketsphinx/zh/zh" + part_name = "voiceassistant" + subsystem_name = "miscservices" +} + +ohos_prebuilt_etc("pocketsphinx_source10") { + source = "zh/zh/variances" + deps = [] + relative_install_dir = "pocketsphinx/zh/zh" + part_name = "voiceassistant" + subsystem_name = "miscservices" +} + +ohos_prebuilt_etc("voice_tip_source") { + source = "voice_tip.mp3" + deps = [] + relative_install_dir = "pocketsphinx/" + part_name = "voiceassistant" + subsystem_name = "miscservices" +} + +group("pocketsphinx_all_source") { + deps = [ + ":pocketsphinx_source1", + ":pocketsphinx_source2", + ":pocketsphinx_source3", + ":pocketsphinx_source4", + ":pocketsphinx_source5", + ":pocketsphinx_source6", + ":pocketsphinx_source7", + ":pocketsphinx_source8", + ":pocketsphinx_source9", + ":pocketsphinx_source10", + ":voice_tip_source" + ] +} \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/BUILD.gn b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..00c7c5a7015f766f0c66a1d54cca522288ad45bf --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/BUILD.gn @@ -0,0 +1,141 @@ +# Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import("//build/ohos.gni") + +ohos_shared_library("carvoiceassistant") { + cflags = ["-Wno-unused-variable", "-Wno-unused-function", "-Wno-implicit-function-declaration", "-Wno-unused-private-field"] + cflags_cc = ["-fexceptions"] + + include_dirs = [ + "//base/miscservices/voiceassistant/frameworks/utils/include", + "//base/miscservices/voiceassistant/interfaces/kits/js/napi/include", + "//base/miscservices/voiceassistant/services/include/client", + "//foundation/ace/napi/interfaces/kits", + "//utils/native/base/include", + "//utils/system/safwk/native/include", + "//third_party/openssl/include", + "//third_party/json/single_include", + "//third_party" + ] + + sources = [ + "src/client/voice_assistant_ability_proxy.cpp", + "src/client/voice_assistant_client_callback_stub.cpp", + "src/client/voice_assistant_client_manager.cpp", + "../interfaces\kits\js\napi\src\voice_assistant_event_target.cpp", + "../interfaces\kits\js\napi\src\voice_assistant_napi_tools.cpp", + "../interfaces\kits\js\napi\src\voice_assistant_napi.cpp" + ] + + deps = [ + "//foundation/ace/napi/:ace_napi", + "//foundation/appexecfwk/standard/interfaces/innerkits/appexecfwk_base:appexecfwk_base", + "//foundation/appexecfwk/standard/interfaces/innerkits/appexecfwk_core:appexecfwk_core", + "//foundation/appexecfwk/standard/interfaces/innerkits/libeventhandler:libeventhandler", + "//foundation/communication/ipc/interfaces/innerkits/ipc_core:ipc_core", + "//foundation/communication/ipc/interfaces/innerkits/ipc_single:ipc_single", + "//foundation/distributedschedule/dmsfwk/interfaces/innerkits/uri:zuri", + "//foundation/distributedschedule/samgr/interfaces/innerkits/samgr_proxy:samgr_proxy", + "//utils/native/base:utils", + "//third_party/libwebsockets:websockets", + "//third_party/openssl:libcrypto_static", + "//third_party/openssl:ssl_source", + "//third_party/zlib:libz", + ] + + external_deps = [ + "hiviewdfx_hilog_native:libhilog" + ] + + subsystem_name = "miscservices" + part_name = "voiceassistant" +} + +ohos_shared_library("voiceassistant_service") { + cflags = ["-Wno-unused-variable", "-Wno-unused-function", "-Wno-implicit-function-declaration", "-Wno-unused-private-field"] + cflags_cc = ["-fexceptions"] + + include_dirs = [ + "//foundation/multimedia/audio_standard/interfaces/inner_api/native/audiocapturer/include", + "//foundation/multimedia/audio_standard/interfaces/inner_api/native/audiocommon/include", + "//base/miscservices/voiceassistant/frameworks/manager/include", + "//base/miscservices/voiceassistant/frameworks/pocketsphinx/include", + "//base/miscservices/voiceassistant/frameworks/vad/include", + "//base/miscservices/voiceassistant/frameworks/utils/include", + "//base/miscservices/voiceassistant/frameworks/websocket/include", + "//base/miscservices/voiceassistant/services/include/client", + "//base/miscservices/voiceassistant/services/include/server", + "//base/miscservices/voiceassistant/frameworks/voiceclouddll", + "//utils/native/base/include", + "//third_party/libwebsockets/include", + "//third_party/openssl/include", + "//third_party/json/single_include", + "//third_party/curl/include", + "//foundation/graphic/standard/interfaces/innerkits/wmclient", + "//foundation/graphic/standard/interfaces/innerkits/common", + "//foundation/multimedia/media_standard/interfaces/innerkits/native/media/include", + "//foundation/multimedia/media_standard/services/utils/include" + ] + + sources = [ + "../frameworks/manager/src/audio_record_manager.cpp", + "../frameworks/manager/src/tts_manager.cpp", + "../frameworks/manager/src/wakeup_manager.cpp", + "../frameworks/manager/src/voice_cloud_loader.cpp", + "src/server/voice_assistant_ability_stub.cpp", + "src/server/voice_assistant_agent_service.cpp", + "src/server/voice_assistant_callback_event_target.cpp", + "src/server/voice_assistant_client_callback_proxy.cpp" + ] + + deps = [ + "//base/miscservices/voiceassistant/frameworks/pocketsphinx:pocketsphinx", + "//base/miscservices/voiceassistant/frameworks/vad:ps_vad", + "//foundation/ace/napi/:ace_napi", + "//foundation/appexecfwk/standard/interfaces/innerkits/appexecfwk_base:appexecfwk_base", + "//foundation/appexecfwk/standard/interfaces/innerkits/appexecfwk_core:appexecfwk_core", + "//foundation/appexecfwk/standard/interfaces/innerkits/libeventhandler:libeventhandler", + "//foundation/communication/ipc/interfaces/innerkits/ipc_core:ipc_core", + "//foundation/communication/ipc/interfaces/innerkits/ipc_single:ipc_single", + "//foundation/distributedschedule/dmsfwk/interfaces/innerkits/uri:zuri", + "//foundation/distributedschedule/safwk/interfaces/innerkits/safwk:system_ability_fwk", + "//foundation/distributedschedule/samgr/interfaces/innerkits/samgr_proxy:samgr_proxy", + "//foundation/distributeddatamgr/appdatamgr/interfaces/innerkits/native_preferences:native_preferences", + "//utils/native/base:utils", + "//third_party/libwebsockets:websockets", + "//third_party/openssl:libcrypto_static", + "//third_party/openssl:ssl_source", + "//third_party/zlib:libz", + "//foundation/multimedia/audio_standard/interfaces/inner_api/native/audiocapturer:audio_capturer", + "//foundation/graphic/standard:libsurface", + "//foundation/graphic/standard:libwmclient", + "//foundation/graphic/standard/frameworks/surface:surface", + "//foundation/multimedia/image_standard/interfaces/innerkits:image_native", + "//third_party/libjpeg:libjpeg_static", + "//third_party/curl:curl" + ] + + external_deps = [ + "hiviewdfx_hilog_native:libhilog", + "multimedia_media_standard:media_client" + ] + + subsystem_name = "miscservices" + part_name = "voiceassistant" +} + +group("voiceassistant_service_group") { + deps = [":carvoiceassistant", + ":voiceassistant_service"] +} \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_ability_agent.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_ability_agent.h new file mode 100644 index 0000000000000000000000000000000000000000..d2030f52c46e05c20d2523290c9d97e31c5eb45c --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_ability_agent.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef CAR_VOICE_ASSISTANT_ABILITY_AGENT_H +#define CAR_VOICE_ASSISTANT_ABILITY_AGENT_H + +#include +#include +#include + +#include "iremote_broker.h" +#include "common_utils.h" + +namespace OHOS { +namespace CarVoiceAssistant { + class IVoiceAssistantAbilityAgent : public IRemoteBroker { + public: + enum { + VOICE_ASSITANT_CMD_IS_ENABLE_WAKEUP = 0, //是否开启了唤醒 + VOICE_ASSITANT_CMD_ENABLE_WAKEUP, //开启唤醒 + VOICE_ASSITANT_CMD_DISABLE_WAKEUP, //取消唤醒 + VOICE_ASSITANT_CMD_IS_RECOGNIZING, //是否识别中 + VOICE_ASSITANT_CMD_START_RECOGNIZE, //开始识别 + VOICE_ASSITANT_CMD_STOP_RECOGNIZE, //停止识别 + VOICE_ASSITANT_CMD_PLAY_TTS, //播放TTS + VOICE_ASSITANT_CMD_STOP_PLAY_TTS, //停止播放TTS + VOICE_ASSITANT_CMD_REGISTER_HOTWORDS, //注册热词 + VOICE_ASSISTANT_CMD_SET_COORD, //设置经纬度 + VOICE_ASSITANT_CMD_REGISTER_CALLBACK, //注册回调 + VOICE_ASSITANT_CMD_CHANGE_SPEAKER_TYPE, //修改TTS播报声音 + }; + + virtual int32_t IsEnableWakeUp(bool& isEnable) = 0; + virtual int32_t EnableWakeUp() = 0; + virtual int32_t DisableWakeUp() = 0; + virtual int32_t IsRecognizing(bool& isRecognizing) = 0; + virtual int32_t StartRecognize(CommonUtils::VoiceAssistantErrorCode& result) = 0; + virtual int32_t StopRecognize() = 0; + virtual int32_t PlayTTS(CommonUtils::VoiceAssistantErrorCode& result, std::string& tts) = 0; + virtual int32_t StopPlayTTS() = 0; + virtual int32_t RegisterHotwords(std::string& hotwords) = 0; + virtual int32_t SetCoord(double latitude, double longitude) = 0; + virtual int32_t RegisterCallback() = 0; + virtual int32_t ChangeSpeakerType(std::string speaker) = 0; + + DECLARE_INTERFACE_DESCRIPTOR(u"ohos.miscservices.voiceassistant.IVoiceAssistantAbilityAgent"); + }; +} // namespace CarVoiceAssistant +} // namespace OHOS + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_ability_proxy.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_ability_proxy.h new file mode 100644 index 0000000000000000000000000000000000000000..9470ee1aeb70245a835982fd50d830b07b5d6886 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_ability_proxy.h @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef CAR_VOICE_ASSISTANT_ABILITY_PROXY_H +#define CAR_VOICE_ASSISTANT_ABILITY_PROXY_H + +#include "voice_assistant_ability_agent.h" +#include "common_utils.h" +#include "iremote_proxy.h" +#include "refbase.h" + +namespace OHOS { +namespace CarVoiceAssistant { + class VoiceAssistantAbilityAgentProxy : public IRemoteProxy { + public: + explicit VoiceAssistantAbilityAgentProxy(const sptr& object); + ~VoiceAssistantAbilityAgentProxy() = default; + virtual int32_t IsEnableWakeUp(bool& isEnable) override; + virtual int32_t EnableWakeUp() override; + virtual int32_t DisableWakeUp() override; + virtual int32_t IsRecognizing(bool& isRecognizing) override; + virtual int32_t StartRecognize(CommonUtils::VoiceAssistantErrorCode& result) override; + virtual int32_t StopRecognize() override; + virtual int32_t PlayTTS(CommonUtils::VoiceAssistantErrorCode& result,std::string& tts) override; + virtual int32_t StopPlayTTS() override; + virtual int32_t RegisterHotwords(std::string& hotwords) override; + virtual int32_t SetCoord(double latitude, double longitude) override; + virtual int32_t RegisterCallback() override; + virtual int32_t ChangeSpeakerType(std::string speaker) override; + + private: + static inline BrokerDelegator delegator_; + CommonUtils::VoiceAssistantErrorCode DoDispatch(uint32_t cmd, MessageParcel& data, MessageParcel& reply); + }; +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_agent_proxy_death_recipient.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_agent_proxy_death_recipient.h new file mode 100644 index 0000000000000000000000000000000000000000..8a0bebd2f3f2b20bec49fb392336aea15d17100b --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_agent_proxy_death_recipient.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef CAR_VOICE_ASSISTANT_AGENT_PROXY_DEATH_RECIPIENT_H +#define CAR_VOICE_ASSISTANT_AGENT_PROXY_DEATH_RECIPIENT_H + +#include "iremote_object.h" +#include "refbase.h" + +namespace OHOS +{ + namespace CarVoiceAssistant + { + class VoiceAssistantAgentProxyDeathRecipient : public IRemoteObject::DeathRecipient + { + public: + VoiceAssistantAgentProxyDeathRecipient() = default; + virtual ~VoiceAssistantAgentProxyDeathRecipient() = default; + + virtual void OnRemoteDied(const wptr &remote) + { + if (diedCb_ != nullptr) + { + diedCb_(remote); + } + } + + using NotifyFunc = std::function &)>; + void SetNotifyCb(NotifyFunc func) + { + diedCb_ = func; + } + + private: + NotifyFunc diedCb_ = nullptr; + }; + } +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_client_callback.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_client_callback.h new file mode 100644 index 0000000000000000000000000000000000000000..1b6322d5e591a9f4533edc04330becc27f702be2 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_client_callback.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef CAR_VOICE_ASSISTANT_CLIENT_CALLBACK_H +#define CAR_VOICE_ASSISTANT_CLIENT_CALLBACK_H + +#include "iremote_broker.h" +#include + +namespace OHOS { +namespace CarVoiceAssistant { + class IVoiceAssistantClientCallback : public IRemoteBroker { + public: + enum { + VOICE_ASSITANT_CALLBACK_ON_WAKEUP, //被唤醒 + VOICE_ASSITANT_CALLBACK_RECOGNIZE_STATE_CHANGED, //识别状态改变 + VOICE_ASSITANT_CALLBACK_ASR_RESULT, //语音识别结果返回 + VOICE_ASSISTANT_CALLBACK_TTS_STATE_CHANGED, // tts播报状态改变 + }; + + virtual size_t NotifyWakeUp() = 0; + virtual size_t NotifyRecognizeStateChanged(bool isRecognizing) = 0; + virtual size_t NotifyAsrResult(std::string result) = 0; + virtual size_t NotifyTTSPlayStateChanged(bool isPlaying) = 0; + + DECLARE_INTERFACE_DESCRIPTOR(u"ohos.miscservices.voiceassistant.IVoiceAssistantClientCallback"); + }; +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_client_callback_stub.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_client_callback_stub.h new file mode 100644 index 0000000000000000000000000000000000000000..f35da1207e7300b3a50007a47c64e34e4e2a96ce --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_client_callback_stub.h @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef CAR_VOICE_ASSISTANT_CLIENT_CALLBACK_STUB_H +#define CAR_VOICE_ASSISTANT_CLIENT_CALLBACK_STUB_H + +#include "voice_assistant_client_callback.h" +#include "iremote_stub.h" + +namespace OHOS { +namespace CarVoiceAssistant { + class VoiceAssistantClientCallbackStub : public IRemoteStub { + public: + virtual int OnRemoteRequest(uint32_t code, + MessageParcel& data, MessageParcel& reply, MessageOption& option) override; + + virtual size_t NotifyWakeUp() override; + virtual size_t NotifyRecognizeStateChanged(bool isRecognizing) override; + virtual size_t NotifyAsrResult(std::string result) override; + virtual size_t NotifyTTSPlayStateChanged(bool isPlaying) override; + }; +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_client_manager.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_client_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..b709830a3d918c8a228c4e49f60a267cedd9378f --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/client/voice_assistant_client_manager.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef VOICE_ASSISTANT_CLIENT_MANAGER_H +#define VOICE_ASSISTANT_CLIENT_MANAGER_H + +#include "voice_assistant_ability_proxy.h" +#include "voice_assistant_agent_proxy_death_recipient.h" +#include "voice_assistant_event_target.h" +#include "common_utils.h" +#include "refbase.h" +#include + +namespace OHOS { +namespace CarVoiceAssistant { + + class VoiceAssistantClientManager : public RefBase { + public: + static sptr GetInstance(); + + VoiceAssistantClientManager(); + ~VoiceAssistantClientManager(); + + void SetEventTarget(sptr eventTarget); + sptr GetEventTarget(); + + int32_t IsEnableWakeUp(bool& isEnable); + int32_t EnableWakeUp(); + int32_t DisableWakeUp(); + int32_t IsRecognizing(bool& isRecognizing); + int32_t StartRecognize(CommonUtils::VoiceAssistantErrorCode& result); + int32_t StopRecognize(); + int32_t PlayTTS(CommonUtils::VoiceAssistantErrorCode& result,std::string& tts); + int32_t StopPlayTTS(); + int32_t RegisterHotwords(std::string& hotwords); + int32_t SetCoord(double latitude, double longitude); + int32_t RegisterCallback(); + int32_t ChangeSpeakerType(std::string speaker); + + private: + sptr eventTarget_; + sptr mAbilityManager_; + sptr deathRecipient_; + std::mutex createProxyLock_; + + static std::mutex instanceLock_; + static sptr instance_; + + sptr CreateAbilityAgentProxy(); + void ResetAgentProxy(); + }; + +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_ability_stub.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_ability_stub.h new file mode 100644 index 0000000000000000000000000000000000000000..ac576b9ecdc22ad08d4c241d9074f631ff04dcd6 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_ability_stub.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef CAR_VOICE_ASSISTANT_ABILITY_STUB_H +#define CAR_VOICE_ASSISTANT_ABILITY_STUB_H + +#include "audio_record_manager.h" +#include "common_utils.h" +#include "i_voice_cloud_manager.h" +#include "i_wakeup_manager.h" +#include "iremote_stub.h" +#include "tts_manager.h" +#include "voice_assistant_ability_agent.h" +#include "voice_assistant_callback_event_target.h" +#include +#include +#include + +namespace OHOS { +namespace CarVoiceAssistant { + + class VoiceAssistantAbilityAgentStub : public IRemoteStub, + public IVoiceCloudManagerCallback, + public IAudioRecordCallback, + public IWakeUpCallback, + public ITTSManagerCallback { + public: + explicit VoiceAssistantAbilityAgentStub(); + virtual ~VoiceAssistantAbilityAgentStub(); + virtual int32_t OnRemoteRequest(uint32_t code, + MessageParcel& data, + MessageParcel& reply, + MessageOption& option) override; + + virtual int32_t IsEnableWakeUp(bool& isEnable) override; + virtual int32_t EnableWakeUp() override; + virtual int32_t DisableWakeUp() override; + virtual int32_t IsRecognizing(bool& isRecognizing) override; + virtual int32_t StartRecognize(CommonUtils::VoiceAssistantErrorCode& result) override; + virtual int32_t StopRecognize() override; + virtual int32_t PlayTTS(CommonUtils::VoiceAssistantErrorCode& result, std::string& tts) override; + virtual int32_t StopPlayTTS() override; + virtual int32_t RegisterHotwords(std::string& hotwords) override; + virtual int32_t SetCoord(double latitude, double longitude) override; + virtual int32_t RegisterCallback() override; + virtual int32_t ChangeSpeakerType(std::string speaker) override; + + bool ConnectWebsocket(); + void RemoveCallback(const wptr& remoteObject); + + virtual void VoiceCloudStatusChanged(VoiceCloudStatus status) override; + virtual void ReveiceVoiceCloudMessage(void* data, size_t length, bool isBinary) override; + + virtual void AudioRecordStatusChanged(AudioRecordStatus status) override; + virtual void ReceiveAudioBuffer(void* data, size_t length) override; + + virtual void WakeUpCallback(std::string text) override; + + virtual void AudioPlayerStatusChanged(bool isPlaying) override; + + void SendAudioBufferToWebsocketIfNeeded(void* data, size_t length); + void CheckWakeUpIfNeeded(void* data, size_t length); + + private: + bool isWakeUpEnabled_; //唤醒功能是否开启 + bool isRecognizing_; + std::pair coord_; + std::string hotwords_; + + // time_t startRecognizingTime_; //开始识别时间 + + std::mutex mutex_; + + sptr callbackEventTarget_; + IVoiceCloudManager* voiceCloudManager_; + sptr audioRecordManager_; + sptr ttsManager_; + IWakeUpManager* wakeUpManager_; + + std::shared_ptr player_; + + void SendStartAudioStreamIfNeeded(); + void PlayStartRecoginizingSound(); + }; +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_agent_service.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_agent_service.h new file mode 100644 index 0000000000000000000000000000000000000000..0db5adfaf34cdba84aeaaa7510cfda8f0e9f85f4 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_agent_service.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +#ifndef CAR_VOICE_ASSISTANT_AGENT_SERVICE_H +#define CAR_VOICE_ASSISTANT_AGENT_SERVICE_H + +#include "voice_assistant_ability_stub.h" +#include "system_ability.h" + +namespace OHOS { +namespace CarVoiceAssistant { + + enum ServiceRunState { + ServiceRunStateNotStart, + ServiceRunStateRunning + }; + + class VoiceAssistantAgentService : public SystemAbility, + public VoiceAssistantAbilityAgentStub { + DECLARE_SYSTEM_ABILITY(VoiceAssistantAgentService); + + public: + DISALLOW_COPY_AND_MOVE(VoiceAssistantAgentService); + VoiceAssistantAgentService(int32_t systemAbilityId, bool runOnCreate); + VoiceAssistantAgentService(); + ~VoiceAssistantAgentService(); + + protected: + void OnStart() override; + void OnStop() override; + + private: + ServiceRunState state_; + int32_t Init(); + }; +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_callback_event_target.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_callback_event_target.h new file mode 100644 index 0000000000000000000000000000000000000000..29a29caa2db76688fcfa150df7bc025e4ff427c9 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_callback_event_target.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CAR_VOICE_ASSISTANT_CALLBACK_EVENT_TARGET_H +#define CAR_VOICE_ASSISTANT_CALLBACK_EVENT_TARGET_H + +#include "voice_assistant_callback_proxy_death_recipient.h" +#include "voice_assistant_client_callback.h" +#include "refbase.h" +#include + +namespace OHOS { +namespace CarVoiceAssistant { + struct VoiceAssistantCallbackEventListener { + sptr proxy_; + sptr deathRecipient_; + }; + + class VoiceAssistantCallbackEventTarget : public RefBase { + public: + void AddListener(sptr proxy, sptr deathRecipient); + void RemoveListener(const wptr& remote); + + void EmitOnWakeUp(); + void EmitRecognizeStateChanged(bool isRecognizing); + void EmitAsrResult(std::string& result); + void EmitTTSPlayStateChanged(bool isPlaying); + template + void DoEmit(Callback callback); + + private: + std::list listenerList_; + }; + +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_callback_proxy_death_recipient.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_callback_proxy_death_recipient.h new file mode 100644 index 0000000000000000000000000000000000000000..898535d86e9f4e9548451e74a31ee1a2f5cc0758 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_callback_proxy_death_recipient.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CAR_VOICE_ASSISTANT_CLIENT_CALLBACK_DEATH_RECIPIENT_H +#define CAR_VOICE_ASSISTANT_CLIENT_CALLBACK_DEATH_RECIPIENT_H + +#include "iremote_object.h" +#include "refbase.h" + +namespace OHOS +{ + namespace CarVoiceAssistant + { + class VoiceAssistantClientCallbackDeathRecipient : public IRemoteObject::DeathRecipient + { + public: + VoiceAssistantClientCallbackDeathRecipient() = default; + virtual ~VoiceAssistantClientCallbackDeathRecipient() = default; + + virtual void OnRemoteDied(const wptr &remote) + { + if (diedCb_ != nullptr) + { + diedCb_(remote); + } + } + + using NotifyFunc = std::function &)>; + void SetNotifyCb(NotifyFunc func) + { + diedCb_ = func; + } + + private: + NotifyFunc diedCb_ = nullptr; + }; + } +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_client_callback_proxy.h b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_client_callback_proxy.h new file mode 100644 index 0000000000000000000000000000000000000000..25464bf72935345c97648d6f4f9739892b8fae48 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/include/server/voice_assistant_client_callback_proxy.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef CAR_VOICE_ASSISTANT_CLIENT_CALLBACK_PROXY_H +#define CAR_VOICE_ASSISTANT_CLIENT_CALLBACK_PROXY_H + +#include "voice_assistant_client_callback.h" +#include "common_utils.h" +#include "iremote_object.h" +#include "iremote_proxy.h" + +namespace OHOS { +namespace CarVoiceAssistant { + class VoiceAssistantClientCallbackProxy : public IRemoteProxy { + public: + explicit VoiceAssistantClientCallbackProxy(const sptr& impl); + + virtual size_t NotifyWakeUp() override; + virtual size_t NotifyRecognizeStateChanged(bool isRecognizing) override; + virtual size_t NotifyAsrResult(std::string result) override; + virtual size_t NotifyTTSPlayStateChanged(bool isPlaying) override; + + private: + static inline BrokerDelegator delegator_; + CommonUtils::VoiceAssistantErrorCode DoDispatch(uint32_t cmd, MessageParcel& data, MessageParcel& reply); + }; +} +} + +#endif \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/client/voice_assistant_ability_proxy.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/client/voice_assistant_ability_proxy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d851601e5ade79210e52e2fab43c16f3a6eb02d7 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/client/voice_assistant_ability_proxy.cpp @@ -0,0 +1,268 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "voice_assistant_ability_proxy.h" +#include "voice_assistant_client_callback_stub.h" +#include "voice_assistant_log.h" +#include "common_utils.h" +#include + +using namespace OHOS::CarVoiceAssistant::CommonUtils; + +namespace OHOS { +namespace CarVoiceAssistant { + +#define WRITE_PARCEL_WITH_RET(parcel, type, data, retval) \ + do { \ + if (!(parcel).Write##type(data)) { \ + VOICE_ASSISTANT_LOGI("%{public}s write " #data " failed", __func__); \ + return (retval); \ + } \ + } while (0) + +#define READ_PARCEL_WITH_RET(parcel, type, out, retval) \ + do { \ + if (!(parcel).Read##type(out)) { \ + VOICE_ASSISTANT_LOGI("%{public}s read " #out " failed", __func__); \ + return (retval); \ + } \ + } while (0) + + VoiceAssistantAbilityAgentProxy::VoiceAssistantAbilityAgentProxy(const sptr& object) + : IRemoteProxy(object) + { + } + + int32_t VoiceAssistantAbilityAgentProxy::IsEnableWakeUp(bool& isEnable) + { + VOICE_ASSISTANT_LOGI("IsEnableWakeUp"); + MessageParcel data; + MessageParcel reply; + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CMD_IS_ENABLE_WAKEUP, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("IsEnableWakeUp failed"); + return code; + } + + READ_PARCEL_WITH_RET(reply, Bool, isEnable, VOICE_ASSISTANT_ERR); + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentProxy::EnableWakeUp() + { + VOICE_ASSISTANT_LOGI("EnableWakeUp"); + MessageParcel data; + MessageParcel reply; + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CMD_ENABLE_WAKEUP, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("EnableWakeUp failed"); + return code; + } + + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentProxy::DisableWakeUp() + { + VOICE_ASSISTANT_LOGI("DisableWakeUp"); + MessageParcel data; + MessageParcel reply; + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CMD_DISABLE_WAKEUP, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("DisableWakeUp failed"); + return code; + } + + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentProxy::IsRecognizing(bool& isRecognizing) + { + VOICE_ASSISTANT_LOGI("IsRecognizing"); + MessageParcel data; + MessageParcel reply; + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CMD_IS_RECOGNIZING, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("IsRecognizing failed"); + return code; + } + + READ_PARCEL_WITH_RET(reply, Bool, isRecognizing, VOICE_ASSISTANT_ERR); + + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentProxy::StartRecognize(CommonUtils::VoiceAssistantErrorCode& result) + { + VOICE_ASSISTANT_LOGI("StartRecognize"); + MessageParcel data; + MessageParcel reply; + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CMD_START_RECOGNIZE, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("StartRecognize failed"); + return code; + } + + int32_t rst = VOICE_ASSISTANT_ERR; + READ_PARCEL_WITH_RET(reply, Int32, rst, VOICE_ASSISTANT_ERR); + result = static_cast(rst); + + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentProxy::StopRecognize() + { + VOICE_ASSISTANT_LOGI("StopRecognize"); + MessageParcel data; + MessageParcel reply; + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CMD_STOP_RECOGNIZE, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("StopRecognize failed"); + return code; + } + + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentProxy::PlayTTS(CommonUtils::VoiceAssistantErrorCode& result, std::string& tts) + { + VOICE_ASSISTANT_LOGI("PlayTTS"); + MessageParcel data; + MessageParcel reply; + + WRITE_PARCEL_WITH_RET(data, String, tts, VOICE_ASSISTANT_ERR); + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CMD_PLAY_TTS, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("PlayTTS failed"); + return code; + } + + int32_t rst = VOICE_ASSISTANT_ERR; + READ_PARCEL_WITH_RET(reply, Int32, rst, VOICE_ASSISTANT_ERR); + result = static_cast(rst); + + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentProxy::StopPlayTTS() + { + VOICE_ASSISTANT_LOGI("StopPlayTTS"); + MessageParcel data; + MessageParcel reply; + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CMD_STOP_PLAY_TTS, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("StopPlayTTS failed"); + return code; + } + + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentProxy::RegisterHotwords(std::string& hotwords) + { + VOICE_ASSISTANT_LOGI("RegisterHotwords"); + MessageParcel data; + MessageParcel reply; + + WRITE_PARCEL_WITH_RET(data, String, hotwords, VOICE_ASSISTANT_ERR); + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CMD_REGISTER_HOTWORDS, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("RegisterHotwords failed"); + return code; + } + + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentProxy::SetCoord(double latitude, double longitude) + { + VOICE_ASSISTANT_LOGI("SetCoord"); + MessageParcel data; + MessageParcel reply; + + WRITE_PARCEL_WITH_RET(data, Double, latitude, VOICE_ASSISTANT_ERR); + WRITE_PARCEL_WITH_RET(data, Double, longitude, VOICE_ASSISTANT_ERR); + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSISTANT_CMD_SET_COORD, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("SetCoord failed"); + return code; + } + + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentProxy::RegisterCallback() + { + VOICE_ASSISTANT_LOGI("RegisterCallback"); + MessageParcel data; + MessageParcel reply; + + sptr callback = new VoiceAssistantClientCallbackStub(); + sptr remoteObject = callback->AsObject(); + WRITE_PARCEL_WITH_RET(data, RemoteObject, remoteObject, VOICE_ASSISTANT_ERR); + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CMD_REGISTER_CALLBACK, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("RegisterCallback failed"); + return code; + } + + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentProxy::ChangeSpeakerType(std::string speaker) + { + VOICE_ASSISTANT_LOGI("ChangeSpeakerType"); + MessageParcel data; + MessageParcel reply; + + WRITE_PARCEL_WITH_RET(data, String, speaker, VOICE_ASSISTANT_ERR); + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CMD_CHANGE_SPEAKER_TYPE, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("ChangeSpeakerType failed"); + return code; + } + + return VOICE_ASSISTANT_OK; + } + + CommonUtils::VoiceAssistantErrorCode VoiceAssistantAbilityAgentProxy::DoDispatch(uint32_t cmd, MessageParcel& data, MessageParcel& reply) + { + VOICE_ASSISTANT_LOGI("%{public}s:%{public}d cmd:%{public}d", __func__, __LINE__, cmd); + + MessageOption option; + auto ret = Remote()->SendRequest(cmd, data, reply, option); + VOICE_ASSISTANT_LOGI("%{public}s:%{public}d SendRequest end cmd:%{public}d ", __func__, __LINE__, cmd); + if (ret != ERR_NONE) { + VOICE_ASSISTANT_LOGI("failed to send request, cmd: %{public}d, ret: %{public}d", cmd, ret); + return VOICE_ASSISTANT_ERR; + } + VOICE_ASSISTANT_LOGI(" success to dispatch cmd: %{public}d", cmd); + return VOICE_ASSISTANT_OK; + } + +} +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/client/voice_assistant_client_callback_stub.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/client/voice_assistant_client_callback_stub.cpp new file mode 100644 index 0000000000000000000000000000000000000000..e54ba746a99d81dda2278324fed3dc46ab394983 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/client/voice_assistant_client_callback_stub.cpp @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "voice_assistant_client_callback_stub.h" +#include "common_utils.h" +#include "voice_assistant_client_manager.h" +#include "voice_assistant_log.h" + +using namespace OHOS::CarVoiceAssistant::CommonUtils; + +namespace OHOS { +namespace CarVoiceAssistant { + + int VoiceAssistantClientCallbackStub::OnRemoteRequest(uint32_t code, + MessageParcel& data, MessageParcel& reply, MessageOption& option) + { + switch (code) { + case VOICE_ASSITANT_CALLBACK_ON_WAKEUP: + NotifyWakeUp(); + break; + case VOICE_ASSITANT_CALLBACK_RECOGNIZE_STATE_CHANGED: { + bool isRecognizing = data.ReadBool(); + NotifyRecognizeStateChanged(isRecognizing); + } break; + case VOICE_ASSITANT_CALLBACK_ASR_RESULT: { + std::string result = data.ReadString(); + NotifyAsrResult(result); + } break; + case VOICE_ASSISTANT_CALLBACK_TTS_STATE_CHANGED: { + bool isPlaying = data.ReadBool(); + NotifyTTSPlayStateChanged(isPlaying); + } break; + default: + break; + } + + return 0; + } + + size_t VoiceAssistantClientCallbackStub::NotifyWakeUp() + { + VOICE_ASSISTANT_LOGI("VoiceAssistantClientCallbackStub::NotifyWakeUp"); + sptr eventTarget = VoiceAssistantClientManager::GetInstance()->GetEventTarget(); + if (eventTarget) { + eventTarget->EmitOnWakeUp(); + } + return VOICE_ASSISTANT_OK; + } + + size_t VoiceAssistantClientCallbackStub::NotifyRecognizeStateChanged(bool isRecognizing) + { + VOICE_ASSISTANT_LOGI("VoiceAssistantClientCallbackStub::NotifyRecognizeStateChanged"); + sptr eventTarget = VoiceAssistantClientManager::GetInstance()->GetEventTarget(); + if (eventTarget) { + eventTarget->EmitRecognizeStateChanged(isRecognizing); + } + return VOICE_ASSISTANT_OK; + } + + size_t VoiceAssistantClientCallbackStub::NotifyAsrResult(std::string result) + { + VOICE_ASSISTANT_LOGI("VoiceAssistantClientCallbackStub::NotifyAsrResult"); + sptr eventTarget = VoiceAssistantClientManager::GetInstance()->GetEventTarget(); + if (eventTarget) { + eventTarget->EmitAsrResult(result); + } + return VOICE_ASSISTANT_OK; + } + + size_t VoiceAssistantClientCallbackStub::NotifyTTSPlayStateChanged(bool isPlaying) + { + VOICE_ASSISTANT_LOGI("VoiceAssistantClientCallbackStub::NotifyTTSPlayStateChanged"); + sptr eventTarget = VoiceAssistantClientManager::GetInstance()->GetEventTarget(); + if (eventTarget) { + eventTarget->EmitTTSPlayStateChanged(isPlaying); + } + return VOICE_ASSISTANT_OK; + } +} +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/client/voice_assistant_client_manager.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/client/voice_assistant_client_manager.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8fc3b57dee9d085e9399be38216dbeeddeb76d66 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/client/voice_assistant_client_manager.cpp @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "voice_assistant_client_manager.h" +#include "voice_assistant_log.h" +#include "common_utils.h" +#include "iservice_registry.h" + +using namespace OHOS::CarVoiceAssistant::CommonUtils; + +namespace OHOS { +namespace CarVoiceAssistant { + +#define CHECK_PROXY_VALID() \ + if (CreateAbilityAgentProxy() == NULL) { \ + VOICE_ASSISTANT_LOGI("CreateAbilityAgentProxy failed"); \ + return VOICE_ASSISTANT_ERR; \ + } + + sptr VoiceAssistantClientManager::instance_; + std::mutex VoiceAssistantClientManager::instanceLock_; + + sptr VoiceAssistantClientManager::GetInstance() + { + VOICE_ASSISTANT_LOGI("VoiceAssistantClientManager::GetInstance"); + if (instance_ == nullptr) { + std::lock_guard autoLock(instanceLock_); + if (instance_ == nullptr) { + VOICE_ASSISTANT_LOGI("VoiceAssistantClientManager::GetInstance"); + instance_ = new VoiceAssistantClientManager(); + } + } + return instance_; + } + + VoiceAssistantClientManager::VoiceAssistantClientManager() + { + CreateAbilityAgentProxy(); + } + + VoiceAssistantClientManager::~VoiceAssistantClientManager() + { + VOICE_ASSISTANT_LOGI("VoiceAssistantClientManager::~VoiceAssistantClientManager"); + instance_ = NULL; + } + + void VoiceAssistantClientManager::SetEventTarget(sptr eventTarget) + { + eventTarget_ = eventTarget; + } + + sptr VoiceAssistantClientManager::GetEventTarget() + { + return eventTarget_; + } + + sptr VoiceAssistantClientManager::CreateAbilityAgentProxy() + { + VOICE_ASSISTANT_LOGI("VoiceAssistantClientManager::VoiceAssistantAbilityAgentProxy"); + + std::lock_guard autoLock(createProxyLock_); + + VOICE_ASSISTANT_LOGI("VoiceAssistantClientManager::VoiceAssistantAbilityAgentProxy Start"); + + if (mAbilityManager_) { + VOICE_ASSISTANT_LOGI("VoiceAssistantClientManager::VoiceAssistantAbilityAgentProxy isExist"); + return mAbilityManager_; + } + + sptr systemAbilityManager = SystemAbilityManagerClient::GetInstance().GetSystemAbilityManager(); + if (systemAbilityManager == nullptr) { + VOICE_ASSISTANT_LOGI("VoiceAssistantClientManager::VoiceAssistantAbilityAgentProxy systemAbilityManager is nullptr"); + return nullptr; + } + + auto systemAbility = systemAbilityManager->GetSystemAbility(CAR_VOICE_ASSISTANT_SERVICE_SA_ID, ""); + if (systemAbility == nullptr) { + VOICE_ASSISTANT_LOGI("VoiceAssistantClientManager::VoiceAssistantAbilityAgentProxy systemAbility is nullptr"); + return nullptr; + } + + mAbilityManager_ = iface_cast(systemAbility); + + deathRecipient_ = new VoiceAssistantAgentProxyDeathRecipient(); + deathRecipient_->SetNotifyCb(std::bind(&VoiceAssistantClientManager::ResetAgentProxy, this)); + mAbilityManager_->AsObject()->AddDeathRecipient(deathRecipient_); + + RegisterCallback(); + + VOICE_ASSISTANT_LOGI("VoiceAssistantClientManager::VoiceAssistantAbilityAgentProxy finished"); + + return mAbilityManager_; + } + + void VoiceAssistantClientManager::ResetAgentProxy() + { + VOICE_ASSISTANT_LOGI("VoiceAssistantClientManager::ResetAgentProxy"); + if (mAbilityManager_ != nullptr && mAbilityManager_->AsObject() != nullptr) { + mAbilityManager_->AsObject()->RemoveDeathRecipient(deathRecipient_); + } + mAbilityManager_ = nullptr; + } + + int32_t VoiceAssistantClientManager::IsEnableWakeUp(bool& isEnable) + { + CHECK_PROXY_VALID(); + return mAbilityManager_->IsEnableWakeUp(isEnable); + } + + int32_t VoiceAssistantClientManager::EnableWakeUp() + { + CHECK_PROXY_VALID(); + return mAbilityManager_->EnableWakeUp(); + } + + int32_t VoiceAssistantClientManager::DisableWakeUp() + { + CHECK_PROXY_VALID(); + return mAbilityManager_->DisableWakeUp(); + } + + int32_t VoiceAssistantClientManager::IsRecognizing(bool& isRecognizing) + { + CHECK_PROXY_VALID(); + return mAbilityManager_->IsRecognizing(isRecognizing); + } + + int32_t VoiceAssistantClientManager::StartRecognize(CommonUtils::VoiceAssistantErrorCode& result) + { + CHECK_PROXY_VALID(); + return mAbilityManager_->StartRecognize(result); + } + + int32_t VoiceAssistantClientManager::StopRecognize() + { + CHECK_PROXY_VALID(); + return mAbilityManager_->StopRecognize(); + } + + int32_t VoiceAssistantClientManager::PlayTTS(CommonUtils::VoiceAssistantErrorCode& result, std::string& tts) + { + CHECK_PROXY_VALID(); + return mAbilityManager_->PlayTTS(result, tts); + } + + int32_t VoiceAssistantClientManager::StopPlayTTS() + { + CHECK_PROXY_VALID(); + return mAbilityManager_->StopPlayTTS(); + } + + int32_t VoiceAssistantClientManager::RegisterHotwords(std::string& hotwords) + { + CHECK_PROXY_VALID(); + return mAbilityManager_->RegisterHotwords(hotwords); + } + + int32_t VoiceAssistantClientManager::SetCoord(double latitude, double longitude) + { + CHECK_PROXY_VALID(); + return mAbilityManager_->SetCoord(latitude, longitude); + } + + int32_t VoiceAssistantClientManager::RegisterCallback() + { //由于在CreateAbilityAgentProxy调用,此处不需要CHECK_PROXY_VALID + return mAbilityManager_->RegisterCallback(); + } + + int32_t VoiceAssistantClientManager::ChangeSpeakerType(std::string speaker) + { + CHECK_PROXY_VALID(); + return mAbilityManager_->ChangeSpeakerType(speaker); + } + +} +} \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/server/voice_assistant_ability_stub.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/server/voice_assistant_ability_stub.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8e4da92d51f1690176bf96164b8e18359109a759 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/server/voice_assistant_ability_stub.cpp @@ -0,0 +1,512 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "voice_assistant_ability_stub.h" +#include "common_utils.h" +#include "nlohmann/json.hpp" +#include "voice_assistant_log.h" +#include "voice_cloud_loader.h" +#include "wakeup_manager.h" +#include +#include + +using namespace OHOS::CarVoiceAssistant::CommonUtils; + +namespace OHOS { +namespace CarVoiceAssistant { + +#define WRITE_PARCEL_WITH_RET(parcel, type, data, retval) \ + do { \ + if (!(parcel).Write##type(data)) { \ + VOICE_ASSISTANT_LOGI("%{public}s write " #data " failed", __func__); \ + return (retval); \ + } \ + } while (0) + +#define READ_PARCEL_WITH_RET(parcel, type, out, retval) \ + do { \ + if (!(parcel).Read##type(out)) { \ + VOICE_ASSISTANT_LOGI("%{public}s read " #out " failed", __func__); \ + return (retval); \ + } \ + } while (0) + + VoiceAssistantAbilityAgentStub::VoiceAssistantAbilityAgentStub() + { + isWakeUpEnabled_ = false; + isRecognizing_ = false; + coord_ = std::make_pair(31.32751, 118.8921); + hotwords_ = ""; + // startRecognizingTime_ = 0; + player_ = nullptr; + + callbackEventTarget_ = new VoiceAssistantCallbackEventTarget(); + + voiceCloudManager_ = CreateVoiceCloudManager(); + if (voiceCloudManager_) { + voiceCloudManager_->SetCallback(static_cast(this)); + } + + audioRecordManager_ = new AudioRecordManager(); + audioRecordManager_->SetCallback(static_cast(this)); + + ttsManager_ = new TTSManager(); + ttsManager_->SetCallback(static_cast(this)); + ttsManager_->voiceCloudManager_ = voiceCloudManager_; + + wakeUpManager_ = new WakeUpManager(); + wakeUpManager_->SetCallback(static_cast(this)); + wakeUpManager_->Init(); + } + + VoiceAssistantAbilityAgentStub::~VoiceAssistantAbilityAgentStub() + { + callbackEventTarget_ = nullptr; + if (voiceCloudManager_) { + DestoryVoiceCloudManager(voiceCloudManager_); + voiceCloudManager_ = nullptr; + } + audioRecordManager_ = nullptr; + ttsManager_ = nullptr; + if (wakeUpManager_) { + delete wakeUpManager_; + wakeUpManager_ = nullptr; + } + } + + int32_t VoiceAssistantAbilityAgentStub::OnRemoteRequest(uint32_t code, + MessageParcel& data, + MessageParcel& reply, + MessageOption& option) + { + VOICE_ASSISTANT_LOGI("VoiceAssistantAbilityAgentStub::OnRemoteRequest code = %{public}u, pid=%{public}lu", code, pthread_self()); + switch (code) { + case VOICE_ASSITANT_CMD_IS_ENABLE_WAKEUP: { + bool isEnableWakeUp = false; + IsEnableWakeUp(isEnableWakeUp); + WRITE_PARCEL_WITH_RET(reply, Bool, isEnableWakeUp, VOICE_ASSISTANT_ERR); + } break; + case VOICE_ASSITANT_CMD_ENABLE_WAKEUP: + EnableWakeUp(); + break; + case VOICE_ASSITANT_CMD_DISABLE_WAKEUP: + DisableWakeUp(); + break; + case VOICE_ASSITANT_CMD_IS_RECOGNIZING: { + bool isRecognizing; + IsRecognizing(isRecognizing); + WRITE_PARCEL_WITH_RET(reply, Bool, isRecognizing, VOICE_ASSISTANT_ERR); + } break; + case VOICE_ASSITANT_CMD_START_RECOGNIZE: { + VoiceAssistantErrorCode result = VOICE_ASSISTANT_OK; + StartRecognize(result); + WRITE_PARCEL_WITH_RET(reply, Int32, result, VOICE_ASSISTANT_ERR); + + } break; + case VOICE_ASSITANT_CMD_STOP_RECOGNIZE: + StopRecognize(); + break; + case VOICE_ASSITANT_CMD_PLAY_TTS: { + std::string tts; + VoiceAssistantErrorCode result = VOICE_ASSISTANT_OK; + READ_PARCEL_WITH_RET(data, String, tts, VOICE_ASSISTANT_ERR); + PlayTTS(result, tts); + WRITE_PARCEL_WITH_RET(reply, Int32, result, VOICE_ASSISTANT_ERR); + + } break; + case VOICE_ASSITANT_CMD_STOP_PLAY_TTS: + StopPlayTTS(); + break; + case VOICE_ASSITANT_CMD_REGISTER_HOTWORDS: { + std::string hotwords; + READ_PARCEL_WITH_RET(data, String, hotwords, VOICE_ASSISTANT_ERR); + RegisterHotwords(hotwords); + } break; + case VOICE_ASSISTANT_CMD_SET_COORD: { + double latitude; + double longitude; + READ_PARCEL_WITH_RET(data, Double, latitude, VOICE_ASSISTANT_ERR); + READ_PARCEL_WITH_RET(data, Double, longitude, VOICE_ASSISTANT_ERR); + SetCoord(latitude, longitude); + } break; + case VOICE_ASSITANT_CMD_REGISTER_CALLBACK: { + sptr proxy = iface_cast(data.ReadRemoteObject()); + if (!proxy) { + VOICE_ASSISTANT_LOGI("OnRemoteRequest-VOICE_ASSITANT_CMD_REGISTER_CALLBACK: is null"); + return 0; + } + sptr deathRecipient = new VoiceAssistantClientCallbackDeathRecipient(); + deathRecipient->SetNotifyCb(std::bind(&VoiceAssistantAbilityAgentStub::RemoveCallback, this, std::placeholders::_1)); + proxy->AsObject()->AddDeathRecipient(deathRecipient); + callbackEventTarget_->AddListener(proxy, deathRecipient); + } break; + case VOICE_ASSITANT_CMD_CHANGE_SPEAKER_TYPE: { + std::string speakerType; + READ_PARCEL_WITH_RET(data, String, speakerType, VOICE_ASSISTANT_ERR); + ChangeSpeakerType(speakerType); + } break; + default: + break; + } + return 0; + } + + int32_t VoiceAssistantAbilityAgentStub::IsEnableWakeUp(bool& isEnable) + { + VOICE_ASSISTANT_LOGI("IsEnableWakeUp:%{public}s", isWakeUpEnabled_ ? "true" : "false"); + std::lock_guard lock(mutex_); + isEnable = isWakeUpEnabled_; + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentStub::EnableWakeUp() + { + VOICE_ASSISTANT_LOGI("EnableWakeUp"); + std::lock_guard lock(mutex_); + if (isWakeUpEnabled_) { + VOICE_ASSISTANT_LOGI("EnableWakeUp: is already enabled"); + return VOICE_ASSISTANT_OK; + } + if (audioRecordManager_->GetStatus() != AudioRecordStatusNone) { + isWakeUpEnabled_ = true; + VOICE_ASSISTANT_LOGI("EnableWakeUp: recording is running"); + return VOICE_ASSISTANT_OK; + } + VOICE_ASSISTANT_LOGI("EnableWakeUp: start record"); + bool rst = audioRecordManager_->StartRecord(); + if (rst) { + VOICE_ASSISTANT_LOGI("EnableWakeUp: start record success"); + isWakeUpEnabled_ = true; + } else { + VOICE_ASSISTANT_LOGI("EnableWakeUp: start record failed"); + } + + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentStub::DisableWakeUp() + { + VOICE_ASSISTANT_LOGI("DisableWakeUp"); + std::lock_guard lock(mutex_); + if (!isWakeUpEnabled_) { + VOICE_ASSISTANT_LOGI("DisableWakeUp: is already disabled"); + return VOICE_ASSISTANT_OK; + } + + if (audioRecordManager_->GetStatus() == AudioRecordStatusNone) { + isWakeUpEnabled_ = false; + VOICE_ASSISTANT_LOGI("DisableWakeUp: audio is not recording"); + return VOICE_ASSISTANT_OK; + } + + if (isRecognizing_) { + VOICE_ASSISTANT_LOGI("DisableWakeUp: isRecognizing, do not stop recording"); + isWakeUpEnabled_ = false; + return VOICE_ASSISTANT_OK; + } + + VOICE_ASSISTANT_LOGI("DisableWakeUp: stop recording"); + audioRecordManager_->StopRecord(); + isWakeUpEnabled_ = false; + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentStub::IsRecognizing(bool& isRecognizing) + { + VOICE_ASSISTANT_LOGI("IsRecognizing:%{public}s", isRecognizing_ ? "true" : "false"); + std::lock_guard lock(mutex_); + isRecognizing = isRecognizing_; + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentStub::StartRecognize(CommonUtils::VoiceAssistantErrorCode& result) + { + VOICE_ASSISTANT_LOGI("StartRecognize"); + std::lock_guard lock(mutex_); + + if (isRecognizing_) { + VOICE_ASSISTANT_LOGI("StopRecognize: is already recognizing"); + result = VOICE_ASSISTANT_OK; + return VOICE_ASSISTANT_OK; + } + + ttsManager_->CancelAll(); + + if (audioRecordManager_->GetStatus() == AudioRecordStatusNone) { + bool rst = audioRecordManager_->StartRecord(); + if (rst) { + VOICE_ASSISTANT_LOGI("StartRecognize: start record success"); + } else { + VOICE_ASSISTANT_LOGI("StartRecognize: start record failed"); + result = VOICE_ASSISTANT_START_RECORD_FAILED; + return VOICE_ASSISTANT_ERR; + } + } + + //启动websocket + bool rst = ConnectWebsocket(); + if (!rst) { + VOICE_ASSISTANT_LOGI("StartRecognize: connect websocket failed"); + result = VOICE_ASSISTANT_START_WEBSOCKET_CONNECT_FAILED; + return VOICE_ASSISTANT_ERR; + } + + PlayStartRecoginizingSound(); + isRecognizing_ = true; + // startRecognizingTime_ = CommonUtils::GetTimestamp(); + + callbackEventTarget_->EmitRecognizeStateChanged(isRecognizing_); + + std::thread startAudioStreamThread(&VoiceAssistantAbilityAgentStub::SendStartAudioStreamIfNeeded, this); + startAudioStreamThread.detach(); + + result = VOICE_ASSISTANT_OK; + + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentStub::StopRecognize() + { + VOICE_ASSISTANT_LOGI("StopRecognize"); + std::lock_guard lock(mutex_); + if (!isRecognizing_) { + VOICE_ASSISTANT_LOGI("StopRecognize: is already not recognizing"); + return VOICE_ASSISTANT_OK; + } + + if (!isWakeUpEnabled_ && audioRecordManager_->GetStatus() != AudioRecordStatusNone) { + VOICE_ASSISTANT_LOGI("StopRecognize:stop record"); + audioRecordManager_->StopRecord(); + } + + voiceCloudManager_->SendEndAudioStream(); + + wakeUpManager_->SetNeedClearBeforeProcess(); + isRecognizing_ = false; + + callbackEventTarget_->EmitRecognizeStateChanged(isRecognizing_); + + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentStub::PlayTTS(CommonUtils::VoiceAssistantErrorCode& result, std::string& tts) + { + VOICE_ASSISTANT_LOGI("PlayTTS:%{public}s", tts.c_str()); + ttsManager_->RequestPlay(tts); + result = VOICE_ASSISTANT_OK; + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentStub::StopPlayTTS() + { + VOICE_ASSISTANT_LOGI("StopPlayTTS"); + ttsManager_->CancelAll(); + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentStub::RegisterHotwords(std::string& hotwords) + { + VOICE_ASSISTANT_LOGI("RegisterHotwords:%{public}s", hotwords.c_str()); + std::lock_guard lock(mutex_); + hotwords_ = hotwords; + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentStub::SetCoord(double latitude, double longitude) + { + VOICE_ASSISTANT_LOGI("SetCoord:%{public}f,%{public}f", latitude, longitude); + coord_ = std::make_pair(latitude, longitude); + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentStub::RegisterCallback() + { + return VOICE_ASSISTANT_OK; + } + + int32_t VoiceAssistantAbilityAgentStub::ChangeSpeakerType(std::string speakerType) + { + VOICE_ASSISTANT_LOGI("ChangeSpeakerType:%{public}s", speakerType.c_str()); + ttsManager_->ChangeSpeakerType(speakerType); + return VOICE_ASSISTANT_OK; + } + + void VoiceAssistantAbilityAgentStub::RemoveCallback(const wptr& remoteObject) + { + callbackEventTarget_->RemoveListener(remoteObject); + } + + bool VoiceAssistantAbilityAgentStub::ConnectWebsocket() + { + VOICE_ASSISTANT_LOGI("ConnectWebsocket"); + if (voiceCloudManager_->GetStatus() == VoiceCloudStatusConnected || voiceCloudManager_->GetStatus() == VoiceCloudStatusConnecting) { + VOICE_ASSISTANT_LOGI("ConnectWebsocket: is connect or connectting, need not connect"); + return true; + } + + return voiceCloudManager_->Connect(); + } + + void VoiceAssistantAbilityAgentStub::SendStartAudioStreamIfNeeded() + { + VOICE_ASSISTANT_LOGI("SendStartAudioStreamIfNeeded"); + usleep(1000000); + + if (!isRecognizing_) { + VOICE_ASSISTANT_LOGI("SendStartAudioStreamIfNeeded: Is not recognizing, not need send start"); + return; + } + + if (voiceCloudManager_->IsSendingAudioStream()) { + VOICE_ASSISTANT_LOGI("SendStartAudioStreamIfNeeded: Is sending stream, not need send start"); + return; + } + if (voiceCloudManager_->GetStatus() == VoiceCloudStatusConnected) { + voiceCloudManager_->SendTrackStat(coord_.first, coord_.second, hotwords_); + voiceCloudManager_->SendStartAudioStream(); + VOICE_ASSISTANT_LOGI("SendStartAudioStreamIfNeeded: send start audio stream"); + } + } + + void VoiceAssistantAbilityAgentStub::VoiceCloudStatusChanged(VoiceCloudStatus status) + { + switch (status) { + case VoiceCloudStatusNone: + if (isRecognizing_) { + StopRecognize(); + } + break; + case VoiceCloudStatusConnected: { + std::thread startAudioStreamThread(&VoiceAssistantAbilityAgentStub::SendStartAudioStreamIfNeeded, this); + startAudioStreamThread.detach(); + } break; + default: + break; + } + } + + void VoiceAssistantAbilityAgentStub::ReveiceVoiceCloudMessage(void* data, size_t length, bool isBinary) + { + if (isBinary) { + VOICE_ASSISTANT_LOGI("ReveiceVoiceCloudMessage: not need parse binary"); + return; + } + + if (length == 0) { + return; + } + + std::string dataStr(static_cast(data), length); + VOICE_ASSISTANT_LOGI("ReveiceVoiceCloudMessage:length:%{public}zu, %{public}s", length, dataStr.c_str()); + nlohmann::json json = nlohmann::json::parse(dataStr, nullptr, false); + if (json.is_discarded()) { + VOICE_ASSISTANT_LOGI("ReveiceVoiceCloudMessage: json parse failed"); + return; + } + + std::string op = json.at("op"); + if (op == "stopListen") { + StopRecognize(); + } else { + if (op == "realTimeASRResult") { + bool isFinish = json.at("isFinish"); + if (isFinish) { + StopRecognize(); + } + } + + callbackEventTarget_->EmitAsrResult(dataStr); + } + } + + void VoiceAssistantAbilityAgentStub::AudioRecordStatusChanged(AudioRecordStatus status) + { + switch (status) { + case AudioRecordStatusNone: + if (isRecognizing_) { + StopRecognize(); + } + if (isWakeUpEnabled_) { + DisableWakeUp(); + } + break; + default: + break; + } + } + + void VoiceAssistantAbilityAgentStub::ReceiveAudioBuffer(void* data, size_t length) + { + SendAudioBufferToWebsocketIfNeeded(data, length); + CheckWakeUpIfNeeded(data, length); + } + + void VoiceAssistantAbilityAgentStub::WakeUpCallback(std::string text) + { + if (isRecognizing_ || !isWakeUpEnabled_) { + return; + } + + if (text == "你好博泰") { + callbackEventTarget_->EmitOnWakeUp(); + CommonUtils::VoiceAssistantErrorCode result = VOICE_ASSISTANT_OK; + StartRecognize(result); + } + } + + void VoiceAssistantAbilityAgentStub::AudioPlayerStatusChanged(bool isPlaying) + { + if (callbackEventTarget_ != nullptr) { + callbackEventTarget_->EmitTTSPlayStateChanged(isPlaying); + } + } + + void VoiceAssistantAbilityAgentStub::SendAudioBufferToWebsocketIfNeeded(void* data, size_t length) + { + if (!isRecognizing_) { + return; + } + + if (!voiceCloudManager_->IsSendingAudioStream()) { + return; + } + + voiceCloudManager_->SendBinary(data, length); + } + + void VoiceAssistantAbilityAgentStub::CheckWakeUpIfNeeded(void* data, size_t length) + { + if (!isWakeUpEnabled_) { + return; + } + + if (isRecognizing_) { + return; + } + + wakeUpManager_->Process(data, length); + } + + void VoiceAssistantAbilityAgentStub::PlayStartRecoginizingSound() + { + if (player_ == nullptr) { + player_ = OHOS::Media::PlayerFactory::CreatePlayer(); + } + player_->SetSource("/system/etc/pocketsphinx/voice_tip.mp3"); + player_->Prepare(); + player_->Play(); + } +} +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/server/voice_assistant_agent_service.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/server/voice_assistant_agent_service.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9810a67fc11dfcb2c1ad5762867d32711983ca2c --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/server/voice_assistant_agent_service.cpp @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "voice_assistant_agent_service.h" +#include "voice_assistant_log.h" +#include "ipc_skeleton.h" +#include "iservice_registry.h" +#include "system_ability.h" +#include "common_utils.h" +#include "voice_cloud_loader.h" + +using namespace OHOS::CarVoiceAssistant::CommonUtils; + +namespace OHOS { +namespace CarVoiceAssistant { + + REGISTER_SYSTEM_ABILITY_BY_ID(VoiceAssistantAgentService, CAR_VOICE_ASSISTANT_SERVICE_SA_ID, true); + + VoiceAssistantAgentService::VoiceAssistantAgentService(int32_t systemAbilityId, bool runOnCreate) + : SystemAbility(systemAbilityId, runOnCreate) + , state_(ServiceRunStateNotStart) + { + } + + VoiceAssistantAgentService::VoiceAssistantAgentService() + : state_(ServiceRunStateNotStart) + { + } + + VoiceAssistantAgentService::~VoiceAssistantAgentService() + { + } + + void VoiceAssistantAgentService::OnStart() + { + std::cout<<"Version: 0.0.1"< proxy, sptr deathRecipient) + { + VOICE_ASSISTANT_LOGI("VoiceAssistantCallbackEventTarget::addListener"); + listenerList_.push_back({ proxy, deathRecipient }); + } + + void VoiceAssistantCallbackEventTarget::RemoveListener(const wptr& remote) + { + listenerList_.remove_if([remote](VoiceAssistantCallbackEventListener listener) -> bool { + if (listener.proxy_ == nullptr) { + return false; + } + sptr object = listener.proxy_->AsObject(); + if (object != nullptr && remote == object) { + if (listener.deathRecipient_) { + object->RemoveDeathRecipient(listener.deathRecipient_); + } + return true; + } + + return false; + }); + VOICE_ASSISTANT_LOGI("VoiceAssistantCallbackEventTarget::RemoveListener by remote:%{public}d", listenerList_.size()); + } + + void VoiceAssistantCallbackEventTarget::EmitOnWakeUp() + { + VOICE_ASSISTANT_LOGI("VoiceAssistantCallbackEventTarget::EmitOnWakeUp"); + DoEmit([](sptr& proxy) { + proxy->NotifyWakeUp(); + }); + } + + void VoiceAssistantCallbackEventTarget::EmitRecognizeStateChanged(bool isRecognizing) + { + VOICE_ASSISTANT_LOGI("VoiceAssistantCallbackEventTarget::EmitRecognizeStateChanged:%{public}s", isRecognizing ? "true" : "false"); + DoEmit([isRecognizing](sptr& proxy) { + proxy->NotifyRecognizeStateChanged(isRecognizing); + }); + } + + void VoiceAssistantCallbackEventTarget::EmitAsrResult(std::string& result) + { + VOICE_ASSISTANT_LOGI("VoiceAssistantCallbackEventTarget::EmitFavoriteListChanged:%{public}s", result.c_str()); + DoEmit([result](sptr& proxy) { + proxy->NotifyAsrResult(result); + }); + } + + void VoiceAssistantCallbackEventTarget::EmitTTSPlayStateChanged(bool isPlaying){ + VOICE_ASSISTANT_LOGI("VoiceAssistantCallbackEventTarget::EmitTTSPlayStateChanged:%{public}s", isPlaying ? "true" : "false"); + DoEmit([isPlaying](sptr& proxy) { + proxy->NotifyTTSPlayStateChanged(isPlaying); + }); + } + + template + void VoiceAssistantCallbackEventTarget::DoEmit(Callback callback) + { + VOICE_ASSISTANT_LOGI("VoiceAssistantCallbackEventTarget::DoEmit"); + for (std::list::iterator it = listenerList_.begin(); it != listenerList_.end(); ++it) { + VOICE_ASSISTANT_LOGI("VoiceAssistantCallbackEventTarget::DoEmit schedule"); + if (it->proxy_ == nullptr) { + VOICE_ASSISTANT_LOGI("VoiceAssistantCallbackEventTarget::DoEmit:proxy_ is null"); + continue; + } else { + callback(it->proxy_); + } + } + } + +} +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/server/voice_assistant_client_callback_proxy.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/server/voice_assistant_client_callback_proxy.cpp new file mode 100644 index 0000000000000000000000000000000000000000..acff1ba784623ead0a0ad8734d796a9854feebde --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/server/voice_assistant_client_callback_proxy.cpp @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "voice_assistant_client_callback_proxy.h" +#include "common_utils.h" +#include "iremote_object.h" +#include "iremote_proxy.h" +#include "voice_assistant_log.h" + +using namespace OHOS::CarVoiceAssistant::CommonUtils; + +namespace OHOS { +namespace CarVoiceAssistant { + +#define WRITE_PARCEL_WITH_RET(parcel, type, data, retval) \ + do { \ + if (!(parcel).Write##type(data)) { \ + VOICE_ASSISTANT_LOGI("%{public}s write " #data " failed", __func__); \ + return (retval); \ + } \ + } while (0) + +#define READ_PARCEL_WITH_RET(parcel, type, out, retval) \ + do { \ + if (!(parcel).Read##type(out)) { \ + VOICE_ASSISTANT_LOGI("%{public}s read " #out " failed", __func__); \ + return (retval); \ + } \ + } while (0) + + VoiceAssistantClientCallbackProxy::VoiceAssistantClientCallbackProxy(const sptr& impl) + : IRemoteProxy(impl) + { + } + + size_t VoiceAssistantClientCallbackProxy::NotifyWakeUp() + { + VOICE_ASSISTANT_LOGI("NotifyWakeUp"); + MessageParcel data; + MessageParcel reply; + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CALLBACK_ON_WAKEUP, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("NotifyWakeUp failed"); + return code; + } + return VOICE_ASSISTANT_OK; + } + + size_t VoiceAssistantClientCallbackProxy::NotifyRecognizeStateChanged(bool isRecognizing) + { + VOICE_ASSISTANT_LOGI("NotifyRecognizeStateChanged:%{public}s", isRecognizing ? "true" : "false"); + MessageParcel data; + MessageParcel reply; + + WRITE_PARCEL_WITH_RET(data, Bool, isRecognizing, VOICE_ASSISTANT_ERR); + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CALLBACK_RECOGNIZE_STATE_CHANGED, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("NotifyRecognizeStateChanged failed"); + return code; + } + return VOICE_ASSISTANT_OK; + } + + size_t VoiceAssistantClientCallbackProxy::NotifyAsrResult(std::string result) + { + VOICE_ASSISTANT_LOGI("NotifyAsrResult:%{public}s", result.c_str()); + MessageParcel data; + MessageParcel reply; + + WRITE_PARCEL_WITH_RET(data, String, result, VOICE_ASSISTANT_ERR); + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSITANT_CALLBACK_ASR_RESULT, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("NotifyAsrResult failed"); + return code; + } + return VOICE_ASSISTANT_OK; + } + + size_t VoiceAssistantClientCallbackProxy::NotifyTTSPlayStateChanged(bool isPlaying) + { + VOICE_ASSISTANT_LOGI("NotifyTTSPlayStateChanged:%{public}s", isPlaying ? "true" : "false"); + MessageParcel data; + MessageParcel reply; + + WRITE_PARCEL_WITH_RET(data, Bool, isPlaying, VOICE_ASSISTANT_ERR); + + CommonUtils::VoiceAssistantErrorCode code = DoDispatch(VOICE_ASSISTANT_CALLBACK_TTS_STATE_CHANGED, data, reply); + if (code != VOICE_ASSISTANT_OK) { + VOICE_ASSISTANT_LOGI("NotifyTTSPlayStateChanged failed"); + return code; + } + return VOICE_ASSISTANT_OK; + } + + CommonUtils::VoiceAssistantErrorCode VoiceAssistantClientCallbackProxy::DoDispatch(uint32_t cmd, MessageParcel& data, MessageParcel& reply) + { + VOICE_ASSISTANT_LOGI("%{public}s:%{public}d cmd:%{public}d", __func__, __LINE__, cmd); + + MessageOption option; + auto ret = Remote()->SendRequest(cmd, data, reply, option); + VOICE_ASSISTANT_LOGI("%{public}s:%{public}d SendRequest end cmd:%{public}d ", __func__, __LINE__, cmd); + if (ret != ERR_NONE) { + VOICE_ASSISTANT_LOGI("failed to send request, cmd: %{public}d, ret: %{public}d", cmd, ret); + return VOICE_ASSISTANT_ERR; + } + VOICE_ASSISTANT_LOGI(" success to dispatch cmd: %{public}d", cmd); + return VOICE_ASSISTANT_OK; + } +} +} \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/voice_cloud_doc.txt b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/voice_cloud_doc.txt new file mode 100644 index 0000000000000000000000000000000000000000..dce4fb2a1c4b1316698ba9d173fa5deffd73246c --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/services/src/voice_cloud_doc.txt @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +voicecloud语音识别返回数据格式: + +op: string // 消息类型 realTimeASRResult 实时上屏文字 / nluResult 语义解析返回 / stopListen 停止识别 +isFinish: true/false //识别是否结束 +text: string //识别的文字 +needDeclare: true/false //是否多轮 +tts: string //回复文字 +intentName: string //意图 +control:string //热词 intentName为UIControl时有效 +modeType: string //模式 +action:string //动作 +value: int //值 +positions: string //位置 + +示例: + +// 实时上屏 +op: realTimeASRResult +isFinish: false +text: 你好 + +//停止识别 +op: stopListen + +// 闲聊,例如 今天天气怎么样 +op: nluResult +intentName: CHAT +needDeclare: false +tts: "今天白天20摄氏度...." + + +//热词 +op: nluResult +intentName: UIControl +control: uicontrol_commo$OpenAir + +//空调温度/风量增减 +op: nluResult +intentName: CAR_AIR_CONDITION_CTRL +modeType: TEMPERATURE_ADJUST/AIRVOLUME_ADJUST +action:INCREASE/DECREASE + +//空调温度、风量设置 +op: nluResult +intentName: CAR_AIR_CONDITION_CTRL +modeType: TEMPERATURE_SET/AIRVOLUME_SET +value: 20 + +//空调外循环/内循环/制热/制冷/通风/除湿/自动/开关 +op: nluResult +intentName: CAR_AIR_CONDITION_CTRL +modeType: OUT_LOOP/IN_LOOP/HOT/COLD/VENTILATION/AREF/AUTO/DOACTION +action: OPEN/CLOSE + +//空调除雾/除霜 +op: nluResult +intentName: CAR_AIR_CONDITION_CTRL +modeType: DEFROST/DEMIST +action: OPEN/CLOSE +positions: FRONT/BACK/ + +//天窗开关 +op: nluResult +intentName: CAR_SKYLIGHT_CTRL +modeType: DOACTION +action: OPEN/CLOSE + +op: nluResult +intentName: CAR_SKYLIGHT_CTRL +modeType: SKYLIGHT_SET +value: 0.5 + +//遮阳帘开关 +op: nluResult +intentName: CAR_SUNSHADE_CTRL +modeType: DOACTION +action: OPEN/CLOSE \ No newline at end of file diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/test/client_test/BUILD.gn b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/test/client_test/BUILD.gn new file mode 100644 index 0000000000000000000000000000000000000000..5041c86aec70523773eac936fb59b6b671005db0 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/test/client_test/BUILD.gn @@ -0,0 +1,68 @@ +# Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at + +# http://www.apache.org/licenses/LICENSE-2.0 + +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import("//build/ohos.gni") +import("//build/ohos/ndk/ndk.gni") + + + +ohos_executable("client_test") { + cflags = ["-Wno-unused-variable", "-Wno-unused-function", "-Wno-implicit-function-declaration", "-Wno-unused-private-field"] + cflags_cc = ["-fexceptions"] + + install_enable = true + include_dirs = [ + "//base/miscservices/voiceassistant/frameworks/utils/include", + "//base/miscservices/voiceassistant/interfaces/kits/js/napi/include", + "//base/miscservices/voiceassistant/services/include/client", + "//foundation/ace/napi/interfaces/kits", + "//utils/native/base/include", + "//utils/system/safwk/native/include", + "//third_party/openssl/include", + "//third_party/json/single_include", + "//third_party" + ] + + sources = [ + "client_test.cpp" + ] + + deps = [ + "//base/miscservices/voiceassistant/services:carvoiceassistant", + "//foundation/ace/napi/:ace_napi", + "//foundation/appexecfwk/standard/interfaces/innerkits/appexecfwk_base:appexecfwk_base", + "//foundation/appexecfwk/standard/interfaces/innerkits/appexecfwk_core:appexecfwk_core", + "//foundation/appexecfwk/standard/interfaces/innerkits/libeventhandler:libeventhandler", + "//foundation/communication/ipc/interfaces/innerkits/ipc_core:ipc_core", + "//foundation/communication/ipc/interfaces/innerkits/ipc_single:ipc_single", + "//foundation/distributedschedule/dmsfwk/interfaces/innerkits/uri:zuri", + "//foundation/distributedschedule/samgr/interfaces/innerkits/samgr_proxy:samgr_proxy", + "//utils/native/base:utils", + "//third_party/libwebsockets:websockets", + "//third_party/openssl:libcrypto_static", + "//third_party/openssl:ssl_source", + "//third_party/zlib:libz" + ] + + external_deps = [ + "hiviewdfx_hilog_native:libhilog" + ] + + subsystem_name = "miscservices" + part_name = "voiceassistant" + install_images = [ + "system", + "updater", + ] + +} diff --git a/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/test/client_test/client_test.cpp b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/test/client_test/client_test.cpp new file mode 100644 index 0000000000000000000000000000000000000000..838d79abddbb2f74016dcb125cd800eb4bde5f16 --- /dev/null +++ b/dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/test/client_test/client_test.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2022 PATEO CONNECT+ (Nanjing) Co., Ltd. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "voice_assistant_client_manager.h" +#include "common_utils.h" +#include +#include +#include + +using namespace OHOS; +using namespace OHOS::CarVoiceAssistant; +using namespace std; + +int main() +{ + VoiceAssistantClientManager* manager = VoiceAssistantClientManager::GetInstance(); + + bool isEnableWakeUp = true; + manager->IsEnableWakeUp(isEnableWakeUp); + cout << "isEnableWakeUp:" << isEnableWakeUp << endl; + + while (true) { + cout << "**************************************" << endl; + cout << "1. change speaker" << endl; + cout << "2. speak tts" << endl; + cout << "3. start recognizer" << endl; + cout << "4. stop recognizer" << endl; + cout << "5. register hotwords" << endl; + cout << "6. enable wakeup" << endl; + cout << "7. disable wakeup" << endl; + cout << "8. stop speak tts" << endl; + cout << "**************************************" << endl; + + std::string line; + getline(cin, line); + if (line == "1") { + cout << "input speaker:" << endl; + std::string speaker; + getline(cin, speaker); + if (speaker.length() > 0) { + manager->ChangeSpeakerType(speaker); + } + } else if (line == "2") { + std::string tts = "大连市今天多云,气温15~21℃,西北风6~7级, 温度适宜。"; + CommonUtils::VoiceAssistantErrorCode result = CommonUtils::VOICE_ASSISTANT_OK; + manager->PlayTTS(result, tts); + } else if (line == "3") { + CommonUtils::VoiceAssistantErrorCode result = CommonUtils::VOICE_ASSISTANT_OK; + manager->StartRecognize(result); + cout << "result:" << result << endl; + } else if (line == "4") { + manager->StopRecognize(); + } else if (line == "5") { + std::list> items; + items.push_back({ { "title", "打开车窗" }, { "url", "uicontrol_common$Open_Car_Window" } }); + items.push_back({ { "title", "关闭车窗" }, { "url", "uicontrol_common$Close_Car_Window" } }); + items.push_back({ { "title", "打开空调" }, { "url", "uicontrol_common$Open_Car_Air" } }); + items.push_back({ { "title", "关闭空调" }, { "url", "uicontrol_common$Close_Car_Air" } }); + + nlohmann::json j = items; + std::string jsonStr = j.dump(); + + manager->RegisterHotwords(jsonStr); + } else if (line == "6") { + manager->EnableWakeUp(); + } else if (line == "7") { + manager->DisableWakeUp(); + } else if (line == "8") { + manager->StopPlayTTS(); + } + } + + return 0; +} \ No newline at end of file diff --git a/docs/PATEO_CarVoiceAssistant/README_zh.md b/docs/PATEO_CarVoiceAssistant/README_zh.md new file mode 100644 index 0000000000000000000000000000000000000000..4e7e6ec0d5f3283d95b2455eeb4250aada3a7359 --- /dev/null +++ b/docs/PATEO_CarVoiceAssistant/README_zh.md @@ -0,0 +1,80 @@ +[博泰OpenHarmony语音助理](../../FA/PATEO_CarVoiceAssistant) +========= +### 概述 + +博泰OpenHarmony语音助理项目,是由博泰&开放原子基金会联合立项的项目,由博泰车联网(南京)有限公司负责研发。 + +该项目是基于OpenHarmony系统研发的一款语音类产品,包含了语音AI子系统和语音助理应用两大功能模块。 + +OpenHarmony语音助理突破了层层技术难关,实现了OH设备可见即可说的闭环能力,可对开发者基于语音能力的拓展开发进行赋能,对OpenHarmony系统生态共建具有里程碑的意义。 + +### UI效果图 + +![](./media/AppUI.jpg) + +### 视频演示 + +[视频链接](https://www.bilibili.com/video/BV1Ed4y1t7SW/) + +### 涉及OpenHarmony技术特性 + +- eTSUI + +- 音频数据采集AudioStandard::AudioCapturer + +- 音频播放OHOS::Media::Player + +- 子系统开发 + +- IPC通信 + +- NAPI + +### 基础信息 + +| 开发平台 | 系统类型 | 系统版本 | 开发语言 | IDE | +| -------- | -------- | ----------------------- | ------------ | ---------------- | +| DAYU200 | 标准系统 | OpenHarmony 3.1 Release | C++、JS、eTS | VS code、Dev Eco | + +**语音助理App源码目录:[源码目录](../../FA/PATEO_CarVoiceAssistant)** + +**语音AI子系统源码目录:[源码目录](../../dev/team_x/PATEO_CarVoiceAssistant)** + + +### 软件架构 +![框架图](media/架构图.jpg) + +服务补充说明: + +**QingAI:**博泰云服务,音频/文字互转,自然语言解析,最终语义反馈 + +**Sphinx:**唤醒词训练、声音采集,唤醒 + +**VAD:**语音活动检测,截取有效音频 + +**QGSpeechKit:**与QingAI云端通信的服务,含WebSocket网络连接,音频传输,云端ASR解析,NLU解析 + +**TTS****:**语音播报服务 + +**SA Service:**服务管理,跨进程通信服务 + +**NAPI:**服务层C++与应用层eTS转译服务 + +### 功能用例图 + +![用例图](media/功能图.png) + +### 交互流程图 + +![流程图](media/流程图.jpg) + + + +### 开发文档 + +本项目分为语音助理App和语音AI子系统两部分; + +如果你只关注语音助理App部分,可以参考[语音AI子系统集成文档](./语音AI子系统集成文档.md)将语音子系统服务集成到开发板中,然后参考[语音助理App开发文档](./语音助理App开发文档.md)安装App体验。 + +如果你想深入了解语音子系统源码,可以参考[语音AI子系统开发文档](./语音AI子系统开发文档.md)。 + diff --git a/docs/PATEO_CarVoiceAssistant/media/AppUI.jpg b/docs/PATEO_CarVoiceAssistant/media/AppUI.jpg new file mode 100644 index 0000000000000000000000000000000000000000..afc6f3a2a995caa4133ea11849094451e668aca8 Binary files /dev/null and b/docs/PATEO_CarVoiceAssistant/media/AppUI.jpg differ diff --git "a/docs/PATEO_CarVoiceAssistant/media/\345\212\237\350\203\275\345\233\276.png" "b/docs/PATEO_CarVoiceAssistant/media/\345\212\237\350\203\275\345\233\276.png" new file mode 100644 index 0000000000000000000000000000000000000000..49e5f595041f34c795fb66891e7fc77acaafbc7d Binary files /dev/null and "b/docs/PATEO_CarVoiceAssistant/media/\345\212\237\350\203\275\345\233\276.png" differ diff --git "a/docs/PATEO_CarVoiceAssistant/media/\345\255\220\347\263\273\347\273\237\345\224\244\351\206\222\346\265\201\347\250\213\345\233\276.jpg" "b/docs/PATEO_CarVoiceAssistant/media/\345\255\220\347\263\273\347\273\237\345\224\244\351\206\222\346\265\201\347\250\213\345\233\276.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..bf2cda84db2fb1a33705d471b63e2d5c39075584 Binary files /dev/null and "b/docs/PATEO_CarVoiceAssistant/media/\345\255\220\347\263\273\347\273\237\345\224\244\351\206\222\346\265\201\347\250\213\345\233\276.jpg" differ diff --git "a/docs/PATEO_CarVoiceAssistant/media/\346\236\266\346\236\204\345\233\276.jpg" "b/docs/PATEO_CarVoiceAssistant/media/\346\236\266\346\236\204\345\233\276.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..765d1895805d38121ba30a910c7a63a6fc14791e Binary files /dev/null and "b/docs/PATEO_CarVoiceAssistant/media/\346\236\266\346\236\204\345\233\276.jpg" differ diff --git "a/docs/PATEO_CarVoiceAssistant/media/\346\265\201\347\250\213\345\233\276.jpg" "b/docs/PATEO_CarVoiceAssistant/media/\346\265\201\347\250\213\345\233\276.jpg" new file mode 100644 index 0000000000000000000000000000000000000000..c62af73f38e1b5afc9da3907815ec47d0e2f85c2 Binary files /dev/null and "b/docs/PATEO_CarVoiceAssistant/media/\346\265\201\347\250\213\345\233\276.jpg" differ diff --git "a/docs/PATEO_CarVoiceAssistant/\350\257\255\351\237\263AI\345\255\220\347\263\273\347\273\237\345\274\200\345\217\221\346\226\207\346\241\243.md" "b/docs/PATEO_CarVoiceAssistant/\350\257\255\351\237\263AI\345\255\220\347\263\273\347\273\237\345\274\200\345\217\221\346\226\207\346\241\243.md" new file mode 100644 index 0000000000000000000000000000000000000000..a97fc60b3a6201aa6f503407de4954dbe308eb88 --- /dev/null +++ "b/docs/PATEO_CarVoiceAssistant/\350\257\255\351\237\263AI\345\255\220\347\263\273\347\273\237\345\274\200\345\217\221\346\226\207\346\241\243.md" @@ -0,0 +1,121 @@ +## [语音AI子系统开发文档](../../dev/team_x/PATEO_CarVoiceAssistant) + +### 1. 子系统介绍 + +语音子系统作为`miscservices`子系统下一个`part`(`voiceassistant`)集成在系统代码中;包含9个`module`,实现语音唤醒、语义解析、TTS播报、录音等功能;App调用JS接口,通过NPI调用client端代码,client发消息给server端,server端执行具体逻辑。 + +**代码目录: [语音AI子系统目录](../../dev/team_x/PATEO_CarVoiceAssistant/voiceassistant)** + +### 2. 编译代码 + +1. 准备好OpenHarmony源码编译环境,参考[标准系统编译构建指导](https://docs.openharmony.cn/pages/v3.1/zh-cn/device-dev/subsystems/subsys-build-standard-large.md/); +2. 执行`prebuild.sh`( [脚本所在目录](../../dev/team_x/PATEO_CarVoiceAssistant/voiceassistant))脚本,将云语音动态库、唤醒语言模型等资源解压到代码目录中; +3. 将`voiceassistant`目录拷贝到系统源码`系统源码目录/base/miscservices/`下; +4. 修改`系统源码目录/productdefine/common/products/DAYU.json`文件,添加`"miscservices:voiceassistant":{},`; +5. 执行编译指令`build.sh --product-name DAYU` + +### 3. 开发板集成 + +集成有两种方式: + +1. 将源码编译生成的镜像刷到开发版中;具体参考开发板的刷机教程; + +2. 如果您开发板已经安装好OpenHarmony 3.1 Release系统,可以将生成的动态库、资源文件,参考[语音AI子系统集成文档](语音AI子系统集成文档.md)集成。 + +### 4. 代码架构 + +1. 代码目录如下: + +```bash +├── etc #语音助理SA服务启动配置 +├── frameworks #FWK目录 +│   ├── manager #管理类,包括录音管理、唤醒管理、TTS播报管理、云语音动态库管理 +│   ├── pocketsphinx #开源库pocketsphinx,用于语音识别 +│   ├── utils #工具类、日志等 +│   ├── vad #开源库VAD检测,用于分割录音中有效音频部分 +│   └── voiceclouddll #云语音动态库,用于语义解析、TTS文字转语音 +├── interfaces #NAPI模块 +│   └── kits +│   └── js +│   ├── declaration #定义ts接口文件 +│   └── napi #NAPI实现类 +├── profile #语音助理SA服务配置 +├── resources #资源文件:语言模型、提示音文件 +├── services #SA服务server、client代码 +└── test #测试类 + +``` + +2. 唤醒、语义识别流程图如下: + +![子系统唤醒流程图](./media/子系统唤醒流程图.jpg) + +3. module说明: + + `pocketsphinx`:语音唤醒库; + + `ps_vad`:VAD检测库,用于检测有效音频流; + + `voicecloud_dll`:云语音动态库; + + `voice_assistant_service.rc`:语音助理SA服务启动配置,开机后自动启动语音助理服务; + + `voice_assistant_sa_profiles`:语音助理SA服务配置; + + `pocketsphinx_all_source`:语音助理相关资源,包含语言模型、提示音文件; + + `voiceassistant_js`: JS声明文件; + + `voiceassistant_service_group`:包含SA服务动态库和语音助理动态库 + + `carvoiceassistant`:语音助理动态库,App层调用引用JS声明文件,自动加载该动态库,JS接口的具体实现代码也在该动态库中; + + `voiceassistant_service`:SA服务动态库,系统启动时加载该动态库启动语音助理SA服务; + + `client_test`:测试程序。 + +4. 相关类和文件说明: + + NAPI: + + `@ohos.carvoiceassistant.d.ts`:语音助理ts声明文件; + + `VoiceAssistantEventTarget`:client端回调到App的NAPI接口实现; + + `voice_assistant_napi.cpp`:NAPI接口实现; + + framework: + + `AudioRecordManager`:音频录制类,用于录制音频,返回音频流; + + `IWakeUpManager`:唤醒抽象类,包含音频流处理接口,唤醒结果回调接口,开发者可以实现该抽象类接口来替换样例的唤醒引擎; + + `WakeUpManager`: `IWakeUpManager`的具体实现类,使用 WebRtcVad分割音频,pocketsphinx语音识别; + + `TTSManager`:TTS管理类,用于TTS文字转语音(调用云语音接口),并调用播放器播放; + + `voice_cloud_loader`:云语音动态库加载,提供文字转语音接口和语义解析功能。 + + `IVoiceCloudManager`:云语音解析抽象类,包含语义解析、TTS文字转语音接口,开发者可以实现该抽象类接口来替换样例的云语音动态库; + + server端: + + `VoiceAssistantAbilityAgentStub`:负责接收client端发来的消息,并调用相关管理类处理消息; + + `VoiceAssistantAgentService`:SA服务管理类; + + `VoiceAssistantCallbackEventTarget`:client回调管理类,用于client回调的注册、注销、回调处理; + + `VoiceAssistantClientCallbackProxy`:client回调类,用于回调语义解析消息、唤醒状态、识别状态等到client端; + + `VoiceAssistantClientCallbackDeathRecipient`:client回调状态监听类,client断开时注销回调; + + client端: + + `IVoiceAssistantAbilityAgent`:与server通信抽象类; + + `VoiceAssistantAbilityAgentProxy`:实现与server具体通信逻辑; + + `VoiceAssistantClientCallbackStub`:接收server回调,并通过NAPI接口回调到App; + + `VoiceAssistantClientManager`:client端管理类,用于与server端建立连接,发送消息。 \ No newline at end of file diff --git "a/docs/PATEO_CarVoiceAssistant/\350\257\255\351\237\263AI\345\255\220\347\263\273\347\273\237\351\233\206\346\210\220\346\226\207\346\241\243.md" "b/docs/PATEO_CarVoiceAssistant/\350\257\255\351\237\263AI\345\255\220\347\263\273\347\273\237\351\233\206\346\210\220\346\226\207\346\241\243.md" new file mode 100644 index 0000000000000000000000000000000000000000..c2dc9fc4dac1add1c8fb361d362c8ef65204b145 --- /dev/null +++ "b/docs/PATEO_CarVoiceAssistant/\350\257\255\351\237\263AI\345\255\220\347\263\273\347\273\237\351\233\206\346\210\220\346\226\207\346\241\243.md" @@ -0,0 +1,44 @@ +## 语音AI子系统集成 + +1. 系统版本要求:OpenHarmony 3.1Release, 请确认当前开发板的版本**录音功能、网络功能**是否正常。 + +2. 解压[data.zip文件](../../dev/team_x/PATEO_CarVoiceAssistant/data.zip) + +3. 使用hdc工具将data中的文件发送到OpenHarmony系统中: + + ```bash + #1. 将动态库和资源文件发送到OpenHarmony系统中 + # 如果提示Read only system;进入OH系统后执行:"mount -o rw,remount /"命令后再发送文件 + hdc_std.exe file send voice_assistant_service.xml /system/profile/ + hdc_std.exe file send libcarvoiceassistant.z.so /system/lib/module/libcarvoiceassistant.z.so + hdc_std.exe file send libvoiceassistant_service.z.so /system/lib/libvoiceassistant_service.z.so + hdc_std.exe file send libpocketsphinx.z.so /system/lib/module/libpocketsphinx.z.so + hdc_std.exe file send libps_vad.z.so /system/lib/module/libps_vad.z.so + hdc_std.exe file send libvoicecloud.z.so /system/lib/libvoicecloud.z.so + hdc_std.exe file send voice_assistant_service.cfg /system/etc/init/ + + #在系统/system/etc/下,创建目录pocketsphinx; 创建目录命令: mkdir /system/etc/pocketsphinx + hdc_std.exe file send voice_tip.mp3 /system/etc/pocketsphinx/ + hdc_std.exe file send zh.tar /system/etc/pocketsphinx/ + + #在OpenHarmony系统中解压zh.tar + tar xvf zh.tar + + #确保/system/etc/pocketsphinx/下文件目录结构如下: + ├── zh + │   ├── zh + │   │   ├── feat.params + │   │   ├── feature_transform + │   │   ├── mdef + │   │   ├── means + │   │   ├── mixture_weights + │   │   ├── noisedict + │   │   ├── transition_matrices + │   │   └── variances + │   ├── zh_cn.dic + │   └── zh_cn.lm.bin + ├── voice_tip.mp3 + + #重启系统 + ``` + diff --git "a/docs/PATEO_CarVoiceAssistant/\350\257\255\351\237\263\345\212\251\347\220\206App\345\274\200\345\217\221\346\226\207\346\241\243.md" "b/docs/PATEO_CarVoiceAssistant/\350\257\255\351\237\263\345\212\251\347\220\206App\345\274\200\345\217\221\346\226\207\346\241\243.md" new file mode 100644 index 0000000000000000000000000000000000000000..b7093889c9b471f56e8a9be3e97ecb5b07fb5f21 --- /dev/null +++ "b/docs/PATEO_CarVoiceAssistant/\350\257\255\351\237\263\345\212\251\347\220\206App\345\274\200\345\217\221\346\226\207\346\241\243.md" @@ -0,0 +1,74 @@ +## [语音助理App开发文档](../../FA/PATEO_CarVoiceAssistant) + +### 1. 介绍 + +语音助理App主要实现语音唤醒、语音识别、热词注册、文字上屏、车辆控制等功能;使用eTS语言开发。运行此App时,请确认开发板已集成语音子系统服务,语音子系统集成文档链接:[语音AI子系统集成文档](./语音AI子系统集成文档.md)。 + +**代码目录:[语音助理App目录](../../FA/PATEO_CarVoiceAssistant)** + +### 2. 主要功能介绍 + +1. 引入语音助理声明文件,声明文件:[文件链接](../../dev/team_x/PATEO_CarVoiceAssistant/voiceassistant/interfaces/kits/js/declaration/@ohos.carvoiceassistant.d.ts) + + ```js + import carvoiceassistant from '@ohos.carvoiceassistant' + let voiceManager = carvoiceassistant.getManager(); // 获取语音助理管理类 + ``` + +2. 开启唤醒 + + ```js + voiceManager.enableWakeUp() + ``` + +3. 注册热词 + + ```js + voiceManager.registerHotwords(JSON.stringify(hotwords)) + ``` + +4. 经纬度设置,用于云语音定位地理位置;例如“今天天气怎么样?”语义可以返回设置的经纬度地区的天气信息。 + + ```js + voiceManager.setCoord(23.025978, 113.754969) + ``` + +5. 监听回调,可以监听识别状态、语义解析回调、TTS播报状态。 + + ```js + voiceManager.on(carvoiceassistant.EventType.VoiceAssistantEventTypeRecognizeStateChanged, (err, data) => { + this.isRecognizing = data['isRecognizing'] + if (this.isRecognizing) { + this.voiceText = "我正在听..." + } else if (this.voiceText == "我正在听...") { + this.voiceText = '' + } + }) + voiceManager.on(carvoiceassistant.EventType.VoiceAssistantEventTypeAsrResult, (err, data) => { + let json: AsrModel = JSON.parse(data['result']) + ... + }) + voiceManager.on(carvoiceassistant.EventType.VoiceAssistantEventTypeTTSPlayStateChanged, (err, data) => { + let isPlaying = data["isPlaying"] + if (isPlaying == false) { + if (this.needDeclare) { + this.isUserStopRecognizing = false; + this.needDeclare = false; + voiceManager.startRecognize(); + } + this.voiceText = ''; + } + }) + } + ``` + + 5. 识别接口 + + ```js + voiceManager.startRecognize(); //开始识别 + voiceManager.stopRecognize(); //停止识别 + ``` + + + +​ \ No newline at end of file