47#include <sphinxbase/ckd_alloc.h>
48#include <sphinxbase/listelem_alloc.h>
49#include <sphinxbase/err.h>
59static int ngram_search_step(
ps_search_t *search,
int frame_idx);
62static char const *ngram_search_hyp(
ps_search_t *search, int32 *out_score);
75 ngram_search_seg_iter,
78static ngram_model_t *default_lm;
87 n_words = ps_search_n_words(ngs);
88 words = (
char const**)ckd_calloc(n_words,
sizeof(*words));
90 for (i = 0; i < n_words; ++i)
91 words[i] = dict_wordstr(ps_search_dict(ngs), i);
92 ngram_model_set_map_words(ngs->
lmset, words, n_words);
102 config = ps_search_config(ngs);
103 acmod = ps_search_acmod(ngs);
106 ngs->beam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-beam"))>>
SENSCR_SHIFT;
107 ngs->wbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-wbeam"))>>
SENSCR_SHIFT;
108 ngs->pbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-pbeam"))>>
SENSCR_SHIFT;
109 ngs->lpbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-lpbeam"))>>
SENSCR_SHIFT;
110 ngs->lponlybeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-lponlybeam"))>>
SENSCR_SHIFT;
111 ngs->fwdflatbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-fwdflatbeam"))>>
SENSCR_SHIFT;
112 ngs->fwdflatwbeam = logmath_log(acmod->
lmath, cmd_ln_float64_r(config,
"-fwdflatwbeam"))>>
SENSCR_SHIFT;
115 ngs->maxwpf = cmd_ln_int32_r(config,
"-maxwpf");
116 ngs->maxhmmpf = cmd_ln_int32_r(config,
"-maxhmmpf");
119 ngs->wip = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-wip")) >>
SENSCR_SHIFT;
120 ngs->nwpen = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-nwpen")) >>
SENSCR_SHIFT;
121 ngs->pip = logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-pip")) >>
SENSCR_SHIFT;
122 ngs->silpen = ngs->pip
123 + (logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-silprob"))>>
SENSCR_SHIFT);
124 ngs->fillpen = ngs->pip
125 + (logmath_log(acmod->
lmath, cmd_ln_float32_r(config,
"-fillprob"))>>
SENSCR_SHIFT);
128 ngs->fwdflat_fwdtree_lw_ratio =
129 cmd_ln_float32_r(config,
"-fwdflatlw")
130 / cmd_ln_float32_r(config,
"-lw");
131 ngs->bestpath_fwdtree_lw_ratio =
132 cmd_ln_float32_r(config,
"-bestpathlw")
133 / cmd_ln_float32_r(config,
"-lw");
136 ngs->
ascale = 1.0 / cmd_ln_float32_r(config,
"-ascale");
148 static char *lmname =
"default";
153 cmd_ln_boolean_r(config,
"-fwdtree"));
155 ngs = ckd_calloc(1,
sizeof(*ngs));
156 ps_search_init(&ngs->base, &ngram_funcs, PS_SEARCH_TYPE_NGRAM, name, config, acmod, dict, d2p);
158 ngs->
hmmctx = hmm_context_init(bin_mdef_n_emit_state(acmod->
mdef),
160 if (ngs->
hmmctx == NULL) {
161 ps_search_free(ps_search_base(ngs));
169 ngram_search_calc_beams(ngs);
174 ngs->word_lat_idx = ckd_calloc(
dict_size(dict),
175 sizeof(*ngs->word_lat_idx));
177 ngs->last_ltrans = ckd_calloc(
dict_size(dict),
178 sizeof(*ngs->last_ltrans));
182 ngs->bp_table_size = cmd_ln_int32_r(config,
"-latsize");
183 ngs->bp_table = ckd_calloc(ngs->bp_table_size,
184 sizeof(*ngs->bp_table));
186 ngs->bscore_stack_size = ngs->bp_table_size * 20;
187 ngs->bscore_stack = ckd_calloc(ngs->bscore_stack_size,
188 sizeof(*ngs->bscore_stack));
191 sizeof(*ngs->bp_table_idx));
198 ngs->
lmset = ngram_model_set_init(config, &lm, &lmname, NULL, 1);
202 if (ngram_wid(ngs->
lmset, S3_FINISH_WORD) ==
203 ngram_unknown_wid(ngs->
lmset))
205 E_ERROR(
"Language model/set does not contain </s>, "
206 "recognition will fail\n");
211 ngram_search_update_widmap(ngs);
214 if (cmd_ln_boolean_r(config,
"-fwdtree")) {
217 ngs->fwdtree_perf.name =
"fwdtree";
218 ptmr_init(&ngs->fwdtree_perf);
220 if (cmd_ln_boolean_r(config,
"-fwdflat")) {
223 ngs->fwdflat_perf.name =
"fwdflat";
224 ptmr_init(&ngs->fwdflat_perf);
226 if (cmd_ln_boolean_r(config,
"-bestpath")) {
227 ngs->bestpath = TRUE;
228 ngs->bestpath_perf.name =
"bestpath";
229 ptmr_init(&ngs->bestpath_perf);
251 ckd_free(ngs->word_lat_idx);
253 ckd_free(ngs->last_ltrans);
255 ngs->word_lat_idx = ckd_calloc(search->
n_words,
sizeof(*ngs->word_lat_idx));
257 ngs->last_ltrans = ckd_calloc(search->
n_words,
sizeof(*ngs->last_ltrans));
259 = ckd_calloc_2d(2, search->
n_words,
264 ps_search_base_reinit(search, dict, d2p);
266 if (ngs->
lmset == NULL)
270 ngram_search_calc_beams(ngs);
273 ngram_search_update_widmap(ngs);
298 double n_speech = (double)ngs->n_tot_frame
299 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
301 E_INFO(
"TOTAL bestpath %.2f CPU %.3f xRT\n",
302 ngs->bestpath_perf.t_tot_cpu,
303 ngs->bestpath_perf.t_tot_cpu / n_speech);
304 E_INFO(
"TOTAL bestpath %.2f wall %.3f xRT\n",
305 ngs->bestpath_perf.t_tot_elapsed,
306 ngs->bestpath_perf.t_tot_elapsed / n_speech);
309 ps_search_base_free(search);
310 hmm_context_free(ngs->
hmmctx);
314 ngram_model_free(ngs->
lmset);
317 ckd_free(ngs->word_lat_idx);
319 ckd_free(ngs->bp_table);
320 ckd_free(ngs->bscore_stack);
321 if (ngs->bp_table_idx != NULL)
322 ckd_free(ngs->bp_table_idx - 1);
324 ckd_free(ngs->last_ltrans);
333 ngs->bp_table_idx = ckd_realloc(ngs->bp_table_idx - 1,
335 *
sizeof(*ngs->bp_table_idx));
343 ngs->bp_table_idx[frame_idx] = ngs->bpidx;
353 ent = ngs->bp_table + bp;
354 if (ent->
bp == NO_BP)
357 prev = ngs->bp_table + ent->
bp;
360 if (dict_filler_word(ps_search_dict(ngs), ent->
wid)) {
366 ent->
real_wid = dict_basewid(ps_search_dict(ngs),
372 ent->
real_wid = dict_basewid(ps_search_dict(ngs), ent->
wid);
380#define NGRAM_HISTORY_LONG_WORD 2000
384 int32 w, int32 score, int32 path, int32 rc)
391 bp = ngs->word_lat_idx[w];
394 if (frame_idx - ngs->bp_table[path].
frame > NGRAM_HISTORY_LONG_WORD) {
395 E_WARN(
"Word '%s' survived for %d frames, potential overpruning\n", dict_wordstr(ps_search_dict(ngs), w),
396 frame_idx - ngs->bp_table[path].
frame);
404 if (ngs->bp_table[bp].
bp != path) {
405 int32 bplh[2], newlh[2];
409 E_DEBUG(2,(
"Updating path history %d => %d frame %d\n",
410 ngs->bp_table[bp].
bp, path, frame_idx));
411 bplh[0] = ngs->bp_table[bp].
bp == -1
413 bplh[1] = ngs->bp_table[bp].
bp == -1
414 ? -1 : ngs->bp_table[ngs->bp_table[bp].
bp].
real_wid;
415 newlh[0] = path == -1
417 newlh[1] = path == -1
418 ? -1 : ngs->bp_table[path].
real_wid;
421 if (bplh[0] != newlh[0] || bplh[1] != newlh[1]) {
425 E_DEBUG(1, (
"Updating language model state %s,%s => %s,%s frame %d\n",
426 dict_wordstr(ps_search_dict(ngs), bplh[0]),
427 dict_wordstr(ps_search_dict(ngs), bplh[1]),
428 dict_wordstr(ps_search_dict(ngs), newlh[0]),
429 dict_wordstr(ps_search_dict(ngs), newlh[1]),
431 set_real_wid(ngs, bp);
433 ngs->bp_table[bp].
bp = path;
435 ngs->bp_table[bp].
score = score;
440 if (ngs->bp_table[bp].
s_idx != -1)
441 ngs->bscore_stack[ngs->bp_table[bp].
s_idx + rc] = score;
448 if (ngs->bpidx == NO_BP) {
449 E_ERROR(
"No entries in backpointer table!");
454 if (ngs->bpidx >= ngs->bp_table_size) {
455 ngs->bp_table_size *= 2;
456 ngs->bp_table = ckd_realloc(ngs->bp_table,
458 *
sizeof(*ngs->bp_table));
459 E_INFO(
"Resized backpointer table to %d entries\n", ngs->bp_table_size);
461 if (ngs->bss_head >= ngs->bscore_stack_size
462 - bin_mdef_n_ciphone(ps_search_acmod(ngs)->mdef)) {
463 ngs->bscore_stack_size *= 2;
464 ngs->bscore_stack = ckd_realloc(ngs->bscore_stack,
465 ngs->bscore_stack_size
466 *
sizeof(*ngs->bscore_stack));
467 E_INFO(
"Resized score stack to %d entries\n", ngs->bscore_stack_size);
470 ngs->word_lat_idx[w] = ngs->bpidx;
471 be = &(ngs->bp_table[ngs->bpidx]);
473 be->
frame = frame_idx;
476 be->
s_idx = ngs->bss_head;
478 assert(path != ngs->bpidx);
482 be->
last_phone = dict_last_phone(ps_search_dict(ngs),w);
483 if (dict_is_single_phone(ps_search_dict(ngs), w)) {
489 be->
last2_phone = dict_second_last_phone(ps_search_dict(ngs),w);
494 for (i = 0; i < rcsize; ++i)
495 ngs->bscore_stack[ngs->bss_head + i] =
WORST_SCORE;
497 ngs->bscore_stack[ngs->bss_head + rc] = score;
498 set_real_wid(ngs, ngs->bpidx);
501 ngs->bss_head += rcsize;
517 if (frame_idx == -1 || frame_idx >= ngs->
n_frame)
519 end_bpidx = ngs->bp_table_idx[frame_idx];
525 while (frame_idx >= 0 && ngs->bp_table_idx[frame_idx] == end_bpidx)
532 assert(end_bpidx < ngs->bp_table_size);
533 for (bp = ngs->bp_table_idx[frame_idx]; bp < end_bpidx; ++bp) {
534 if (ngs->bp_table[bp].
wid == ps_search_finish_wid(ngs)
536 best_score = ngs->bp_table[bp].
score;
539 if (ngs->bp_table[bp].
wid == ps_search_finish_wid(ngs))
543 if (out_best_score) {
544 *out_best_score = best_score;
562 while (bp != NO_BP) {
563 bptbl_t *be = &ngs->bp_table[bp];
565 if (dict_real_word(ps_search_dict(ngs), be->
wid))
566 len += strlen(dict_basestr(ps_search_dict(ngs), be->
wid)) + 1;
574 base->
hyp_str = ckd_calloc(1, len);
578 while (bp != NO_BP) {
579 bptbl_t *be = &ngs->bp_table[bp];
583 if (dict_real_word(ps_search_dict(ngs), be->
wid)) {
584 len = strlen(dict_basestr(ps_search_dict(ngs), be->
wid));
586 memcpy(c, dict_basestr(ps_search_dict(ngs), be->
wid), len);
602 int32 i, tmatid, ciphone;
606 assert(!dict_is_single_phone(ps_search_dict(ngs), w));
607 ciphone = dict_last_phone(ps_search_dict(ngs),w);
610 dict_second_last_phone(ps_search_dict(ngs),w));
611 tmatid = bin_mdef_pid2tmatid(ps_search_acmod(ngs)->mdef, ciphone);
613 if ((hmm == NULL) || (hmm_nonmpx_ssid(&hmm->
hmm) != rssid->
ssid[0])) {
620 hmm_init(ngs->
hmmctx, &hmm->
hmm, FALSE, rssid->
ssid[0], tmatid);
621 E_DEBUG(3,(
"allocated rc_id 0 ssid %d ciphone %d lc %d word %s\n",
623 dict_second_last_phone(ps_search_dict(ngs),w),
624 dict_wordstr(ps_search_dict(ngs),w)));
626 for (i = 1; i < rssid->
n_ssid; ++i) {
627 if ((hmm->
next == NULL) || (hmm_nonmpx_ssid(&hmm->
next->
hmm) != rssid->
ssid[i])) {
635 hmm_init(ngs->
hmmctx, &hmm->
hmm, FALSE, rssid->
ssid[i], tmatid);
636 E_DEBUG(3,(
"allocated rc_id %d ssid %d ciphone %d lc %d word %s\n",
638 dict_second_last_phone(ps_search_dict(ngs),w),
639 dict_wordstr(ps_search_dict(ngs),w)));
651 for (hmm = ngs->
word_chan[w]; hmm; hmm = thmm) {
653 hmm_deinit(&hmm->
hmm);
677 return ngs->bscore_stack[pbe->
s_idx + rssid->
cimap[rcphone]];
686 int32 *out_ascr, int32 *out_lscr)
692 if (be->
bp == NO_BP) {
693 *out_ascr = be->
score;
699 pbe = ngs->bp_table + be->
bp;
701 dict_first_phone(ps_search_dict(ngs),be->
wid));
708 if (be->
wid == ps_search_silence_wid(ngs)) {
709 *out_lscr = ngs->silpen;
711 else if (dict_filler_word(ps_search_dict(ngs), be->
wid)) {
712 *out_lscr = ngs->fillpen;
716 *out_lscr = ngram_tg_score(ngs->
lmset,
721 *out_lscr = *out_lscr * lwf;
723 *out_ascr = be->
score - start_score - *out_lscr;
732 ngram_model_flush(ngs->
lmset);
735 else if (ngs->fwdflat)
743ngram_search_step(
ps_search_t *search,
int frame_idx)
749 else if (ngs->fwdflat)
759 E_INFO(
"Backpointer table (%d entries):\n", ngs->bpidx);
760 for (i = 0; i < ngs->bpidx; ++i) {
761 bptbl_t *bpe = ngs->bp_table + i;
764 E_INFO_NOFN(
"%-5d %-10s start %-3d end %-3d score %-8d bp %-3d real_wid %-5d prev_real_wid %-5d",
765 i, dict_wordstr(ps_search_dict(ngs), bpe->
wid),
767 ? 0 : ngs->bp_table[bpe->bp].frame + 1),
768 bpe->frame, bpe->score, bpe->bp,
769 bpe->real_wid, bpe->prev_real_wid);
778 for (j = 0; j < rcsize; ++j)
780 E_INFOCONT(
" %d", bpe->
score - ngs->bscore_stack[bpe->
s_idx + j]);
791 ngs->n_tot_frame += ngs->
n_frame;
805 while (ps_search_acmod(ngs)->n_feat_frame > 0) {
817 else if (ngs->fwdflat) {
827ngram_search_bestpath(
ps_search_t *search, int32 *out_score,
int backward)
833 ngs->bestpath_fwdtree_lw_ratio,
839 if (search->
post == 0)
849ngram_search_hyp(
ps_search_t *search, int32 *out_score)
854 if (ngs->bestpath && ngs->done) {
860 ptmr_reset(&ngs->bestpath_perf);
861 ptmr_start(&ngs->bestpath_perf);
864 if ((link = ngram_search_bestpath(search, out_score, FALSE)) == NULL)
867 ptmr_stop(&ngs->bestpath_perf);
869 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
870 E_INFO(
"bestpath %.2f CPU %.3f xRT\n",
871 ngs->bestpath_perf.t_cpu,
872 ngs->bestpath_perf.t_cpu / n_speech);
873 E_INFO(
"bestpath %.2f wall %.3f xRT\n",
874 ngs->bestpath_perf.t_elapsed,
875 ngs->bestpath_perf.t_elapsed / n_speech);
891ngram_search_bp2itor(
ps_seg_t *seg,
int bp)
896 be = &ngs->bp_table[bp];
897 pbe = be->
bp == -1 ? NULL : &ngs->bp_table[be->
bp];
898 seg->word = dict_wordstr(ps_search_dict(ngs), be->
wid);
900 seg->sf = pbe ? pbe->
frame + 1 : 0;
904 seg->ascr = be->
score;
913 dict_first_phone(ps_search_dict(ngs), be->
wid));
915 if (be->
wid == ps_search_silence_wid(ngs)) {
916 seg->lscr = ngs->silpen;
918 else if (dict_filler_word(ps_search_dict(ngs), be->
wid)) {
919 seg->lscr = ngs->fillpen;
922 seg->lscr = ngram_tg_score(ngs->
lmset,
927 seg->lscr = (int32)(seg->lscr * seg->lwf);
929 seg->ascr = be->
score - start_score - seg->lscr;
938 ckd_free(itor->
bpidx);
948 ngram_bp_seg_free(seg);
952 ngram_search_bp2itor(seg, itor->
bpidx[itor->
cur]);
971 itor = ckd_calloc(1,
sizeof(*itor));
972 itor->
base.
vt = &ngram_bp_segfuncs;
977 while (bp != NO_BP) {
978 bptbl_t *be = &ngs->bp_table[bp];
989 while (bp != NO_BP) {
990 bptbl_t *be = &ngs->bp_table[bp];
991 itor->
bpidx[cur] = bp;
997 ngram_search_bp2itor((
ps_seg_t *)itor, itor->bpidx[0]);
1008 if (ngs->bestpath && ngs->done) {
1014 ptmr_reset(&ngs->bestpath_perf);
1015 ptmr_start(&ngs->bestpath_perf);
1018 if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1021 ngs->bestpath_fwdtree_lw_ratio);
1022 ptmr_stop(&ngs->bestpath_perf);
1024 / cmd_ln_int32_r(ps_search_config(ngs),
"-frate");
1025 E_INFO(
"bestpath %.2f CPU %.3f xRT\n",
1026 ngs->bestpath_perf.t_cpu,
1027 ngs->bestpath_perf.t_cpu / n_speech);
1028 E_INFO(
"bestpath %.2f wall %.3f xRT\n",
1029 ngs->bestpath_perf.t_elapsed,
1030 ngs->bestpath_perf.t_elapsed / n_speech);
1038 return ngram_search_bp_iter(ngs, bpidx,
1040 (ngs->done && ngs->fwdflat)
1041 ? ngs->fwdflat_fwdtree_lw_ratio : 1.0);
1053 if (ngs->bestpath && ngs->done) {
1059 if ((link = ngram_search_bestpath(search, NULL, TRUE)) == NULL)
1061 return search->
post;
1075 for (i = 0, bp_ptr = ngs->bp_table; i < ngs->bpidx; ++i, ++bp_ptr) {
1083 sf = (bp_ptr->
bp < 0) ? 0 : ngs->bp_table[bp_ptr->bp].frame + 1;
1087 assert(ef < dag->n_frames);
1089 if ((wid == ps_search_finish_wid(ngs)) && (ef < dag->n_frames - 1))
1093 if ((!dict_filler_word(ps_search_dict(ngs), wid))
1094 && (!ngram_model_set_known_wid(ngs->
lmset,
1095 dict_basewid(ps_search_dict(ngs), wid))))
1099 for (node = dag->
nodes; node; node = node->
next) {
1100 if ((node->
wid == wid) && (node->
sf == sf))
1112 node->
fef = node->
lef = i;
1133 for (node = dag->
nodes; node; node = node->
next) {
1134 if ((node->
wid == ps_search_start_wid(ngs)) && (node->
sf == 0))
1139 E_ERROR(
"Couldn't find <s> in first frame\n");
1149 int32 ef, bestbp, bp, bestscore;
1152 for (node = dag->
nodes; node; node = node->
next) {
1153 int32 lef = ngs->bp_table[node->
lef].
frame;
1154 if ((node->
wid == ps_search_finish_wid(ngs))
1165 ef >= 0 && ngs->bp_table_idx[ef] == ngs->bpidx;
1168 E_ERROR(
"Empty backpointer table: can not build DAG.\n");
1175 for (bp = ngs->bp_table_idx[ef]; bp < ngs->bp_table_idx[ef + 1]; ++bp) {
1176 int32 n_used, l_scr, wid, prev_wid;
1180 if (wid == ps_search_finish_wid(ngs)) {
1184 l_scr = ngram_tg_score(ngs->
lmset, ps_search_finish_wid(ngs),
1186 l_scr = l_scr * lwf;
1188 bestscore = ngs->bp_table[bp].
score + l_scr;
1192 if (bestbp == NO_BP) {
1193 E_ERROR(
"No word exits found in last frame (%d), assuming no recognition\n", ef);
1196 E_INFO(
"</s> not found in last frame, using %s.%d instead\n",
1197 dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].
wid), ef);
1200 for (node = dag->
nodes; node; node = node->
next) {
1201 if (node->
lef == bestbp)
1206 E_ERROR(
"Failed to find DAG node corresponding to %s\n",
1207 dict_basestr(ps_search_dict(ngs), ngs->bp_table[bestbp].
wid));
1217 int32 i, score, ascr, lscr;
1221 int min_endfr, nlink;
1225 min_endfr = cmd_ln_int32_r(ps_search_config(search),
"-min_endfr");
1242 lwf = ngs->fwdflat ? ngs->fwdflat_fwdtree_lw_ratio : 1.0;
1243 create_dag_nodes(ngs, dag);
1244 if ((dag->
start = find_start_node(ngs, dag)) == NULL)
1246 if ((dag->
end = find_end_node(ngs, dag, ngs->bestpath_fwdtree_lw_ratio)) == NULL)
1248 E_INFO(
"lattice start node %s.%d end node %s.%d\n",
1252 ngram_compute_seg_score(ngs, ngs->bp_table + dag->
end->
lef, lwf,
1276 E_INFO(
"Eliminated %d nodes before end node\n", i);
1279 for (to = dag->
end; to; to = to->
next) {
1288 fef = ngs->bp_table[to->
fef].
frame;
1289 lef = ngs->bp_table[to->
lef].
frame;
1290 if (to != dag->
end && lef - fef < min_endfr) {
1296 for (from = to->
next; from; from = from->
next) {
1299 fef = ngs->bp_table[from->
fef].
frame;
1300 lef = ngs->bp_table[from->
lef].
frame;
1302 if ((to->
sf <= fef) || (to->
sf > lef + 1))
1304 if (lef - fef < min_endfr) {
1311 from_bpe = ngs->bp_table + i;
1312 for (; i <= from->
lef; i++, from_bpe++) {
1313 if (from_bpe->
wid != from->
wid)
1315 if (from_bpe->
frame >= to->
sf - 1)
1319 if ((i > from->
lef) || (from_bpe->
frame != to->
sf - 1))
1324 ngram_compute_seg_score(ngs, from_bpe, lwf,
1330 dict_first_phone(ps_search_dict(ngs), to->
wid));
1336 score = ascr + (score - from_bpe->
score);
1357 E_ERROR(
"End node of lattice isolated; unreachable\n");
1361 for (node = dag->
nodes; node; node = node->
next) {
1370 for (node = dag->
nodes; node; node = node->
next) {
1373 for (alt = node->
next; alt && alt->
sf == node->
sf; alt = alt->
next) {
1381 E_INFO(
"Lattice has %d nodes, %d links\n", dag->
n_nodes, nlink);
1386 if (dict_filler_word(ps_search_dict(ngs), dag->
end->
wid))
1387 dag->
end->
basewid = ps_search_finish_wid(ngs);
1405 default_lm = ngram_model_retain(lm);
int acmod_set_grow(acmod_t *acmod, int grow_feat)
Set memory allocation policy for utterance processing.
int acmod_advance(acmod_t *acmod)
Advance the frame index.
int acmod_rewind(acmod_t *acmod)
Rewind the current utterance, allowing it to be rescored.
#define dict2pid_rssid(d, ci, lc)
Access macros; not designed for arbitrary use.
#define dict_size(d)
Packaged macro access to dictionary members.
#define BETTER_THAN
Is one score better than another?
#define WORST_SCORE
Large "bad" score.
#define WORSE_THAN
Is one score worse than another?
#define SENSCR_SHIFT
Shift count for senone scores.
void ngram_search_set_lm(ngram_model_t *lm)
Sets the global language model.
void ngram_search_free_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
void ngram_search_alloc_all_rc(ngram_search_t *ngs, int32 w)
Allocate last phone channels for all possible right contexts for word w.
int32 ngram_search_exit_score(ngram_search_t *ngs, bptbl_t *pbe, int rcphone)
Get the exit score for a backpointer entry with a given right context.
ps_search_t * ngram_search_init(const char *name, ngram_model_t *lm, cmd_ln_t *config, acmod_t *acmod, dict_t *dict, dict2pid_t *d2p)
Initialize the N-Gram search module.
int ngram_search_mark_bptable(ngram_search_t *ngs, int frame_idx)
Record the current frame's index in the backpointer table.
ps_lattice_t * ngram_search_lattice(ps_search_t *search)
Construct a word lattice from the current hypothesis.
int ngram_search_find_exit(ngram_search_t *ngs, int frame_idx, int32 *out_best_score)
Find the best word exit for the current frame in the backpointer table.
char const * ngram_search_bp_hyp(ngram_search_t *ngs, int bpidx)
Backtrace from a given backpointer index to obtain a word hypothesis.
void ngram_search_save_bp(ngram_search_t *ngs, int frame_idx, int32 w, int32 score, int32 path, int32 rc)
Enter a word in the backpointer table.
void ngram_search_free(ps_search_t *search)
Finalize the N-Gram search module.
N-Gram based multi-pass search ("FBS")
void ngram_fwdflat_start(ngram_search_t *ngs)
Start fwdflat decoding for an utterance.
void ngram_fwdflat_deinit(ngram_search_t *ngs)
Release memory associated with fwdflat decoding.
int ngram_fwdflat_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
void ngram_fwdflat_finish(ngram_search_t *ngs)
Finish fwdflat decoding for an utterance.
void ngram_fwdflat_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdflat decoding.
int ngram_fwdflat_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
Flat lexicon based Viterbi search.
void ngram_fwdtree_deinit(ngram_search_t *ngs)
Release memory associated with fwdtree decoding.
void ngram_fwdtree_init(ngram_search_t *ngs)
Initialize N-Gram search for fwdtree decoding.
int ngram_fwdtree_search(ngram_search_t *ngs, int frame_idx)
Search one frame forward in an utterance.
int ngram_fwdtree_reinit(ngram_search_t *ngs)
Rebuild search structures for updated language models.
void ngram_fwdtree_finish(ngram_search_t *ngs)
Finish fwdtree decoding for an utterance.
void ngram_fwdtree_start(ngram_search_t *ngs)
Start fwdtree decoding for an utterance.
Lexicon tree based Viterbi search.
Internal implementation of PocketSphinx decoder.
ps_lattice_t * ps_lattice_init_search(ps_search_t *search, int n_frame)
Construct an empty word graph with reference to a search structure.
void ps_lattice_penalize_fillers(ps_lattice_t *dag, int32 silpen, int32 fillpen)
Insert penalty for fillers.
char const * ps_lattice_hyp(ps_lattice_t *dag, ps_latlink_t *link)
Get hypothesis string after bestpath search.
void ps_lattice_delete_unreachable(ps_lattice_t *dag)
Remove nodes marked as unreachable.
ps_seg_t * ps_lattice_seg_iter(ps_lattice_t *dag, ps_latlink_t *link, float32 lwf)
Get hypothesis segmentation iterator after bestpath search.
POCKETSPHINX_EXPORT int ps_lattice_free(ps_lattice_t *dag)
Free a lattice.
POCKETSPHINX_EXPORT void ps_lattice_link(ps_lattice_t *dag, ps_latnode_t *from, ps_latnode_t *to, int32 score, int32 ef)
Create a directed link between "from" and "to" nodes, but if a link already exists,...
POCKETSPHINX_EXPORT int32 ps_lattice_posterior(ps_lattice_t *dag, ngram_model_t *lmset, float32 ascale)
Calculate link posterior probabilities on a word graph.
POCKETSPHINX_EXPORT ps_latlink_t * ps_lattice_bestpath(ps_lattice_t *dag, ngram_model_t *lmset, float32 lwf, float32 ascale)
Do N-Gram based best-path search on a word graph.
Word graph search implementation.
#define BAD_S3WID
Dictionary word id.
Acoustic model structure.
bin_mdef_t * mdef
Model definition.
logmath_t * lmath
Log-math computation.
tmat_t * tmat
Transition matrices.
uint16 ** sseq
Unique senone sequences (2D array built at load time)
Back pointer table (forward pass lattice; actually a tree)
int16 last2_phone
next-to-last phone of this word
uint8 valid
For absolute pruning.
int32 prev_real_wid
wid of second-last real word
int32 real_wid
wid of this or latest predecessor real word
int32 score
Score (best among all right contexts)
int16 last_phone
last phone of this word
int32 s_idx
Start of BScoreStack for various right contexts.
frame_idx_t frame
start or end frame
Segmentation "iterator" for backpointer table results.
int16 cur
Current position in bpidx.
int32 * bpidx
Sequence of backpointer IDs.
int16 n_bpidx
Number of backpointer IDs.
ps_seg_t base
Base structure.
Lexical tree node data type.
struct chan_s * next
first descendant of this channel; or, in the case of the last phone of a word, the next alternative r...
int32 ciphone
ciphone for this node
hmm_t hmm
Basic HMM structure.
int32 rc_id
right-context id for last phone of words
Building composite triphone (as well as word internal triphones) with the dictionary.
a structure for a dictionary.
N-Gram search module structure.
int32 best_score
Best Viterbi path score.
float32 ascale
Acoustic score scale for posterior probabilities.
listelem_alloc_t * latnode_alloc
For latnode_t.
int32 n_frame_alloc
Number of frames allocated in bp_table_idx and friends.
int32 ** active_word_list
Array of active multi-phone words for current and next frame.
int32 n_frame
Number of frames actually present.
listelem_alloc_t * root_chan_alloc
For root_chan_t.
ngram_model_t * lmset
Set of language models.
chan_t ** word_chan
Channels associated with a given word (only used for right contexts, single-phone words in fwdtree se...
ps_latnode_t ** frm_wordlist
List of active words in each frame.
listelem_alloc_t * chan_alloc
For chan_t.
hmm_context_t * hmmctx
HMM context.
bitvec_t * word_active
array of active flags for all words.
int32 path_scr
Best path score from root of DAG.
latlink_list_t * entries
Links into this node.
frame_idx_t sf
Start frame.
latlink_list_t * exits
Links out of this node.
int32 fef
First end frame.
struct ps_latnode_s * alt
Node with alternate pronunciation for this word.
struct ps_latnode_s * next
Next node in DAG (no ordering implied)
int32 basewid
Dictionary base word id.
int32 wid
Dictionary word id.
Word graph structure used in bestpath/nbest search.
ps_latnode_t * end
Ending node.
listelem_alloc_t * latnode_alloc
Node allocator for this DAG.
frame_idx_t n_frames
Number of frames for this utterance.
ps_latnode_t * start
Starting node.
ps_latnode_t * nodes
List of all nodes.
int32 final_node_ascr
Acoustic score of implicit link exiting final node.
int32 n_nodes
Number of nodes in this lattice.
Base structure for search module.
int32 post
Utterance posterior probability.
ps_lattice_t * dag
Current hypothesis word graph.
dict_t * dict
Pronunciation dictionary.
ps_latlink_t * last_link
Final link in best path.
char * hyp_str
Current hypothesis string.
int32 n_words
Number of words known to search (may be less than in the dictionary)
V-table for search algorithm.
Base structure for hypothesis segmentation iterator.
ps_search_t * search
Search object from whence this came.
float32 lwf
Language weight factor (for second-pass searches)
ps_segfuncs_t * vt
V-table of seg methods.
Lexical tree node data type for the first phone (root) of each dynamic HMM tree structure.
uint8 *** tp
The transition matrices; kept in the same scale as acoustic scores; tp[tmatid][from-state][to-state].
cross word triphone model structure
s3cipid_t * cimap
Index into ssid[] above for each ci phone.
int32 n_ssid
#Unique ssid in above, compressed ssid list
s3ssid_t * ssid
Senone Sequence ID list for all context ciphones.