From 0655646ede7b1244c28bca0d107b6e64dd813420 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 28 Sep 2022 22:18:04 -0400 Subject: [PATCH 1/7] feat: resurrect gstreamer code --- MANIFEST.in | 1 + gst/Makefile.am | 29 ++ gst/gstpocketsphinx.c | 808 ++++++++++++++++++++++++++++++++++++++++++ gst/gstpocketsphinx.h | 98 +++++ gst/livedemo.c | 108 ++++++ gst/livedemo.py | 103 ++++++ 6 files changed, 1147 insertions(+) create mode 100644 gst/Makefile.am create mode 100644 gst/gstpocketsphinx.c create mode 100644 gst/gstpocketsphinx.h create mode 100644 gst/livedemo.c create mode 100644 gst/livedemo.py diff --git a/MANIFEST.in b/MANIFEST.in index 7465d6f71..414beadf8 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -15,6 +15,7 @@ include setup.cfg include setup.py include sphinx_config.h.in recursive-include cython * +recursive-include gst * recursive-include docs * recursive-include doxygen * recursive-include examples * diff --git a/gst/Makefile.am b/gst/Makefile.am new file mode 100644 index 000000000..d3f70bf01 --- /dev/null +++ b/gst/Makefile.am @@ -0,0 +1,29 @@ +my_plugins = +my_headers = +my_files = +if BUILD_GST +my_plugins += libgstpocketsphinx.la +endif + +plugin_LTLIBRARIES = $(my_plugins) + +libgstpocketsphinx_la_SOURCES = gstpocketsphinx.c + +libgstpocketsphinx_la_LIBADD = \ + $(GST_LIBS) \ + -lgstaudio-$(GST_MAJORMINOR) \ + $(top_builddir)/src/libpocketsphinx/libpocketsphinx.la \ + -lsphinxbase + +libgstpocketsphinx_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS) + +noinst_HEADERS = gstpocketsphinx.h + +AM_CFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_builddir)/include \ + $(GST_CFLAGS) \ + -DMODELDIR=\"$(pkgdatadir)/model\" + +EXTRA_DIST = livedemo.py livedemo.c + diff --git a/gst/gstpocketsphinx.c b/gst/gstpocketsphinx.c new file mode 100644 index 000000000..32442d1cd --- /dev/null +++ b/gst/gstpocketsphinx.c @@ -0,0 +1,808 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2014 Alpha Cephei Inc. + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + * Author: David Huggins-Daines + */ + +/** + * SECTION:element-pocketsphix + * + * The element runs the speech recohgnition on incomming audio buffers and + * generates an element messages named "pocketsphinx" + * for each hypothesis and one for the final result. The message's structure + * contains these fields: + * + * + * + * + * #GstClockTime + * "timestamp": + * the timestamp of the buffer that triggered the message. + * + * + * + * + * #gboolean + * "final": + * %FALSE for intermediate messages. + * + * + * + * + * #gin32 + * "confidence": + * posterior probability (confidence) of the result in log domain + * + * + * + * + * #gchar + * "hypothesis": + * the recognized text + * + * + * + * + * + * Example pipeline + * |[ + * gst-launch-1.0 -m autoaudiosrc ! audioconvert ! audioresample ! pocketsphinx ! fakesink + * ]| + * + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include +#include + +#include "gstpocketsphinx.h" + +GST_DEBUG_CATEGORY_STATIC(pocketsphinx_debug); +#define GST_CAT_DEFAULT pocketsphinx_debug + + +static void +gst_pocketsphinx_set_property(GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec); + +static void +gst_pocketsphinx_get_property(GObject * object, guint prop_id, + GValue * value, GParamSpec * pspec); + +static GstStateChangeReturn +gst_pocketsphinx_change_state(GstElement *element, GstStateChange transition); + +static GstFlowReturn +gst_pocketsphinx_chain(GstPad * pad, GstObject *parent, GstBuffer * buffer); + +static gboolean +gst_pocketsphinx_event(GstPad *pad, GstObject *parent, GstEvent *event); + +static void +gst_pocketsphinx_finalize_utt(GstPocketSphinx *ps); + +static void +gst_pocketsphinx_finalize(GObject * gobject); + +enum +{ + PROP_0, + PROP_HMM_DIR, + PROP_LM_FILE, + PROP_LMCTL_FILE, + PROP_DICT_FILE, + PROP_MLLR_FILE, + PROP_FSG_FILE, + PROP_ALLPHONE_FILE, + PROP_KWS_FILE, + PROP_JSGF_FILE, + PROP_FWDFLAT, + PROP_BESTPATH, + PROP_MAXHMMPF, + PROP_MAXWPF, + PROP_BEAM, + PROP_WBEAM, + PROP_PBEAM, + PROP_DSRATIO, + + PROP_LATDIR, + PROP_LM_NAME, + PROP_DECODER +}; + +/* + * Static data. + */ + +/* Default command line. (will go away soon and be constructed using properties) */ +static char *default_argv[] = { + "gst-pocketsphinx", +}; +static const int default_argc = sizeof(default_argv)/sizeof(default_argv[0]); + +static GstStaticPadTemplate sink_factory = + GST_STATIC_PAD_TEMPLATE("sink", + GST_PAD_SINK, + GST_PAD_ALWAYS, + GST_STATIC_CAPS("audio/x-raw, " + "format = (string) { S16LE }, " + "channels = (int) 1, " + "rate = (int) 16000") + ); + +static GstStaticPadTemplate src_factory = + GST_STATIC_PAD_TEMPLATE("src", + GST_PAD_SRC, + GST_PAD_ALWAYS, + GST_STATIC_CAPS("text/plain") + ); + +/* + * Boxing of ps_decoder_t. + */ +GType +ps_decoder_get_type(void) +{ + static GType ps_decoder_type = 0; + + if (G_UNLIKELY(ps_decoder_type == 0)) { + ps_decoder_type = g_boxed_type_register_static + ("PSDecoder", + /* Conveniently, these should just work. */ + (GBoxedCopyFunc) ps_retain, + (GBoxedFreeFunc) ps_free); + } + + return ps_decoder_type; +} + + +G_DEFINE_TYPE(GstPocketSphinx, gst_pocketsphinx, GST_TYPE_ELEMENT); + +static void +gst_pocketsphinx_class_init(GstPocketSphinxClass * klass) +{ + GObjectClass *gobject_class; + GstElementClass *element_class;; + + gobject_class =(GObjectClass *) klass; + element_class = (GstElementClass *)klass; + + gobject_class->set_property = gst_pocketsphinx_set_property; + gobject_class->get_property = gst_pocketsphinx_get_property; + gobject_class->finalize = gst_pocketsphinx_finalize; + + /* TODO: We will bridge cmd_ln.h properties to GObject + * properties here somehow eventually. */ + g_object_class_install_property + (gobject_class, PROP_HMM_DIR, + g_param_spec_string("hmm", "HMM Directory", + "Directory containing acoustic model parameters", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_LM_FILE, + g_param_spec_string("lm", "LM File", + "Language model file", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_LMCTL_FILE, + g_param_spec_string("lmctl", "LM Control File", + "Language model control file (for class LMs)", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_FSG_FILE, + g_param_spec_string("fsg", "FSG File", + "Finite state grammar file", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_ALLPHONE_FILE, + g_param_spec_string("allphone", "Allphone File", + "Phonetic language model file", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_KWS_FILE, + g_param_spec_string("kws", "Keyphrases File", + "List of keyphrases for spotting", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_JSGF_FILE, + g_param_spec_string("jsgf", "Grammer file", + "File with grammer in Java Speech Grammar Format (JSGF)", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_DICT_FILE, + g_param_spec_string("dict", "Dictionary File", + "Dictionary File", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_MLLR_FILE, + g_param_spec_string("mllr", "MLLR transformation file", + "Transformation to apply to means and variances", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_FWDFLAT, + g_param_spec_boolean("fwdflat", "Flat Lexicon Search", + "Enable Flat Lexicon Search", + FALSE, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_BESTPATH, + g_param_spec_boolean("bestpath", "Graph Search", + "Enable Graph Search", + FALSE, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_MAXHMMPF, + g_param_spec_int("maxhmmpf", "Maximum HMMs per frame", + "Maximum number of HMMs searched per frame", + 1, 100000, 1000, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_MAXWPF, + g_param_spec_int("maxwpf", "Maximum words per frame", + "Maximum number of words searched per frame", + 1, 100000, 10, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_BEAM, + g_param_spec_double("beam", "Beam width applied to every frame in Viterbi search", + "Beam width applied to every frame in Viterbi search", + -1, 1, 1e-48, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_PBEAM, + g_param_spec_double("pbeam", "Beam width applied to phone transitions", + "Beam width applied to phone transitions", + -1, 1, 1e-48, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_WBEAM, + g_param_spec_double("wbeam", "Beam width applied to word exits", + "Beam width applied to phone transitions", + -1, 1, 7e-29, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_DSRATIO, + g_param_spec_int("dsratio", "Frame downsampling ratio", + "Evaluate acoustic model every N frames", + 1, 10, 1, + G_PARAM_READWRITE)); + + /* Could be changed on runtime when ps is already initialized */ + g_object_class_install_property + (gobject_class, PROP_LM_NAME, + g_param_spec_string("lmname", "LM Name", + "Language model name (to select LMs from lmctl)", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_LATDIR, + g_param_spec_string("latdir", "Lattice Directory", + "Output Directory for Lattices", + NULL, + G_PARAM_READWRITE)); + g_object_class_install_property + (gobject_class, PROP_DECODER, + g_param_spec_boxed("decoder", "Decoder object", + "The underlying decoder", + PS_DECODER_TYPE, + G_PARAM_READABLE)); + + + GST_DEBUG_CATEGORY_INIT(pocketsphinx_debug, "pocketsphinx", 0, + "Automatic Speech Recognition"); + + + element_class->change_state = gst_pocketsphinx_change_state; + + gst_element_class_add_pad_template(element_class, + gst_static_pad_template_get(&sink_factory)); + gst_element_class_add_pad_template(element_class, + gst_static_pad_template_get(&src_factory)); + + gst_element_class_set_static_metadata(element_class, "PocketSphinx", "Filter/Audio", "Convert speech to text", "CMUSphinx-devel "); + +} + +static void +gst_pocketsphinx_set_string(GstPocketSphinx *ps, + const gchar *key, const GValue *value) +{ + if (value != NULL) { + cmd_ln_set_str_r(ps->config, key, g_value_get_string(value)); + } else { + cmd_ln_set_str_r(ps->config, key, NULL); + } +} + +static void +gst_pocketsphinx_set_int(GstPocketSphinx *ps, + const gchar *key, const GValue *value) +{ + cmd_ln_set_int32_r(ps->config, key, g_value_get_int(value)); +} + +static void +gst_pocketsphinx_set_boolean(GstPocketSphinx *ps, + const gchar *key, const GValue *value) +{ + cmd_ln_set_boolean_r(ps->config, key, g_value_get_boolean(value)); +} + +static void +gst_pocketsphinx_set_double(GstPocketSphinx *ps, + const gchar *key, const GValue *value) +{ + cmd_ln_set_float_r(ps->config, key, g_value_get_double(value)); +} + +static void +gst_pocketsphinx_set_property(GObject * object, guint prop_id, + const GValue * value, GParamSpec * pspec) +{ + GstPocketSphinx *ps = GST_POCKETSPHINX(object); + + switch (prop_id) { + + case PROP_HMM_DIR: + gst_pocketsphinx_set_string(ps, "-hmm", value); + break; + case PROP_LM_FILE: + /* FSG and LM are mutually exclusive. */ + gst_pocketsphinx_set_string(ps, "-lm", value); + gst_pocketsphinx_set_string(ps, "-lmctl", NULL); + gst_pocketsphinx_set_string(ps, "-fsg", NULL); + gst_pocketsphinx_set_string(ps, "-allphone", NULL); + gst_pocketsphinx_set_string(ps, "-kws", NULL); + gst_pocketsphinx_set_string(ps, "-jsgf", NULL); + break; + case PROP_LMCTL_FILE: + /* FSG and LM are mutually exclusive. */ + gst_pocketsphinx_set_string(ps, "-lm", NULL); + gst_pocketsphinx_set_string(ps, "-lmctl", value); + gst_pocketsphinx_set_string(ps, "-fsg", NULL); + gst_pocketsphinx_set_string(ps, "-allphone", NULL); + gst_pocketsphinx_set_string(ps, "-kws", NULL); + gst_pocketsphinx_set_string(ps, "-jsgf", NULL); + break; + case PROP_DICT_FILE: + gst_pocketsphinx_set_string(ps, "-dict", value); + break; + case PROP_MLLR_FILE: + gst_pocketsphinx_set_string(ps, "-mllr", value); + break; + case PROP_FSG_FILE: + /* FSG and LM are mutually exclusive */ + gst_pocketsphinx_set_string(ps, "-lm", NULL); + gst_pocketsphinx_set_string(ps, "-lmctl", NULL); + gst_pocketsphinx_set_string(ps, "-fsg", value); + gst_pocketsphinx_set_string(ps, "-allphone", NULL); + gst_pocketsphinx_set_string(ps, "-kws", NULL); + gst_pocketsphinx_set_string(ps, "-jsgf", NULL); + break; + case PROP_ALLPHONE_FILE: + /* FSG and LM are mutually exclusive. */ + gst_pocketsphinx_set_string(ps, "-lm", NULL); + gst_pocketsphinx_set_string(ps, "-lmctl", NULL); + gst_pocketsphinx_set_string(ps, "-fsg", NULL); + gst_pocketsphinx_set_string(ps, "-allphone", value); + gst_pocketsphinx_set_string(ps, "-kws", NULL); + gst_pocketsphinx_set_string(ps, "-jsgf", NULL); + break; + case PROP_KWS_FILE: + /* FSG and LM are mutually exclusive. */ + gst_pocketsphinx_set_string(ps, "-lm", NULL); + gst_pocketsphinx_set_string(ps, "-lmctl", NULL); + gst_pocketsphinx_set_string(ps, "-fsg", NULL); + gst_pocketsphinx_set_string(ps, "-allphone", NULL); + gst_pocketsphinx_set_string(ps, "-jsgf", NULL); + gst_pocketsphinx_set_string(ps, "-kws", value); + break; + case PROP_JSGF_FILE: + /* FSG and LM are mutually exclusive. */ + gst_pocketsphinx_set_string(ps, "-lm", NULL); + gst_pocketsphinx_set_string(ps, "-lmctl", NULL); + gst_pocketsphinx_set_string(ps, "-fsg", NULL); + gst_pocketsphinx_set_string(ps, "-allphone", NULL); + gst_pocketsphinx_set_string(ps, "-kws", NULL); + gst_pocketsphinx_set_string(ps, "-jsgf", value); + break; + case PROP_FWDFLAT: + gst_pocketsphinx_set_boolean(ps, "-fwdflat", value); + break; + case PROP_BESTPATH: + gst_pocketsphinx_set_boolean(ps, "-bestpath", value); + break; + case PROP_MAXHMMPF: + gst_pocketsphinx_set_int(ps, "-maxhmmpf", value); + break; + case PROP_MAXWPF: + gst_pocketsphinx_set_int(ps, "-maxwpf", value); + break; + case PROP_BEAM: + gst_pocketsphinx_set_double(ps, "-beam", value); + break; + case PROP_PBEAM: + gst_pocketsphinx_set_double(ps, "-pbeam", value); + break; + case PROP_WBEAM: + gst_pocketsphinx_set_double(ps, "-wbeam", value); + break; + case PROP_DSRATIO: + gst_pocketsphinx_set_int(ps, "-ds", value); + break; + + + case PROP_LATDIR: + if (ps->latdir) + g_free(ps->latdir); + ps->latdir = g_strdup(g_value_get_string(value)); + break; + case PROP_LM_NAME: + gst_pocketsphinx_set_string(ps, "-fsg", NULL); + gst_pocketsphinx_set_string(ps, "-lm", NULL); + gst_pocketsphinx_set_string(ps, "-allphone", NULL); + gst_pocketsphinx_set_string(ps, "-kws", NULL); + gst_pocketsphinx_set_string(ps, "-jsgf", NULL); + gst_pocketsphinx_set_string(ps, "-lmname", value); + + /** + * Chances are that lmctl is already loaded and all + * corresponding searches are configured, so we simply + * try to set the search + */ + + if (value != NULL && ps->ps) { + ps_set_search(ps->ps, g_value_get_string(value)); + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + return; + } + + /* If decoder was already initialized, reinit */ + if (ps->ps && prop_id != PROP_LATDIR && prop_id != PROP_LM_NAME) + ps_reinit(ps->ps, ps->config); +} + +static void +gst_pocketsphinx_get_property(GObject * object, guint prop_id, + GValue * value, GParamSpec * pspec) +{ + GstPocketSphinx *ps = GST_POCKETSPHINX(object); + + switch (prop_id) { + case PROP_DECODER: + g_value_set_boxed(value, ps->ps); + break; + case PROP_HMM_DIR: + g_value_set_string(value, cmd_ln_str_r(ps->config, "-hmm")); + break; + case PROP_LM_FILE: + g_value_set_string(value, cmd_ln_str_r(ps->config, "-lm")); + break; + case PROP_LMCTL_FILE: + g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmctl")); + break; + case PROP_LM_NAME: + g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmname")); + break; + case PROP_DICT_FILE: + g_value_set_string(value, cmd_ln_str_r(ps->config, "-dict")); + break; + case PROP_MLLR_FILE: + g_value_set_string(value, cmd_ln_str_r(ps->config, "-mllr")); + break; + case PROP_FSG_FILE: + g_value_set_string(value, cmd_ln_str_r(ps->config, "-fsg")); + break; + case PROP_ALLPHONE_FILE: + g_value_set_string(value, cmd_ln_str_r(ps->config, "-allphone")); + break; + case PROP_KWS_FILE: + g_value_set_string(value, cmd_ln_str_r(ps->config, "-kws")); + break; + case PROP_JSGF_FILE: + g_value_set_string(value, cmd_ln_str_r(ps->config, "-jsgf")); + break; + case PROP_FWDFLAT: + g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-fwdflat")); + break; + case PROP_BESTPATH: + g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-bestpath")); + break; + case PROP_LATDIR: + g_value_set_string(value, ps->latdir); + break; + case PROP_MAXHMMPF: + g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxhmmpf")); + break; + case PROP_MAXWPF: + g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxwpf")); + break; + case PROP_BEAM: + g_value_set_double(value, cmd_ln_float_r(ps->config, "-beam")); + break; + case PROP_PBEAM: + g_value_set_double(value, cmd_ln_float_r(ps->config, "-pbeam")); + break; + case PROP_WBEAM: + g_value_set_double(value, cmd_ln_float_r(ps->config, "-wbeam")); + break; + case PROP_DSRATIO: + g_value_set_int(value, cmd_ln_int32_r(ps->config, "-ds")); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +gst_pocketsphinx_finalize(GObject * gobject) +{ + GstPocketSphinx *ps = GST_POCKETSPHINX(gobject); + + ps_free(ps->ps); + cmd_ln_free_r(ps->config); + g_free(ps->last_result); + g_free(ps->latdir); + + G_OBJECT_CLASS(gst_pocketsphinx_parent_class)->finalize(gobject); +} + +static void +gst_pocketsphinx_init(GstPocketSphinx * ps) +{ + ps->sinkpad = + gst_pad_new_from_static_template(&sink_factory, "sink"); + ps->srcpad = + gst_pad_new_from_static_template(&src_factory, "src"); + + /* Parse default command-line options. */ + ps->config = cmd_ln_parse_r(NULL, ps_args(), default_argc, default_argv, FALSE); + ps_default_search_args(ps->config); + + /* Set up pads. */ + gst_element_add_pad(GST_ELEMENT(ps), ps->sinkpad); + gst_pad_set_chain_function(ps->sinkpad, gst_pocketsphinx_chain); + gst_pad_set_event_function(ps->sinkpad, gst_pocketsphinx_event); + gst_pad_use_fixed_caps(ps->sinkpad); + + gst_element_add_pad(GST_ELEMENT(ps), ps->srcpad); + gst_pad_use_fixed_caps(ps->srcpad); + + /* Initialize time. */ + ps->last_result_time = 0; + ps->last_result = NULL; +} + +static GstStateChangeReturn +gst_pocketsphinx_change_state(GstElement *element, GstStateChange transition) +{ + GstPocketSphinx *ps = GST_POCKETSPHINX(element); + + switch (transition) { + case GST_STATE_CHANGE_NULL_TO_READY: + ps->ps = ps_init(ps->config); + if (ps->ps == NULL) { + GST_ELEMENT_ERROR(GST_ELEMENT(ps), LIBRARY, INIT, + ("Failed to initialize PocketSphinx"), + ("Failed to initialize PocketSphinx")); + return GST_STATE_CHANGE_FAILURE; + } + break; + case GST_STATE_CHANGE_READY_TO_NULL: + ps_free(ps->ps); + ps->ps = NULL; + default: + break; + } + + return GST_ELEMENT_CLASS(gst_pocketsphinx_parent_class)->change_state(element, transition); +} + +static void +gst_pocketsphinx_post_message(GstPocketSphinx *ps, gboolean final, + GstClockTime timestamp, gint32 prob, const gchar *hyp) +{ + GstStructure *s = gst_structure_new ("pocketsphinx", + "timestamp", G_TYPE_UINT64, timestamp, + "final", G_TYPE_BOOLEAN, final, + "confidence", G_TYPE_LONG, prob, + "hypothesis", G_TYPE_STRING, hyp, NULL); + + gst_element_post_message (GST_ELEMENT (ps), gst_message_new_element (GST_OBJECT (ps), s)); +} + +static GstFlowReturn +gst_pocketsphinx_chain(GstPad * pad, GstObject *parent, GstBuffer * buffer) +{ + GstPocketSphinx *ps; + GstMapInfo info; + gboolean in_speech; + + ps = GST_POCKETSPHINX(parent); + + /* Start an utterance for the first buffer we get */ + if (!ps->listening_started) { + ps->listening_started = TRUE; + ps->speech_started = FALSE; + ps_start_utt(ps->ps); + } + + gst_buffer_map (buffer, &info, GST_MAP_READ); + ps_process_raw(ps->ps, + (short*) info.data, + info.size / sizeof(short), + FALSE, FALSE); + gst_buffer_unmap (buffer, &info); + + in_speech = ps_get_in_speech(ps->ps); + if (in_speech && !ps->speech_started) { + ps->speech_started = TRUE; + } + if (!in_speech && ps->speech_started) { + gst_pocketsphinx_finalize_utt(ps); + } else if (ps->last_result_time == 0 + /* Get a partial result every now and then, see if it is different. */ + /* Check every 100 milliseconds. */ + || (GST_BUFFER_TIMESTAMP(buffer) - ps->last_result_time) > 100*10*1000) { + int32 score; + char const *hyp; + + hyp = ps_get_hyp(ps->ps, &score); + ps->last_result_time = GST_BUFFER_TIMESTAMP(buffer); + if (hyp && strlen(hyp) > 0) { + if (ps->last_result == NULL || 0 != strcmp(ps->last_result, hyp)) { + g_free(ps->last_result); + ps->last_result = g_strdup(hyp); + gst_pocketsphinx_post_message(ps, FALSE, ps->last_result_time, + ps_get_prob(ps->ps), hyp); + } + } + } + + gst_buffer_unref(buffer); + return GST_FLOW_OK; +} + + +static void +gst_pocketsphinx_finalize_utt(GstPocketSphinx *ps) +{ + GstBuffer *buffer; + char const *hyp; + int32 score; + + hyp = NULL; + if (!ps->listening_started) + return; + + ps_end_utt(ps->ps); + ps->listening_started = FALSE; + hyp = ps_get_hyp(ps->ps, &score); + + if (hyp) { + gst_pocketsphinx_post_message(ps, TRUE, GST_CLOCK_TIME_NONE, + ps_get_prob(ps->ps), hyp); + buffer = gst_buffer_new_and_alloc(strlen(hyp) + 1); + gst_buffer_fill(buffer, 0, hyp, strlen(hyp)); + gst_buffer_fill(buffer, strlen(hyp), "\n", 1); + gst_pad_push(ps->srcpad, buffer); + } + + if (ps->latdir) { + char *latfile; + char uttid[16]; + + sprintf(uttid, "%09u", ps->uttno); + ps->uttno++; + latfile = string_join(ps->latdir, "/", uttid, ".lat", NULL); + ps_lattice_t *dag; + if ((dag = ps_get_lattice(ps->ps))) + ps_lattice_write(dag, latfile); + ckd_free(latfile); + } +} + +static gboolean +gst_pocketsphinx_event(GstPad *pad, GstObject *parent, GstEvent *event) +{ + GstPocketSphinx *ps; + + ps = GST_POCKETSPHINX(parent); + + switch (event->type) { + case GST_EVENT_EOS: + { + gst_pocketsphinx_finalize_utt(ps); + return gst_pad_event_default(pad, parent, event); + } + default: + return gst_pad_event_default(pad, parent, event); + } +} + +static void +gst_pocketsphinx_log(void *user_data, err_lvl_t lvl, const char *fmt, ...) +{ + static const int gst_level[ERR_MAX] = {GST_LEVEL_DEBUG, GST_LEVEL_INFO, + GST_LEVEL_INFO, GST_LEVEL_WARNING, GST_LEVEL_ERROR, GST_LEVEL_ERROR}; + + va_list ap; + va_start(ap, fmt); + gst_debug_log_valist(pocketsphinx_debug, gst_level[lvl], "", "", 0, NULL, fmt, ap); + va_end(ap); +} + + +static gboolean +plugin_init(GstPlugin * plugin) +{ + + err_set_callback(gst_pocketsphinx_log, NULL); + + if (!gst_element_register(plugin, "pocketsphinx", + GST_RANK_NONE, GST_TYPE_POCKETSPHINX)) + return FALSE; + return TRUE; +} + +#define PACKAGE PACKAGE_NAME +GST_PLUGIN_DEFINE(GST_VERSION_MAJOR, + GST_VERSION_MINOR, + pocketsphinx, + "PocketSphinx plugin", + plugin_init, PACKAGE_VERSION, + "BSD", + "PocketSphinx", "http://cmusphinx.sourceforge.net/") diff --git a/gst/gstpocketsphinx.h b/gst/gstpocketsphinx.h new file mode 100644 index 000000000..31b9555e8 --- /dev/null +++ b/gst/gstpocketsphinx.h @@ -0,0 +1,98 @@ +/* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */ +/* ==================================================================== + * Copyright (c) 2007 Carnegie Mellon University. All rights + * reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * This work was supported in part by funding from the Defense Advanced + * Research Projects Agency and the National Science Foundation of the + * United States of America, and the CMU Sphinx Speech Consortium. + * + * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND + * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY + * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * ==================================================================== + * + * Author: David Huggins-Daines + */ + +#ifndef __GST_POCKETSPHINX_H__ +#define __GST_POCKETSPHINX_H__ + +#include +#include + +G_BEGIN_DECLS + +#define GST_TYPE_POCKETSPHINX \ + (gst_pocketsphinx_get_type()) +#define GST_POCKETSPHINX(obj) \ + (G_TYPE_CHECK_INSTANCE_CAST((obj),GST_TYPE_POCKETSPHINX,GstPocketSphinx)) +#define GST_POCKETSPHINX_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_CAST((klass),GST_TYPE_POCKETSPHINX,GstPocketSphinxClass)) +#define GST_IS_POCKETSPHINX(obj) \ + (G_TYPE_CHECK_INSTANCE_TYPE((obj),GST_TYPE_POCKETSPHINX)) +#define GST_IS_POCKETSPHINX_CLASS(klass) \ + (G_TYPE_CHECK_CLASS_TYPE((klass),GST_TYPE_POCKETSPHINX)) + +typedef struct _GstPocketSphinx GstPocketSphinx; +typedef struct _GstPocketSphinxClass GstPocketSphinxClass; + +struct _GstPocketSphinx +{ + GstElement element; + + GstPad *sinkpad, *srcpad; + + ps_decoder_t *ps; + cmd_ln_t *config; + + gchar *latdir; /**< Output directory for word lattices. */ + + gboolean speech_started; + gboolean listening_started; + gint uttno; + + GstClockTime last_result_time; /**< Timestamp of last partial result. */ + char *last_result; /**< String of last partial result. */ +}; + +struct _GstPocketSphinxClass +{ + GstElementClass parent_class; + + void (*partial_result) (GstElement *element, const gchar *hyp_str); + void (*result) (GstElement *element, const gchar *hyp_str); +}; + +GType gst_pocketsphinx_get_type(void); + +/* + * Boxing of decoder. + */ +#define PS_DECODER_TYPE (ps_decoder_get_type()) +GType ps_decoder_get_type(void); + +G_END_DECLS + +#endif /* __GST_POCKETSPHINX_H__ */ diff --git a/gst/livedemo.c b/gst/livedemo.c new file mode 100644 index 000000000..44290f19d --- /dev/null +++ b/gst/livedemo.c @@ -0,0 +1,108 @@ +#include +#include + +static gboolean +bus_call(GstBus * bus, GstMessage * msg, gpointer data) +{ + GMainLoop *loop = (GMainLoop *) data; + + switch (GST_MESSAGE_TYPE(msg)) { + + case GST_MESSAGE_EOS: + g_print("End of stream\n"); + g_main_loop_quit(loop); + break; + + case GST_MESSAGE_ERROR:{ + gchar *debug; + GError *error; + + gst_message_parse_error(msg, &error, &debug); + g_free(debug); + + g_printerr("Error: %s\n", error->message); + g_error_free(error); + + g_main_loop_quit(loop); + break; + } + default: + break; + } + + const GstStructure *st = gst_message_get_structure(msg); + if (st && strcmp(gst_structure_get_name(st), "pocketsphinx") == 0) { + if (g_value_get_boolean(gst_structure_get_value(st, "final"))) + g_print("Got result %s\n", g_value_get_string(gst_structure_get_value(st, "hypothesis"))); + } + + return TRUE; +} + + +int +main(int argc, char *argv[]) +{ + GMainLoop *loop; + + GstElement *pipeline, *source, *decoder, *sink; + GstBus *bus; + guint bus_watch_id; + + /* Initialisation */ + gst_init(&argc, &argv); + + loop = g_main_loop_new(NULL, FALSE); + + /* Check input arguments */ + if (argc != 2) { + g_printerr("Usage: %s \n", argv[0]); + return -1; + } + + /* Create gstreamer elements */ + pipeline = gst_pipeline_new("pipeline"); + source = gst_element_factory_make("filesrc", "file-source"); + decoder = gst_element_factory_make("pocketsphinx", "asr"); + sink = gst_element_factory_make("fakesink", "output"); + + if (!pipeline || !source || !decoder || !sink) { + g_printerr("One element could not be created. Exiting.\n"); + return -1; + } + + /* Set up the pipeline */ + /* we set the input filename to the source element */ + g_object_set(G_OBJECT(source), "location", argv[1], NULL); + + g_object_set(G_OBJECT(decoder), "lmctl", "test.lmctl", NULL); + g_object_set(G_OBJECT(decoder), "lmname", "tidigits", NULL); + + /* we add a message handler */ + bus = gst_pipeline_get_bus(GST_PIPELINE(pipeline)); + bus_watch_id = gst_bus_add_watch(bus, bus_call, loop); + gst_object_unref(bus); + + /* we add all elements into the pipeline */ + gst_bin_add_many(GST_BIN(pipeline), source, decoder, sink, NULL); + + /* we link the elements together */ + gst_element_link_many(source, decoder, sink, NULL); + + gst_element_set_state(pipeline, GST_STATE_PLAYING); + + /* Iterate */ + g_print("Running...\n"); + g_main_loop_run(loop); + + /* Out of the main loop, clean up nicely */ + g_print("Returned, stopping playback\n"); + gst_element_set_state(pipeline, GST_STATE_NULL); + + g_print("Deleting pipeline\n"); + gst_object_unref(GST_OBJECT(pipeline)); + g_source_remove(bus_watch_id); + g_main_loop_unref(loop); + + return 0; +} diff --git a/gst/livedemo.py b/gst/livedemo.py new file mode 100644 index 000000000..e73cf8be8 --- /dev/null +++ b/gst/livedemo.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python + +# Copyright (c) 2008 Carnegie Mellon University. +# +# You may modify and redistribute this file under the same terms as +# the CMU Sphinx system. See LICENSE for more information. + + +from gi import pygtkcompat +import gi + +gi.require_version('Gst', '1.0') +from gi.repository import GObject, Gst +GObject.threads_init() +Gst.init(None) + +gst = Gst + +print("Using pygtkcompat and Gst from gi") + +pygtkcompat.enable() +pygtkcompat.enable_gtk(version='3.0') + +import gtk + +class DemoApp(object): + """GStreamer/PocketSphinx Demo Application""" + def __init__(self): + """Initialize a DemoApp object""" + self.init_gui() + self.init_gst() + + def init_gui(self): + """Initialize the GUI components""" + self.window = gtk.Window() + self.window.connect("delete-event", gtk.main_quit) + self.window.set_default_size(400,200) + self.window.set_border_width(10) + vbox = gtk.VBox() + self.textbuf = gtk.TextBuffer() + self.text = gtk.TextView(buffer=self.textbuf) + self.text.set_wrap_mode(gtk.WRAP_WORD) + vbox.pack_start(self.text) + self.button = gtk.ToggleButton("Speak") + self.button.connect('clicked', self.button_clicked) + vbox.pack_start(self.button, False, False, 5) + self.window.add(vbox) + self.window.show_all() + + def init_gst(self): + """Initialize the speech components""" + self.pipeline = gst.parse_launch('autoaudiosrc ! audioconvert ! audioresample ' + + '! pocketsphinx ! fakesink') + bus = self.pipeline.get_bus() + bus.add_signal_watch() + bus.connect('message::element', self.element_message) + + self.pipeline.set_state(gst.State.PAUSED) + + def element_message(self, bus, msg): + """Receive element messages from the bus.""" + msgtype = msg.get_structure().get_name() + if msgtype != 'pocketsphinx': + return + + if msg.get_structure().get_value('final'): + self.final_result(msg.get_structure().get_value('hypothesis'), msg.get_structure().get_value('confidence')) + self.pipeline.set_state(gst.State.PAUSED) + self.button.set_active(False) + elif msg.get_structure().get_value('hypothesis'): + self.partial_result(msg.get_structure().get_value('hypothesis')) + + def partial_result(self, hyp): + """Delete any previous selection, insert text and select it.""" + # All this stuff appears as one single action + self.textbuf.begin_user_action() + self.textbuf.delete_selection(True, self.text.get_editable()) + self.textbuf.insert_at_cursor(hyp) + ins = self.textbuf.get_insert() + iter = self.textbuf.get_iter_at_mark(ins) + iter.backward_chars(len(hyp)) + self.textbuf.move_mark(ins, iter) + self.textbuf.end_user_action() + + def final_result(self, hyp, confidence): + """Insert the final result.""" + # All this stuff appears as one single action + self.textbuf.begin_user_action() + self.textbuf.delete_selection(True, self.text.get_editable()) + self.textbuf.insert_at_cursor(hyp) + self.textbuf.end_user_action() + + def button_clicked(self, button): + """Handle button presses.""" + if button.get_active(): + button.set_label("Stop") + self.pipeline.set_state(gst.State.PLAYING) + else: + button.set_label("Speak") + self.pipeline.set_state(gst.State.PAUSED) + +app = DemoApp() +gtk.main() From e97da1689b2b3fefdbc170e60c25abff0a3d9abd Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 28 Sep 2022 22:18:29 -0400 Subject: [PATCH 2/7] build: switch to cmake --- gst/CMakeLists.txt | 19 +++++++++++++++++++ gst/Makefile.am | 29 ----------------------------- 2 files changed, 19 insertions(+), 29 deletions(-) create mode 100644 gst/CMakeLists.txt delete mode 100644 gst/Makefile.am diff --git a/gst/CMakeLists.txt b/gst/CMakeLists.txt new file mode 100644 index 000000000..bb69452ac --- /dev/null +++ b/gst/CMakeLists.txt @@ -0,0 +1,19 @@ +find_package(PkgConfig REQUIRED) +pkg_check_modules(GOBJECT gobject-2.0 REQUIRED) +pkg_check_modules(GSTREAMER gstreamer-1.0 gstreamer-base-1.0 gstreamer-plugins-base-1.0) +add_library(gstpocketsphinx SHARED gstpocketsphinx.c) +set_property(TARGET pocketsphinx PROPERTY POSITION_INDEPENDENT_CODE on) +target_link_libraries(gstpocketsphinx PUBLIC + pocketsphinx + ${GSTREAMER_LIBRARIES} + ${GOBJECT_LIBRARIES} + ) +target_include_directories( + gstpocketsphinx PRIVATE ${CMAKE_BINARY_DIR} + gstpocketsphinx PRIVATE ${CMAKE_SOURCE_DIR}/src + gstpocketsphinx PUBLIC ${CMAKE_SOURCE_DIR}/include + gstpocketsphinx PUBLIC ${CMAKE_BINARY_DIR}/include + gstpocketsphinx INTERFACE ${CMAKE_SOURCE_DIR}/include + gstpocketsphinx INTERFACE ${CMAKE_BINARY_DIR}/include + gstpocketsphinx PUBLIC ${GSTREAMER_INCLUDE_DIRS} ${GOBJECT_INCLUDE_DIRS} + ) diff --git a/gst/Makefile.am b/gst/Makefile.am deleted file mode 100644 index d3f70bf01..000000000 --- a/gst/Makefile.am +++ /dev/null @@ -1,29 +0,0 @@ -my_plugins = -my_headers = -my_files = -if BUILD_GST -my_plugins += libgstpocketsphinx.la -endif - -plugin_LTLIBRARIES = $(my_plugins) - -libgstpocketsphinx_la_SOURCES = gstpocketsphinx.c - -libgstpocketsphinx_la_LIBADD = \ - $(GST_LIBS) \ - -lgstaudio-$(GST_MAJORMINOR) \ - $(top_builddir)/src/libpocketsphinx/libpocketsphinx.la \ - -lsphinxbase - -libgstpocketsphinx_la_LDFLAGS = $(GST_PLUGIN_LDFLAGS) - -noinst_HEADERS = gstpocketsphinx.h - -AM_CFLAGS = \ - -I$(top_srcdir)/include \ - -I$(top_builddir)/include \ - $(GST_CFLAGS) \ - -DMODELDIR=\"$(pkgdatadir)/model\" - -EXTRA_DIST = livedemo.py livedemo.c - From 29c391b0b7f294afaee7b402e68c6e9435e1d1b7 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 28 Sep 2022 22:18:55 -0400 Subject: [PATCH 3/7] fix: minimally update GStreamer plugin for new API --- gst/gstpocketsphinx.c | 182 +++++++++++++++++++++--------------------- gst/gstpocketsphinx.h | 2 +- 2 files changed, 94 insertions(+), 90 deletions(-) diff --git a/gst/gstpocketsphinx.c b/gst/gstpocketsphinx.c index 32442d1cd..0906987df 100644 --- a/gst/gstpocketsphinx.c +++ b/gst/gstpocketsphinx.c @@ -91,8 +91,9 @@ #include #include -#include -#include +#include +#include "util/strfuncs.h" +#include "util/ckd_alloc.h" #include "gstpocketsphinx.h" @@ -153,12 +154,6 @@ enum * Static data. */ -/* Default command line. (will go away soon and be constructed using properties) */ -static char *default_argv[] = { - "gst-pocketsphinx", -}; -static const int default_argc = sizeof(default_argv)/sizeof(default_argv[0]); - static GstStaticPadTemplate sink_factory = GST_STATIC_PAD_TEMPLATE("sink", GST_PAD_SINK, @@ -176,6 +171,12 @@ static GstStaticPadTemplate src_factory = GST_STATIC_CAPS("text/plain") ); +static void +wrap_ps_free(void *ps) +{ + (void)ps_free((ps_decoder_t *)ps); +} + /* * Boxing of ps_decoder_t. */ @@ -189,7 +190,7 @@ ps_decoder_get_type(void) ("PSDecoder", /* Conveniently, these should just work. */ (GBoxedCopyFunc) ps_retain, - (GBoxedFreeFunc) ps_free); + (GBoxedFreeFunc) wrap_ps_free); } return ps_decoder_type; @@ -357,9 +358,9 @@ gst_pocketsphinx_set_string(GstPocketSphinx *ps, const gchar *key, const GValue *value) { if (value != NULL) { - cmd_ln_set_str_r(ps->config, key, g_value_get_string(value)); + ps_config_set_str(ps->config, key, g_value_get_string(value)); } else { - cmd_ln_set_str_r(ps->config, key, NULL); + ps_config_set_str(ps->config, key, NULL); } } @@ -367,21 +368,21 @@ static void gst_pocketsphinx_set_int(GstPocketSphinx *ps, const gchar *key, const GValue *value) { - cmd_ln_set_int32_r(ps->config, key, g_value_get_int(value)); + ps_config_set_int(ps->config, key, g_value_get_int(value)); } static void gst_pocketsphinx_set_boolean(GstPocketSphinx *ps, const gchar *key, const GValue *value) { - cmd_ln_set_boolean_r(ps->config, key, g_value_get_boolean(value)); + ps_config_set_bool(ps->config, key, g_value_get_boolean(value)); } static void gst_pocketsphinx_set_double(GstPocketSphinx *ps, const gchar *key, const GValue *value) { - cmd_ln_set_float_r(ps->config, key, g_value_get_double(value)); + ps_config_set_float(ps->config, key, g_value_get_double(value)); } static void @@ -393,91 +394,91 @@ gst_pocketsphinx_set_property(GObject * object, guint prop_id, switch (prop_id) { case PROP_HMM_DIR: - gst_pocketsphinx_set_string(ps, "-hmm", value); + gst_pocketsphinx_set_string(ps, "hmm", value); break; case PROP_LM_FILE: /* FSG and LM are mutually exclusive. */ - gst_pocketsphinx_set_string(ps, "-lm", value); - gst_pocketsphinx_set_string(ps, "-lmctl", NULL); - gst_pocketsphinx_set_string(ps, "-fsg", NULL); - gst_pocketsphinx_set_string(ps, "-allphone", NULL); - gst_pocketsphinx_set_string(ps, "-kws", NULL); - gst_pocketsphinx_set_string(ps, "-jsgf", NULL); + gst_pocketsphinx_set_string(ps, "lm", value); + gst_pocketsphinx_set_string(ps, "lmctl", NULL); + gst_pocketsphinx_set_string(ps, "fsg", NULL); + gst_pocketsphinx_set_string(ps, "allphone", NULL); + gst_pocketsphinx_set_string(ps, "kws", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", NULL); break; case PROP_LMCTL_FILE: /* FSG and LM are mutually exclusive. */ - gst_pocketsphinx_set_string(ps, "-lm", NULL); - gst_pocketsphinx_set_string(ps, "-lmctl", value); - gst_pocketsphinx_set_string(ps, "-fsg", NULL); - gst_pocketsphinx_set_string(ps, "-allphone", NULL); - gst_pocketsphinx_set_string(ps, "-kws", NULL); - gst_pocketsphinx_set_string(ps, "-jsgf", NULL); + gst_pocketsphinx_set_string(ps, "lm", NULL); + gst_pocketsphinx_set_string(ps, "lmctl", value); + gst_pocketsphinx_set_string(ps, "fsg", NULL); + gst_pocketsphinx_set_string(ps, "allphone", NULL); + gst_pocketsphinx_set_string(ps, "kws", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", NULL); break; case PROP_DICT_FILE: - gst_pocketsphinx_set_string(ps, "-dict", value); + gst_pocketsphinx_set_string(ps, "dict", value); break; case PROP_MLLR_FILE: - gst_pocketsphinx_set_string(ps, "-mllr", value); + gst_pocketsphinx_set_string(ps, "mllr", value); break; case PROP_FSG_FILE: /* FSG and LM are mutually exclusive */ - gst_pocketsphinx_set_string(ps, "-lm", NULL); - gst_pocketsphinx_set_string(ps, "-lmctl", NULL); - gst_pocketsphinx_set_string(ps, "-fsg", value); - gst_pocketsphinx_set_string(ps, "-allphone", NULL); - gst_pocketsphinx_set_string(ps, "-kws", NULL); - gst_pocketsphinx_set_string(ps, "-jsgf", NULL); + gst_pocketsphinx_set_string(ps, "lm", NULL); + gst_pocketsphinx_set_string(ps, "lmctl", NULL); + gst_pocketsphinx_set_string(ps, "fsg", value); + gst_pocketsphinx_set_string(ps, "allphone", NULL); + gst_pocketsphinx_set_string(ps, "kws", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", NULL); break; case PROP_ALLPHONE_FILE: /* FSG and LM are mutually exclusive. */ - gst_pocketsphinx_set_string(ps, "-lm", NULL); - gst_pocketsphinx_set_string(ps, "-lmctl", NULL); - gst_pocketsphinx_set_string(ps, "-fsg", NULL); - gst_pocketsphinx_set_string(ps, "-allphone", value); - gst_pocketsphinx_set_string(ps, "-kws", NULL); - gst_pocketsphinx_set_string(ps, "-jsgf", NULL); + gst_pocketsphinx_set_string(ps, "lm", NULL); + gst_pocketsphinx_set_string(ps, "lmctl", NULL); + gst_pocketsphinx_set_string(ps, "fsg", NULL); + gst_pocketsphinx_set_string(ps, "allphone", value); + gst_pocketsphinx_set_string(ps, "kws", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", NULL); break; case PROP_KWS_FILE: /* FSG and LM are mutually exclusive. */ - gst_pocketsphinx_set_string(ps, "-lm", NULL); - gst_pocketsphinx_set_string(ps, "-lmctl", NULL); - gst_pocketsphinx_set_string(ps, "-fsg", NULL); - gst_pocketsphinx_set_string(ps, "-allphone", NULL); - gst_pocketsphinx_set_string(ps, "-jsgf", NULL); - gst_pocketsphinx_set_string(ps, "-kws", value); + gst_pocketsphinx_set_string(ps, "lm", NULL); + gst_pocketsphinx_set_string(ps, "lmctl", NULL); + gst_pocketsphinx_set_string(ps, "fsg", NULL); + gst_pocketsphinx_set_string(ps, "allphone", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", NULL); + gst_pocketsphinx_set_string(ps, "kws", value); break; case PROP_JSGF_FILE: /* FSG and LM are mutually exclusive. */ - gst_pocketsphinx_set_string(ps, "-lm", NULL); - gst_pocketsphinx_set_string(ps, "-lmctl", NULL); - gst_pocketsphinx_set_string(ps, "-fsg", NULL); - gst_pocketsphinx_set_string(ps, "-allphone", NULL); - gst_pocketsphinx_set_string(ps, "-kws", NULL); - gst_pocketsphinx_set_string(ps, "-jsgf", value); + gst_pocketsphinx_set_string(ps, "lm", NULL); + gst_pocketsphinx_set_string(ps, "lmctl", NULL); + gst_pocketsphinx_set_string(ps, "fsg", NULL); + gst_pocketsphinx_set_string(ps, "allphone", NULL); + gst_pocketsphinx_set_string(ps, "kws", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", value); break; case PROP_FWDFLAT: - gst_pocketsphinx_set_boolean(ps, "-fwdflat", value); + gst_pocketsphinx_set_boolean(ps, "fwdflat", value); break; case PROP_BESTPATH: - gst_pocketsphinx_set_boolean(ps, "-bestpath", value); + gst_pocketsphinx_set_boolean(ps, "bestpath", value); break; case PROP_MAXHMMPF: - gst_pocketsphinx_set_int(ps, "-maxhmmpf", value); + gst_pocketsphinx_set_int(ps, "maxhmmpf", value); break; case PROP_MAXWPF: - gst_pocketsphinx_set_int(ps, "-maxwpf", value); + gst_pocketsphinx_set_int(ps, "maxwpf", value); break; case PROP_BEAM: - gst_pocketsphinx_set_double(ps, "-beam", value); + gst_pocketsphinx_set_double(ps, "beam", value); break; case PROP_PBEAM: - gst_pocketsphinx_set_double(ps, "-pbeam", value); + gst_pocketsphinx_set_double(ps, "pbeam", value); break; case PROP_WBEAM: - gst_pocketsphinx_set_double(ps, "-wbeam", value); + gst_pocketsphinx_set_double(ps, "wbeam", value); break; case PROP_DSRATIO: - gst_pocketsphinx_set_int(ps, "-ds", value); + gst_pocketsphinx_set_int(ps, "ds", value); break; @@ -487,12 +488,12 @@ gst_pocketsphinx_set_property(GObject * object, guint prop_id, ps->latdir = g_strdup(g_value_get_string(value)); break; case PROP_LM_NAME: - gst_pocketsphinx_set_string(ps, "-fsg", NULL); - gst_pocketsphinx_set_string(ps, "-lm", NULL); - gst_pocketsphinx_set_string(ps, "-allphone", NULL); - gst_pocketsphinx_set_string(ps, "-kws", NULL); - gst_pocketsphinx_set_string(ps, "-jsgf", NULL); - gst_pocketsphinx_set_string(ps, "-lmname", value); + gst_pocketsphinx_set_string(ps, "fsg", NULL); + gst_pocketsphinx_set_string(ps, "lm", NULL); + gst_pocketsphinx_set_string(ps, "allphone", NULL); + gst_pocketsphinx_set_string(ps, "kws", NULL); + gst_pocketsphinx_set_string(ps, "jsgf", NULL); + gst_pocketsphinx_set_string(ps, "lmname", value); /** * Chances are that lmctl is already loaded and all @@ -501,7 +502,7 @@ gst_pocketsphinx_set_property(GObject * object, guint prop_id, */ if (value != NULL && ps->ps) { - ps_set_search(ps->ps, g_value_get_string(value)); + ps_activate_search(ps->ps, g_value_get_string(value)); } break; default: @@ -525,61 +526,61 @@ gst_pocketsphinx_get_property(GObject * object, guint prop_id, g_value_set_boxed(value, ps->ps); break; case PROP_HMM_DIR: - g_value_set_string(value, cmd_ln_str_r(ps->config, "-hmm")); + g_value_set_string(value, ps_config_str(ps->config, "hmm")); break; case PROP_LM_FILE: - g_value_set_string(value, cmd_ln_str_r(ps->config, "-lm")); + g_value_set_string(value, ps_config_str(ps->config, "lm")); break; case PROP_LMCTL_FILE: - g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmctl")); + g_value_set_string(value, ps_config_str(ps->config, "lmctl")); break; case PROP_LM_NAME: - g_value_set_string(value, cmd_ln_str_r(ps->config, "-lmname")); + g_value_set_string(value, ps_config_str(ps->config, "lmname")); break; case PROP_DICT_FILE: - g_value_set_string(value, cmd_ln_str_r(ps->config, "-dict")); + g_value_set_string(value, ps_config_str(ps->config, "dict")); break; case PROP_MLLR_FILE: - g_value_set_string(value, cmd_ln_str_r(ps->config, "-mllr")); + g_value_set_string(value, ps_config_str(ps->config, "mllr")); break; case PROP_FSG_FILE: - g_value_set_string(value, cmd_ln_str_r(ps->config, "-fsg")); + g_value_set_string(value, ps_config_str(ps->config, "fsg")); break; case PROP_ALLPHONE_FILE: - g_value_set_string(value, cmd_ln_str_r(ps->config, "-allphone")); + g_value_set_string(value, ps_config_str(ps->config, "allphone")); break; case PROP_KWS_FILE: - g_value_set_string(value, cmd_ln_str_r(ps->config, "-kws")); + g_value_set_string(value, ps_config_str(ps->config, "kws")); break; case PROP_JSGF_FILE: - g_value_set_string(value, cmd_ln_str_r(ps->config, "-jsgf")); + g_value_set_string(value, ps_config_str(ps->config, "jsgf")); break; case PROP_FWDFLAT: - g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-fwdflat")); + g_value_set_boolean(value, ps_config_bool(ps->config, "fwdflat")); break; case PROP_BESTPATH: - g_value_set_boolean(value, cmd_ln_boolean_r(ps->config, "-bestpath")); + g_value_set_boolean(value, ps_config_bool(ps->config, "bestpath")); break; case PROP_LATDIR: g_value_set_string(value, ps->latdir); break; case PROP_MAXHMMPF: - g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxhmmpf")); + g_value_set_int(value, ps_config_int(ps->config, "maxhmmpf")); break; case PROP_MAXWPF: - g_value_set_int(value, cmd_ln_int32_r(ps->config, "-maxwpf")); + g_value_set_int(value, ps_config_int(ps->config, "maxwpf")); break; case PROP_BEAM: - g_value_set_double(value, cmd_ln_float_r(ps->config, "-beam")); + g_value_set_double(value, ps_config_float(ps->config, "beam")); break; case PROP_PBEAM: - g_value_set_double(value, cmd_ln_float_r(ps->config, "-pbeam")); + g_value_set_double(value, ps_config_float(ps->config, "pbeam")); break; case PROP_WBEAM: - g_value_set_double(value, cmd_ln_float_r(ps->config, "-wbeam")); + g_value_set_double(value, ps_config_float(ps->config, "wbeam")); break; case PROP_DSRATIO: - g_value_set_int(value, cmd_ln_int32_r(ps->config, "-ds")); + g_value_set_int(value, ps_config_int(ps->config, "ds")); break; default: G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); @@ -593,7 +594,7 @@ gst_pocketsphinx_finalize(GObject * gobject) GstPocketSphinx *ps = GST_POCKETSPHINX(gobject); ps_free(ps->ps); - cmd_ln_free_r(ps->config); + ps_config_free(ps->config); g_free(ps->last_result); g_free(ps->latdir); @@ -609,7 +610,7 @@ gst_pocketsphinx_init(GstPocketSphinx * ps) gst_pad_new_from_static_template(&src_factory, "src"); /* Parse default command-line options. */ - ps->config = cmd_ln_parse_r(NULL, ps_args(), default_argc, default_argv, FALSE); + ps->config = ps_config_init(NULL); ps_default_search_args(ps->config); /* Set up pads. */ @@ -671,6 +672,7 @@ gst_pocketsphinx_chain(GstPad * pad, GstObject *parent, GstBuffer * buffer) GstMapInfo info; gboolean in_speech; + (void)pad; ps = GST_POCKETSPHINX(parent); /* Start an utterance for the first buffer we get */ @@ -777,8 +779,9 @@ static void gst_pocketsphinx_log(void *user_data, err_lvl_t lvl, const char *fmt, ...) { static const int gst_level[ERR_MAX] = {GST_LEVEL_DEBUG, GST_LEVEL_INFO, - GST_LEVEL_INFO, GST_LEVEL_WARNING, GST_LEVEL_ERROR, GST_LEVEL_ERROR}; + GST_LEVEL_WARNING, GST_LEVEL_ERROR, GST_LEVEL_ERROR}; + (void)user_data; va_list ap; va_start(ap, fmt); gst_debug_log_valist(pocketsphinx_debug, gst_level[lvl], "", "", 0, NULL, fmt, ap); @@ -791,6 +794,7 @@ plugin_init(GstPlugin * plugin) { err_set_callback(gst_pocketsphinx_log, NULL); + err_set_loglevel(ERR_INFO); if (!gst_element_register(plugin, "pocketsphinx", GST_RANK_NONE, GST_TYPE_POCKETSPHINX)) diff --git a/gst/gstpocketsphinx.h b/gst/gstpocketsphinx.h index 31b9555e8..224cfda1b 100644 --- a/gst/gstpocketsphinx.h +++ b/gst/gstpocketsphinx.h @@ -65,7 +65,7 @@ struct _GstPocketSphinx GstPad *sinkpad, *srcpad; ps_decoder_t *ps; - cmd_ln_t *config; + ps_config_t *config; gchar *latdir; /**< Output directory for word lattices. */ From 870f53893f158e03225eecafd6deb313335e6435 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 28 Sep 2022 22:19:16 -0400 Subject: [PATCH 4/7] feat: add option to build gstreamer --- CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6979e74bf..64b0c53be 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -111,6 +111,11 @@ else() install(DIRECTORY include TYPE INCLUDE) install(DIRECTORY ${CMAKE_BINARY_DIR}/include TYPE INCLUDE) install(FILES ${CMAKE_BINARY_DIR}/pocketsphinx.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) + + option(BUILD_GSTREAMER "Build GStreamer plugin" OFF) + if(BUILD_GSTREAMER) + add_subdirectory(gst) + endif() endif() # Can print this at the end, just to know what it was From 8b1eb6c447a205e92769955f593d847fc9d94c9d Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 28 Sep 2022 22:19:35 -0400 Subject: [PATCH 5/7] docs: add comment for making livedemo.py work without install --- gst/livedemo.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gst/livedemo.py b/gst/livedemo.py index e73cf8be8..872800998 100644 --- a/gst/livedemo.py +++ b/gst/livedemo.py @@ -50,7 +50,8 @@ def init_gui(self): def init_gst(self): """Initialize the speech components""" self.pipeline = gst.parse_launch('autoaudiosrc ! audioconvert ! audioresample ' - + '! pocketsphinx ! fakesink') + '! pocketsphinx ! fakesink') +# '! pocketsphinx hmm=../model/en-us/en-us lm=../model/en-us/en-us.lm.bin dict=../model/en-us/cmudict-en-us.dict ! fakesink') bus = self.pipeline.get_bus() bus.add_signal_watch() bus.connect('message::element', self.element_message) From 1729e585b0392fa2d79a556279412affc19ee329 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 28 Sep 2022 22:28:32 -0400 Subject: [PATCH 6/7] build: install GStreamer plugin (hopefully to the right place) --- gst/CMakeLists.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gst/CMakeLists.txt b/gst/CMakeLists.txt index bb69452ac..952484911 100644 --- a/gst/CMakeLists.txt +++ b/gst/CMakeLists.txt @@ -1,6 +1,6 @@ find_package(PkgConfig REQUIRED) pkg_check_modules(GOBJECT gobject-2.0 REQUIRED) -pkg_check_modules(GSTREAMER gstreamer-1.0 gstreamer-base-1.0 gstreamer-plugins-base-1.0) +pkg_check_modules(GSTREAMER gstreamer-1.0 gstreamer-base-1.0) add_library(gstpocketsphinx SHARED gstpocketsphinx.c) set_property(TARGET pocketsphinx PROPERTY POSITION_INDEPENDENT_CODE on) target_link_libraries(gstpocketsphinx PUBLIC @@ -17,3 +17,5 @@ target_include_directories( gstpocketsphinx INTERFACE ${CMAKE_BINARY_DIR}/include gstpocketsphinx PUBLIC ${GSTREAMER_INCLUDE_DIRS} ${GOBJECT_INCLUDE_DIRS} ) +message("Installing GStreamer plugin to ${CMAKE_INSTALL_FULL_LIBDIR}/gstreamer-1.0") +install(TARGETS gstpocketsphinx LIBRARY DESTINATION ${CMAKE_INSTALL_FULL_LIBDIR}/gstreamer-1.0) From adf20a36da90d92a7d3bafc2bd25c5b6c49a9296 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Wed, 28 Sep 2022 23:40:30 -0400 Subject: [PATCH 7/7] feat: update gst plugin for new endpointer --- gst/gstpocketsphinx.c | 86 +++++++++++++++++++++---------------------- gst/gstpocketsphinx.h | 9 +++-- 2 files changed, 48 insertions(+), 47 deletions(-) diff --git a/gst/gstpocketsphinx.c b/gst/gstpocketsphinx.c index 0906987df..b3497f60e 100644 --- a/gst/gstpocketsphinx.c +++ b/gst/gstpocketsphinx.c @@ -608,6 +608,7 @@ gst_pocketsphinx_init(GstPocketSphinx * ps) gst_pad_new_from_static_template(&sink_factory, "sink"); ps->srcpad = gst_pad_new_from_static_template(&src_factory, "src"); + ps->adapter = gst_adapter_new(); /* Parse default command-line options. */ ps->config = ps_config_init(NULL); @@ -641,6 +642,15 @@ gst_pocketsphinx_change_state(GstElement *element, GstStateChange transition) ("Failed to initialize PocketSphinx")); return GST_STATE_CHANGE_FAILURE; } + ps->ep = ps_endpointer_init(0, 0.0, 0, + ps_config_int(ps->config, "samprate"), 0); + if (ps->ep == NULL) { + GST_ELEMENT_ERROR(GST_ELEMENT(ps), LIBRARY, INIT, + ("Failed to initialize PocketSphinx endpointer"), + ("Failed to initialize PocketSphinx endpointer")); + return GST_STATE_CHANGE_FAILURE; + } + ps->frame_size = ps_endpointer_frame_size(ps->ep) * 2; break; case GST_STATE_CHANGE_READY_TO_NULL: ps_free(ps->ps); @@ -669,52 +679,45 @@ static GstFlowReturn gst_pocketsphinx_chain(GstPad * pad, GstObject *parent, GstBuffer * buffer) { GstPocketSphinx *ps; - GstMapInfo info; - gboolean in_speech; (void)pad; ps = GST_POCKETSPHINX(parent); - /* Start an utterance for the first buffer we get */ - if (!ps->listening_started) { - ps->listening_started = TRUE; - ps->speech_started = FALSE; - ps_start_utt(ps->ps); - } - - gst_buffer_map (buffer, &info, GST_MAP_READ); - ps_process_raw(ps->ps, - (short*) info.data, - info.size / sizeof(short), - FALSE, FALSE); - gst_buffer_unmap (buffer, &info); - - in_speech = ps_get_in_speech(ps->ps); - if (in_speech && !ps->speech_started) { - ps->speech_started = TRUE; - } - if (!in_speech && ps->speech_started) { - gst_pocketsphinx_finalize_utt(ps); - } else if (ps->last_result_time == 0 - /* Get a partial result every now and then, see if it is different. */ - /* Check every 100 milliseconds. */ - || (GST_BUFFER_TIMESTAMP(buffer) - ps->last_result_time) > 100*10*1000) { - int32 score; - char const *hyp; - - hyp = ps_get_hyp(ps->ps, &score); - ps->last_result_time = GST_BUFFER_TIMESTAMP(buffer); - if (hyp && strlen(hyp) > 0) { - if (ps->last_result == NULL || 0 != strcmp(ps->last_result, hyp)) { - g_free(ps->last_result); - ps->last_result = g_strdup(hyp); - gst_pocketsphinx_post_message(ps, FALSE, ps->last_result_time, - ps_get_prob(ps->ps), hyp); + gst_adapter_push(ps->adapter, buffer); + while (gst_adapter_available(ps->adapter) >= ps->frame_size) { + const guint *data = gst_adapter_map(ps->adapter, ps->frame_size); + int prev_in_speech = ps_endpointer_in_speech(ps->ep); + const int16 *speech = ps_endpointer_process(ps->ep, (int16 *)data); + if (speech != NULL) { + if (!prev_in_speech) + ps_start_utt(ps->ps); + ps_process_raw(ps->ps, + speech, ps->frame_size / 2, + FALSE, FALSE); + if (!ps_endpointer_in_speech(ps->ep)) { + gst_pocketsphinx_finalize_utt(ps); + } else if (ps->last_result_time == 0 + /* Get a partial result every now and then, see if it is different. */ + /* Check every 100 milliseconds. */ + || (GST_BUFFER_TIMESTAMP(buffer) - ps->last_result_time) > 100*10*1000) { + int32 score; + char const *hyp; + + hyp = ps_get_hyp(ps->ps, &score); + ps->last_result_time = GST_BUFFER_TIMESTAMP(buffer); + if (hyp && strlen(hyp) > 0) { + if (ps->last_result == NULL || 0 != strcmp(ps->last_result, hyp)) { + g_free(ps->last_result); + ps->last_result = g_strdup(hyp); + gst_pocketsphinx_post_message(ps, FALSE, ps->last_result_time, + ps_get_prob(ps->ps), hyp); + } + } } } - } - - gst_buffer_unref(buffer); + gst_adapter_unmap(ps->adapter); + gst_adapter_flush(ps->adapter, ps->frame_size); + } return GST_FLOW_OK; } @@ -727,11 +730,8 @@ gst_pocketsphinx_finalize_utt(GstPocketSphinx *ps) int32 score; hyp = NULL; - if (!ps->listening_started) - return; ps_end_utt(ps->ps); - ps->listening_started = FALSE; hyp = ps_get_hyp(ps->ps, &score); if (hyp) { diff --git a/gst/gstpocketsphinx.h b/gst/gstpocketsphinx.h index 224cfda1b..cb2a664ae 100644 --- a/gst/gstpocketsphinx.h +++ b/gst/gstpocketsphinx.h @@ -40,6 +40,7 @@ #define __GST_POCKETSPHINX_H__ #include +#include #include G_BEGIN_DECLS @@ -61,16 +62,16 @@ typedef struct _GstPocketSphinxClass GstPocketSphinxClass; struct _GstPocketSphinx { GstElement element; - + GstAdapter *adapter; GstPad *sinkpad, *srcpad; ps_decoder_t *ps; + ps_endpointer_t *ep; ps_config_t *config; - gchar *latdir; /**< Output directory for word lattices. */ + size_t frame_size; - gboolean speech_started; - gboolean listening_started; + gchar *latdir; /**< Output directory for word lattices. */ gint uttno; GstClockTime last_result_time; /**< Timestamp of last partial result. */