语音识别

基于kaldi的iOS语音识别(本地)+04+自定义解码器插件

2019-01-27  本文已影响0人  长风浮云

iOS在线识别:https://www.jianshu.com/u/3c2a0bd52ebc

因为GStreamer的操作几乎都是在一个“黑盒”里面进行操作,所以它的这一套操作也有它自己的一个标准,就是一个个插件,我们要用到GStreamer,所以我们需要把解码器那部分也做成一个GStreamer插件,加入到管道(pipeline)中,这样就让GStreamer自己去处理传输和识别解码。

接下来我们就说说该项目最难的一部分了。

因为有些内容涉及到GStreamer自己的东西,比如制作模板要求和规范,我这里就不展开阐述了,也不是本内容的重点,这里只会讲解解码器在插件中的使用。

插件模板

插件名称kaldidecoder

初始化

/* the capabilities of the inputs and outputs.
 *
 */
static GstStaticPadTemplate sink_template =
GST_STATIC_PAD_TEMPLATE("sink",
    GST_PAD_SINK,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS(
        "audio/x-raw, "
        "format = (string) S16LE, "
        "channels = (int) 1, "
        "rate = (int) [ 1, MAX ]"));

static GstStaticPadTemplate src_template =
GST_STATIC_PAD_TEMPLATE("src",
    GST_PAD_SRC,
    GST_PAD_ALWAYS,
    GST_STATIC_CAPS("text/x-raw, format= { utf8 }"));

static guint gst_ kaldidecoder_signals[LAST_SIGNAL];

#define gst_ kaldidecoder_parent_class parent_class
G_DEFINE_TYPE(Gst kaldidecoder, gst_ kaldidecoder,
              GST_TYPE_ELEMENT);

static void gst_ kaldidecoder_load_phone_syms(Gst kaldidecoder * filter,
                                                        const GValue * value);

static void gst_kaldidecoder_load_word_syms(Gst kaldidecoder * filter,
                                                       const GValue * value);

static void gst_ kaldidecoder_load_model(Gst kaldidecoder * filter,
                                                   const GValue * value);

static void gst_ kaldidecoder_load_fst(Gst kaldidecoder * filter,
                                                 const GValue * value);

static void gst_ kaldidecoder_load_lm_fst(Gst kaldidecoder * filter,
                                                    const GValue * value);

static void gst_ kaldidecoder_load_big_lm(Gst kaldidecoder * filter,
                                                    const GValue * value);

static void gst_ kaldidecoder_load_word_boundary_info(Gst kaldidecoder * filter,
                                                                const GValue * value);


static void gst_ kaldidecoder_set_property(GObject * object,
                                                     guint prop_id,
                                                     const GValue * value,
                                                     GParamSpec * pspec);

static void gst_ kaldidecoder_get_property(GObject * object,
                                                     guint prop_id,
                                                     GValue * value,
                                                     GParamSpec * pspec);

static gboolean gst_ kaldidecoder_sink_event(GstPad * pad,
                                                       GstObject * parent,
                                                       GstEvent * event);

static GstFlowReturn gst_ kaldidecoder_chain(GstPad * pad,
                                                       GstObject * parent,
                                                       GstBuffer * buf);

static GstStateChangeReturn gst_ kaldidecoder_change_state(
    GstElement *element, GstStateChange transition);

static gboolean gst_ kaldidecoder_query(GstPad *pad, GstObject * parent, GstQuery * query);

static void gst_ kaldidecoder_finalize(GObject * object);

这些pad模板都需要通过gst_element_class_add_pad_template ()_class_init方法里面注册。
_class_init:

/* GObject vmethod implementations */

/* initialize the kaldidecoder's class */
static void gst_kaldidecoder_class_init(
    GstkaldidecoderClass * klass) {
  GObjectClass *gobject_class;
  GstElementClass *gstelement_class;

  gobject_class = (GObjectClass *) klass;
  gstelement_class = (GstElementClass *) klass;

  gobject_class->set_property = gst_kaldidecoder_set_property;
  gobject_class->get_property = gst_kaldidecoder_get_property;
  gobject_class->finalize = gst_kaldidecoder_finalize;

  gstelement_class->change_state = gst_kaldidecoder_change_state;

  g_object_class_install_property(
      ...
  );
  ...

  gst_kaldidecoder_signals[PARTIAL_RESULT_SIGNAL] = g_signal_new(
      "partial-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST,
      G_STRUCT_OFFSET(GstkaldidecoderClass, partial_result),
      NULL,
      NULL, kaldi_marshal_VOID__STRING, G_TYPE_NONE, 1,
      G_TYPE_STRING);

  gst_kaldidecoder_signals[FINAL_RESULT_SIGNAL] = g_signal_new(
      "final-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST,
      G_STRUCT_OFFSET(GstkaldidecoderClass, final_result),
      NULL,
      NULL, kaldi_marshal_VOID__STRING, G_TYPE_NONE, 1,
      G_TYPE_STRING);

  gst_kaldidecoder_signals[FULL_FINAL_RESULT_SIGNAL] = g_signal_new(
      "full-final-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST,
      G_STRUCT_OFFSET(GstkaldidecoderClass, full_final_result),
      NULL,
      NULL, kaldi_marshal_VOID__STRING, G_TYPE_NONE, 1,
      G_TYPE_STRING);

  gst_element_class_set_details_simple(
      gstelement_class, "KaldiDecoder", "Speech/Audio",
      "Convert speech to text", "changfengfuyun");

  gst_element_class_add_pad_template(gstelement_class,
                                     gst_static_pad_template_get(&src_template));

  gst_element_class_add_pad_template(
      gstelement_class, gst_static_pad_template_get(&sink_template));
}

我们定义插件所有部分的代码,我们需要有_init()方法。

/* entry point to initialize the plug-in
 * initialize the plug-in itself
 * register the element factories and other features
 */
static gboolean kaldidecoder_init(
    GstPlugin * kaldidecoder) {
  /* debug category for fltering log messages
   *
   * exchange the string 'Template kaldidecoder' with your description
   */
  GST_DEBUG_CATEGORY_INIT(gst_kaldidecoder_debug,
                          "kaldidecoder", 0,
                          "Template kaldidecoder");

  return gst_element_register(kaldidecoder
                              "kaldidecoder", GST_RANK_NONE,
                              GST_TYPE_KALDIDECODER);
}

至此,就完成了解码器模板的初始化

指定pads

pads是数据进出元素的端口,这使得它们在元素创建过程中成为非常重要的项。在模板代码中,我们已经看到了静态pad模板如何负责将pad模板注册到元素类中。在这里,我们将看到如何创建实际的元素,如何使用_event()-函数来配置特定的格式,以及如何注册函数来让数据流经元素。

创建pad:

/* initialize the new element
 * instantiate pads and add them to element
 * set pad calback functions
 * initialize instance structure
 */
static void gst_kaldidecoder_init(
    Gstkaldidecoder * filter) {
  ...
  filter->sinkpad = NULL;

  filter->sinkpad = gst_pad_new_from_static_template(&sink_template, "sink");
  gst_pad_set_event_function(
      filter->sinkpad,
      GST_DEBUG_FUNCPTR(gst_kaldidecoder_sink_event));
  gst_pad_set_chain_function(
      filter->sinkpad, GST_DEBUG_FUNCPTR(gst_kaldidecoder_chain));
  gst_pad_set_query_function(
      filter->sinkpad, GST_DEBUG_FUNCPTR(gst_kaldidecoder_query));
  gst_pad_use_fixed_caps(filter->sinkpad);
  gst_element_add_pad(GST_ELEMENT(filter), filter->sinkpad);

  filter->srcpad = gst_pad_new_from_static_template(&src_template, "src");
  gst_pad_use_fixed_caps(filter->srcpad);
  gst_element_add_pad(GST_ELEMENT(filter), filter->srcpad);

  // 解码相关的初始化
  ...
}

这里也是每次生成元件的时候都会调用的方法

_chain方法:

/* chain function
 * this function does the actual processing
 */
static GstFlowReturn gst_kaldidecoder_chain(GstPad * pad,
                                                       GstObject * parent,
                                                       GstBuffer * buf) {
  Gstkaldidecoder *filter = GST_KALDIDECODER(parent);

  if (G_UNLIKELY(!filter->audio_source))
    goto not_negotiated;
  if (!filter->silent) {
    filter->audio_source->PushBuffer(buf);
  }
  gst_buffer_unref(buf);
  return GST_FLOW_OK;

  /* special cases */
  not_negotiated: {
    GST_ELEMENT_ERROR(filter, CORE, NEGOTIATION, (NULL),
                      ("decoder wasn't allocated before chain function"));

    gst_buffer_unref(buf);
    return GST_FLOW_NOT_NEGOTIATED;
  }
}

_event方法:

/* this function handles sink events */
static gboolean gst_kaldidecoder_sink_event(GstPad * pad,
                                                       GstObject * parent,
                                                       GstEvent * event) {
  gboolean ret;
  Gstkaldidecoder *filter;

  filter = GST_KALDIDECODER(parent);

  GST_DEBUG_OBJECT(filter, "Handling %s event", GST_EVENT_TYPE_NAME(event));

  switch (GST_EVENT_TYPE(event)) {
    case GST_EVENT_SEGMENT: {
      GST_DEBUG_OBJECT(filter, "Starting decoding task");
      filter->decoding = true;
      gst_pad_start_task(filter->srcpad,
                         (GstTaskFunction) gst_ kaldidecoder_loop,
                         filter, NULL);

      GST_DEBUG_OBJECT(filter, "Started decoding task");
      ret = TRUE;
      break;
    }
    case GST_EVENT_CAPS: {
      ret = TRUE;
      break;
    }
    case GST_EVENT_EOS: {
      /* end-of-stream, we should close down all stream leftovers here */
      GST_DEBUG_OBJECT(filter, "EOS received");
      if (filter->decoding) {
        filter->audio_source->SetEnded(true);
      } else {
        GST_DEBUG_OBJECT(filter, "EOS received while not decoding, pushing EOS out");
        gst_pad_push_event(filter->srcpad, gst_event_new_eos());
      }
      ret = TRUE;
      break;
    }
    default:
      ret = gst_pad_event_default(pad, parent, event);
      break;
  }
  return ret;
}

_query方法:

/* GstElement vmethod implementations */

static gboolean
gst_kaldidecoder_query (GstPad *pad, GstObject * parent, GstQuery * query) {
  gboolean ret;
  Gstkaldidecoder *filter;

  filter = GST_KALDIDECODER(parent);

  switch (GST_QUERY_TYPE (query)) {
    case GST_QUERY_CAPS: {
      if (filter->feature_info == NULL) {
        filter->feature_info = new OnlineNnet2FeaturePipelineInfo(*(filter->feature_config));
    if (strcmp((filter->feature_config->feature_type).c_str(), "plp") == 0)
      filter->sample_rate = (int) filter->feature_info->plp_opts.frame_opts.samp_freq;
    else
      filter->sample_rate = (int) filter->feature_info->mfcc_opts.frame_opts.samp_freq;
      }
      GstCaps *new_caps = gst_caps_new_simple ("audio/x-raw",
            "format", G_TYPE_STRING, "S16LE",
            "rate", G_TYPE_INT, filter->sample_rate,
            "channels", G_TYPE_INT, 1, NULL);
      GST_DEBUG_OBJECT (filter, "Setting caps query result: %" GST_PTR_FORMAT, new_caps);
      gst_query_set_caps_result (query, new_caps);
      gst_caps_unref (new_caps);
      ret = TRUE;
      break;
    }
    default:
      ret = gst_pad_query_default (pad, parent, query);
      break;
  }
  return ret;
}

接下来讲解kaldi的解码。

上一篇下一篇

猜你喜欢

热点阅读