基于kaldi的iOS语音识别(本地)+04+自定义解码器插件
2019-01-27 本文已影响0人
长风浮云
iOS在线识别:https://www.jianshu.com/u/3c2a0bd52ebc
因为GStreamer的操作几乎都是在一个“黑盒”里面进行操作,所以它的这一套操作也有它自己的一个标准,就是一个个插件,我们要用到GStreamer,所以我们需要把解码器那部分也做成一个GStreamer插件,加入到管道(pipeline)中,这样就让GStreamer自己去处理传输和识别解码。
接下来我们就说说该项目最难的一部分了。
因为有些内容涉及到GStreamer自己的东西,比如制作模板要求和规范,我这里就不展开阐述了,也不是本内容的重点,这里只会讲解解码器在插件中的使用。
插件模板
插件名称kaldidecoder
初始化
/* the capabilities of the inputs and outputs.
*
*/
static GstStaticPadTemplate sink_template =
GST_STATIC_PAD_TEMPLATE("sink",
GST_PAD_SINK,
GST_PAD_ALWAYS,
GST_STATIC_CAPS(
"audio/x-raw, "
"format = (string) S16LE, "
"channels = (int) 1, "
"rate = (int) [ 1, MAX ]"));
static GstStaticPadTemplate src_template =
GST_STATIC_PAD_TEMPLATE("src",
GST_PAD_SRC,
GST_PAD_ALWAYS,
GST_STATIC_CAPS("text/x-raw, format= { utf8 }"));
static guint gst_ kaldidecoder_signals[LAST_SIGNAL];
#define gst_ kaldidecoder_parent_class parent_class
G_DEFINE_TYPE(Gst kaldidecoder, gst_ kaldidecoder,
GST_TYPE_ELEMENT);
static void gst_ kaldidecoder_load_phone_syms(Gst kaldidecoder * filter,
const GValue * value);
static void gst_kaldidecoder_load_word_syms(Gst kaldidecoder * filter,
const GValue * value);
static void gst_ kaldidecoder_load_model(Gst kaldidecoder * filter,
const GValue * value);
static void gst_ kaldidecoder_load_fst(Gst kaldidecoder * filter,
const GValue * value);
static void gst_ kaldidecoder_load_lm_fst(Gst kaldidecoder * filter,
const GValue * value);
static void gst_ kaldidecoder_load_big_lm(Gst kaldidecoder * filter,
const GValue * value);
static void gst_ kaldidecoder_load_word_boundary_info(Gst kaldidecoder * filter,
const GValue * value);
static void gst_ kaldidecoder_set_property(GObject * object,
guint prop_id,
const GValue * value,
GParamSpec * pspec);
static void gst_ kaldidecoder_get_property(GObject * object,
guint prop_id,
GValue * value,
GParamSpec * pspec);
static gboolean gst_ kaldidecoder_sink_event(GstPad * pad,
GstObject * parent,
GstEvent * event);
static GstFlowReturn gst_ kaldidecoder_chain(GstPad * pad,
GstObject * parent,
GstBuffer * buf);
static GstStateChangeReturn gst_ kaldidecoder_change_state(
GstElement *element, GstStateChange transition);
static gboolean gst_ kaldidecoder_query(GstPad *pad, GstObject * parent, GstQuery * query);
static void gst_ kaldidecoder_finalize(GObject * object);
这些pad模板都需要通过gst_element_class_add_pad_template ()
在_class_init
方法里面注册。
_class_init:
/* GObject vmethod implementations */
/* initialize the kaldidecoder's class */
static void gst_kaldidecoder_class_init(
GstkaldidecoderClass * klass) {
GObjectClass *gobject_class;
GstElementClass *gstelement_class;
gobject_class = (GObjectClass *) klass;
gstelement_class = (GstElementClass *) klass;
gobject_class->set_property = gst_kaldidecoder_set_property;
gobject_class->get_property = gst_kaldidecoder_get_property;
gobject_class->finalize = gst_kaldidecoder_finalize;
gstelement_class->change_state = gst_kaldidecoder_change_state;
g_object_class_install_property(
...
);
...
gst_kaldidecoder_signals[PARTIAL_RESULT_SIGNAL] = g_signal_new(
"partial-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST,
G_STRUCT_OFFSET(GstkaldidecoderClass, partial_result),
NULL,
NULL, kaldi_marshal_VOID__STRING, G_TYPE_NONE, 1,
G_TYPE_STRING);
gst_kaldidecoder_signals[FINAL_RESULT_SIGNAL] = g_signal_new(
"final-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST,
G_STRUCT_OFFSET(GstkaldidecoderClass, final_result),
NULL,
NULL, kaldi_marshal_VOID__STRING, G_TYPE_NONE, 1,
G_TYPE_STRING);
gst_kaldidecoder_signals[FULL_FINAL_RESULT_SIGNAL] = g_signal_new(
"full-final-result", G_TYPE_FROM_CLASS(klass), G_SIGNAL_RUN_LAST,
G_STRUCT_OFFSET(GstkaldidecoderClass, full_final_result),
NULL,
NULL, kaldi_marshal_VOID__STRING, G_TYPE_NONE, 1,
G_TYPE_STRING);
gst_element_class_set_details_simple(
gstelement_class, "KaldiDecoder", "Speech/Audio",
"Convert speech to text", "changfengfuyun");
gst_element_class_add_pad_template(gstelement_class,
gst_static_pad_template_get(&src_template));
gst_element_class_add_pad_template(
gstelement_class, gst_static_pad_template_get(&sink_template));
}
我们定义插件所有部分的代码,我们需要有_init()
方法。
/* entry point to initialize the plug-in
* initialize the plug-in itself
* register the element factories and other features
*/
static gboolean kaldidecoder_init(
GstPlugin * kaldidecoder) {
/* debug category for fltering log messages
*
* exchange the string 'Template kaldidecoder' with your description
*/
GST_DEBUG_CATEGORY_INIT(gst_kaldidecoder_debug,
"kaldidecoder", 0,
"Template kaldidecoder");
return gst_element_register(kaldidecoder
"kaldidecoder", GST_RANK_NONE,
GST_TYPE_KALDIDECODER);
}
至此,就完成了解码器模板的初始化
指定pads
pads是数据进出元素的端口,这使得它们在元素创建过程中成为非常重要的项。在模板代码中,我们已经看到了静态pad模板如何负责将pad模板注册到元素类中。在这里,我们将看到如何创建实际的元素,如何使用_event()-函数来配置特定的格式,以及如何注册函数来让数据流经元素。
创建pad:
/* initialize the new element
* instantiate pads and add them to element
* set pad calback functions
* initialize instance structure
*/
static void gst_kaldidecoder_init(
Gstkaldidecoder * filter) {
...
filter->sinkpad = NULL;
filter->sinkpad = gst_pad_new_from_static_template(&sink_template, "sink");
gst_pad_set_event_function(
filter->sinkpad,
GST_DEBUG_FUNCPTR(gst_kaldidecoder_sink_event));
gst_pad_set_chain_function(
filter->sinkpad, GST_DEBUG_FUNCPTR(gst_kaldidecoder_chain));
gst_pad_set_query_function(
filter->sinkpad, GST_DEBUG_FUNCPTR(gst_kaldidecoder_query));
gst_pad_use_fixed_caps(filter->sinkpad);
gst_element_add_pad(GST_ELEMENT(filter), filter->sinkpad);
filter->srcpad = gst_pad_new_from_static_template(&src_template, "src");
gst_pad_use_fixed_caps(filter->srcpad);
gst_element_add_pad(GST_ELEMENT(filter), filter->srcpad);
// 解码相关的初始化
...
}
这里也是每次生成元件的时候都会调用的方法
_chain方法:
- 用于接收和处理sinkpad上的输入数据。
/* chain function
* this function does the actual processing
*/
static GstFlowReturn gst_kaldidecoder_chain(GstPad * pad,
GstObject * parent,
GstBuffer * buf) {
Gstkaldidecoder *filter = GST_KALDIDECODER(parent);
if (G_UNLIKELY(!filter->audio_source))
goto not_negotiated;
if (!filter->silent) {
filter->audio_source->PushBuffer(buf);
}
gst_buffer_unref(buf);
return GST_FLOW_OK;
/* special cases */
not_negotiated: {
GST_ELEMENT_ERROR(filter, CORE, NEGOTIATION, (NULL),
("decoder wasn't allocated before chain function"));
gst_buffer_unref(buf);
return GST_FLOW_NOT_NEGOTIATED;
}
}
_event方法:
- 该方法通知你在传输数据流中发生的特殊事件(如caps, end-of-stream, newsegment, tags等)。
/* this function handles sink events */
static gboolean gst_kaldidecoder_sink_event(GstPad * pad,
GstObject * parent,
GstEvent * event) {
gboolean ret;
Gstkaldidecoder *filter;
filter = GST_KALDIDECODER(parent);
GST_DEBUG_OBJECT(filter, "Handling %s event", GST_EVENT_TYPE_NAME(event));
switch (GST_EVENT_TYPE(event)) {
case GST_EVENT_SEGMENT: {
GST_DEBUG_OBJECT(filter, "Starting decoding task");
filter->decoding = true;
gst_pad_start_task(filter->srcpad,
(GstTaskFunction) gst_ kaldidecoder_loop,
filter, NULL);
GST_DEBUG_OBJECT(filter, "Started decoding task");
ret = TRUE;
break;
}
case GST_EVENT_CAPS: {
ret = TRUE;
break;
}
case GST_EVENT_EOS: {
/* end-of-stream, we should close down all stream leftovers here */
GST_DEBUG_OBJECT(filter, "EOS received");
if (filter->decoding) {
filter->audio_source->SetEnded(true);
} else {
GST_DEBUG_OBJECT(filter, "EOS received while not decoding, pushing EOS out");
gst_pad_push_event(filter->srcpad, gst_event_new_eos());
}
ret = TRUE;
break;
}
default:
ret = gst_pad_event_default(pad, parent, event);
break;
}
return ret;
}
_query方法:
- 元件接收queries必须回复的内容
/* GstElement vmethod implementations */
static gboolean
gst_kaldidecoder_query (GstPad *pad, GstObject * parent, GstQuery * query) {
gboolean ret;
Gstkaldidecoder *filter;
filter = GST_KALDIDECODER(parent);
switch (GST_QUERY_TYPE (query)) {
case GST_QUERY_CAPS: {
if (filter->feature_info == NULL) {
filter->feature_info = new OnlineNnet2FeaturePipelineInfo(*(filter->feature_config));
if (strcmp((filter->feature_config->feature_type).c_str(), "plp") == 0)
filter->sample_rate = (int) filter->feature_info->plp_opts.frame_opts.samp_freq;
else
filter->sample_rate = (int) filter->feature_info->mfcc_opts.frame_opts.samp_freq;
}
GstCaps *new_caps = gst_caps_new_simple ("audio/x-raw",
"format", G_TYPE_STRING, "S16LE",
"rate", G_TYPE_INT, filter->sample_rate,
"channels", G_TYPE_INT, 1, NULL);
GST_DEBUG_OBJECT (filter, "Setting caps query result: %" GST_PTR_FORMAT, new_caps);
gst_query_set_caps_result (query, new_caps);
gst_caps_unref (new_caps);
ret = TRUE;
break;
}
default:
ret = gst_pad_query_default (pad, parent, query);
break;
}
return ret;
}
接下来讲解kaldi的解码。