freeswitch-asr处理流程

freeswitch-asr处理流程

我们在完全切入mrcp和freeswitch之间之前,我们先了解freeswitch-asr是如何处理的。

asr处理核心

freeswitch 源码现在已经将asr处理流程进行隔离。

freeswitch-设定了基于asr的 interface

我们先定位到freeswitch-源码: switch_core_asr.c


SWITCH_DECLARE(switch_status_t) switch_core_asr_open(switch_asr_handle_t *ah,
													 const char *module_name,
													 const char *codec, int rate, const char *dest, switch_asr_flag_t *flags, switch_memory_pool_t *pool)
{
	switch_status_t status;
	char buf[256] = "";
	char *param = NULL;

	if (strchr(module_name, ':')) {
		switch_set_string(buf, module_name);
		if ((param = strchr(buf, ':'))) {
			*param++ = '\0';
			module_name = buf;
		}
	}

	switch_assert(ah != NULL);

	if ((ah->asr_interface = switch_loadable_module_get_asr_interface(module_name)) == 0) {
		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Invalid ASR module [%s]!\n", module_name);
		return SWITCH_STATUS_GENERR;
	}

	ah->flags = *flags;

	if (pool) {
		ah->memory_pool = pool;
	} else {
		if ((status = switch_core_new_memory_pool(&ah->memory_pool)) != SWITCH_STATUS_SUCCESS) {
			UNPROTECT_INTERFACE(ah->asr_interface);
			return status;
		}
		switch_set_flag(ah, SWITCH_ASR_FLAG_FREE_POOL);
	}

	if (param) {
		ah->param = switch_core_strdup(ah->memory_pool, param);
	}
	ah->rate = rate;
	ah->name = switch_core_strdup(ah->memory_pool, module_name);
	ah->samplerate = rate;
	ah->native_rate = rate;

	status = ah->asr_interface->asr_open(ah, codec, rate, dest, flags);

	if (status != SWITCH_STATUS_SUCCESS) {
		UNPROTECT_INTERFACE(ah->asr_interface);
	}

	return status;

}

我们从 switch_core_asr_open 能分析出freeswitch是如何设计的asr流程的。

如何支持各类asr处理,用户自定义处理,那么肯定需要类似interface概念来动态区分。

果然函数switch_core_asr_open 第一步就是分析module-name, 根据module那么,获取用户此次使用asr interface:  

ah->asr_interface = switch_loadable_module_get_asr_interface(module_name))

设置对应参数:

然后就是调用对应model的asr_open

这就是freeswitch 源码中一贯使用的方式,使用指针来实现类似面向对象的接口,从而来实现多态,从而抽象代码,动态根据传入的参数执行代码。

所有实现asr_interface 的对象(结构体),一定是实现了对应的方法,如此处的: asr_interface->asr_open

以及:对应的



那么 我们可以猜测,unimrcp模块,实现对应的mrcp协议,那么一定还需要实现asr_interface 接口对应的函数

asr_open

asr_load_grammar

asr_unload_grammar

asr_close

等等。

查找调用

找到asr核心处理流程,那么我们反向查找何处调用了core_asr 模块的方法。

switch_ivr_detect_speech_init


处理完asr_open, 紧接着就是 speech_callback


media bug回调 刚好继续调用 core_asr 模块的 asr_feed.

那么我们就清楚feed就是实时将语音音频填充给asr处理模型需要执行的代码。

然后执行: switch_core_asr_check_results 查看 asr结果。


static void *SWITCH_THREAD_FUNC speech_thread(switch_thread_t *thread, void *obj)
{
	struct speech_thread_handle *sth = (struct speech_thread_handle *) obj;
	switch_channel_t *channel = switch_core_session_get_channel(sth->session);
	switch_asr_flag_t flags = SWITCH_ASR_FLAG_NONE;
	switch_status_t status;
	switch_event_t *event;

	switch_thread_cond_create(&sth->cond, sth->pool);
	switch_mutex_init(&sth->mutex, SWITCH_MUTEX_NESTED, sth->pool);

	if (switch_core_session_read_lock(sth->session) != SWITCH_STATUS_SUCCESS) {
		sth->ready = 0;
		return NULL;
	}

	switch_mutex_lock(sth->mutex);

	sth->ready = 1;

	while (switch_channel_up_nosig(channel) && !switch_test_flag(sth->ah, SWITCH_ASR_FLAG_CLOSED)) {
		char *xmlstr = NULL;
		switch_event_t *headers = NULL;

		switch_thread_cond_wait(sth->cond, sth->mutex);

		if (switch_channel_down_nosig(channel) || switch_test_flag(sth->ah, SWITCH_ASR_FLAG_CLOSED)) {
			break;
		}

		if (switch_core_asr_check_results(sth->ah, &flags) == SWITCH_STATUS_SUCCESS) {

			status = switch_core_asr_get_results(sth->ah, &xmlstr, &flags);

			if (status != SWITCH_STATUS_SUCCESS && status != SWITCH_STATUS_BREAK && status != SWITCH_STATUS_MORE_DATA) {
				goto done;
			} else {
				/* Try to fetch extra information for this result, the return value doesn't really matter here - it's just optional data. */
				switch_core_asr_get_result_headers(sth->ah, &headers, &flags);
			}

			if (status == SWITCH_STATUS_SUCCESS && switch_true(switch_channel_get_variable(channel, "asr_intercept_dtmf"))) {
				const char *p;

				if ((p = switch_stristr("<input>", xmlstr))) {
					p += 7;
				}

				while (p && *p) {
					char c;

					if (*p == '<') {
						break;
					}

					if (!strncasecmp(p, "pound", 5)) {
						c = '#';
						p += 5;
					} else if (!strncasecmp(p, "hash", 4)) {
						c = '#';
						p += 4;
					} else if (!strncasecmp(p, "star", 4)) {
						c = '*';
						p += 4;
					} else if (!strncasecmp(p, "asterisk", 8)) {
						c = '*';
						p += 8;
					} else {
						c = *p;
						p++;
					}

					if (is_dtmf(c)) {
						switch_dtmf_t dtmf = {0};
						dtmf.digit = c;
						dtmf.duration = switch_core_default_dtmf_duration(0);
						dtmf.source = SWITCH_DTMF_INBAND_AUDIO;
						switch_log_printf(SWITCH_CHANNEL_CHANNEL_LOG(channel), SWITCH_LOG_DEBUG, "Queue speech detected dtmf %c\n", c);
						switch_channel_queue_dtmf(channel, &dtmf);
					}

				}
				switch_ivr_resume_detect_speech(sth->session);
			}

			if (switch_event_create(&event, SWITCH_EVENT_DETECTED_SPEECH) == SWITCH_STATUS_SUCCESS) {
				if (status == SWITCH_STATUS_SUCCESS) {
					switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Speech-Type", "detected-speech");

					if (headers) {
						switch_event_merge(event, headers);
					}

					switch_event_add_body(event, "%s", xmlstr);
				} else if (status == SWITCH_STATUS_MORE_DATA) {
					switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Speech-Type", "detected-partial-speech");

					if (headers) {
						switch_event_merge(event, headers);
					}

					switch_event_add_body(event, "%s", xmlstr);
				} else {
					switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Speech-Type", "begin-speaking");
				}

				if (switch_test_flag(sth->ah, SWITCH_ASR_FLAG_FIRE_EVENTS)) {
					switch_event_t *dup;

					if (switch_event_dup(&dup, event) == SWITCH_STATUS_SUCCESS) {
						switch_channel_event_set_data(channel, dup);
						switch_event_fire(&dup);
					}

				}

				if (switch_core_session_queue_event(sth->session, &event) != SWITCH_STATUS_SUCCESS) {
					switch_log_printf(SWITCH_CHANNEL_CHANNEL_LOG(channel), SWITCH_LOG_ERROR, "Event queue failed!\n");
					switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "delivery-failure", "true");
					switch_event_fire(&event);
				}
			}

			switch_safe_free(xmlstr);

			if (headers) {
				switch_event_destroy(&headers);
			}
		}
	}
  done:

	if (switch_event_create(&event, SWITCH_EVENT_DETECTED_SPEECH) == SWITCH_STATUS_SUCCESS) {
		switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Speech-Type", "closed");
		if (switch_test_flag(sth->ah, SWITCH_ASR_FLAG_FIRE_EVENTS)) {
			switch_event_t *dup;

			if (switch_event_dup(&dup, event) == SWITCH_STATUS_SUCCESS) {
				switch_channel_event_set_data(channel, dup);
				switch_event_fire(&dup);
			}

		}

		if (switch_core_session_queue_event(sth->session, &event) != SWITCH_STATUS_SUCCESS) {
			switch_log_printf(SWITCH_CHANNEL_CHANNEL_LOG(channel), SWITCH_LOG_ERROR, "Event queue failed!\n");
			switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "delivery-failure", "true");
			switch_event_fire(&event);
		}
	}

	switch_mutex_unlock(sth->mutex);
	switch_core_session_rwunlock(sth->session);

	return NULL;
}


switch_ivr_play_and_detect_speech


	/* wait for result if not done */
	if (!state.done) {
		switch_ivr_detect_speech_start_input_timers(session);
		switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_INFO, "(%s) WAITING FOR RESULT\n", switch_channel_get_name(channel));
		while (!state.done && switch_channel_ready(channel)) {
			status = switch_ivr_sleep(session, input_timeout, SWITCH_FALSE, args);

			if (args->dmachine && switch_ivr_dmachine_last_ping(args->dmachine) != SWITCH_STATUS_SUCCESS) {
				state.done |= PLAY_AND_DETECT_DONE;
				goto done;
			}

			if (status != SWITCH_STATUS_BREAK && status != SWITCH_STATUS_SUCCESS) {
				status = SWITCH_STATUS_FALSE;
				goto done;
			}
		}
	}