您的位置:首页 > 其它

Speex 之回声消除

2013-01-05 17:15 363 查看
为什么需要声学回声消除呢?在一般的VOIP软件或视频会议系统中,假设我们只有A和B两个人在通话,首先,A的声音传给B,B然后用喇叭放出来,而这时B的MIC呢则会采集到喇叭放出来的声音,然后传回给A,如果这个传输的过程中时延足够大,A就会听到一个和自己刚才说过的话一样的声音,这就是回声,声学回声消除器的作用就是在B端对B采集到的声音进行处理,把采集到声音包含的A的声音去掉再传给A,这样,A就不会听到自己说过的话了。

声学回声消除的原理我就不说了,这在网上有很多文档,网上缺少的是实现,所以,我在这把一个开源的声学回声消除器介绍一下,希望对有些有人用,如果有人知道怎么把这消除器用的基于实时流的VOIP软件中,希望能一起分享一下。

这个声学回声消除器是一个著名的音频编解码器speex中的一部分,1.1.9版本后的回声消除器才起作用,以前版本的都不行,我用的也是这个版本,测试表明,用同一个模拟文件,它有效果比INTEL IPP库4.1版中的声学回声消除器的还要好。

先说编译。首先,从www.speex.org上下载speex1.1.9的源代码,解压,打开speex\win32\libspeex中的libspeex.dsw,这个工作区里有两个工程,一个是 libspeex,另一个是libspeex_dynamic。然后,将libspeex中的mdf.c文件添加到工程libspeex中,编译即可。

以下是我根据文档封装的一个类,里面有一个测试程序: //file name: speexEC.h

#ifndef SPEEX_EC_H

#define SPEEX_EC_H

#include <stdio.h>

#include <stdlib.h>

#include "speex/speex_echo.h"

#include "speex/speex_preprocess.h"

class CSpeexEC

{

public:

CSpeexEC();

~CSpeexEC();

void Init(int frame_size=160, int filter_length=1280, int sampling_rate=8000);

void DoAEC(short *mic, short *ref, short *out);

protected:

void Reset();

private:

bool 
     m_bHasInit;

SpeexEchoState*   m_pState;

    SpeexPreprocessState* m_pPreprocessorState;

int      m_nFrameSize;

int      m_nFilterLen;

int      m_nSampleRate;

float*      m_pfNoise;

};

#endif

//fine name:speexEC.cpp

#include "SpeexEC.h"

CSpeexEC::CSpeexEC()

{

m_bHasInit   = false;

m_pState   = NULL;

m_pPreprocessorState  = NULL;

m_nFrameSize   = 160;

m_nFilterLen   = 160*8;

m_nSampleRate   = 8000;

m_pfNoise   = NULL;

}

CSpeexEC::~CSpeexEC()

{

Reset();

}

void CSpeexEC::Init(int frame_size, int filter_length, int sampling_rate)

{

Reset();

if (frame_size<=0 || filter_length<=0 || sampling_rate<=0)

{

  m_nFrameSize  =160;

  m_nFilterLen  = 160*8;

  m_nSampleRate = 8000;

}

else

{

  m_nFrameSize  =frame_size;

  m_nFilterLen  = filter_length;

  m_nSampleRate = sampling_rate;

}

m_pState = speex_echo_state_init(m_nFrameSize, m_nFilterLen);

m_pPreprocessorState = speex_preprocess_state_init(m_nFrameSize, m_nSampleRate);

m_pfNoise = new float[m_nFrameSize+1];

m_bHasInit = true;

}

void CSpeexEC::Reset()

{

if (m_pState != NULL)

{

  speex_echo_state_destroy(m_pState);

  m_pState = NULL;

}

if (m_pPreprocessorState != NULL)

{

  speex_preprocess_state_destroy(m_pPreprocessorState);

  m_pPreprocessorState = NULL;

}

if (m_pfNoise != NULL)

{

  delete []m_pfNoise;

  m_pfNoise = NULL;

}

m_bHasInit = false;

}

void CSpeexEC:DoAEC(short* mic, short* ref, short* out)

{

if (!m_bHasInit)

  return;

speex_echo_cancel(m_pState, mic, ref, out, m_pfNoise);

    speex_preprocess(m_pPreprocessorState, (__int16 *)out, m_pfNoise);

   

}

可以看出,这个回声消除器类很简单,只要初始化一下就可以调用了。但是,要注意的是,传给回声消除器的两个声音信号,必须同步得非常的好,就是说,在B端,接收到A说的话以后,要把这些话音数据传给回声消除器做参考,然后再传给声卡,声卡再放出来,这有一段延时,这时,B再采集,然后传给回声消除器,与那个参考数据比较,从采集到的数据中把频域和参考数据相同的部分消除掉。如果传给消除器的两个信号同步得不好,即两个信号找不到频域相同的部分,就没有办法进行消除了。

测试程序:

#define NN 160

void main()

{

FILE* ref_fd, *mic_fd, *out_fd;

short ref[NN], mic[NN], out[NN];

ref_fd = fopen ("ref.pcm", "rb"); //打开参考文件,即要消除的声音

mic_fd = fopen ("mic.pcm",  "rb");//打开mic采集到的声音文件,包含回声在里面

out_fd = fopen ("echo.pcm", "wb");//消除了回声以后的文件

CSpeexEC ec;

ec.Init();

while (fread(mic, 1, NN*2, mic_fd))

   {

      fread(ref, 1, NN*2, ref_fd);  

      ec.DoAEC(mic, ref, out);

      fwrite(out, 1, NN*2, out_fd);

   }

 

   fclose(ref_fd);

   fclose(mic_fd);

   fclose(out_fd);

}

  以上的程序是用文件来模拟回声和MIC,但在实时流中是大不一样的,在一般的VOIP软件中,接收对方的声音并传到声卡中播放是在一个线程中进行的,而采集本地的声音并传送到对方又是在另一个线程中进行的,而声学回声消除器在对采集到的声音进行回声消除的同时,还需要播放线程中的数据作为参考,而要同步这两个线程中的数据是非常困难的,因为稍稍有些不同步,声学回声消除器中的自适应滤波器就会发散,不但消除不了回声,还会破坏原始采集到的声音,使被破坏的声音难以分辨。我做过好多尝试,始终无法用软件来实现对这两个线程中的数据进行同步,导致实现失败,希望有经验的网友们一起分享一下这方面的经验。

示例代码:

Sample code

This section shows sample code for encoding and decoding speech using the Speex API. The commands can be used to encode and decode a file by calling:

% sampleenc in_file.sw | sampledec out_file.sw

where both files are raw (no header) files encoded at 16 bits per sample (in the machine natural endianness).

sampleenc.c

sampleenc takes a raw 16 bits/sample file, encodes it and outputs a Speex stream to stdout. Note that the packing used is NOT compatible with that of speexenc/speexdec.

#include <speex/speex.h>

#include <stdio.h>

#define FRAME_SIZE 160

int main(int argc, char **argv)

{

char *inFile;

FILE *fin;

short in[FRAME_SIZE];

float input[FRAME_SIZE];

char cbits[200];

int nbBytes;

void *state;

SpeexBits bits;

int i, tmp;

state = speex_encoder_init(&speex_nb_mode);

tmp=8;

speex_encoder_ctl(state, SPEEX_SET_QUALITY, &tmp);

inFile = argv[1];

fin = fopen(inFile, "r");

speex_bits_init(&bits);

while (1)

{

fread(in, sizeof(short), FRAME_SIZE, fin);

if (feof(fin))

break;

for (i=0;i<FRAME_SIZE;i++)

input[i]=in[i];

speex_bits_reset(&bits);

speex_encode(state, input, &bits);

nbBytes = speex_bits_write(&bits, cbits, 200);

fwrite(&nbBytes, sizeof(int), 1, stdout);

fwrite(cbits, 1, nbBytes, stdout);

}

speex_encoder_destroy(state);

speex_bits_destroy(&bits);

fclose(fin);

return 0;

}

sampledec.c

sampledec reads a Speex stream from stdin, decodes it and outputs it to a raw 16 bits/sample file. Note that the packing used is NOT compatible with that of speexenc/speexdec.

#include <speex/speex.h>

#include <stdio.h>

#define FRAME_SIZE 160

int main(int argc, char **argv)

{

char *outFile;

FILE *fout;

short out[FRAME_SIZE];

float output[FRAME_SIZE];

char cbits[200];

int nbBytes;

void *state;

SpeexBits bits;

int i, tmp;

state = speex_decoder_init(&speex_nb_mode);

tmp=1;

speex_decoder_ctl(state, SPEEX_SET_ENH, &tmp);

outFile = argv[1];

fout = fopen(outFile, "w");

speex_bits_init(&bits);

while (1)

{

fread(&nbBytes, sizeof(int), 1, stdin);

fprintf (stderr, "nbBytes: %d\n", nbBytes);

if (feof(stdin))

break;

fread(cbits, 1, nbBytes, stdin);

speex_bits_read_from(&bits, cbits, nbBytes);

speex_decode(state, &bits, output);

for (i=0;i<FRAME_SIZE;i++)

out[i]=output[i];

fwrite(out, sizeof(short), FRAME_SIZE, fout);

}

speex_decoder_destroy(state);

speex_bits_destroy(&bits);

fclose(fout);

return 0;

}

 

 

开源 H323 协议中封装的使用参考代码:

#include <ptlib.h>

#ifdef __GNUC__

#pragma implementation "speexcodec.h"

#endif

#include "speexcodec.h"

#include "h323caps.h"

#include "h245.h"

#include "rtp.h"

extern "C" {

#include "speex/libspeex/speex.h"

};

#define new PNEW

#define XIPH_COUNTRY_CODE       0xB5  // (181) Country code for United States

#define XIPH_T35EXTENSION       0

#define XIPH_MANUFACTURER_CODE  0x0026 // Allocated by Delta Inc

#define EQUIVALENCE_COUNTRY_CODE       9  // Country code for Australia

#define EQUIVALENCE_T35EXTENSION       0

#define EQUIVALENCE_MANUFACTURER_CODE  61 // Allocated by Australian Communications Authority, Oct 2000

#define SAMPLES_PER_FRAME        160

#define SPEEX_BASE_NAME "Speex"

#define SPEEX_NARROW2_H323_NAME    SPEEX_BASE_NAME "Narrow-5.95k{sw}"

#define SPEEX_NARROW3_H323_NAME    SPEEX_BASE_NAME "Narrow-8k{sw}"

#define SPEEX_NARROW4_H323_NAME    SPEEX_BASE_NAME "Narrow-11k{sw}"

#define SPEEX_NARROW5_H323_NAME    SPEEX_BASE_NAME "Narrow-15k{sw}"

#define SPEEX_NARROW6_H323_NAME    SPEEX_BASE_NAME "Narrow-18.2k{sw}"

H323_REGISTER_CAPABILITY(SpeexNarrow2AudioCapability, SPEEX_NARROW2_H323_NAME);

H323_REGISTER_CAPABILITY(SpeexNarrow3AudioCapability, SPEEX_NARROW3_H323_NAME);

H323_REGISTER_CAPABILITY(SpeexNarrow4AudioCapability, SPEEX_NARROW4_H323_NAME);

H323_REGISTER_CAPABILITY(SpeexNarrow5AudioCapability, SPEEX_NARROW5_H323_NAME);

H323_REGISTER_CAPABILITY(SpeexNarrow6AudioCapability, SPEEX_NARROW6_H323_NAME);

#define XIPH_SPEEX_NARROW2_H323_NAME    SPEEX_BASE_NAME "Narrow-5.95k(Xiph){sw}"

#define XIPH_SPEEX_NARROW3_H323_NAME    SPEEX_BASE_NAME "Narrow-8k(Xiph){sw}"

#define XIPH_SPEEX_NARROW4_H323_NAME    SPEEX_BASE_NAME "Narrow-11k(Xiph){sw}"

#define XIPH_SPEEX_NARROW5_H323_NAME    SPEEX_BASE_NAME "Narrow-15k(Xiph){sw}"

#define XIPH_SPEEX_NARROW6_H323_NAME    SPEEX_BASE_NAME "Narrow-18.2k(Xiph){sw}"

H323_REGISTER_CAPABILITY(XiphSpeexNarrow2AudioCapability, XIPH_SPEEX_NARROW2_H323_NAME);

H323_REGISTER_CAPABILITY(XiphSpeexNarrow3AudioCapability, XIPH_SPEEX_NARROW3_H323_NAME);

H323_REGISTER_CAPABILITY(XiphSpeexNarrow4AudioCapability, XIPH_SPEEX_NARROW4_H323_NAME);

H323_REGISTER_CAPABILITY(XiphSpeexNarrow5AudioCapability, XIPH_SPEEX_NARROW5_H323_NAME);

H323_REGISTER_CAPABILITY(XiphSpeexNarrow6AudioCapability, XIPH_SPEEX_NARROW6_H323_NAME);

/////////////////////////////////////////////////////////////////////////

static int Speex_Bits_Per_Second(int mode) {

    void *tmp_coder_state;

    int bitrate;

    tmp_coder_state = speex_encoder_init(&speex_nb_mode);

    speex_encoder_ctl(tmp_coder_state, SPEEX_SET_QUALITY, &mode);

    speex_encoder_ctl(tmp_coder_state, SPEEX_GET_BITRATE, &bitrate);

    speex_encoder_destroy(tmp_coder_state);

    return bitrate;

}

static int Speex_Bytes_Per_Frame(int mode) {

    int bits_per_frame = Speex_Bits_Per_Second(mode) / 50; // (20ms frame size)

    return ((bits_per_frame+7)/8); // round up

}

OpalMediaFormat const OpalSpeexNarrow_5k95(OPAL_SPEEX_NARROW_5k95,

                                           OpalMediaFormat::DefaultAudioSessionID,

               
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息