您的位置：首页 > 其它

live555源码分析----H264的数据处理

2013-07-31 23:20 369 查看

现在来分析live555中关于H264的处理部分，主要包括从文件中读取数据进行并进行frame(NALU)的分割，然后对frame进行分片，这些工作都是在frame交给RTP sink之前完成的。接着上篇分析文章(RTP的打包与发送)中提到的MultiFramedRTP::packFrame函数进行分析。

[cpp]
view plaincopy print ?

void MultiFramedRTPSink::packFrame() {
if (fOutBuf->haveOverflowData()) {
...
} else {
...

//
//从source中获取下一个frame

//
fSource->getNextFrame(fOutBuf->curPtr(), fOutBuf->totalBytesAvailable(),
afterGettingFrame, this, ourHandleClosure, this);
}
}

void MultiFramedRTPSink::packFrame() {
if (fOutBuf->haveOverflowData()) {
...
} else {
...

//
//从source中获取下一个frame
//
fSource->getNextFrame(fOutBuf->curPtr(), fOutBuf->totalBytesAvailable(),
afterGettingFrame, this, ourHandleClosure, this);
}
}

getNextFrame是定义在FramedSource中的非虚函数，从source中获取下一个frame，然后调用回调函数afterGettingFrame。afterGettingFrame被定义为静态函数，因为在C++中类成员函数是不能作为回调用函数的。不过这里为什么要用回调函数回？

注意，对于H264来说，上面的fSource并不是MPEGVideoStreamFramer类，因为在 H264VideoRTPSink::continuePlaying()函数中改变了fSource的值。

[cpp]
view plaincopy print ?

Boolean H264VideoRTPSink::continuePlaying() {
// First, check whether we have a 'fragmenter' class set up yet.

// If not, create it now:

if (fOurFragmenter == NULL) {
//创建一个辅助类H264FUAFragmenter，用于H264按照RFC3984进行RTP打包

fOurFragmenter = new H264FUAFragmenter(envir(), fSource, OutPacketBuffer::maxSize,
ourMaxPacketSize() - 12/*RTP hdr size*/);
fSource = fOurFragmenter;
}

// Then call the parent class's implementation:

return MultiFramedRTPSink::continuePlaying();
}

Boolean H264VideoRTPSink::continuePlaying() {
// First, check whether we have a 'fragmenter' class set up yet.
// If not, create it now:
if (fOurFragmenter == NULL) {
//创建一个辅助类H264FUAFragmenter，用于H264按照RFC3984进行RTP打包

fOurFragmenter = new H264FUAFragmenter(envir(), fSource, OutPacketBuffer::maxSize,
ourMaxPacketSize() - 12/*RTP hdr size*/);
fSource = fOurFragmenter;
}

// Then call the parent class's implementation:
return MultiFramedRTPSink::continuePlaying();
}

fSource被指向了H264FUAFragmenter类，这个类主要实现了H264按照RFC3984进行RTP分包，不过这里的实现每个RTP中最多只包含一个NALU，没有实现组合封包的情形。这个类的继承关系如下：H264FUAFragmenter->FramedFilter->FramedSource。很明显，这是一个filter,包装了MPEGVideoStreamFramer类的对像。

先来看来看getNextFrame的实现

[cpp]
view plaincopy print ?

void FramedSource::getNextFrame(unsigned char* to, unsigned maxSize,
afterGettingFunc* afterGettingFunc,
void* afterGettingClientData,
onCloseFunc* onCloseFunc,
void* onCloseClientData) {
// Make sure we're not already being read:

if (fIsCurrentlyAwaitingData) {
envir() << "FramedSource[" << this << "]::getNextFrame(): attempting to read more than once at the same time!\n";
envir().internalError();
}

fTo = to; //buffer地址

fMaxSize = maxSize; //buffer最大长度

fNumTruncatedBytes = 0; // by default; could be changed by doGetNextFrame()

fDurationInMicroseconds = 0; // by default; could be changed by doGetNextFrame()

fAfterGettingFunc = afterGettingFunc; //获取完一个frame后将执行这个函数

fAfterGettingClientData = afterGettingClientData; //这个参数就是MultiFramedRTPSink类型指针

fOnCloseFunc = onCloseFunc;
fOnCloseClientData = onCloseClientData;
fIsCurrentlyAwaitingData = True;

doGetNextFrame();
}

void FramedSource::getNextFrame(unsigned char* to, unsigned maxSize,
afterGettingFunc* afterGettingFunc,
void* afterGettingClientData,
onCloseFunc* onCloseFunc,
void* onCloseClientData) {
// Make sure we're not already being read:
if (fIsCurrentlyAwaitingData) {
envir() << "FramedSource[" << this << "]::getNextFrame(): attempting to read more than once at the same time!\n";
envir().internalError();
}

fTo = to;             //buffer地址
fMaxSize = maxSize;   //buffer最大长度
fNumTruncatedBytes = 0; // by default; could be changed by doGetNextFrame()
fDurationInMicroseconds = 0; // by default; could be changed by doGetNextFrame()
fAfterGettingFunc = afterGettingFunc;             //获取完一个frame后将执行这个函数
fAfterGettingClientData = afterGettingClientData; //这个参数就是MultiFramedRTPSink类型指针
fOnCloseFunc = onCloseFunc;
fOnCloseClientData = onCloseClientData;
fIsCurrentlyAwaitingData = True;

doGetNextFrame();
}

上面的函数主要是进行一些成员变量的初始化，获取到的frame需要保存到fTo地址中，然后调用fAfterGettingFunc函数，若文件读取完毕，还需要调用fOnCloseFunc函数。重要的工作还是在doGetNextFrame函数中完成，不过它是定义在FramedSource类中的纯虚函数，需要在子类中重新实现。

现在来看H264FUAFragmenter中对doGetNextFrame的实现

[cpp]
view plaincopy print ?

void H264FUAFragmenter::doGetNextFrame() {
if (fNumValidDataBytes == 1) {
//读取一个新的frame
// We have no NAL unit data currently in the buffer. Read a new one:

fInputSource->getNextFrame(&fInputBuffer[1], fInputBufferSize - 1,
afterGettingFrame, this,
FramedSource::handleClosure, this);
} else {
//
//现在buffer中已经存在NALU数据了，需要考虑三种情况

//1.一个新的NALU,且足够小能投递给RTP sink。

//2.一个新的NALU,但是比RTP sink要求的包大了，投递第一个分片作为一个FU-A packet, 并带上一个额外的头字节。

//3.部分NALU数据,投递下一个分片作为一个FU-A packet，并带上2个额外的头字节。

// We have NAL unit data in the buffer. There are three cases to consider:

// 1. There is a new NAL unit in the buffer, and it's small enough to deliver

// to the RTP sink (as is).

// 2. There is a new NAL unit in the buffer, but it's too large to deliver to

// the RTP sink in its entirety. Deliver the first fragment of this data,

// as a FU-A packet, with one extra preceding header byte.

// 3. There is a NAL unit in the buffer, and we've already delivered some

// fragment(s) of this. Deliver the next fragment of this data,

// as a FU-A packet, with two extra preceding header bytes.

if (fMaxSize < fMaxOutputPacketSize) { // shouldn't happen

envir() << "H264FUAFragmenter::doGetNextFrame(): fMaxSize ("
<< fMaxSize << ") is smaller than expected\n";
} else {
fMaxSize = fMaxOutputPacketSize;
}

fLastFragmentCompletedNALUnit = True; // by default

if (fCurDataOffset == 1) { // case 1 or 2

if (fNumValidDataBytes - 1 <= fMaxSize) { // case 1

//
//情况1, 处理整个NALU

//
memmove(fTo, &fInputBuffer[1], fNumValidDataBytes - 1);
fFrameSize = fNumValidDataBytes - 1;
fCurDataOffset = fNumValidDataBytes;
} else { // case 2

//
//情况2，处理NALU的第1个分片。注意，我们添加FU指示符和FU头字节(with S bit)到包的最前面(

//重用已经存在的NAL 头字节作为FU的头字节)

//
// We need to send the NAL unit data as FU-A packets. Deliver the first

// packet now. Note that we add FU indicator and FU header bytes to the front

// of the packet (reusing the existing NAL header byte for the FU header).

fInputBuffer[0] = (fInputBuffer[1] & 0xE0) | 28; // FU indicator

fInputBuffer[1] = 0x80 | (fInputBuffer[1] & 0x1F); // FU header (with S bit) 重用NALU头字节

memmove(fTo, fInputBuffer, fMaxSize);
fFrameSize = fMaxSize;
fCurDataOffset += fMaxSize - 1;
fLastFragmentCompletedNALUnit = False;
}
} else { // case 3

//
//情况3，处理非第1个分片。需要添加FU指示符和FU头(我们重用了第一个分片中的字节，但是需要清除S位，

//并在最后一个分片中添加E位)
//
//
// We are sending this NAL unit data as FU-A packets. We've already sent the

// first packet (fragment). Now, send the next fragment. Note that we add

// FU indicator and FU header bytes to the front. (We reuse these bytes that

// we already sent for the first fragment, but clear the S bit, and add the E

// bit if this is the last fragment.)

fInputBuffer[fCurDataOffset-2] = fInputBuffer[0]; // FU indicator

fInputBuffer[fCurDataOffset-1] = fInputBuffer[1]&~0x80; // FU header (no S bit)

unsigned numBytesToSend = 2 + fNumValidDataBytes - fCurDataOffset;
if (numBytesToSend > fMaxSize) {
// We can't send all of the remaining data this time:

numBytesToSend = fMaxSize;
fLastFragmentCompletedNALUnit = False;
} else {
//
//最后一个分片，需要在FU头中设置E标志位

// This is the last fragment:
fInputBuffer[fCurDataOffset-1] |= 0x40; // set the E bit in the FU header

fNumTruncatedBytes = fSaveNumTruncatedBytes;
}
memmove(fTo, &fInputBuffer[fCurDataOffset-2], numBytesToSend);
fFrameSize = numBytesToSend;
fCurDataOffset += numBytesToSend - 2;
}

if (fCurDataOffset >= fNumValidDataBytes) {
// We're done with this data. Reset the pointers for receiving new data:

fNumValidDataBytes = fCurDataOffset = 1;
}

// Complete delivery to the client:

FramedSource::afterGetting(this);
}
}

void H264FUAFragmenter::doGetNextFrame() {
if (fNumValidDataBytes == 1) {
//读取一个新的frame
// We have no NAL unit data currently in the buffer.  Read a new one:
fInputSource->getNextFrame(&fInputBuffer[1], fInputBufferSize - 1,
afterGettingFrame, this,
FramedSource::handleClosure, this);
} else {
//
//现在buffer中已经存在NALU数据了，需要考虑三种情况
//1.一个新的NALU,且足够小能投递给RTP sink。
//2.一个新的NALU,但是比RTP sink要求的包大了，投递第一个分片作为一个FU-A packet, 并带上一个额外的头字节。
//3.部分NALU数据,投递下一个分片作为一个FU-A packet，并带上2个额外的头字节。
// We have NAL unit data in the buffer.  There are three cases to consider:
// 1. There is a new NAL unit in the buffer, and it's small enough to deliver
//    to the RTP sink (as is).
// 2. There is a new NAL unit in the buffer, but it's too large to deliver to
//    the RTP sink in its entirety.  Deliver the first fragment of this data,
//    as a FU-A packet, with one extra preceding header byte.
// 3. There is a NAL unit in the buffer, and we've already delivered some
//    fragment(s) of this.  Deliver the next fragment of this data,
//    as a FU-A packet, with two extra preceding header bytes.

if (fMaxSize < fMaxOutputPacketSize) { // shouldn't happen
envir() << "H264FUAFragmenter::doGetNextFrame(): fMaxSize ("
<< fMaxSize << ") is smaller than expected\n";
} else {
fMaxSize = fMaxOutputPacketSize;
}

fLastFragmentCompletedNALUnit = True; // by default
if (fCurDataOffset == 1) { // case 1 or 2
if (fNumValidDataBytes - 1 <= fMaxSize) { // case 1
//
//情况1, 处理整个NALU
//
memmove(fTo, &fInputBuffer[1], fNumValidDataBytes - 1);
fFrameSize = fNumValidDataBytes - 1;
fCurDataOffset = fNumValidDataBytes;
} else { // case 2
//
//情况2，处理NALU的第1个分片。注意，我们添加FU指示符和FU头字节(with S bit)到包的最前面(
//重用已经存在的NAL 头字节作为FU的头字节)
//
// We need to send the NAL unit data as FU-A packets.  Deliver the first
// packet now.  Note that we add FU indicator and FU header bytes to the front
// of the packet (reusing the existing NAL header byte for the FU header).
fInputBuffer[0] = (fInputBuffer[1] & 0xE0) | 28; // FU indicator
fInputBuffer[1] = 0x80 | (fInputBuffer[1] & 0x1F); // FU header (with S bit)   重用NALU头字节
memmove(fTo, fInputBuffer, fMaxSize);
fFrameSize = fMaxSize;
fCurDataOffset += fMaxSize - 1;
fLastFragmentCompletedNALUnit = False;
}
} else { // case 3
//
//情况3，处理非第1个分片。需要添加FU指示符和FU头(我们重用了第一个分片中的字节，但是需要清除S位，
//并在最后一个分片中添加E位)
//
//
// We are sending this NAL unit data as FU-A packets.  We've already sent the
// first packet (fragment).  Now, send the next fragment.  Note that we add
// FU indicator and FU header bytes to the front.  (We reuse these bytes that
// we already sent for the first fragment, but clear the S bit, and add the E
// bit if this is the last fragment.)
fInputBuffer[fCurDataOffset-2] = fInputBuffer[0]; // FU indicator
fInputBuffer[fCurDataOffset-1] = fInputBuffer[1]&~0x80; // FU header (no S bit)
unsigned numBytesToSend = 2 + fNumValidDataBytes - fCurDataOffset;
if (numBytesToSend > fMaxSize) {
// We can't send all of the remaining data this time:
numBytesToSend = fMaxSize;
fLastFragmentCompletedNALUnit = False;
} else {
//
//最后一个分片，需要在FU头中设置E标志位
// This is the last fragment:
fInputBuffer[fCurDataOffset-1] |= 0x40; // set the E bit in the FU header
fNumTruncatedBytes = fSaveNumTruncatedBytes;
}
memmove(fTo, &fInputBuffer[fCurDataOffset-2], numBytesToSend);
fFrameSize = numBytesToSend;
fCurDataOffset += numBytesToSend - 2;
}

if (fCurDataOffset >= fNumValidDataBytes) {
// We're done with this data.  Reset the pointers for receiving new data:
fNumValidDataBytes = fCurDataOffset = 1;
}

// Complete delivery to the client:
FramedSource::afterGetting(this);
}
}

H264FUAFragmenter::doGetNextFrame函数第一次执行时，执行条件1,需要调用 MPEGVideoStreamFramer::doGetNextFrame读取一个新的frame，获取frame的具体过程稍后再分析。现在先看获取frame之后的工作，afterGettingFrame函数

[cpp]
view plaincopy print ?

void H264FUAFragmenter::afterGettingFrame(void* clientData, unsigned frameSize, unsigned numTruncatedBytes, struct timeval presentationTime, unsigned durationInMicroseconds) { H264FUAFragmenter* fragmenter = (H264FUAFragmenter*)clientData; fragmenter->afterGettingFrame1(frameSize, numTruncatedBytes, presentationTime, durationInMicroseconds); }

void H264FUAFragmenter::afterGettingFrame(void* clientData, unsigned frameSize,
unsigned numTruncatedBytes,
struct timeval presentationTime,
unsigned durationInMicroseconds) {
H264FUAFragmenter* fragmenter = (H264FUAFragmenter*)clientData;
fragmenter->afterGettingFrame1(frameSize, numTruncatedBytes, presentationTime,
durationInMicroseconds);
}

没什么好说的，再看afterGettingFrame1函数

[cpp]
view plaincopy print ?

void H264FUAFragmenter::afterGettingFrame1(unsigned frameSize,
unsigned numTruncatedBytes,
struct timeval presentationTime,
unsigned durationInMicroseconds) {
fNumValidDataBytes += frameSize; //保存读到的frame长度

fSaveNumTruncatedBytes = numTruncatedBytes;
fPresentationTime = presentationTime;
fDurationInMicroseconds = durationInMicroseconds;

// Deliver data to the client:

doGetNextFrame();
}

void H264FUAFragmenter::afterGettingFrame1(unsigned frameSize,
unsigned numTruncatedBytes,
struct timeval presentationTime,
unsigned durationInMicroseconds) {
fNumValidDataBytes += frameSize;      //保存读到的frame长度
fSaveNumTruncatedBytes = numTruncatedBytes;
fPresentationTime = presentationTime;
fDurationInMicroseconds = durationInMicroseconds;

// Deliver data to the client:
doGetNextFrame();
}

上面的代码首先记录几个数据到成员变量中，fNumValidDataBytes很重要，表示读取到的frame长度+1。然后又一次调用了H264FUAFragmenter::doGetNextFrame()，这里将进入H264FUAFragmenter::doGetNextFrame函数中第二个条件分支，这种循环调用很容易把人弄迷糊了。

H264FUAFragmenter::doGetNextFrame函数中第二个条件分支中，处理H264的RTP分片问题，这里是按照RFC3984进行RTP封装的。你应该注意到，在上篇文章"RTP的打包与发送"中，也出现了分片的代码(MultiFramedRTPSink::packFrame函数中)，那里直接将frame按MTU的长度来拆分。那为什么H264还要自定义一套RTP打包的标准呢？暂时我也不清楚。

在H264FUAFragmenter::doGetNextFrame()最后调用了 FramedSource::afterGetting

[cpp]
view plaincopy print ?

void FramedSource::afterGetting(FramedSource* source) {
source->fIsCurrentlyAwaitingData = False; //表示已经获取到数据了，处于非等待状态

// indicates that we can be read again

// Note that this needs to be done here, in case the "fAfterFunc"

// called below tries to read another frame (which it usually will)

//通过回调用进行后续处理
if (source->fAfterGettingFunc != NULL) {
(*(source->fAfterGettingFunc))(source->fAfterGettingClientData,
source->fFrameSize, source->fNumTruncatedBytes,
source->fPresentationTime,
source->fDurationInMicroseconds);
}
}

void FramedSource::afterGetting(FramedSource* source) {
source->fIsCurrentlyAwaitingData = False;     //表示已经获取到数据了，处于非等待状态
// indicates that we can be read again
// Note that this needs to be done here, in case the "fAfterFunc"
// called below tries to read another frame (which it usually will)

//通过回调用进行后续处理
if (source->fAfterGettingFunc != NULL) {
(*(source->fAfterGettingFunc))(source->fAfterGettingClientData,
source->fFrameSize, source->fNumTruncatedBytes,
source->fPresentationTime,
source->fDurationInMicroseconds);
}
}

上面的代码主要是调用了FramedSource::getNextFrame函数中传递下来的回调函数，这个回调函数就是MultiFramedRTPSink::afterGettingFrame，处理过程在上一篇文章"RTP的打包与发送"中已经分析过了。

现在来看MPEGVideoStreamFramer::doGetNextFrame获取Frame的过程。继承关系：H264VideoStreamFramer->MPEGVideoStreamFramer->FramedFilter->FramedSource。在继承路径中存在FrameFilter，这说明H264VideoStreamFramer包装了其它source(包装的是读取文件的字节流source)。doGetNextFrame函数首先在MPEGVideoStreamFramer中实现。

[cpp]
view plaincopy print ?

void MPEGVideoStreamFramer::doGetNextFrame() {
fParser->registerReadInterest(fTo, fMaxSize); //将目的buffer信息注册到语法分析类中

continueReadProcessing(); //继续进行读数据

}

void MPEGVideoStreamFramer::doGetNextFrame() {
fParser->registerReadInterest(fTo, fMaxSize); //将目的buffer信息注册到语法分析类中
continueReadProcessing();     //继续进行读数据
}

这里的MPEGVideoStreamFramer::fParser,是一个MPEGVideoStreamParser类型指针，作为语法分析器。再来看continueReadProcessing函数

[cpp]
view plaincopy print ?

void MPEGVideoStreamFramer::continueReadProcessing() {
unsigned acquiredFrameSize = fParser->parse(); //文件的语法分析(即demux)

if (acquiredFrameSize > 0) {
// We were able to acquire a frame from the input.

// It has already been copied to the reader's space.

fFrameSize = acquiredFrameSize;
fNumTruncatedBytes = fParser->numTruncatedBytes();

// "fPresentationTime" should have already been computed.

// Compute "fDurationInMicroseconds" now:

fDurationInMicroseconds
= (fFrameRate == 0.0 || ((int)fPictureCount) < 0) ? 0
: (unsigned)((fPictureCount*1000000)/fFrameRate);
#ifdef DEBUG
fprintf(stderr, "%d bytes @%u.%06d, fDurationInMicroseconds: %d ((%d*1000000)/%f)\n", acquiredFrameSize, fPresentationTime.tv_sec, fPresentationTime.tv_usec, fDurationInMicroseconds, fPictureCount, fFrameRate);
#endif
fPictureCount = 0;

//
//调用自身的afterGetting函数，因为这不一个"leaf" source, 所以可能直接调用，

//而不用担心出现无限递归
//
// Call our own 'after getting' function. Because we're not a 'leaf'

// source, we can call this directly, without risking infinite recursion.

afterGetting(this);
} else {
// We were unable to parse a complete frame from the input, because:

// - we had to read more data from the source stream, or

// - the source stream has ended.

}
}

void MPEGVideoStreamFramer::continueReadProcessing() {
unsigned acquiredFrameSize = fParser->parse();    //文件的语法分析(即demux)
if (acquiredFrameSize > 0) {
// We were able to acquire a frame from the input.
// It has already been copied to the reader's space.
fFrameSize = acquiredFrameSize;
fNumTruncatedBytes = fParser->numTruncatedBytes();

// "fPresentationTime" should have already been computed.

// Compute "fDurationInMicroseconds" now:
fDurationInMicroseconds
= (fFrameRate == 0.0 || ((int)fPictureCount) < 0) ? 0
: (unsigned)((fPictureCount*1000000)/fFrameRate);
#ifdef DEBUG
fprintf(stderr, "%d bytes @%u.%06d, fDurationInMicroseconds: %d ((%d*1000000)/%f)\n", acquiredFrameSize, fPresentationTime.tv_sec, fPresentationTime.tv_usec, fDurationInMicroseconds, fPictureCount, fFrameRate);
#endif
fPictureCount = 0;

//
//调用自身的afterGetting函数，因为这不一个"leaf" source, 所以可能直接调用，
//而不用担心出现无限递归
//
// Call our own 'after getting' function.  Because we're not a 'leaf'
// source, we can call this directly, without risking infinite recursion.
afterGetting(this);
} else {
// We were unable to parse a complete frame from the input, because:
// - we had to read more data from the source stream, or
// - the source stream has ended.
}
}

函数中首先调用了MPEGVideoStreamParser::parse函数，将一个完整的Frame分析出来，并copy到了fTo(fTo就是OutPacketBuffer中的缓冲区)中，这其中肯定也实现了从文件中读取数据的过程。这里的fNumTruncatedBytes变量需要注意，fNumTruncatedBytes>0的话，表明Frame的实际长度大于fTo的最大长度，这将导致数据丢失，这时就要考虑增加缓冲区的长度了。成功获取一个Frame后，将调用afterGetting函数处理后续工作。

先来看parse函数，parse是定义在MPEGVideoStreamParser中的纯虚函数，在子类H264VideoStreamParser中实现。parse主要是从文件的字节流中，分离出一个个的Frame,对于H264而言其实就是对一个个的NALU。*.264文件的格式非常简单，每个NALU以 0x00000001 作为起始符号(中间的NALU也可以以0x000001作为起始符)，顺序存放。

[cpp]
view plaincopy print ?

unsigned H264VideoStreamParser::parse() {
try {
//
//首先找到起始符号, 并跳过。文件流的最开始必需以0x00000001开始，但后续的NALU充许以0x000001(3 bytes)作为分隔

//
// The stream must start with a 0x00000001:

if (!fHaveSeenFirstStartCode) {
// Skip over any input bytes that precede the first 0x00000001:

u_int32_t first4Bytes;
while ((first4Bytes = test4Bytes()) != 0x00000001) {
get1Byte(); setParseState(); // ensures that we progress over bad data

}
skipBytes(4); // skip this initial code

setParseState();
fHaveSeenFirstStartCode = True; // from now on

}
//
//将起始标志也保存到目的缓冲区中
//
if (fOutputStartCodeSize > 0) {
// Include a start code in the output:

save4Bytes(0x00000001);
}

//
//保存所有数据，直至遇到起始标志，或者文件结束符。需要注意NALU中的第一个字节，因为它指示了NALU的类型

//
// Then save everything up until the next 0x00000001 (4 bytes) or 0x000001 (3 bytes), or we hit EOF.

// Also make note of the first byte, because it contains the "nal_unit_type":

u_int8_t firstByte;
if (haveSeenEOF()) {
//
//已经设置了文件结束标志，将剩下的数据保存也来即可
//
// We hit EOF the last time that we tried to parse this data,

// so we know that the remaining unparsed data forms a complete NAL unit:

unsigned remainingDataSize = totNumValidBytes() - curOffset();
if (remainingDataSize == 0) (void)get1Byte(); // forces another read, which will cause EOF to get handled for real this time

#ifdef DEBUG
fprintf(stderr, "This NAL unit (%d bytes) ends with EOF\n", remainingDataSize);
#endif
if (remainingDataSize == 0) return 0;
firstByte = get1Byte(); //将第一个字节保存下来，其指示了NALU的类型

saveByte(firstByte);

while (--remainingDataSize > 0) {
saveByte(get1Byte());
}
} else {
u_int32_t next4Bytes = test4Bytes();
firstByte = next4Bytes>>24; //将第一个字节保存下来

//
//将下一个起始符号之前的数据都保存下来
//
while (next4Bytes != 0x00000001 && (next4Bytes&0xFFFFFF00) != 0x00000100) {
// We save at least some of "next4Bytes".

if ((unsigned)(next4Bytes&0xFF) > 1) { //一次可以保存4个字节,并不需要一个一个字节对比，除非到了结尾

// Common case: 0x00000001 or 0x000001 definitely doesn't begin anywhere in "next4Bytes", so we save all of it:

save4Bytes(next4Bytes);
skipBytes(4);
} else {
// Save the first byte, and continue testing the rest:

saveByte(next4Bytes>>24);
skipBytes(1);
}
next4Bytes = test4Bytes();
}
// Assert: next4Bytes starts with 0x00000001 or 0x000001, and we've saved all previous bytes (forming a complete NAL unit).

// Skip over these remaining bytes, up until the start of the next NAL unit:

if (next4Bytes == 0x00000001) {
skipBytes(4);
} else {
skipBytes(3);
}
}

u_int8_t nal_ref_idc = (firstByte&0x60)>>5;
u_int8_t nal_unit_type = firstByte&0x1F;
#ifdef DEBUG
fprintf(stderr, "Parsed %d-byte NAL-unit (nal_ref_idc: %d, nal_unit_type: %d (\"%s\"))\n",
curFrameSize()-fOutputStartCodeSize, nal_ref_idc, nal_unit_type, nal_unit_type_description[nal_unit_type]);
#endif

//
//下面根据NALU的类型来作具体的分析

//
switch (nal_unit_type) {
case 6: { // Supplemental enhancement information (SEI)

analyze_sei_data();
// Later, perhaps adjust "fPresentationTime" if we saw a "pic_timing" SEI payload??? #####

break;
}
case 7: { // Sequence parameter set (序列参数集)

//
//保存一份SPS的副本到H264VideoStreamFramer中，后面的pps也需要保存，sps中可能还包含了帧率信息

//
// First, save a copy of this NAL unit, in case the downstream object wants to see it:

usingSource()->saveCopyOfSPS(fStartOfFrame + fOutputStartCodeSize, fTo - fStartOfFrame - fOutputStartCodeSize);

// Parse this NAL unit to check whether frame rate information is present:

unsigned num_units_in_tick, time_scale, fixed_frame_rate_flag;
analyze_seq_parameter_set_data(num_units_in_tick, time_scale, fixed_frame_rate_flag);
if (time_scale > 0 && num_units_in_tick > 0) {
usingSource()->fFrameRate = time_scale/(2.0*num_units_in_tick); //sps中包含了帧率信息

#ifdef DEBUG
fprintf(stderr, "Set frame rate to %f fps\n", usingSource()->fFrameRate);
if (fixed_frame_rate_flag == 0) {
fprintf(stderr, "\tWARNING: \"fixed_frame_rate_flag\" was not set\n");
}
#endif
} else { //sps中不包含帧率信息，则使用source中设置的默认帧率

#ifdef DEBUG
fprintf(stderr, "\tThis \"Sequence Parameter Set\" NAL unit contained no frame rate information, so we use a default frame rate of %f fps\n", usingSource()->fFrameRate);
#endif
}
break;
}
case 8: { // Picture parameter set (图像参数集PPS)

// Save a copy of this NAL unit, in case the downstream object wants to see it:

usingSource()->saveCopyOfPPS(fStartOfFrame + fOutputStartCodeSize, fTo - fStartOfFrame - fOutputStartCodeSize);
}
}

usingSource()->setPresentationTime(); //设置当前的时间

#ifdef DEBUG
unsigned long secs = (unsigned long)usingSource()->fPresentationTime.tv_sec;
unsigned uSecs = (unsigned)usingSource()->fPresentationTime.tv_usec;
fprintf(stderr, "\tPresentation time: %lu.%06u\n", secs, uSecs);
#endif

//
//如果这个NALU是一个VCL NALU(即包含的是视频数据)，我们需要扫描下一个NALU的起始符，

//以判断这个NALU是否是当前的"access unit"(这个"access unit"应该可以理解为一帧图像帧吧)。

//我们需要根据这个信息去指明何时该增加"fPresentationTime"(RTP 打包时也需要根据这个信息，决定是否设置"M"位)。

//
// If this NAL unit is a VCL NAL unit, we also scan the start of the next NAL unit, to determine whether this NAL unit

// ends the current 'access unit'. We need this information to figure out when to increment "fPresentationTime".

// (RTP streamers also need to know this in order to figure out whether or not to set the "M" bit.)

Boolean thisNALUnitEndsAccessUnit = False; // until we learn otherwise

if (haveSeenEOF()) {
// There is no next NAL unit, so we assume that this one ends the current 'access unit':

thisNALUnitEndsAccessUnit = True;
} else {
Boolean const isVCL = nal_unit_type <= 5 && nal_unit_type > 0; // Would need to include type 20 for SVC and MVC #####

if (isVCL) {
u_int32_t first4BytesOfNextNALUnit = test4Bytes();
u_int8_t firstByteOfNextNALUnit = first4BytesOfNextNALUnit>>24;
u_int8_t next_nal_ref_idc = (firstByteOfNextNALUnit&0x60)>>5;
u_int8_t next_nal_unit_type = firstByteOfNextNALUnit&0x1F;
if (next_nal_unit_type >= 6) {
//下一个NALU不是VCL的，当前的"access unit"结束了

// The next NAL unit is not a VCL; therefore, we assume that this NAL unit ends the current 'access unit':

#ifdef DEBUG
fprintf(stderr, "\t(The next NAL unit is not a VCL)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else {
//下一个NALU也是VCL的，还需要检查一下它们是不是属于同一个"access unit"

// The next NAL unit is also a VLC. We need to examine it a little to figure out if it's a different 'access unit'.

// (We use many of the criteria described in section 7.4.1.2.4 of the H.264 specification.)

Boolean IdrPicFlag = nal_unit_type == 5;
Boolean next_IdrPicFlag = next_nal_unit_type == 5;
if (next_IdrPicFlag != IdrPicFlag) {
// IdrPicFlag differs in value

#ifdef DEBUG
fprintf(stderr, "\t(IdrPicFlag differs in value)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else if (next_nal_ref_idc != nal_ref_idc && next_nal_ref_idc*nal_ref_idc == 0) {
// nal_ref_idc differs in value with one of the nal_ref_idc values being equal to 0

#ifdef DEBUG
fprintf(stderr, "\t(nal_ref_idc differs in value with one of the nal_ref_idc values being equal to 0)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else if ((nal_unit_type == 1 || nal_unit_type == 2 || nal_unit_type == 5)
&& (next_nal_unit_type == 1 || next_nal_unit_type == 2 || next_nal_unit_type == 5)) {
// Both this and the next NAL units begin with a "slice_header".

// Parse this (for each), to get parameters that we can compare:

// Current NAL unit's "slice_header":

unsigned frame_num, pic_parameter_set_id, idr_pic_id;
Boolean field_pic_flag, bottom_field_flag;
analyze_slice_header(fStartOfFrame + fOutputStartCodeSize, fTo, nal_unit_type,
frame_num, pic_parameter_set_id, idr_pic_id, field_pic_flag, bottom_field_flag);

// Next NAL unit's "slice_header":

#ifdef DEBUG
fprintf(stderr, " Next NAL unit's slice_header:\n");
#endif
u_int8_t next_slice_header[NUM_NEXT_SLICE_HEADER_BYTES_TO_ANALYZE];
testBytes(next_slice_header, sizeof next_slice_header);
unsigned next_frame_num, next_pic_parameter_set_id, next_idr_pic_id;
Boolean next_field_pic_flag, next_bottom_field_flag;
analyze_slice_header(next_slice_header, &next_slice_header[sizeof next_slice_header], next_nal_unit_type,
next_frame_num, next_pic_parameter_set_id, next_idr_pic_id, next_field_pic_flag, next_bottom_field_flag);

if (next_frame_num != frame_num) {
// frame_num differs in value

#ifdef DEBUG
fprintf(stderr, "\t(frame_num differs in value)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else if (next_pic_parameter_set_id != pic_parameter_set_id) {
// pic_parameter_set_id differs in value

#ifdef DEBUG
fprintf(stderr, "\t(pic_parameter_set_id differs in value)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else if (next_field_pic_flag != field_pic_flag) {
// field_pic_flag differs in value

#ifdef DEBUG
fprintf(stderr, "\t(field_pic_flag differs in value)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else if (next_bottom_field_flag != bottom_field_flag) {
// bottom_field_flag differs in value

#ifdef DEBUG
fprintf(stderr, "\t(bottom_field_flag differs in value)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else if (next_IdrPicFlag == 1 && next_idr_pic_id != idr_pic_id) {
// IdrPicFlag is equal to 1 for both and idr_pic_id differs in value

// Note: We already know that IdrPicFlag is the same for both.

#ifdef DEBUG
fprintf(stderr, "\t(IdrPicFlag is equal to 1 for both and idr_pic_id differs in value)\n");
#endif
thisNALUnitEndsAccessUnit = True;
}
}
}
}
}

if (thisNALUnitEndsAccessUnit) {
#ifdef DEBUG
fprintf(stderr, "*****This NAL unit ends the current access unit*****\n");
#endif
usingSource()->fPictureEndMarker = True; //这里就是设置RTP打包时用到的M标志了

++usingSource()->fPictureCount;

//
//下一个NALU不再属于当前"access　unit""时，才改变时间

//
// Note that the presentation time for the next NAL unit will be different:

struct timeval& nextPT = usingSource()->fNextPresentationTime; // alias　这里是引用　

nextPT = usingSource()->fPresentationTime;
double nextFraction = nextPT.tv_usec/1000000.0 + 1/usingSource()->fFrameRate;
unsigned nextSecsIncrement = (long)nextFraction;
nextPT.tv_sec += (long)nextSecsIncrement;
nextPT.tv_usec = (long)((nextFraction - nextSecsIncrement)*1000000);
}
setParseState();

return curFrameSize();
} catch (int /*e*/) {
#ifdef DEBUG
fprintf(stderr, "H264VideoStreamParser::parse() EXCEPTION (This is normal behavior - *not* an error)\n");
#endif
return 0; // the parsing got interrupted

}
}

unsigned H264VideoStreamParser::parse() {
try {
//
//首先找到起始符号, 并跳过。文件流的最开始必需以0x00000001开始，但后续的NALU充许以0x000001(3 bytes)作为分隔
//
// The stream must start with a 0x00000001:
if (!fHaveSeenFirstStartCode) {
// Skip over any input bytes that precede the first 0x00000001:
u_int32_t first4Bytes;
while ((first4Bytes = test4Bytes()) != 0x00000001) {
get1Byte(); setParseState(); // ensures that we progress over bad data
}
skipBytes(4); // skip this initial code

setParseState();
fHaveSeenFirstStartCode = True; // from now on
}
//
//将起始标志也保存到目的缓冲区中
//
if (fOutputStartCodeSize > 0) {
// Include a start code in the output:
save4Bytes(0x00000001);
}

//
//保存所有数据，直至遇到起始标志，或者文件结束符。需要注意NALU中的第一个字节，因为它指示了NALU的类型
//
// Then save everything up until the next 0x00000001 (4 bytes) or 0x000001 (3 bytes), or we hit EOF.
// Also make note of the first byte, because it contains the "nal_unit_type":
u_int8_t firstByte;
if (haveSeenEOF()) {
//
//已经设置了文件结束标志，将剩下的数据保存也来即可
//
// We hit EOF the last time that we tried to parse this data,
// so we know that the remaining unparsed data forms a complete NAL unit:
unsigned remainingDataSize = totNumValidBytes() - curOffset();
if (remainingDataSize == 0) (void)get1Byte(); // forces another read, which will cause EOF to get handled for real this time
#ifdef DEBUG
fprintf(stderr, "This NAL unit (%d bytes) ends with EOF\n", remainingDataSize);
#endif
if (remainingDataSize == 0) return 0;
firstByte = get1Byte();   //将第一个字节保存下来，其指示了NALU的类型
saveByte(firstByte);

while (--remainingDataSize > 0) {
saveByte(get1Byte());
}
} else {
u_int32_t next4Bytes = test4Bytes();
firstByte = next4Bytes>>24;   //将第一个字节保存下来
//
//将下一个起始符号之前的数据都保存下来
//
while (next4Bytes != 0x00000001 && (next4Bytes&0xFFFFFF00) != 0x00000100) {
// We save at least some of "next4Bytes".
if ((unsigned)(next4Bytes&0xFF) > 1) {  //一次可以保存4个字节,并不需要一个一个字节对比，除非到了结尾
// Common case: 0x00000001 or 0x000001 definitely doesn't begin anywhere in "next4Bytes", so we save all of it:
save4Bytes(next4Bytes);
skipBytes(4);
} else {
// Save the first byte, and continue testing the rest:
saveByte(next4Bytes>>24);
skipBytes(1);
}
next4Bytes = test4Bytes();
}
// Assert: next4Bytes starts with 0x00000001 or 0x000001, and we've saved all previous bytes (forming a complete NAL unit).
// Skip over these remaining bytes, up until the start of the next NAL unit:
if (next4Bytes == 0x00000001) {
skipBytes(4);
} else {
skipBytes(3);
}
}

u_int8_t nal_ref_idc = (firstByte&0x60)>>5;
u_int8_t nal_unit_type = firstByte&0x1F;
#ifdef DEBUG
fprintf(stderr, "Parsed %d-byte NAL-unit (nal_ref_idc: %d, nal_unit_type: %d (\"%s\"))\n",
curFrameSize()-fOutputStartCodeSize, nal_ref_idc, nal_unit_type, nal_unit_type_description[nal_unit_type]);
#endif

//
//下面根据NALU的类型来作具体的分析
//
switch (nal_unit_type) {
case 6: { // Supplemental enhancement information (SEI)
analyze_sei_data();
// Later, perhaps adjust "fPresentationTime" if we saw a "pic_timing" SEI payload??? #####
break;
}
case 7: { // Sequence parameter set (序列参数集)
//
//保存一份SPS的副本到H264VideoStreamFramer中，后面的pps也需要保存，sps中可能还包含了帧率信息
//
// First, save a copy of this NAL unit, in case the downstream object wants to see it:
usingSource()->saveCopyOfSPS(fStartOfFrame + fOutputStartCodeSize, fTo - fStartOfFrame - fOutputStartCodeSize);

// Parse this NAL unit to check whether frame rate information is present:
unsigned num_units_in_tick, time_scale, fixed_frame_rate_flag;
analyze_seq_parameter_set_data(num_units_in_tick, time_scale, fixed_frame_rate_flag);
if (time_scale > 0 && num_units_in_tick > 0) {
usingSource()->fFrameRate = time_scale/(2.0*num_units_in_tick);   //sps中包含了帧率信息
#ifdef DEBUG
fprintf(stderr, "Set frame rate to %f fps\n", usingSource()->fFrameRate);
if (fixed_frame_rate_flag == 0) {
fprintf(stderr, "\tWARNING: \"fixed_frame_rate_flag\" was not set\n");
}
#endif
} else {    //sps中不包含帧率信息，则使用source中设置的默认帧率
#ifdef DEBUG
fprintf(stderr, "\tThis \"Sequence Parameter Set\" NAL unit contained no frame rate information, so we use a default frame rate of %f fps\n", usingSource()->fFrameRate);
#endif
}
break;
}
case 8: { // Picture parameter set (图像参数集PPS)
// Save a copy of this NAL unit, in case the downstream object wants to see it:
usingSource()->saveCopyOfPPS(fStartOfFrame + fOutputStartCodeSize, fTo - fStartOfFrame - fOutputStartCodeSize);
}
}

usingSource()->setPresentationTime();   //设置当前的时间
#ifdef DEBUG
unsigned long secs = (unsigned long)usingSource()->fPresentationTime.tv_sec;
unsigned uSecs = (unsigned)usingSource()->fPresentationTime.tv_usec;
fprintf(stderr, "\tPresentation time: %lu.%06u\n", secs, uSecs);
#endif

//
//如果这个NALU是一个VCL NALU(即包含的是视频数据)，我们需要扫描下一个NALU的起始符，
//以判断这个NALU是否是当前的"access unit"(这个"access unit"应该可以理解为一帧图像帧吧)。
//我们需要根据这个信息去指明何时该增加"fPresentationTime"(RTP 打包时也需要根据这个信息，决定是否设置"M"位)。
//
// If this NAL unit is a VCL NAL unit, we also scan the start of the next NAL unit, to determine whether this NAL unit
// ends the current 'access unit'.  We need this information to figure out when to increment "fPresentationTime".
// (RTP streamers also need to know this in order to figure out whether or not to set the "M" bit.)
Boolean thisNALUnitEndsAccessUnit = False; // until we learn otherwise
if (haveSeenEOF()) {
// There is no next NAL unit, so we assume that this one ends the current 'access unit':
thisNALUnitEndsAccessUnit = True;
} else {
Boolean const isVCL = nal_unit_type <= 5 && nal_unit_type > 0; // Would need to include type 20 for SVC and MVC #####
if (isVCL) {
u_int32_t first4BytesOfNextNALUnit = test4Bytes();
u_int8_t firstByteOfNextNALUnit = first4BytesOfNextNALUnit>>24;
u_int8_t next_nal_ref_idc = (firstByteOfNextNALUnit&0x60)>>5;
u_int8_t next_nal_unit_type = firstByteOfNextNALUnit&0x1F;
if (next_nal_unit_type >= 6) {
//下一个NALU不是VCL的，当前的"access unit"结束了
// The next NAL unit is not a VCL; therefore, we assume that this NAL unit ends the current 'access unit':
#ifdef DEBUG
fprintf(stderr, "\t(The next NAL unit is not a VCL)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else {
//下一个NALU也是VCL的，还需要检查一下它们是不是属于同一个"access unit"
// The next NAL unit is also a VLC.  We need to examine it a little to figure out if it's a different 'access unit'.
// (We use many of the criteria described in section 7.4.1.2.4 of the H.264 specification.)
Boolean IdrPicFlag = nal_unit_type == 5;
Boolean next_IdrPicFlag = next_nal_unit_type == 5;
if (next_IdrPicFlag != IdrPicFlag) {
// IdrPicFlag differs in value
#ifdef DEBUG
fprintf(stderr, "\t(IdrPicFlag differs in value)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else if (next_nal_ref_idc != nal_ref_idc && next_nal_ref_idc*nal_ref_idc == 0) {
// nal_ref_idc differs in value with one of the nal_ref_idc values being equal to 0
#ifdef DEBUG
fprintf(stderr, "\t(nal_ref_idc differs in value with one of the nal_ref_idc values being equal to 0)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else if ((nal_unit_type == 1 || nal_unit_type == 2 || nal_unit_type == 5)
&& (next_nal_unit_type == 1 || next_nal_unit_type == 2 || next_nal_unit_type == 5)) {
// Both this and the next NAL units begin with a "slice_header".
// Parse this (for each), to get parameters that we can compare:

// Current NAL unit's "slice_header":
unsigned frame_num, pic_parameter_set_id, idr_pic_id;
Boolean field_pic_flag, bottom_field_flag;
analyze_slice_header(fStartOfFrame + fOutputStartCodeSize, fTo, nal_unit_type,
frame_num, pic_parameter_set_id, idr_pic_id, field_pic_flag, bottom_field_flag);

// Next NAL unit's "slice_header":
#ifdef DEBUG
fprintf(stderr, "    Next NAL unit's slice_header:\n");
#endif
u_int8_t next_slice_header[NUM_NEXT_SLICE_HEADER_BYTES_TO_ANALYZE];
testBytes(next_slice_header, sizeof next_slice_header);
unsigned next_frame_num, next_pic_parameter_set_id, next_idr_pic_id;
Boolean next_field_pic_flag, next_bottom_field_flag;
analyze_slice_header(next_slice_header, &next_slice_header[sizeof next_slice_header], next_nal_unit_type,
next_frame_num, next_pic_parameter_set_id, next_idr_pic_id, next_field_pic_flag, next_bottom_field_flag);

if (next_frame_num != frame_num) {
// frame_num differs in value
#ifdef DEBUG
fprintf(stderr, "\t(frame_num differs in value)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else if (next_pic_parameter_set_id != pic_parameter_set_id) {
// pic_parameter_set_id differs in value
#ifdef DEBUG
fprintf(stderr, "\t(pic_parameter_set_id differs in value)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else if (next_field_pic_flag != field_pic_flag) {
// field_pic_flag differs in value
#ifdef DEBUG
fprintf(stderr, "\t(field_pic_flag differs in value)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else if (next_bottom_field_flag != bottom_field_flag) {
// bottom_field_flag differs in value
#ifdef DEBUG
fprintf(stderr, "\t(bottom_field_flag differs in value)\n");
#endif
thisNALUnitEndsAccessUnit = True;
} else if (next_IdrPicFlag == 1 && next_idr_pic_id != idr_pic_id) {
// IdrPicFlag is equal to 1 for both and idr_pic_id differs in value
// Note: We already know that IdrPicFlag is the same for both.
#ifdef DEBUG
fprintf(stderr, "\t(IdrPicFlag is equal to 1 for both and idr_pic_id differs in value)\n");
#endif
thisNALUnitEndsAccessUnit = True;
}
}
}
}
}

if (thisNALUnitEndsAccessUnit) {
#ifdef DEBUG
fprintf(stderr, "*****This NAL unit ends the current access unit*****\n");
#endif
usingSource()->fPictureEndMarker = True;  //这里就是设置RTP打包时用到的M标志了
++usingSource()->fPictureCount;

//
//下一个NALU不再属于当前"access　unit""时，才改变时间
//
// Note that the presentation time for the next NAL unit will be different:
struct timeval& nextPT = usingSource()->fNextPresentationTime; // alias　这里是引用　
nextPT = usingSource()->fPresentationTime;
double nextFraction = nextPT.tv_usec/1000000.0 + 1/usingSource()->fFrameRate;
unsigned nextSecsIncrement = (long)nextFraction;
nextPT.tv_sec += (long)nextSecsIncrement;
nextPT.tv_usec = (long)((nextFraction - nextSecsIncrement)*1000000);
}
setParseState();

return curFrameSize();
} catch (int /*e*/) {
#ifdef DEBUG
fprintf(stderr, "H264VideoStreamParser::parse() EXCEPTION (This is normal behavior - *not* an error)\n");
#endif
return 0;  // the parsing got interrupted
}
}

H264VideoStreamParser::parse()函数除了取出Frame，还对NALU中的部分参数做了解释工作。对于PPS或者SPS类型的NALU，要保存到H264VideoStreamFramer中。"access unit",在这里可以理解为一副图像,一个"access unit"可以包含多个NALU，很显示这些NALU的时间戳应该是相同的。实际上，很多时候一个"access unit"单元只包含一个NALU，这里简单多了。分析过程是根据section 7.4.1.2.4 of the
H.264 specification进行的。

内容来自用户分享和网络整理，不保证内容的准确性，如有侵权内容，可联系管理员处理

标签：

相关文章推荐

新的分享

章节导航