[教學]FFmpeg - Audio - PCM to AAC－一個小小工程師的心情抒發天地

最近開始協助同事撰寫Audio相關程式, 為了避免以後老人痴呆忘記, 所以還是在這裡多少註記一番, 一方面造福後人, 一方面能夠讓自己有個提醒.

正文開始前, 先來簡單說一下什麼叫做PCM什麼叫做AAC吧, 其實網路上文件很多, 講解也很多, 我在這裡就很簡單的帶過, 如果想要理解更詳細的, 可以直接Google一下, 應該會有更多資源能夠供你參考; 但本人這主要是先讓大家有一個很粗淺的概念, 所以會講得較為通俗一些, 一樣, 如果有任何錯誤, 歡迎指教.

PCM : 脈波編碼變調 , 簡單來說就是一種類比訊號數位化的方式, 那什麼是類比訊號? 你可以把類比訊號想像成是一種連續的訊號, 比如說電壓或聲音, 在妳張口喊阿~~~~~~~~這樣的時候, 不管你啊幾秒, 這就就是一個連續的訊息, 而類比轉數位的意思就是將你的阿阿阿阿阿阿阿加工, 讓它只有0和1的非連續訊息.

AAC : 進階音訊編碼, 英文是 Advanced Audio Coding, 有時候記這種名詞的時候, 用英文記其實會比中文來的好記多了; 言歸正傳, AAC是一種高壓縮比有損音訊壓縮編碼的演算法, 傳書的編碼速率為320 kbit/s, 並且是44.1kHz/24bit, 看不懂沒關係, 這裡沒有要教大家變成音訊專家, 簡單提就好; 而AAC也是目前Airpods/ Airpods pro 等產品的主要編碼, 其主流程度可想而知, 而其他編碼依照品質來說會有像是LDAC> aptX> AAC> SBC .., 一樣, 有興趣的玩家朋友們, 也歡迎自行了解一下.

接下來, 我們來寫寫程式吧 !



extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
}



//#Step1: Init.
av_register_all();



void CFFmpegAudioDlg::OnBnClickedPcmtoaac()
{

	// PCM to AAV

	// Init parameter
	AVFormatContext*	pFormatContext = NULL;
	AVOutputFormat*		pFmt = NULL;
	AVStream*			pAudioStream = NULL;

	AVCodecContext*		pCodecContext = NULL;
	AVCodec*			pCodec = NULL;


	uint8_t*			FrameBuffer = NULL;
	AVFrame*			pFrame = NULL;
	AVPacket			Packet = { 0 };


	int nGOTFrame = 0;
	int res = 0;
	int nSize = 0;


	// Read Raw PCM data from file
	FILE* hFile = NULL;
	// Audio frame number
	int nFrameNumber = 1000;
	const char* OutputFileName = "OutputAAC.aac";

	if ((hFile = fopen("PCMFile.pcm", "rb")) == NULL) //C4996
	{
		AppendText(L"The file 'crt_fopen.c' was not opened\r\n");
		return;
	}
	else
		AppendText(L"The file 'crt_fopen.c' was opened\r\n");


	//#Step2: Allocate memory and select format.
	int nMethodFlag = 0;
	switch (nMethodFlag)
	{
	case 0:
	{
		// can be used to free the context and everything allocated by the framework within it.
		pFormatContext = avformat_alloc_context();
		if (!pFormatContext) {
			AppendText(L"Could not allocate avformat_alloc_context()\r\n");
			return;
		}

		// Initialize AVOutputFormat
		pFmt = av_guess_format(NULL, OutputFileName, NULL);
		if (!pFmt) {
			AppendText(L"Could not find suitable output format for file.\r\n");
			return;
		}
		pFormatContext->oformat = pFmt;
	}break;
	case 1:
	default:
	{
		avformat_alloc_output_context2(&pFormatContext, NULL, NULL, OutputFileName);
		pFmt = pFormatContext->oformat;

	}break;
	}

	//#Step3: Open output URL.
	if (avio_open(&pFormatContext->pb, OutputFileName, AVIO_FLAG_READ_WRITE) < 0) {
		AppendText(L"Failed to open output file!\r\n");
		return;
	}

	pAudioStream = avformat_new_stream(pFormatContext, 0);
	if (NULL == pAudioStream) {
		return;
	}

	pCodecContext				= pAudioStream->codec;
	pCodecContext->codec_id		= pFmt->audio_codec;
	pCodecContext->codec_type	= AVMEDIA_TYPE_AUDIO;
	pCodecContext->sample_fmt	= AV_SAMPLE_FMT_S16;
	/*
		enum AVSampleFormat {
		AV_SAMPLE_FMT_NONE = -1,
		AV_SAMPLE_FMT_U8,          ///< unsigned 8 bits
		AV_SAMPLE_FMT_S16,         ///< signed 16 bits
		AV_SAMPLE_FMT_S32,         ///< signed 32 bits
		AV_SAMPLE_FMT_FLT,         ///< float
		AV_SAMPLE_FMT_DBL,         ///< double

		AV_SAMPLE_FMT_U8P,         ///< unsigned 8 bits, planar
		AV_SAMPLE_FMT_S16P,        ///< signed 16 bits, planar
		AV_SAMPLE_FMT_S32P,        ///< signed 32 bits, planar
		AV_SAMPLE_FMT_FLTP,        ///< float, planar
		AV_SAMPLE_FMT_DBLP,        ///< double, planar

		AV_SAMPLE_FMT_NB           ///< Number of sample formats. DO NOT USE if linking dynamically
	};
	*/
	pCodecContext->sample_rate		= 44100;
	pCodecContext->channel_layout	= AV_CH_LAYOUT_STEREO;
	pCodecContext->channels			= av_get_channel_layout_nb_channels(pCodecContext->channel_layout);
	pCodecContext->bit_rate			= 64000;

	// Print informations on input and output streams
	av_dump_format(pFormatContext, 0, OutputFileName, 1);


	pCodec = avcodec_find_encoder(pCodecContext->codec_id);
	if (!pCodec) {
		AppendText(L"Can not find encoder!\r\n");
		return;
	}

	if (avcodec_open2(pCodecContext, pCodec, NULL) < 0) {
		AppendText(L"Failed to open encoder!\r\n");
		return;
	}

	pFrame				= av_frame_alloc();
	pFrame->nb_samples	= pCodecContext->frame_size;
	pFrame->format		= pCodecContext->sample_fmt;

	nSize = av_samples_get_buffer_size
	(
		NULL,
		pCodecContext->channels,
		pCodecContext->frame_size,
		pCodecContext->sample_fmt,
		1
	);
	AppendText(L"Buffer size :%d \r\n", nSize);
	FrameBuffer = (uint8_t*)av_malloc(nSize);
	avcodec_fill_audio_frame
	(
		pFrame						,
		pCodecContext->channels		,
		pCodecContext->sample_fmt	, 
		(const uint8_t*)FrameBuffer	, 
		nSize						, 
		1
	);

	// Write header
	avformat_write_header(pFormatContext, NULL);

	av_new_packet(&Packet, nSize);
	int i;
	for (i = 0; i < nFrameNumber; i++){

		// Read PCM
		if (fread(FrameBuffer, 1, nSize, hFile) <= 0) {
			return;
		}
		else 
		if (feof(hFile))break;

		// PCM Data
		pFrame->data[0] = FrameBuffer;
		pFrame->pts		= i * 100;
		nGOTFrame		= 0;

		// Encode
		res = avcodec_encode_audio2(pCodecContext, &Packet, pFrame, &nGOTFrame);
		if (res < 0) {
			AppendText(L"Failed to encode!\r\n");
			return;
		}
		if (nGOTFrame == 1) {
			AppendText(L"[ %d ] Succeed to encode 1 frame, Size : %5d \r\n", i+1 ,Packet.size);
			Packet.stream_index = pAudioStream->index;
			res = av_write_frame(pFormatContext, &Packet);
			av_free_packet(&Packet);
		}
	}

	// Flush encoder
	res = Flush_Encoder(pFormatContext, 0);
	if (res < 0) {
		AppendText(L"Flushing encoder failed.\r\n");
		return;
	}

	// Write Trailer
	av_write_trailer(pFormatContext);


	// Clean
	if (pAudioStream) {
		avcodec_close(pAudioStream->codec);
		av_free(pFrame);
		av_free(FrameBuffer);
	}
	avio_close(pFormatContext->pb);
	avformat_free_context(pFormatContext);

	fclose(hFile);

	AppendText(L"Total encode frame :%d, down.\r\n", i);



	return;
}



int CFFmpegAudioDlg::Flush_Encoder(AVFormatContext* FormatContext, UINT32 Index)
{
	int res					= 0;
	int nGOTFrame			= 0;
	AVPacket AVEncodePacket	= {0};
	if (!(FormatContext->streams[Index]->codec->codec->capabilities & CODEC_CAP_DELAY))
	{
		return 0;
	}
	while (0) {
		AVEncodePacket.data = NULL;
		AVEncodePacket.size = 0;
		av_init_packet(&AVEncodePacket);
		res = avcodec_encode_audio2
		(
			FormatContext->streams[Index]->codec,
			&AVEncodePacket,
			NULL,
			&nGOTFrame
		);
		av_frame_free(NULL);
		if (res < 0)
			break;
		if (!nGOTFrame) {
			res = 0;
			break;
		}
		AppendText(L"Flush Encoder> Succeed encode 1 frame!\tSize:%5d\r\n", AVEncodePacket.size);
		res = av_write_frame(FormatContext, &AVEncodePacket);
		if (res < 0)break;
	}
	return res;

}