最近開始協助同事撰寫Audio相關程式, 為了避免以後老人痴呆忘記, 所以還是在這裡多少註記一番, 一方面造福後人, 一方面能夠讓自己有個提醒.
正文開始前, 先來簡單說一下什麼叫做PCM什麼叫做AAC吧, 其實網路上文件很多, 講解也很多, 我在這裡就很簡單的帶過, 如果想要理解更詳細的, 可以直接Google一下, 應該會有更多資源能夠供你參考; 但本人這主要是先讓大家有一個很粗淺的概念, 所以會講得較為通俗一些, 一樣, 如果有任何錯誤, 歡迎指教.
PCM : 脈波編碼變調 , 簡單來說就是一種類比訊號數位化的方式, 那什麼是類比訊號? 你可以把類比訊號想像成是一種連續的訊號, 比如說電壓或聲音, 在妳張口喊阿~~~~~~~~這樣的時候, 不管你啊幾秒, 這就就是一個連續的訊息, 而類比轉數位的意思就是將你的阿阿阿阿阿阿阿加工, 讓它只有0和1的非連續訊息.
AAC : 進階音訊編碼, 英文是 Advanced Audio Coding, 有時候記這種名詞的時候, 用英文記其實會比中文來的好記多了; 言歸正傳, AAC是一種高壓縮比有損音訊壓縮編碼的演算法, 傳書的編碼速率為320 kbit/s, 並且是44.1kHz/24bit, 看不懂沒關係, 這裡沒有要教大家變成音訊專家, 簡單提就好; 而AAC也是目前Airpods/ Airpods pro 等產品的主要編碼, 其主流程度可想而知, 而其他編碼依照品質來說會有像是LDAC> aptX> AAC> SBC .., 一樣, 有興趣的玩家朋友們, 也歡迎自行了解一下.
接下來, 我們來寫寫程式吧 !
extern "C"
{
#include "libavcodec/avcodec.h"
#include "libavformat/avformat.h"
}
//#Step1: Init.
av_register_all();
void CFFmpegAudioDlg::OnBnClickedPcmtoaac()
{
// PCM to AAV
// Init parameter
AVFormatContext* pFormatContext = NULL;
AVOutputFormat* pFmt = NULL;
AVStream* pAudioStream = NULL;
AVCodecContext* pCodecContext = NULL;
AVCodec* pCodec = NULL;
uint8_t* FrameBuffer = NULL;
AVFrame* pFrame = NULL;
AVPacket Packet = { 0 };
int nGOTFrame = 0;
int res = 0;
int nSize = 0;
// Read Raw PCM data from file
FILE* hFile = NULL;
// Audio frame number
int nFrameNumber = 1000;
const char* OutputFileName = "OutputAAC.aac";
if ((hFile = fopen("PCMFile.pcm", "rb")) == NULL) //C4996
{
AppendText(L"The file 'crt_fopen.c' was not opened\r\n");
return;
}
else
AppendText(L"The file 'crt_fopen.c' was opened\r\n");
//#Step2: Allocate memory and select format.
int nMethodFlag = 0;
switch (nMethodFlag)
{
case 0:
{
// can be used to free the context and everything allocated by the framework within it.
pFormatContext = avformat_alloc_context();
if (!pFormatContext) {
AppendText(L"Could not allocate avformat_alloc_context()\r\n");
return;
}
// Initialize AVOutputFormat
pFmt = av_guess_format(NULL, OutputFileName, NULL);
if (!pFmt) {
AppendText(L"Could not find suitable output format for file.\r\n");
return;
}
pFormatContext->oformat = pFmt;
}break;
case 1:
default:
{
avformat_alloc_output_context2(&pFormatContext, NULL, NULL, OutputFileName);
pFmt = pFormatContext->oformat;
}break;
}
//#Step3: Open output URL.
if (avio_open(&pFormatContext->pb, OutputFileName, AVIO_FLAG_READ_WRITE) < 0) {
AppendText(L"Failed to open output file!\r\n");
return;
}
pAudioStream = avformat_new_stream(pFormatContext, 0);
if (NULL == pAudioStream) {
return;
}
pCodecContext = pAudioStream->codec;
pCodecContext->codec_id = pFmt->audio_codec;
pCodecContext->codec_type = AVMEDIA_TYPE_AUDIO;
pCodecContext->sample_fmt = AV_SAMPLE_FMT_S16;
/*
enum AVSampleFormat {
AV_SAMPLE_FMT_NONE = -1,
AV_SAMPLE_FMT_U8, ///< unsigned 8 bits
AV_SAMPLE_FMT_S16, ///< signed 16 bits
AV_SAMPLE_FMT_S32, ///< signed 32 bits
AV_SAMPLE_FMT_FLT, ///< float
AV_SAMPLE_FMT_DBL, ///< double
AV_SAMPLE_FMT_U8P, ///< unsigned 8 bits, planar
AV_SAMPLE_FMT_S16P, ///< signed 16 bits, planar
AV_SAMPLE_FMT_S32P, ///< signed 32 bits, planar
AV_SAMPLE_FMT_FLTP, ///< float, planar
AV_SAMPLE_FMT_DBLP, ///< double, planar
AV_SAMPLE_FMT_NB ///< Number of sample formats. DO NOT USE if linking dynamically
};
*/
pCodecContext->sample_rate = 44100;
pCodecContext->channel_layout = AV_CH_LAYOUT_STEREO;
pCodecContext->channels = av_get_channel_layout_nb_channels(pCodecContext->channel_layout);
pCodecContext->bit_rate = 64000;
// Print informations on input and output streams
av_dump_format(pFormatContext, 0, OutputFileName, 1);
pCodec = avcodec_find_encoder(pCodecContext->codec_id);
if (!pCodec) {
AppendText(L"Can not find encoder!\r\n");
return;
}
if (avcodec_open2(pCodecContext, pCodec, NULL) < 0) {
AppendText(L"Failed to open encoder!\r\n");
return;
}
pFrame = av_frame_alloc();
pFrame->nb_samples = pCodecContext->frame_size;
pFrame->format = pCodecContext->sample_fmt;
nSize = av_samples_get_buffer_size
(
NULL,
pCodecContext->channels,
pCodecContext->frame_size,
pCodecContext->sample_fmt,
1
);
AppendText(L"Buffer size :%d \r\n", nSize);
FrameBuffer = (uint8_t*)av_malloc(nSize);
avcodec_fill_audio_frame
(
pFrame ,
pCodecContext->channels ,
pCodecContext->sample_fmt ,
(const uint8_t*)FrameBuffer ,
nSize ,
1
);
// Write header
avformat_write_header(pFormatContext, NULL);
av_new_packet(&Packet, nSize);
int i;
for (i = 0; i < nFrameNumber; i++){
// Read PCM
if (fread(FrameBuffer, 1, nSize, hFile) <= 0) {
return;
}
else
if (feof(hFile))break;
// PCM Data
pFrame->data[0] = FrameBuffer;
pFrame->pts = i * 100;
nGOTFrame = 0;
// Encode
res = avcodec_encode_audio2(pCodecContext, &Packet, pFrame, &nGOTFrame);
if (res < 0) {
AppendText(L"Failed to encode!\r\n");
return;
}
if (nGOTFrame == 1) {
AppendText(L"[ %d ] Succeed to encode 1 frame, Size : %5d \r\n", i+1 ,Packet.size);
Packet.stream_index = pAudioStream->index;
res = av_write_frame(pFormatContext, &Packet);
av_free_packet(&Packet);
}
}
// Flush encoder
res = Flush_Encoder(pFormatContext, 0);
if (res < 0) {
AppendText(L"Flushing encoder failed.\r\n");
return;
}
// Write Trailer
av_write_trailer(pFormatContext);
// Clean
if (pAudioStream) {
avcodec_close(pAudioStream->codec);
av_free(pFrame);
av_free(FrameBuffer);
}
avio_close(pFormatContext->pb);
avformat_free_context(pFormatContext);
fclose(hFile);
AppendText(L"Total encode frame :%d, down.\r\n", i);
return;
}
int CFFmpegAudioDlg::Flush_Encoder(AVFormatContext* FormatContext, UINT32 Index)
{
int res = 0;
int nGOTFrame = 0;
AVPacket AVEncodePacket = {0};
if (!(FormatContext->streams[Index]->codec->codec->capabilities & CODEC_CAP_DELAY))
{
return 0;
}
while (0) {
AVEncodePacket.data = NULL;
AVEncodePacket.size = 0;
av_init_packet(&AVEncodePacket);
res = avcodec_encode_audio2
(
FormatContext->streams[Index]->codec,
&AVEncodePacket,
NULL,
&nGOTFrame
);
av_frame_free(NULL);
if (res < 0)
break;
if (!nGOTFrame) {
res = 0;
break;
}
AppendText(L"Flush Encoder> Succeed encode 1 frame!\tSize:%5d\r\n", AVEncodePacket.size);
res = av_write_frame(FormatContext, &AVEncodePacket);
if (res < 0)break;
}
return res;
}