ffmpeg编码,webm容器中的Opus声音不起作用

时间:2017-07-02 17:29:04

标签: audio ffmpeg opus

我正在尝试使用VP8和Opus编码将音频和视频编码为webm文件。它几乎可以工作。 (我使用FFmpeg 3.3.2)

我可以创建一个唯一的视频webm文件并在VLC,FFPlay中播放并将其上传到YouTube(以及所有作品)。如果我将Opus声音添加到文件中,它仍可在VLC中使用,但在FFPlay或youtube上,在youtube上,声音变得只是“滴答声”。

如果我只将Opus音频编码为webm文件,我会遇到同样的问题;它只适用于VLC。但是,如果我只将Opus音频编码为 ogg容器,它可以在任何地方使用,我甚至可以使用FFmpeg将ogg文件与仅视频webm文件合并,并生成带有音频和视频的完整工作webm文件

所以在我看来,只有当我使用我的代码将Opus编码为webm容器时,它才会在大多数播放器和YouTube中运行。我需要它在youtube上工作。

以下是opus仅限webm编码的代码(您可以使用定义切换ogg / webm):https://pastebin.com/jyQ4s3tB

#include <algorithm>
#include <iterator>

extern "C"
{

//#define OGG

#include "libavcodec/avcodec.h"
#include "libavdevice/avdevice.h"
#include "libavfilter/avfilter.h"
#include "libavformat/avformat.h"
#include "libavutil/avutil.h"
#include "libavutil/imgutils.h"
#include "libswscale/swscale.h"
#include "libswresample/swresample.h"

    enum InfoCodes
    {
        ENCODED_VIDEO,
        ENCODED_AUDIO,
        ENCODED_AUDIO_AND_VIDEO,
        NOT_ENOUGH_AUDIO_DATA,
    };

    enum ErrorCodes
    {
        RES_NOT_MUL_OF_TWO = -1,
        ERROR_FINDING_VID_CODEC = -2,
        ERROR_CONTEXT_CREATION = -3,
        ERROR_CONTEXT_ALLOCATING = -4,
        ERROR_OPENING_VID_CODEC = -5,
        ERROR_OPENING_FILE = -6,
        ERROR_ALLOCATING_FRAME = -7,
        ERROR_ALLOCATING_PIC_BUF = -8,
        ERROR_ENCODING_FRAME_SEND = -9,
        ERROR_ENCODING_FRAME_RECEIVE = -10,
        ERROR_FINDING_AUD_CODEC = -11,
        ERROR_OPENING_AUD_CODEC = -12,
        ERROR_INIT_RESMPL_CONTEXT = -13,
        ERROR_ENCODING_SAMPLES_SEND = -14,
        ERROR_ENCODING_SAMPLES_RECEIVE = -15,
        ERROR_WRITING_HEADER = -16,
        ERROR_INIT_AUDIO_RESPAMLER = -17,
    };

    AVCodecID aud_codec_comp_id = AV_CODEC_ID_OPUS;
    AVSampleFormat sample_fmt_comp = AV_SAMPLE_FMT_FLT;

    AVCodecID aud_codec_id;
    AVSampleFormat sample_fmt;

#ifndef OGG
    char* compressed_cont = "webm";
#endif
#ifdef OGG
    char* compressed_cont = "ogg";
#endif

    AVCodec *aud_codec = NULL;
    AVCodecContext *aud_codec_context = NULL;
    AVFormatContext *outctx;
    AVStream *audio_st;
    AVFrame *aud_frame;
    SwrContext *audio_swr_ctx;

    int vid_frame_counter, aud_frame_counter;
    int vid_width, vid_height;

    char* concat(const char *s1, const char *s2)
    {
        char *result = (char*)malloc(strlen(s1) + strlen(s2) + 1);

        strcpy(result, s1);
        strcat(result, s2);

        return result;
    }

    int setup_audio_codec()
    {
        aud_codec_id = aud_codec_comp_id;
        sample_fmt = sample_fmt_comp;

        // Fixup audio codec
        if (aud_codec == NULL)
        {
            aud_codec = avcodec_find_encoder(aud_codec_id);
            avcodec_register(aud_codec);
        }

        if (!aud_codec)
            return ERROR_FINDING_AUD_CODEC;

        return 0;
    }

    int initialize_audio_stream(AVFormatContext *local_outctx, int sample_rate, int per_frame_audio_samples, int audio_bitrate)
    {
        aud_codec_context = avcodec_alloc_context3(aud_codec);
        if (!aud_codec_context)
            return ERROR_CONTEXT_CREATION;

        aud_codec_context->bit_rate = audio_bitrate;
        aud_codec_context->sample_rate = sample_rate;
        aud_codec_context->sample_fmt = sample_fmt;
        aud_codec_context->channel_layout = AV_CH_LAYOUT_STEREO;
        aud_codec_context->channels = av_get_channel_layout_nb_channels(aud_codec_context->channel_layout);
        //aud_codec_context->profile = FF_PROFILE_AAC_MAIN;

        aud_codec_context->codec = aud_codec;
        aud_codec_context->codec_id = aud_codec_id;

        AVRational time_base;
        time_base.num = per_frame_audio_samples;
        time_base.den = aud_codec_context->sample_rate;
        aud_codec_context->time_base = time_base;

        int ret = avcodec_open2(aud_codec_context, aud_codec, NULL);

        if (ret < 0)
            return ERROR_OPENING_AUD_CODEC;

        local_outctx->audio_codec = aud_codec;
        local_outctx->audio_codec_id = aud_codec_id;

        audio_st = avformat_new_stream(local_outctx, aud_codec);

        audio_st->codecpar->bit_rate = aud_codec_context->bit_rate;
        audio_st->codecpar->sample_rate = aud_codec_context->sample_rate;
        audio_st->codecpar->channels = aud_codec_context->channels;
        audio_st->codecpar->channel_layout = aud_codec_context->channel_layout;
        audio_st->codecpar->codec_id = aud_codec_context->codec_id;
        audio_st->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
        audio_st->codecpar->format = aud_codec_context->sample_fmt;
        audio_st->codecpar->frame_size = aud_codec_context->frame_size;
        audio_st->codecpar->block_align = aud_codec_context->block_align;
        audio_st->codecpar->initial_padding = aud_codec_context->initial_padding;
        audio_st->codecpar->extradata = aud_codec_context->extradata;
        audio_st->codecpar->extradata_size = aud_codec_context->extradata_size;

        aud_frame = av_frame_alloc();
        aud_frame->nb_samples = aud_codec_context->frame_size;
        aud_frame->format = aud_codec_context->sample_fmt;
        aud_frame->channel_layout = aud_codec_context->channel_layout;
        aud_frame->sample_rate = aud_codec_context->sample_rate;

        int buffer_size;
        if (aud_codec_context->frame_size == 0)
        {
            buffer_size = per_frame_audio_samples * 2 * 4;
            aud_frame->nb_samples = per_frame_audio_samples;
        }
        else
        {
            buffer_size = av_samples_get_buffer_size(NULL, aud_codec_context->channels, aud_codec_context->frame_size,
                aud_codec_context->sample_fmt, 0);
        }

        if (av_sample_fmt_is_planar(sample_fmt))
            ret = av_frame_get_buffer(aud_frame, buffer_size / 2);
        else
            ret = av_frame_get_buffer(aud_frame, buffer_size);

        if (!aud_frame || ret < 0)
            return ERROR_ALLOCATING_FRAME;

        aud_frame_counter = 0;

        return 0;
    }

    int initialize_audio_only_encoding(int sample_rate, int per_frame_audio_samples, int audio_bitrate, const char *filename)
    {
        int ret;

        avcodec_register_all();
        av_register_all();

        outctx = avformat_alloc_context();

        char* with_dot = concat(filename, ".");
        char* full_filename = concat(with_dot, compressed_cont);

        ret = avformat_alloc_output_context2(&outctx, NULL, compressed_cont, full_filename);

        free(with_dot);

        if (ret < 0)
        {
            free(full_filename);
            return ERROR_CONTEXT_CREATION;
        }

        ret = setup_audio_codec();
        if (ret < 0)
            return ret;

        // Setup Audio
        ret = initialize_audio_stream(outctx, sample_rate, per_frame_audio_samples, audio_bitrate);
        if (ret < 0)
            return ret;

        av_dump_format(outctx, 0, full_filename, 1);

        if (!(outctx->oformat->flags & AVFMT_NOFILE))
        {
            if (avio_open(&outctx->pb, full_filename, AVIO_FLAG_WRITE) < 0)
            {
                free(full_filename);
                return ERROR_OPENING_FILE;
            }
        }

        free(full_filename);

        ret = avformat_write_header(outctx, NULL);
        if (ret < 0)
            return ERROR_WRITING_HEADER;

        return 0;
    }

    int write_interleaved_audio_frame(float_t *aud_sample)
    {
        int ret;

        aud_frame->data[0] = (uint8_t*)aud_sample;
        aud_frame->extended_data[0] = (uint8_t*)aud_sample;

        aud_frame->pts = aud_frame_counter++;

        ret = avcodec_send_frame(aud_codec_context, aud_frame);

        AVPacket pkt;
        av_init_packet(&pkt);
        pkt.data = NULL;
        pkt.size = 0;

        while (true)
        {
            ret = avcodec_receive_packet(aud_codec_context, &pkt);
            if (!ret)
            {
                av_packet_rescale_ts(&pkt, aud_codec_context->time_base, audio_st->time_base);

                pkt.stream_index = audio_st->index;

                av_interleaved_write_frame(outctx, &pkt);

                av_packet_unref(&pkt);
            }
            if (ret == AVERROR(EAGAIN))
                break;
            else if (ret < 0)
                return ERROR_ENCODING_SAMPLES_RECEIVE;
            else
                break;
        }

        return ENCODED_AUDIO;
    }

    int write_audio_frame(float_t *aud_sample)
    {
        int ret;
        aud_frame->data[0] = (uint8_t*)aud_sample;
        aud_frame->extended_data[0] = (uint8_t*)aud_sample;

        aud_frame->pts = aud_frame_counter++;

        ret = avcodec_send_frame(aud_codec_context, aud_frame);
        if (ret < 0)
            return ERROR_ENCODING_FRAME_SEND;

        AVPacket pkt;
        av_init_packet(&pkt);
        pkt.data = NULL;
        pkt.size = 0;

        fflush(stdout);

        while (true)
        {
            ret = avcodec_receive_packet(aud_codec_context, &pkt);
            if (!ret)
                if (pkt.pts != AV_NOPTS_VALUE)
                    pkt.pts = av_rescale_q(pkt.pts, aud_codec_context->time_base, audio_st->time_base);
            if (pkt.dts != AV_NOPTS_VALUE)
                pkt.dts = av_rescale_q(pkt.dts, aud_codec_context->time_base, audio_st->time_base);
            {

                av_write_frame(outctx, &pkt);
                av_packet_unref(&pkt);
            }
            if (ret == AVERROR(EAGAIN))
                break;
            else if (ret < 0)
                return ERROR_ENCODING_FRAME_RECEIVE;
            else
                break;
        }

        return ENCODED_AUDIO;
    }

    int finish_audio_encoding()
    {
        AVPacket pkt;
        av_init_packet(&pkt);
        pkt.data = NULL;
        pkt.size = 0;

        fflush(stdout);

        int ret = avcodec_send_frame(aud_codec_context, NULL);
        if (ret < 0)
            return ERROR_ENCODING_FRAME_SEND;

        while (true)
        {
            ret = avcodec_receive_packet(aud_codec_context, &pkt);
            if (!ret)
            {
                if (pkt.pts != AV_NOPTS_VALUE)
                    pkt.pts = av_rescale_q(pkt.pts, aud_codec_context->time_base, audio_st->time_base);
                if (pkt.dts != AV_NOPTS_VALUE)
                    pkt.dts = av_rescale_q(pkt.dts, aud_codec_context->time_base, audio_st->time_base);

                av_write_frame(outctx, &pkt);
                av_packet_unref(&pkt);
            }
            if (ret == -AVERROR(AVERROR_EOF))
                break;
            else if (ret < 0)
                return ERROR_ENCODING_FRAME_RECEIVE;
        }

        av_write_trailer(outctx);

        return 0;
    }

    void cleanup()
    {
        if (aud_frame)
        {
            av_frame_free(&aud_frame);
        }
        if (outctx)
        {
            for (int i = 0; i < outctx->nb_streams; i++)
                av_freep(&outctx->streams[i]);

            avio_close(outctx->pb);
            av_free(outctx);
        }

        if (aud_codec_context)
        {
            avcodec_close(aud_codec_context);
            av_free(aud_codec_context);
        }
    }

    void fill_samples(float_t *dst, int nb_samples, int nb_channels, int sample_rate, float_t *t)
    {
        int i, j;
        float_t tincr = 1.0 / sample_rate;
        const float_t c = 2 * M_PI * 440.0;

        for (i = 0; i < nb_samples; i++) {
            *dst = sin(c * *t);
            for (j = 1; j < nb_channels; j++)
                dst[j] = dst[0];
            dst += nb_channels;
            *t += tincr;
        }
    }

    int main()
    {
        int sec = 5;
        int frame_rate = 30;
        float t = 0, tincr = 0, tincr2 = 0;

        int src_samples_linesize;
        int src_nb_samples = 960;
        int src_channels = 2;
        int sample_rate = 48000;

        uint8_t **src_data = NULL;

        int ret;

        initialize_audio_only_encoding(48000, src_nb_samples, 192000, "sound_FLT_960");

        ret = av_samples_alloc_array_and_samples(&src_data, &src_samples_linesize, src_channels,
            src_nb_samples, AV_SAMPLE_FMT_FLT, 0);

        for (size_t i = 0; i < frame_rate * sec; i++)
        {
                fill_samples((float *)src_data[0], src_nb_samples, src_channels, sample_rate, &t);
                write_interleaved_audio_frame((float *)src_data[0]);
        }

        finish_audio_encoding();

        cleanup();

        return 0;
    }
}

还有一些文件:

的webm音频文件(仅限VLC): https://drive.google.com/file/d/0B16rIXjPXJCqcU5HVllIYW1iODg/view?usp=sharing

有效的ogg音频文件: https://drive.google.com/file/d/0B16rIXjPXJCqMUZhbW0tTDFjT1E/view?usp=sharing

仅适用于VLC的视频和音频文件:https://drive.google.com/file/d/0B16rIXjPXJCqX3pEN3B0QVlrekU/view?usp=sharing

如果在FFPlay中播放ogg文件,则表示“aq = 30kb”,但如果我播放webm音频文件,则会获得“aq = 0kb”。所以这似乎也不对。

有什么想法吗?提前谢谢!

编辑所以我只需将VP8和Opus编码到ogg容器中,然后将其重命名为.webm并将其上传到YouTube即可。我实际上并不知道ogg里面可能有视频。我真的不知道它如何影响编码和东西......我可以上传原始的ogg文件和视频,它也适用于YouTube。但我去webm的全部原因是它拥有的许可(https://www.webmproject.org/license/)......所以我现在有点困惑。

我需要了解“容器”在上下文中的含义以及更改扩展意味着什么。

任何评论都可以对此表示赞赏!

0 个答案:

没有答案