Remux audio if possible when transcoding.
Addresses #11712, #12674, #12945, #13084, #13346.
This commit is contained in:
parent
c0235d4cc2
commit
9a52f4e3ff
7 changed files with 163 additions and 16 deletions
|
@ -24,4 +24,6 @@ public interface Muxer {
|
|||
void writeSampleData(int trackIndex, @NonNull ByteBuffer byteBuf, @NonNull MediaCodec.BufferInfo bufferInfo) throws IOException;
|
||||
|
||||
void release();
|
||||
|
||||
boolean supportsAudioRemux();
|
||||
}
|
||||
|
|
|
@ -51,4 +51,9 @@ final class AndroidMuxer implements Muxer {
|
|||
public void release() {
|
||||
muxer.release();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supportsAudioRemux() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package org.thoughtcrime.securesms.video.videoconverter;
|
||||
|
||||
import android.annotation.SuppressLint;
|
||||
import android.media.MediaCodec;
|
||||
import android.media.MediaCodecInfo;
|
||||
import android.media.MediaExtractor;
|
||||
|
@ -27,7 +28,8 @@ final class AudioTrackConverter {
|
|||
private static final String OUTPUT_AUDIO_MIME_TYPE = VideoConstants.AUDIO_MIME_TYPE; // Advanced Audio Coding
|
||||
private static final int OUTPUT_AUDIO_AAC_PROFILE = MediaCodecInfo.CodecProfileLevel.AACObjectLC; //MediaCodecInfo.CodecProfileLevel.AACObjectHE;
|
||||
|
||||
private static final int TIMEOUT_USEC = 10000;
|
||||
private static final int SAMPLE_BUFFER_SIZE = 16 * 1024;
|
||||
private static final int TIMEOUT_USEC = 10000;
|
||||
|
||||
private final long mTimeFrom;
|
||||
private final long mTimeTo;
|
||||
|
@ -39,6 +41,10 @@ final class AudioTrackConverter {
|
|||
private final MediaCodec mAudioDecoder;
|
||||
private final MediaCodec mAudioEncoder;
|
||||
|
||||
private final boolean skipTrancode;
|
||||
private final ByteBuffer instanceSampleBuffer = ByteBuffer.allocateDirect(SAMPLE_BUFFER_SIZE);
|
||||
private final MediaCodec.BufferInfo instanceBufferInfo = new MediaCodec.BufferInfo();
|
||||
|
||||
private final ByteBuffer[] mAudioDecoderInputBuffers;
|
||||
private ByteBuffer[] mAudioDecoderOutputBuffers;
|
||||
private final ByteBuffer[] mAudioEncoderInputBuffers;
|
||||
|
@ -68,7 +74,8 @@ final class AudioTrackConverter {
|
|||
final @NonNull MediaInput input,
|
||||
final long timeFrom,
|
||||
final long timeTo,
|
||||
final int audioBitrate) throws IOException {
|
||||
final int audioBitrate,
|
||||
final boolean allowSkipTranscode) throws IOException {
|
||||
|
||||
final MediaExtractor audioExtractor = input.createExtractor();
|
||||
final int audioInputTrack = getAndSelectAudioTrackIndex(audioExtractor);
|
||||
|
@ -76,7 +83,7 @@ final class AudioTrackConverter {
|
|||
audioExtractor.release();
|
||||
return null;
|
||||
}
|
||||
return new AudioTrackConverter(audioExtractor, audioInputTrack, timeFrom, timeTo, audioBitrate);
|
||||
return new AudioTrackConverter(audioExtractor, audioInputTrack, timeFrom, timeTo, audioBitrate, allowSkipTranscode);
|
||||
}
|
||||
|
||||
private AudioTrackConverter(
|
||||
|
@ -84,7 +91,8 @@ final class AudioTrackConverter {
|
|||
final int audioInputTrack,
|
||||
long timeFrom,
|
||||
long timeTo,
|
||||
int audioBitrate) throws IOException {
|
||||
int audioBitrate,
|
||||
final boolean allowSkipTranscode) throws IOException {
|
||||
|
||||
mTimeFrom = timeFrom;
|
||||
mTimeTo = timeTo;
|
||||
|
@ -102,6 +110,13 @@ final class AudioTrackConverter {
|
|||
final MediaFormat inputAudioFormat = mAudioExtractor.getTrackFormat(audioInputTrack);
|
||||
mInputDuration = inputAudioFormat.containsKey(MediaFormat.KEY_DURATION) ? inputAudioFormat.getLong(MediaFormat.KEY_DURATION) : 0;
|
||||
|
||||
skipTrancode = allowSkipTranscode && formatCanSkipTranscode(inputAudioFormat, audioBitrate);
|
||||
if (skipTrancode) {
|
||||
mEncoderOutputAudioFormat = inputAudioFormat;
|
||||
}
|
||||
|
||||
if (VERBOSE) Log.d(TAG, "audio skipping transcoding: " + skipTrancode);
|
||||
|
||||
final MediaFormat outputAudioFormat =
|
||||
MediaFormat.createAudioFormat(
|
||||
OUTPUT_AUDIO_MIME_TYPE,
|
||||
|
@ -109,7 +124,7 @@ final class AudioTrackConverter {
|
|||
inputAudioFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT));
|
||||
outputAudioFormat.setInteger(MediaFormat.KEY_BIT_RATE, audioBitrate);
|
||||
outputAudioFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, OUTPUT_AUDIO_AAC_PROFILE);
|
||||
outputAudioFormat.setInteger(MediaFormat.KEY_MAX_INPUT_SIZE, 16 * 1024);
|
||||
outputAudioFormat.setInteger(MediaFormat.KEY_MAX_INPUT_SIZE, SAMPLE_BUFFER_SIZE);
|
||||
|
||||
// Create a MediaCodec for the desired codec, then configure it as an encoder with
|
||||
// our desired properties. Request a Surface to use for input.
|
||||
|
@ -135,9 +150,11 @@ final class AudioTrackConverter {
|
|||
if (mEncoderOutputAudioFormat != null) {
|
||||
Log.d(TAG, "muxer: adding audio track.");
|
||||
if (!mEncoderOutputAudioFormat.containsKey(MediaFormat.KEY_BIT_RATE)) {
|
||||
Log.d(TAG, "muxer: fixed MediaFormat to add bitrate.");
|
||||
mEncoderOutputAudioFormat.setInteger(MediaFormat.KEY_BIT_RATE, mAudioBitrate);
|
||||
}
|
||||
if (!mEncoderOutputAudioFormat.containsKey(MediaFormat.KEY_AAC_PROFILE)) {
|
||||
Log.d(TAG, "muxer: fixed MediaFormat to add AAC profile.");
|
||||
mEncoderOutputAudioFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, OUTPUT_AUDIO_AAC_PROFILE);
|
||||
}
|
||||
mOutputAudioTrack = muxer.addTrack(mEncoderOutputAudioFormat);
|
||||
|
@ -145,6 +162,12 @@ final class AudioTrackConverter {
|
|||
}
|
||||
|
||||
void step() throws IOException {
|
||||
|
||||
if (skipTrancode && mEncoderOutputAudioFormat != null) {
|
||||
extractAndRemux();
|
||||
return;
|
||||
}
|
||||
|
||||
// Extract audio from file and feed to decoder.
|
||||
// Do not extract audio if we have determined the output format but we are not yet
|
||||
// ready to mux the frames.
|
||||
|
@ -164,7 +187,8 @@ final class AudioTrackConverter {
|
|||
Log.d(TAG, "audio extractor: returned buffer of size " + size);
|
||||
Log.d(TAG, "audio extractor: returned buffer for time " + presentationTime);
|
||||
}
|
||||
mAudioExtractorDone = size < 0 || (mTimeTo > 0 && presentationTime > mTimeTo * 1000);
|
||||
mAudioExtractorDone = isAudioExtractorDone(size, presentationTime);
|
||||
|
||||
if (mAudioExtractorDone) {
|
||||
if (VERBOSE) Log.d(TAG, "audio extractor: EOS");
|
||||
mAudioDecoder.queueInputBuffer(
|
||||
|
@ -388,6 +412,47 @@ final class AudioTrackConverter {
|
|||
Preconditions.checkState("no frame should be pending", -1 == mPendingAudioDecoderOutputBufferIndex);
|
||||
}
|
||||
|
||||
@SuppressLint("WrongConstant") // flags extracted from sample by MediaExtractor should be safe for MediaCodec.BufferInfo
|
||||
private void extractAndRemux() throws IOException {
|
||||
if (mMuxer == null) {
|
||||
Log.d(TAG, "audio remuxer: tried to execute before muxer was ready");
|
||||
return;
|
||||
}
|
||||
int size = mAudioExtractor.readSampleData(instanceSampleBuffer, 0);
|
||||
long presentationTime = mAudioExtractor.getSampleTime();
|
||||
int sampleFlags = mAudioExtractor.getSampleFlags();
|
||||
if (VERBOSE) {
|
||||
Log.d(TAG, "audio extractor: returned buffer of size " + size);
|
||||
Log.d(TAG, "audio extractor: returned buffer for time " + presentationTime);
|
||||
Log.d(TAG, "audio extractor: returned buffer with flags " + Integer.toBinaryString(sampleFlags));
|
||||
}
|
||||
mAudioExtractorDone = isAudioExtractorDone(size, presentationTime);
|
||||
|
||||
if (mAudioExtractorDone) {
|
||||
if (VERBOSE) Log.d(TAG, "audio encoder: EOS");
|
||||
instanceBufferInfo.set(0, 0, presentationTime, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
|
||||
mAudioEncoderDone = true;
|
||||
} else {
|
||||
instanceBufferInfo.set(0, size, presentationTime, sampleFlags);
|
||||
}
|
||||
|
||||
mMuxer.writeSampleData(mOutputAudioTrack, instanceSampleBuffer, instanceBufferInfo);
|
||||
|
||||
if (VERBOSE) {
|
||||
Log.d(TAG, "audio extractor: wrote sample at " + presentationTime);
|
||||
}
|
||||
|
||||
mAudioExtractor.advance();
|
||||
|
||||
mAudioExtractedFrameCount++;
|
||||
mAudioEncodedFrameCount++;
|
||||
mMuxingAudioPresentationTime = Math.max(mMuxingAudioPresentationTime, presentationTime);
|
||||
}
|
||||
|
||||
private boolean isAudioExtractorDone(int size, long presentationTime) {
|
||||
return presentationTime == -1 || size < 0 || (mTimeTo > 0 && presentationTime > mTimeTo * 1000);
|
||||
}
|
||||
|
||||
private static @NonNull
|
||||
MediaCodec createAudioDecoder(final @NonNull MediaFormat inputFormat) throws IOException {
|
||||
final MediaCodec decoder = MediaCodec.createDecoderByType(MediaConverter.getMimeTypeFor(inputFormat));
|
||||
|
@ -420,4 +485,23 @@ final class AudioTrackConverter {
|
|||
private static boolean isAudioFormat(final @NonNull MediaFormat format) {
|
||||
return MediaConverter.getMimeTypeFor(format).startsWith("audio/");
|
||||
}
|
||||
|
||||
/**
|
||||
* HE-AAC input bitstreams exhibit bad decoder behavior: the decoder's output buffer's presentation timestamp is way larger than the input sample's.
|
||||
* This mismatch propagates throughout the transcoding pipeline and results in slowed, distorted audio in the output file.
|
||||
* To sidestep this: AAC and its variants are a supported output codec, and HE-AAC bitrates are almost always lower than our target bitrate,
|
||||
* so we can pass through the input bitstream unaltered, relying on consumers of the output file to render HE-AAC correctly.
|
||||
*/
|
||||
private static boolean formatCanSkipTranscode(MediaFormat audioFormat, int desiredBitrate) {
|
||||
try {
|
||||
int inputBitrate = audioFormat.getInteger(MediaFormat.KEY_BIT_RATE);
|
||||
String inputMimeType = audioFormat.getString(MediaFormat.KEY_MIME);
|
||||
return OUTPUT_AUDIO_MIME_TYPE.equals(inputMimeType) && inputBitrate <= desiredBitrate;
|
||||
} catch (NullPointerException exception) {
|
||||
if (VERBOSE) {
|
||||
Log.d(TAG, "could not find bitrate in mediaFormat, can't skip transcoding.");
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -141,15 +141,15 @@ public final class MediaConverter {
|
|||
AudioTrackConverter audioTrackConverter = null;
|
||||
|
||||
try {
|
||||
muxer = mOutput.createMuxer();
|
||||
|
||||
videoTrackConverter = VideoTrackConverter.create(mInput, mTimeFrom, mTimeTo, mVideoResolution, mVideoBitrate, mVideoCodec);
|
||||
audioTrackConverter = AudioTrackConverter.create(mInput, mTimeFrom, mTimeTo, mAudioBitrate);
|
||||
audioTrackConverter = AudioTrackConverter.create(mInput, mTimeFrom, mTimeTo, mAudioBitrate, muxer.supportsAudioRemux());
|
||||
|
||||
if (videoTrackConverter == null && audioTrackConverter == null) {
|
||||
throw new EncodingException("No video and audio tracks");
|
||||
}
|
||||
|
||||
muxer = mOutput.createMuxer();
|
||||
|
||||
doExtractDecodeEditEncodeMux(
|
||||
videoTrackConverter,
|
||||
audioTrackConverter,
|
||||
|
|
|
@ -4,6 +4,7 @@ import android.util.SparseIntArray;
|
|||
|
||||
import org.mp4parser.boxes.iso14496.part1.objectdescriptors.AudioSpecificConfig;
|
||||
import org.mp4parser.boxes.iso14496.part1.objectdescriptors.DecoderConfigDescriptor;
|
||||
import org.mp4parser.boxes.iso14496.part1.objectdescriptors.DecoderSpecificInfo;
|
||||
import org.mp4parser.boxes.iso14496.part1.objectdescriptors.ESDescriptor;
|
||||
import org.mp4parser.boxes.iso14496.part1.objectdescriptors.SLConfigDescriptor;
|
||||
import org.mp4parser.boxes.iso14496.part12.SampleDescriptionBox;
|
||||
|
@ -16,6 +17,8 @@ import org.mp4parser.streaming.input.StreamingSampleImpl;
|
|||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
|
||||
import androidx.annotation.Nullable;
|
||||
|
||||
abstract class AacTrack extends AbstractStreamingTrack {
|
||||
|
||||
private static final SparseIntArray SAMPLING_FREQUENCY_INDEX_MAP = new SparseIntArray();
|
||||
|
@ -39,7 +42,7 @@ abstract class AacTrack extends AbstractStreamingTrack {
|
|||
|
||||
private int sampleRate;
|
||||
|
||||
AacTrack(long avgBitrate, long maxBitrate, int sampleRate, int channelCount, int aacProfile) {
|
||||
AacTrack(long avgBitrate, long maxBitrate, int sampleRate, int channelCount, int aacProfile, @Nullable DecoderSpecificInfo decoderSpecificInfo) {
|
||||
this.sampleRate = sampleRate;
|
||||
|
||||
final DefaultSampleFlagsTrackExtension defaultSampleFlagsTrackExtension = new DefaultSampleFlagsTrackExtension();
|
||||
|
@ -83,6 +86,10 @@ abstract class AacTrack extends AbstractStreamingTrack {
|
|||
audioSpecificConfig.setChannelConfiguration(channelCount);
|
||||
decoderConfigDescriptor.setAudioSpecificInfo(audioSpecificConfig);
|
||||
|
||||
if (decoderSpecificInfo != null) {
|
||||
decoderConfigDescriptor.setDecoderSpecificInfo(decoderSpecificInfo);
|
||||
}
|
||||
|
||||
descriptor.setDecoderConfigDescriptor(decoderConfigDescriptor);
|
||||
|
||||
esds.setEsDescriptor(descriptor);
|
||||
|
|
|
@ -4,9 +4,13 @@ import android.media.MediaCodec;
|
|||
import android.media.MediaFormat;
|
||||
|
||||
import androidx.annotation.NonNull;
|
||||
import androidx.annotation.Nullable;
|
||||
|
||||
import org.mp4parser.boxes.iso14496.part1.objectdescriptors.DecoderSpecificInfo;
|
||||
import org.mp4parser.streaming.StreamingTrack;
|
||||
import org.signal.core.util.logging.Log;
|
||||
import org.thoughtcrime.securesms.video.interfaces.Muxer;
|
||||
import org.thoughtcrime.securesms.video.videoconverter.utils.MediaCodecCompat;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
@ -16,7 +20,7 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
public final class StreamingMuxer implements Muxer {
|
||||
|
||||
private static final String TAG = Log.tag(StreamingMuxer.class);
|
||||
private final OutputStream outputStream;
|
||||
private final List<MediaCodecTrack> tracks = new ArrayList<>();
|
||||
private Mp4Writer mp4Writer;
|
||||
|
@ -55,7 +59,7 @@ public final class StreamingMuxer implements Muxer {
|
|||
tracks.add(new MediaCodecAvcTrack(format));
|
||||
break;
|
||||
case "audio/mp4a-latm":
|
||||
tracks.add(new MediaCodecAacTrack(format));
|
||||
tracks.add(MediaCodecAacTrack.create(format));
|
||||
break;
|
||||
case "video/hevc":
|
||||
tracks.add(new MediaCodecHevcTrack(format));
|
||||
|
@ -75,6 +79,11 @@ public final class StreamingMuxer implements Muxer {
|
|||
public void release() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean supportsAudioRemux() {
|
||||
return true;
|
||||
}
|
||||
|
||||
interface MediaCodecTrack {
|
||||
void writeSampleData(@NonNull ByteBuffer byteBuf, @NonNull MediaCodec.BufferInfo bufferInfo) throws IOException;
|
||||
|
||||
|
@ -123,10 +132,43 @@ public final class StreamingMuxer implements Muxer {
|
|||
|
||||
static class MediaCodecAacTrack extends AacTrack implements MediaCodecTrack {
|
||||
|
||||
MediaCodecAacTrack(@NonNull MediaFormat format) {
|
||||
super(format.getInteger(MediaFormat.KEY_BIT_RATE), format.getInteger(MediaFormat.KEY_BIT_RATE),
|
||||
format.getInteger(MediaFormat.KEY_SAMPLE_RATE), format.getInteger(MediaFormat.KEY_CHANNEL_COUNT),
|
||||
format.getInteger(MediaFormat.KEY_AAC_PROFILE));
|
||||
private MediaCodecAacTrack(long avgBitrate, long maxBitrate, int sampleRate, int channelCount, int aacProfile, @Nullable DecoderSpecificInfo decoderSpecificInfo) {
|
||||
super(avgBitrate, maxBitrate, sampleRate, channelCount, aacProfile, decoderSpecificInfo);
|
||||
}
|
||||
|
||||
public static MediaCodecAacTrack create(@NonNull MediaFormat format) {
|
||||
final int bitrate = format.getInteger(MediaFormat.KEY_BIT_RATE);
|
||||
final int maxBitrate;
|
||||
if (format.containsKey(MediaCodecCompat.MEDIA_FORMAT_KEY_MAX_BIT_RATE)) {
|
||||
maxBitrate = format.getInteger(MediaCodecCompat.MEDIA_FORMAT_KEY_MAX_BIT_RATE);
|
||||
} else {
|
||||
maxBitrate = bitrate;
|
||||
}
|
||||
|
||||
final DecoderSpecificInfo filledDecoderSpecificInfo;
|
||||
if (format.containsKey(MediaCodecCompat.MEDIA_FORMAT_KEY_MAX_BIT_RATE)) {
|
||||
final ByteBuffer csd = format.getByteBuffer(MediaCodecCompat.MEDIA_FORMAT_KEY_CODEC_SPECIFIC_DATA_0);
|
||||
|
||||
DecoderSpecificInfo decoderSpecificInfo = new DecoderSpecificInfo();
|
||||
boolean parseSuccess = false;
|
||||
try {
|
||||
decoderSpecificInfo.parseDetail(csd);
|
||||
parseSuccess = true;
|
||||
} catch (IOException e) {
|
||||
Log.w(TAG, "Could not parse AAC codec-specific data!", e);
|
||||
}
|
||||
if (parseSuccess) {
|
||||
filledDecoderSpecificInfo = decoderSpecificInfo;
|
||||
} else {
|
||||
filledDecoderSpecificInfo = null;
|
||||
}
|
||||
} else {
|
||||
filledDecoderSpecificInfo = null;
|
||||
}
|
||||
|
||||
return new MediaCodecAacTrack(bitrate, maxBitrate,
|
||||
format.getInteger(MediaFormat.KEY_SAMPLE_RATE), format.getInteger(MediaFormat.KEY_CHANNEL_COUNT),
|
||||
format.getInteger(MediaFormat.KEY_AAC_PROFILE), filledDecoderSpecificInfo);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,6 +17,13 @@ import java.io.IOException
|
|||
object MediaCodecCompat {
|
||||
private const val TAG = "MediaDataSourceCompat"
|
||||
|
||||
const val MEDIA_FORMAT_KEY_MAX_BIT_RATE = "max-bitrate"
|
||||
|
||||
// https://developer.android.com/reference/android/media/MediaCodec#CSD
|
||||
const val MEDIA_FORMAT_KEY_CODEC_SPECIFIC_DATA_0 = "csd-0"
|
||||
const val MEDIA_FORMAT_KEY_CODEC_SPECIFIC_DATA_1 = "csd-1"
|
||||
const val MEDIA_FORMAT_KEY_CODEC_SPECIFIC_DATA_2 = "csd-2"
|
||||
|
||||
@JvmStatic
|
||||
fun findDecoder(inputFormat: MediaFormat): Pair<MediaCodec, MediaFormat> {
|
||||
val codecs = MediaCodecList(MediaCodecList.REGULAR_CODECS)
|
||||
|
|
Loading…
Add table
Reference in a new issue