Remux audio if possible when transcoding.

Addresses #11712, #12674, #12945, #13084, #13346.
This commit is contained in:
Nicholas Tinsley 2024-01-25 13:48:38 -05:00
parent c0235d4cc2
commit 9a52f4e3ff
7 changed files with 163 additions and 16 deletions

View file

@ -24,4 +24,6 @@ public interface Muxer {
void writeSampleData(int trackIndex, @NonNull ByteBuffer byteBuf, @NonNull MediaCodec.BufferInfo bufferInfo) throws IOException;
void release();
boolean supportsAudioRemux();
}

View file

@ -51,4 +51,9 @@ final class AndroidMuxer implements Muxer {
public void release() {
muxer.release();
}
@Override
public boolean supportsAudioRemux() {
return false;
}
}

View file

@ -1,5 +1,6 @@
package org.thoughtcrime.securesms.video.videoconverter;
import android.annotation.SuppressLint;
import android.media.MediaCodec;
import android.media.MediaCodecInfo;
import android.media.MediaExtractor;
@ -27,7 +28,8 @@ final class AudioTrackConverter {
private static final String OUTPUT_AUDIO_MIME_TYPE = VideoConstants.AUDIO_MIME_TYPE; // Advanced Audio Coding
private static final int OUTPUT_AUDIO_AAC_PROFILE = MediaCodecInfo.CodecProfileLevel.AACObjectLC; //MediaCodecInfo.CodecProfileLevel.AACObjectHE;
private static final int TIMEOUT_USEC = 10000;
private static final int SAMPLE_BUFFER_SIZE = 16 * 1024;
private static final int TIMEOUT_USEC = 10000;
private final long mTimeFrom;
private final long mTimeTo;
@ -39,6 +41,10 @@ final class AudioTrackConverter {
private final MediaCodec mAudioDecoder;
private final MediaCodec mAudioEncoder;
private final boolean skipTrancode;
private final ByteBuffer instanceSampleBuffer = ByteBuffer.allocateDirect(SAMPLE_BUFFER_SIZE);
private final MediaCodec.BufferInfo instanceBufferInfo = new MediaCodec.BufferInfo();
private final ByteBuffer[] mAudioDecoderInputBuffers;
private ByteBuffer[] mAudioDecoderOutputBuffers;
private final ByteBuffer[] mAudioEncoderInputBuffers;
@ -68,7 +74,8 @@ final class AudioTrackConverter {
final @NonNull MediaInput input,
final long timeFrom,
final long timeTo,
final int audioBitrate) throws IOException {
final int audioBitrate,
final boolean allowSkipTranscode) throws IOException {
final MediaExtractor audioExtractor = input.createExtractor();
final int audioInputTrack = getAndSelectAudioTrackIndex(audioExtractor);
@ -76,7 +83,7 @@ final class AudioTrackConverter {
audioExtractor.release();
return null;
}
return new AudioTrackConverter(audioExtractor, audioInputTrack, timeFrom, timeTo, audioBitrate);
return new AudioTrackConverter(audioExtractor, audioInputTrack, timeFrom, timeTo, audioBitrate, allowSkipTranscode);
}
private AudioTrackConverter(
@ -84,7 +91,8 @@ final class AudioTrackConverter {
final int audioInputTrack,
long timeFrom,
long timeTo,
int audioBitrate) throws IOException {
int audioBitrate,
final boolean allowSkipTranscode) throws IOException {
mTimeFrom = timeFrom;
mTimeTo = timeTo;
@ -102,6 +110,13 @@ final class AudioTrackConverter {
final MediaFormat inputAudioFormat = mAudioExtractor.getTrackFormat(audioInputTrack);
mInputDuration = inputAudioFormat.containsKey(MediaFormat.KEY_DURATION) ? inputAudioFormat.getLong(MediaFormat.KEY_DURATION) : 0;
skipTrancode = allowSkipTranscode && formatCanSkipTranscode(inputAudioFormat, audioBitrate);
if (skipTrancode) {
mEncoderOutputAudioFormat = inputAudioFormat;
}
if (VERBOSE) Log.d(TAG, "audio skipping transcoding: " + skipTrancode);
final MediaFormat outputAudioFormat =
MediaFormat.createAudioFormat(
OUTPUT_AUDIO_MIME_TYPE,
@ -109,7 +124,7 @@ final class AudioTrackConverter {
inputAudioFormat.getInteger(MediaFormat.KEY_CHANNEL_COUNT));
outputAudioFormat.setInteger(MediaFormat.KEY_BIT_RATE, audioBitrate);
outputAudioFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, OUTPUT_AUDIO_AAC_PROFILE);
outputAudioFormat.setInteger(MediaFormat.KEY_MAX_INPUT_SIZE, 16 * 1024);
outputAudioFormat.setInteger(MediaFormat.KEY_MAX_INPUT_SIZE, SAMPLE_BUFFER_SIZE);
// Create a MediaCodec for the desired codec, then configure it as an encoder with
// our desired properties. Request a Surface to use for input.
@ -135,9 +150,11 @@ final class AudioTrackConverter {
if (mEncoderOutputAudioFormat != null) {
Log.d(TAG, "muxer: adding audio track.");
if (!mEncoderOutputAudioFormat.containsKey(MediaFormat.KEY_BIT_RATE)) {
Log.d(TAG, "muxer: fixed MediaFormat to add bitrate.");
mEncoderOutputAudioFormat.setInteger(MediaFormat.KEY_BIT_RATE, mAudioBitrate);
}
if (!mEncoderOutputAudioFormat.containsKey(MediaFormat.KEY_AAC_PROFILE)) {
Log.d(TAG, "muxer: fixed MediaFormat to add AAC profile.");
mEncoderOutputAudioFormat.setInteger(MediaFormat.KEY_AAC_PROFILE, OUTPUT_AUDIO_AAC_PROFILE);
}
mOutputAudioTrack = muxer.addTrack(mEncoderOutputAudioFormat);
@ -145,6 +162,12 @@ final class AudioTrackConverter {
}
void step() throws IOException {
if (skipTrancode && mEncoderOutputAudioFormat != null) {
extractAndRemux();
return;
}
// Extract audio from file and feed to decoder.
// Do not extract audio if we have determined the output format but we are not yet
// ready to mux the frames.
@ -164,7 +187,8 @@ final class AudioTrackConverter {
Log.d(TAG, "audio extractor: returned buffer of size " + size);
Log.d(TAG, "audio extractor: returned buffer for time " + presentationTime);
}
mAudioExtractorDone = size < 0 || (mTimeTo > 0 && presentationTime > mTimeTo * 1000);
mAudioExtractorDone = isAudioExtractorDone(size, presentationTime);
if (mAudioExtractorDone) {
if (VERBOSE) Log.d(TAG, "audio extractor: EOS");
mAudioDecoder.queueInputBuffer(
@ -388,6 +412,47 @@ final class AudioTrackConverter {
Preconditions.checkState("no frame should be pending", -1 == mPendingAudioDecoderOutputBufferIndex);
}
@SuppressLint("WrongConstant") // flags extracted from sample by MediaExtractor should be safe for MediaCodec.BufferInfo
private void extractAndRemux() throws IOException {
if (mMuxer == null) {
Log.d(TAG, "audio remuxer: tried to execute before muxer was ready");
return;
}
int size = mAudioExtractor.readSampleData(instanceSampleBuffer, 0);
long presentationTime = mAudioExtractor.getSampleTime();
int sampleFlags = mAudioExtractor.getSampleFlags();
if (VERBOSE) {
Log.d(TAG, "audio extractor: returned buffer of size " + size);
Log.d(TAG, "audio extractor: returned buffer for time " + presentationTime);
Log.d(TAG, "audio extractor: returned buffer with flags " + Integer.toBinaryString(sampleFlags));
}
mAudioExtractorDone = isAudioExtractorDone(size, presentationTime);
if (mAudioExtractorDone) {
if (VERBOSE) Log.d(TAG, "audio encoder: EOS");
instanceBufferInfo.set(0, 0, presentationTime, MediaCodec.BUFFER_FLAG_END_OF_STREAM);
mAudioEncoderDone = true;
} else {
instanceBufferInfo.set(0, size, presentationTime, sampleFlags);
}
mMuxer.writeSampleData(mOutputAudioTrack, instanceSampleBuffer, instanceBufferInfo);
if (VERBOSE) {
Log.d(TAG, "audio extractor: wrote sample at " + presentationTime);
}
mAudioExtractor.advance();
mAudioExtractedFrameCount++;
mAudioEncodedFrameCount++;
mMuxingAudioPresentationTime = Math.max(mMuxingAudioPresentationTime, presentationTime);
}
private boolean isAudioExtractorDone(int size, long presentationTime) {
return presentationTime == -1 || size < 0 || (mTimeTo > 0 && presentationTime > mTimeTo * 1000);
}
private static @NonNull
MediaCodec createAudioDecoder(final @NonNull MediaFormat inputFormat) throws IOException {
final MediaCodec decoder = MediaCodec.createDecoderByType(MediaConverter.getMimeTypeFor(inputFormat));
@ -420,4 +485,23 @@ final class AudioTrackConverter {
private static boolean isAudioFormat(final @NonNull MediaFormat format) {
return MediaConverter.getMimeTypeFor(format).startsWith("audio/");
}
/**
* HE-AAC input bitstreams exhibit bad decoder behavior: the decoder's output buffer's presentation timestamp is way larger than the input sample's.
* This mismatch propagates throughout the transcoding pipeline and results in slowed, distorted audio in the output file.
* To sidestep this: AAC and its variants are a supported output codec, and HE-AAC bitrates are almost always lower than our target bitrate,
* so we can pass through the input bitstream unaltered, relying on consumers of the output file to render HE-AAC correctly.
*/
private static boolean formatCanSkipTranscode(MediaFormat audioFormat, int desiredBitrate) {
try {
int inputBitrate = audioFormat.getInteger(MediaFormat.KEY_BIT_RATE);
String inputMimeType = audioFormat.getString(MediaFormat.KEY_MIME);
return OUTPUT_AUDIO_MIME_TYPE.equals(inputMimeType) && inputBitrate <= desiredBitrate;
} catch (NullPointerException exception) {
if (VERBOSE) {
Log.d(TAG, "could not find bitrate in mediaFormat, can't skip transcoding.");
}
return false;
}
}
}

View file

@ -141,15 +141,15 @@ public final class MediaConverter {
AudioTrackConverter audioTrackConverter = null;
try {
muxer = mOutput.createMuxer();
videoTrackConverter = VideoTrackConverter.create(mInput, mTimeFrom, mTimeTo, mVideoResolution, mVideoBitrate, mVideoCodec);
audioTrackConverter = AudioTrackConverter.create(mInput, mTimeFrom, mTimeTo, mAudioBitrate);
audioTrackConverter = AudioTrackConverter.create(mInput, mTimeFrom, mTimeTo, mAudioBitrate, muxer.supportsAudioRemux());
if (videoTrackConverter == null && audioTrackConverter == null) {
throw new EncodingException("No video and audio tracks");
}
muxer = mOutput.createMuxer();
doExtractDecodeEditEncodeMux(
videoTrackConverter,
audioTrackConverter,

View file

@ -4,6 +4,7 @@ import android.util.SparseIntArray;
import org.mp4parser.boxes.iso14496.part1.objectdescriptors.AudioSpecificConfig;
import org.mp4parser.boxes.iso14496.part1.objectdescriptors.DecoderConfigDescriptor;
import org.mp4parser.boxes.iso14496.part1.objectdescriptors.DecoderSpecificInfo;
import org.mp4parser.boxes.iso14496.part1.objectdescriptors.ESDescriptor;
import org.mp4parser.boxes.iso14496.part1.objectdescriptors.SLConfigDescriptor;
import org.mp4parser.boxes.iso14496.part12.SampleDescriptionBox;
@ -16,6 +17,8 @@ import org.mp4parser.streaming.input.StreamingSampleImpl;
import java.io.IOException;
import java.nio.ByteBuffer;
import androidx.annotation.Nullable;
abstract class AacTrack extends AbstractStreamingTrack {
private static final SparseIntArray SAMPLING_FREQUENCY_INDEX_MAP = new SparseIntArray();
@ -39,7 +42,7 @@ abstract class AacTrack extends AbstractStreamingTrack {
private int sampleRate;
AacTrack(long avgBitrate, long maxBitrate, int sampleRate, int channelCount, int aacProfile) {
AacTrack(long avgBitrate, long maxBitrate, int sampleRate, int channelCount, int aacProfile, @Nullable DecoderSpecificInfo decoderSpecificInfo) {
this.sampleRate = sampleRate;
final DefaultSampleFlagsTrackExtension defaultSampleFlagsTrackExtension = new DefaultSampleFlagsTrackExtension();
@ -83,6 +86,10 @@ abstract class AacTrack extends AbstractStreamingTrack {
audioSpecificConfig.setChannelConfiguration(channelCount);
decoderConfigDescriptor.setAudioSpecificInfo(audioSpecificConfig);
if (decoderSpecificInfo != null) {
decoderConfigDescriptor.setDecoderSpecificInfo(decoderSpecificInfo);
}
descriptor.setDecoderConfigDescriptor(decoderConfigDescriptor);
esds.setEsDescriptor(descriptor);

View file

@ -4,9 +4,13 @@ import android.media.MediaCodec;
import android.media.MediaFormat;
import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
import org.mp4parser.boxes.iso14496.part1.objectdescriptors.DecoderSpecificInfo;
import org.mp4parser.streaming.StreamingTrack;
import org.signal.core.util.logging.Log;
import org.thoughtcrime.securesms.video.interfaces.Muxer;
import org.thoughtcrime.securesms.video.videoconverter.utils.MediaCodecCompat;
import java.io.IOException;
import java.io.OutputStream;
@ -16,7 +20,7 @@ import java.util.ArrayList;
import java.util.List;
public final class StreamingMuxer implements Muxer {
private static final String TAG = Log.tag(StreamingMuxer.class);
private final OutputStream outputStream;
private final List<MediaCodecTrack> tracks = new ArrayList<>();
private Mp4Writer mp4Writer;
@ -55,7 +59,7 @@ public final class StreamingMuxer implements Muxer {
tracks.add(new MediaCodecAvcTrack(format));
break;
case "audio/mp4a-latm":
tracks.add(new MediaCodecAacTrack(format));
tracks.add(MediaCodecAacTrack.create(format));
break;
case "video/hevc":
tracks.add(new MediaCodecHevcTrack(format));
@ -75,6 +79,11 @@ public final class StreamingMuxer implements Muxer {
public void release() {
}
@Override
public boolean supportsAudioRemux() {
return true;
}
interface MediaCodecTrack {
void writeSampleData(@NonNull ByteBuffer byteBuf, @NonNull MediaCodec.BufferInfo bufferInfo) throws IOException;
@ -123,10 +132,43 @@ public final class StreamingMuxer implements Muxer {
static class MediaCodecAacTrack extends AacTrack implements MediaCodecTrack {
MediaCodecAacTrack(@NonNull MediaFormat format) {
super(format.getInteger(MediaFormat.KEY_BIT_RATE), format.getInteger(MediaFormat.KEY_BIT_RATE),
format.getInteger(MediaFormat.KEY_SAMPLE_RATE), format.getInteger(MediaFormat.KEY_CHANNEL_COUNT),
format.getInteger(MediaFormat.KEY_AAC_PROFILE));
private MediaCodecAacTrack(long avgBitrate, long maxBitrate, int sampleRate, int channelCount, int aacProfile, @Nullable DecoderSpecificInfo decoderSpecificInfo) {
super(avgBitrate, maxBitrate, sampleRate, channelCount, aacProfile, decoderSpecificInfo);
}
public static MediaCodecAacTrack create(@NonNull MediaFormat format) {
final int bitrate = format.getInteger(MediaFormat.KEY_BIT_RATE);
final int maxBitrate;
if (format.containsKey(MediaCodecCompat.MEDIA_FORMAT_KEY_MAX_BIT_RATE)) {
maxBitrate = format.getInteger(MediaCodecCompat.MEDIA_FORMAT_KEY_MAX_BIT_RATE);
} else {
maxBitrate = bitrate;
}
final DecoderSpecificInfo filledDecoderSpecificInfo;
if (format.containsKey(MediaCodecCompat.MEDIA_FORMAT_KEY_MAX_BIT_RATE)) {
final ByteBuffer csd = format.getByteBuffer(MediaCodecCompat.MEDIA_FORMAT_KEY_CODEC_SPECIFIC_DATA_0);
DecoderSpecificInfo decoderSpecificInfo = new DecoderSpecificInfo();
boolean parseSuccess = false;
try {
decoderSpecificInfo.parseDetail(csd);
parseSuccess = true;
} catch (IOException e) {
Log.w(TAG, "Could not parse AAC codec-specific data!", e);
}
if (parseSuccess) {
filledDecoderSpecificInfo = decoderSpecificInfo;
} else {
filledDecoderSpecificInfo = null;
}
} else {
filledDecoderSpecificInfo = null;
}
return new MediaCodecAacTrack(bitrate, maxBitrate,
format.getInteger(MediaFormat.KEY_SAMPLE_RATE), format.getInteger(MediaFormat.KEY_CHANNEL_COUNT),
format.getInteger(MediaFormat.KEY_AAC_PROFILE), filledDecoderSpecificInfo);
}
@Override

View file

@ -17,6 +17,13 @@ import java.io.IOException
object MediaCodecCompat {
private const val TAG = "MediaDataSourceCompat"
const val MEDIA_FORMAT_KEY_MAX_BIT_RATE = "max-bitrate"
// https://developer.android.com/reference/android/media/MediaCodec#CSD
const val MEDIA_FORMAT_KEY_CODEC_SPECIFIC_DATA_0 = "csd-0"
const val MEDIA_FORMAT_KEY_CODEC_SPECIFIC_DATA_1 = "csd-1"
const val MEDIA_FORMAT_KEY_CODEC_SPECIFIC_DATA_2 = "csd-2"
@JvmStatic
fun findDecoder(inputFormat: MediaFormat): Pair<MediaCodec, MediaFormat> {
val codecs = MediaCodecList(MediaCodecList.REGULAR_CODECS)