Skip to content

Commit a2096b6

Browse files
rohitjoinsmicrokatz
authored andcommitted
Merge pull request #162 from ittiam-systems:rtp-mp4a-latm
PiperOrigin-RevId: 482490230 (cherry picked from commit d21c948)
1 parent 3cb1d60 commit a2096b6

18 files changed

+481
-58
lines changed

RELEASENOTES.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,8 @@ Release notes
8787
* RTSP:
8888
* Add H263 fragmented packet handling
8989
([#119](https://github.com/androidx/media/pull/119)).
90+
* Add support for MP4A-LATM
91+
([#162](https://github.com/androidx/media/pull/162)).
9092
* IMA:
9193
* Add timeout for loading ad information to handle cases where the IMA SDK
9294
gets stuck loading an ad

libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtpPayloadFormat.java

Lines changed: 32 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -39,22 +39,23 @@
3939
@UnstableApi
4040
public final class RtpPayloadFormat {
4141

42-
private static final String RTP_MEDIA_AC3 = "AC3";
43-
private static final String RTP_MEDIA_AMR = "AMR";
44-
private static final String RTP_MEDIA_AMR_WB = "AMR-WB";
45-
private static final String RTP_MEDIA_MPEG4_GENERIC = "MPEG4-GENERIC";
46-
private static final String RTP_MEDIA_MPEG4_VIDEO = "MP4V-ES";
47-
private static final String RTP_MEDIA_H263_1998 = "H263-1998";
48-
private static final String RTP_MEDIA_H263_2000 = "H263-2000";
49-
private static final String RTP_MEDIA_H264 = "H264";
50-
private static final String RTP_MEDIA_H265 = "H265";
51-
private static final String RTP_MEDIA_OPUS = "OPUS";
52-
private static final String RTP_MEDIA_PCM_L8 = "L8";
53-
private static final String RTP_MEDIA_PCM_L16 = "L16";
54-
private static final String RTP_MEDIA_PCMA = "PCMA";
55-
private static final String RTP_MEDIA_PCMU = "PCMU";
56-
private static final String RTP_MEDIA_VP8 = "VP8";
57-
private static final String RTP_MEDIA_VP9 = "VP9";
42+
public static final String RTP_MEDIA_AC3 = "AC3";
43+
public static final String RTP_MEDIA_AMR = "AMR";
44+
public static final String RTP_MEDIA_AMR_WB = "AMR-WB";
45+
public static final String RTP_MEDIA_MPEG4_GENERIC = "MPEG4-GENERIC";
46+
public static final String RTP_MEDIA_MPEG4_LATM_AUDIO = "MP4A-LATM";
47+
public static final String RTP_MEDIA_MPEG4_VIDEO = "MP4V-ES";
48+
public static final String RTP_MEDIA_H263_1998 = "H263-1998";
49+
public static final String RTP_MEDIA_H263_2000 = "H263-2000";
50+
public static final String RTP_MEDIA_H264 = "H264";
51+
public static final String RTP_MEDIA_H265 = "H265";
52+
public static final String RTP_MEDIA_OPUS = "OPUS";
53+
public static final String RTP_MEDIA_PCM_L8 = "L8";
54+
public static final String RTP_MEDIA_PCM_L16 = "L16";
55+
public static final String RTP_MEDIA_PCMA = "PCMA";
56+
public static final String RTP_MEDIA_PCMU = "PCMU";
57+
public static final String RTP_MEDIA_VP8 = "VP8";
58+
public static final String RTP_MEDIA_VP9 = "VP9";
5859

5960
/** Returns whether the format of a {@link MediaDescription} is supported. */
6061
public static boolean isFormatSupported(MediaDescription mediaDescription) {
@@ -66,8 +67,9 @@ public static boolean isFormatSupported(MediaDescription mediaDescription) {
6667
case RTP_MEDIA_H263_2000:
6768
case RTP_MEDIA_H264:
6869
case RTP_MEDIA_H265:
69-
case RTP_MEDIA_MPEG4_VIDEO:
7070
case RTP_MEDIA_MPEG4_GENERIC:
71+
case RTP_MEDIA_MPEG4_LATM_AUDIO:
72+
case RTP_MEDIA_MPEG4_VIDEO:
7173
case RTP_MEDIA_OPUS:
7274
case RTP_MEDIA_PCM_L8:
7375
case RTP_MEDIA_PCM_L16:
@@ -97,6 +99,7 @@ public static String getMimeTypeFromRtpMediaType(String mediaType) {
9799
case RTP_MEDIA_AMR_WB:
98100
return MimeTypes.AUDIO_AMR_WB;
99101
case RTP_MEDIA_MPEG4_GENERIC:
102+
case RTP_MEDIA_MPEG4_LATM_AUDIO:
100103
return MimeTypes.AUDIO_AAC;
101104
case RTP_MEDIA_OPUS:
102105
return MimeTypes.AUDIO_OPUS;
@@ -142,6 +145,8 @@ public static String getMimeTypeFromRtpMediaType(String mediaType) {
142145
public final Format format;
143146
/** The format parameters, mapped from the SDP FMTP attribute (RFC2327 Page 22). */
144147
public final ImmutableMap<String, String> fmtpParameters;
148+
/** The RTP media encoding. */
149+
public final String mediaEncoding;
145150

146151
/**
147152
* Creates a new instance.
@@ -153,13 +158,19 @@ public static String getMimeTypeFromRtpMediaType(String mediaType) {
153158
* @param fmtpParameters The format parameters, from the SDP FMTP attribute (RFC2327 Page 22),
154159
* empty if unset. The keys and values are specified in the RFCs for specific formats. For
155160
* instance, RFC3640 Section 4.1 defines keys like profile-level-id and config.
161+
* @param mediaEncoding The RTP media encoding.
156162
*/
157163
public RtpPayloadFormat(
158-
Format format, int rtpPayloadType, int clockRate, Map<String, String> fmtpParameters) {
164+
Format format,
165+
int rtpPayloadType,
166+
int clockRate,
167+
Map<String, String> fmtpParameters,
168+
String mediaEncoding) {
159169
this.rtpPayloadType = rtpPayloadType;
160170
this.clockRate = clockRate;
161171
this.format = format;
162172
this.fmtpParameters = ImmutableMap.copyOf(fmtpParameters);
173+
this.mediaEncoding = mediaEncoding;
163174
}
164175

165176
@Override
@@ -174,7 +185,8 @@ public boolean equals(@Nullable Object o) {
174185
return rtpPayloadType == that.rtpPayloadType
175186
&& clockRate == that.clockRate
176187
&& format.equals(that.format)
177-
&& fmtpParameters.equals(that.fmtpParameters);
188+
&& fmtpParameters.equals(that.fmtpParameters)
189+
&& mediaEncoding.equals(that.mediaEncoding);
178190
}
179191

180192
@Override
@@ -184,6 +196,7 @@ public int hashCode() {
184196
result = 31 * result + clockRate;
185197
result = 31 * result + format.hashCode();
186198
result = 31 * result + fmtpParameters.hashCode();
199+
result = 31 * result + mediaEncoding.hashCode();
187200
return result;
188201
}
189202
}

libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/RtspMediaTrack.java

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,9 @@
3131
import androidx.media3.common.C;
3232
import androidx.media3.common.Format;
3333
import androidx.media3.common.MimeTypes;
34+
import androidx.media3.common.ParserException;
3435
import androidx.media3.common.util.CodecSpecificDataUtil;
36+
import androidx.media3.common.util.ParsableBitArray;
3537
import androidx.media3.common.util.UnstableApi;
3638
import androidx.media3.common.util.Util;
3739
import androidx.media3.extractor.AacUtil;
@@ -52,7 +54,8 @@
5254
private static final String PARAMETER_H265_SPROP_PPS = "sprop-pps";
5355
private static final String PARAMETER_H265_SPROP_VPS = "sprop-vps";
5456
private static final String PARAMETER_H265_SPROP_MAX_DON_DIFF = "sprop-max-don-diff";
55-
private static final String PARAMETER_MP4V_CONFIG = "config";
57+
private static final String PARAMETER_MP4A_CONFIG = "config";
58+
private static final String PARAMETER_MP4A_C_PRESENT = "cpresent";
5659

5760
/** Prefix for the RFC6381 codecs string for AAC formats. */
5861
private static final String AAC_CODECS_PREFIX = "mp4a.40.";
@@ -208,6 +211,23 @@ public int hashCode() {
208211
case MimeTypes.AUDIO_AAC:
209212
checkArgument(channelCount != C.INDEX_UNSET);
210213
checkArgument(!fmtpParameters.isEmpty());
214+
if (mediaEncoding.equals(RtpPayloadFormat.RTP_MEDIA_MPEG4_LATM_AUDIO)) {
215+
// cpresent is defined in RFC3016 Section 5.3. cpresent=0 means the config fmtp parameter
216+
// must exist.
217+
checkArgument(
218+
fmtpParameters.containsKey(PARAMETER_MP4A_C_PRESENT)
219+
&& fmtpParameters.get(PARAMETER_MP4A_C_PRESENT).equals("0"),
220+
"Only supports cpresent=0 in AAC audio.");
221+
@Nullable String config = fmtpParameters.get(PARAMETER_MP4A_CONFIG);
222+
checkNotNull(config, "AAC audio stream must include config fmtp parameter");
223+
// config is a hex string.
224+
checkArgument(config.length() % 2 == 0, "Malformat MPEG4 config: " + config);
225+
AacUtil.Config aacConfig = parseAacStreamMuxConfig(config);
226+
formatBuilder
227+
.setSampleRate(aacConfig.sampleRateHz)
228+
.setChannelCount(aacConfig.channelCount)
229+
.setCodecs(aacConfig.codecs);
230+
}
211231
processAacFmtpAttribute(formatBuilder, fmtpParameters, channelCount, clockRate);
212232
break;
213233
case MimeTypes.AUDIO_AMR_NB:
@@ -267,7 +287,8 @@ public int hashCode() {
267287
}
268288

269289
checkArgument(clockRate > 0);
270-
return new RtpPayloadFormat(formatBuilder.build(), rtpPayloadType, clockRate, fmtpParameters);
290+
return new RtpPayloadFormat(
291+
formatBuilder.build(), rtpPayloadType, clockRate, fmtpParameters, mediaEncoding);
271292
}
272293

273294
private static int inferChannelCount(int encodingParameter, String mimeType) {
@@ -300,9 +321,29 @@ private static void processAacFmtpAttribute(
300321
AacUtil.buildAacLcAudioSpecificConfig(sampleRate, channelCount)));
301322
}
302323

324+
/**
325+
* Returns the {@link AacUtil.Config} by parsing the MPEG4 Audio Stream Mux configuration.
326+
*
327+
*

fmtp attribute {@code config} includes the MPEG4 Audio Stream Mux configuration

328+
* (ISO/IEC14496-3, Chapter 1.7.3).
329+
*/
330+
private static AacUtil.Config parseAacStreamMuxConfig(String streamMuxConfig) {
331+
ParsableBitArray config = new ParsableBitArray(Util.getBytesFromHexString(streamMuxConfig));
332+
checkArgument(config.readBits(1) == 0, "Only supports audio mux version 0.");
333+
checkArgument(config.readBits(1) == 1, "Only supports allStreamsSameTimeFraming.");
334+
config.skipBits(6);
335+
checkArgument(config.readBits(4) == 0, "Only supports one program.");
336+
checkArgument(config.readBits(3) == 0, "Only supports one numLayer.");
337+
try {
338+
return AacUtil.parseAudioSpecificConfig(config, false);
339+
} catch (ParserException e) {
340+
throw new IllegalArgumentException(e);
341+
}
342+
}
343+
303344
private static void processMPEG4FmtpAttribute(
304345
Format.Builder formatBuilder, ImmutableMap<String, String> fmtpAttributes) {
305-
@Nullable String configInput = fmtpAttributes.get(PARAMETER_MP4V_CONFIG);
346+
@Nullable String configInput = fmtpAttributes.get(PARAMETER_MP4A_CONFIG);
306347
if (configInput != null) {
307348
byte[] configBuffer = Util.getBytesFromHexString(configInput);
308349
formatBuilder.setInitializationData(ImmutableList.of(configBuffer));

libraries/exoplayer_rtsp/src/main/java/androidx/media3/exoplayer/rtsp/reader/DefaultRtpPayloadReaderFactory.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,11 @@ public RtpPayloadReader createPayloadReader(RtpPayloadFormat payloadFormat) {
3535
case MimeTypes.AUDIO_AC3:
3636
return new RtpAc3Reader(payloadFormat);
3737
case MimeTypes.AUDIO_AAC:
38-
return new RtpAacReader(payloadFormat);
38+
if (payloadFormat.mediaEncoding.equals(RtpPayloadFormat.RTP_MEDIA_MPEG4_LATM_AUDIO)) {
39+
return new RtpMp4aReader(payloadFormat);
40+
} else {
41+
return new RtpAacReader(payloadFormat);
42+
}
3943
case MimeTypes.AUDIO_AMR_NB:
4044
case MimeTypes.AUDIO_AMR_WB:
4145
return new RtpAmrReader(payloadFormat);
Lines changed: 182 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
/*
2+
* Copyright 2022 The Android Open Source Project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
package androidx.media3.exoplayer.rtsp.reader;
17+
18+
import static androidx.media3.common.util.Assertions.checkArgument;
19+
import static androidx.media3.common.util.Assertions.checkNotNull;
20+
import static androidx.media3.common.util.Assertions.checkState;
21+
import static androidx.media3.common.util.Assertions.checkStateNotNull;
22+
import static androidx.media3.common.util.Util.castNonNull;
23+
import static androidx.media3.exoplayer.rtsp.reader.RtpReaderUtils.toSampleTimeUs;
24+
25+
import androidx.annotation.Nullable;
26+
import androidx.media3.common.C;
27+
import androidx.media3.common.ParserException;
28+
import androidx.media3.common.util.ParsableBitArray;
29+
import androidx.media3.common.util.ParsableByteArray;
30+
import androidx.media3.common.util.UnstableApi;
31+
import androidx.media3.common.util.Util;
32+
import androidx.media3.exoplayer.rtsp.RtpPacket;
33+
import androidx.media3.exoplayer.rtsp.RtpPayloadFormat;
34+
import androidx.media3.extractor.ExtractorOutput;
35+
import androidx.media3.extractor.TrackOutput;
36+
import com.google.common.collect.ImmutableMap;
37+
import org.checkerframework.checker.nullness.qual.MonotonicNonNull;
38+
39+
/**
40+
* Parses an MP4A-LATM byte stream carried on RTP packets, and extracts MP4A-LATM Access Units.
41+
*
42+
*

Refer to RFC3016 for more details. The LATM byte stream format is defined in ISO/IEC14496-3.

43+
*/
44+
@UnstableApi
45+
/* package */ final class RtpMp4aReader implements RtpPayloadReader {
46+
private static final String TAG = "RtpMp4aReader";
47+
48+
private static final String PARAMETER_MP4A_CONFIG = "config";
49+
50+
private final RtpPayloadFormat payloadFormat;
51+
private final int numberOfSubframes;
52+
private @MonotonicNonNull TrackOutput trackOutput;
53+
private long firstReceivedTimestamp;
54+
private int previousSequenceNumber;
55+
/** The combined size of a sample that is fragmented into multiple subFrames. */
56+
private int fragmentedSampleSizeBytes;
57+
58+
private long startTimeOffsetUs;
59+
private long fragmentedSampleTimeUs;
60+
61+
/**
62+
* Creates an instance.
63+
*
64+
* @throws IllegalArgumentException If {@link RtpPayloadFormat payloadFormat} is malformed.
65+
*/
66+
public RtpMp4aReader(RtpPayloadFormat payloadFormat) {
67+
this.payloadFormat = payloadFormat;
68+
try {
69+
numberOfSubframes = getNumOfSubframesFromMpeg4AudioConfig(payloadFormat.fmtpParameters);
70+
} catch (ParserException e) {
71+
throw new IllegalArgumentException(e);
72+
}
73+
firstReceivedTimestamp = C.TIME_UNSET;
74+
previousSequenceNumber = C.INDEX_UNSET;
75+
fragmentedSampleSizeBytes = 0;
76+
// The start time offset must be 0 until the first seek.
77+
startTimeOffsetUs = 0;
78+
fragmentedSampleTimeUs = C.TIME_UNSET;
79+
}
80+
81+
@Override
82+
public void createTracks(ExtractorOutput extractorOutput, int trackId) {
83+
trackOutput = extractorOutput.track(trackId, C.TRACK_TYPE_VIDEO);
84+
castNonNull(trackOutput).format(payloadFormat.format);
85+
}
86+
87+
@Override
88+
public void onReceivingFirstPacket(long timestamp, int sequenceNumber) {
89+
checkState(firstReceivedTimestamp == C.TIME_UNSET);
90+
firstReceivedTimestamp = timestamp;
91+
}
92+
93+
@Override
94+
public void consume(
95+
ParsableByteArray data, long timestamp, int sequenceNumber, boolean rtpMarker) {
96+
checkStateNotNull(trackOutput);
97+
98+
int expectedSequenceNumber = RtpPacket.getNextSequenceNumber(previousSequenceNumber);
99+
if (fragmentedSampleSizeBytes > 0 && expectedSequenceNumber < sequenceNumber) {
100+
outputSampleMetadataForFragmentedPackets();
101+
}
102+
103+
for (int subFrameIndex = 0; subFrameIndex < numberOfSubframes; subFrameIndex++) {
104+
int sampleLength = 0;
105+
// Implements PayloadLengthInfo() in ISO/IEC14496-3 Chapter 1.7.3.1, it only supports one
106+
// program and one layer. Each subframe starts with a variable length encoding.
107+
while (data.getPosition() < data.limit()) {
108+
int payloadMuxLength = data.readUnsignedByte();
109+
sampleLength += payloadMuxLength;
110+
if (payloadMuxLength != 0xff) {
111+
break;
112+
}
113+
}
114+
115+
trackOutput.sampleData(data, sampleLength);
116+
fragmentedSampleSizeBytes += sampleLength;
117+
}
118+
fragmentedSampleTimeUs =
119+
toSampleTimeUs(
120+
startTimeOffsetUs, timestamp, firstReceivedTimestamp, payloadFormat.clockRate);
121+
if (rtpMarker) {
122+
outputSampleMetadataForFragmentedPackets();
123+
}
124+
previousSequenceNumber = sequenceNumber;
125+
}
126+
127+
@Override
128+
public void seek(long nextRtpTimestamp, long timeUs) {
129+
firstReceivedTimestamp = nextRtpTimestamp;
130+
fragmentedSampleSizeBytes = 0;
131+
startTimeOffsetUs = timeUs;
132+
}
133+
134+
// Internal methods.
135+
136+
/**
137+
* Parses an MPEG-4 Audio Stream Mux configuration, as defined in ISO/IEC14496-3.
138+
*
139+
*

FMTP attribute {@code config} contains the MPEG-4 Audio Stream Mux configuration.

140+
*
141+
* @param fmtpAttributes The format parameters, mapped from the SDP FMTP attribute.
142+
* @return The number of subframes that is carried in each RTP packet.
143+
*/
144+
private static int getNumOfSubframesFromMpeg4AudioConfig(
145+
ImmutableMap<String, String> fmtpAttributes) throws ParserException {
146+
@Nullable String configInput = fmtpAttributes.get(PARAMETER_MP4A_CONFIG);
147+
int numberOfSubframes = 0;
148+
if (configInput != null && configInput.length() % 2 == 0) {
149+
byte[] configBuffer = Util.getBytesFromHexString(configInput);
150+
ParsableBitArray scratchBits = new ParsableBitArray(configBuffer);
151+
int audioMuxVersion = scratchBits.readBits(1);
152+
if (audioMuxVersion == 0) {
153+
checkArgument(scratchBits.readBits(1) == 1, "Only supports allStreamsSameTimeFraming.");
154+
numberOfSubframes = scratchBits.readBits(6);
155+
checkArgument(scratchBits.readBits(4) == 0, "Only suppors one program.");
156+
checkArgument(scratchBits.readBits(3) == 0, "Only suppors one layer.");
157+
} else {
158+
throw ParserException.createForMalformedDataOfUnknownType(
159+
"unsupported audio mux version: " + audioMuxVersion, null);
160+
}
161+
}
162+
// ISO/IEC14496-3 Chapter 1.7.3.2.3: The minimum value is 0 indicating 1 subframe.
163+
return numberOfSubframes + 1;
164+
}
165+
166+
/**
167+
* Outputs sample metadata.
168+
*
169+
*

Call this method only after receiving the end of an MPEG4 partition.

170+
*/
171+
private void outputSampleMetadataForFragmentedPackets() {
172+
checkNotNull(trackOutput)
173+
.sampleMetadata(
174+
fragmentedSampleTimeUs,
175+
C.BUFFER_FLAG_KEY_FRAME,
176+
fragmentedSampleSizeBytes,
177+
/* offset= */ 0,
178+
/* cryptoData= */ null);
179+
fragmentedSampleSizeBytes = 0;
180+
fragmentedSampleTimeUs = C.TIME_UNSET;
181+
}
182+
}

0 commit comments

Comments
 (0)