Skip to content

Commit 9b71f2a

Browse files
ychaparovcopybara-github
authored andcommitted
Mp4Extractor: allow zero length NAL units
Some videos include zero length NAL units in the length-delimited MP4 samples. Empty NAL units are not spec-compliant (see ISO/IEC 14496-15 section 4.3.3.3), but other players are able to play these videos (with warnings or errors). With this change, we check track.sampleTable.sizes[sampleIndex] before reading a byte from the NAL unit itself. PiperOrigin-RevId: 711720621
1 parent 682889f commit 9b71f2a

13 files changed

+1529
-14
lines changed

libraries/extractor/src/main/java/androidx/media3/extractor/mp4/Mp4Extractor.java

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -894,38 +894,48 @@ private int readSample(ExtractorInput input, PositionHolder positionHolder) thro
894894
nalPrefixData[0] = 0;
895895
nalPrefixData[1] = 0;
896896
nalPrefixData[2] = 0;
897-
int nalUnitPrefixLength = track.track.nalUnitLengthFieldLength + 1;
898897
int nalUnitLengthFieldLengthDiff = 4 - track.track.nalUnitLengthFieldLength;
899898
// NAL units are length delimited, but the decoder requires start code delimited units.
900899
// Loop until we've written the sample to the track output, replacing length delimiters with
901900
// start codes as we encounter them.
902901
while (sampleBytesWritten < sampleSize) {
903902
if (sampleCurrentNalBytesRemaining == 0) {
903+
int nalUnitPrefixLength = track.track.nalUnitLengthFieldLength;
904+
boolean readNalType = false;
905+
if (!isSampleDependedOn
906+
&& nalUnitPrefixLength + 1
907+
<= track.sampleTable.sizes[sampleIndex] - sampleBytesRead) {
908+
// Parsing sample dependencies needs the first NAL unit byte. Read it in the same
909+
// readFully call that reads the NAL length. This ensures sampleBytesRead,
910+
// sampleBytesWritten and isSampleDependedOn remain in a consistent state if we have
911+
// read failures.
912+
nalUnitPrefixLength = track.track.nalUnitLengthFieldLength + 1;
913+
readNalType = true;
914+
}
904915
// Read the NAL length so that we know where we find the next one.
905-
// In the same readFully call, read the first payload byte in order to determine
906-
// sample dependencies. Do not attempt to peek the first payload byte because that might
907-
// fail, and we should keep sampleBytesRead, sampleBytesWritten, isSampleDependedOn in
908-
// a consistent state.
909916
input.readFully(nalPrefixData, nalUnitLengthFieldLengthDiff, nalUnitPrefixLength);
910917
sampleBytesRead += nalUnitPrefixLength;
911918
nalPrefix.setPosition(0);
912919
int nalLengthInt = nalPrefix.readInt();
913-
if (nalLengthInt < 1) {
920+
if (nalLengthInt < 0) {
914921
throw ParserException.createForMalformedContainer(
915922
"Invalid NAL length", /* cause= */ null);
916923
}
917-
sampleCurrentNalBytesRemaining = nalLengthInt - 1;
924+
sampleCurrentNalBytesRemaining = nalLengthInt - (readNalType ? 1 : 0);
918925
// Write a start code for the current NAL unit.
919926
nalStartCode.setPosition(0);
920927
trackOutput.sampleData(nalStartCode, 4);
921-
// Write the NAL unit type byte.
922-
trackOutput.sampleData(nalPrefix, 1);
923-
sampleBytesWritten += 5;
928+
sampleBytesWritten += 4;
924929
sampleSize += nalUnitLengthFieldLengthDiff;
925-
// If any NAL unit that's part of this sample can be depended on, treat the entire sample
926-
// as depended on.
927-
if (!isSampleDependedOn && NalUnitUtil.isH264NalUnitDependedOn(nalPrefixData[4])) {
928-
isSampleDependedOn = true;
930+
if (readNalType) {
931+
// Write the NAL unit type byte.
932+
trackOutput.sampleData(nalPrefix, 1);
933+
sampleBytesWritten += 1;
934+
// If any NAL unit that's part of this sample can be depended on, treat the entire
935+
// sample as depended on.
936+
if (NalUnitUtil.isH264NalUnitDependedOn(nalPrefixData[4])) {
937+
isSampleDependedOn = true;
938+
}
929939
}
930940
} else {
931941
// Write the payload of the NAL unit.

libraries/extractor/src/test/java/androidx/media3/extractor/mp4/Mp4ExtractorParameterizedTest.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,11 @@ public void mp4WithAuxiliaryTracksInterleavedWithPrimaryVideoTracks() throws Exc
239239
"media/mp4/sample_with_fake_auxiliary_tracks_interleaved_with_primary_video_tracks.mp4");
240240
}
241241

242+
@Test
243+
public void mp4SampleWithEmptyNalu() throws Exception {
244+
assertExtractorBehavior("media/mp4/sample_with_invalid_nalu.mp4");
245+
}
246+
242247
private void assertExtractorBehavior(String file) throws IOException {
243248
ExtractorAsserts.AssertionConfig.Builder assertionConfigBuilder =
244249
new ExtractorAsserts.AssertionConfig.Builder();
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
seekMap:
2+
isSeekable = true
3+
duration = 1001000
4+
getPosition(0) = [[timeUs=0, position=1266]]
5+
getPosition(1) = [[timeUs=0, position=1266]]
6+
getPosition(500500) = [[timeUs=0, position=1266]]
7+
getPosition(1001000) = [[timeUs=0, position=1266]]
8+
numberOfTracks = 1
9+
track 0:
10+
total output bytes = 89876
11+
sample count = 30
12+
track duration = 1001000
13+
format 0:
14+
id = 1
15+
containerMimeType = video/mp4
16+
sampleMimeType = video/avc
17+
codecs = avc1.64001F
18+
maxInputSize = 36722
19+
maxNumReorderSamples = 2
20+
width = 1080
21+
height = 720
22+
frameRate = 29.97
23+
colorInfo:
24+
lumaBitdepth = 8
25+
chromaBitdepth = 8
26+
metadata = entries=[TSSE: description=null: values=[Lavf60.16.100], Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
27+
initializationData:
28+
data = length 29, hash 4746B5D9
29+
data = length 10, hash 7A0D0F2B
30+
sample 0:
31+
time = 0
32+
flags = 1
33+
data = length 36692, hash D216076E
34+
sample 1:
35+
time = 66733
36+
flags = 0
37+
data = length 5312, hash D45D3CA0
38+
sample 2:
39+
time = 33366
40+
flags = 0
41+
data = length 599, hash 1BE7812D
42+
sample 3:
43+
time = 200200
44+
flags = 0
45+
data = length 7735, hash 4490F110
46+
sample 4:
47+
time = 133466
48+
flags = 0
49+
data = length 987, hash 560B5036
50+
sample 5:
51+
time = 100100
52+
flags = 0
53+
data = length 673, hash ED7CD8C7
54+
sample 6:
55+
time = 166833
56+
flags = 0
57+
data = length 523, hash 3020DF50
58+
sample 7:
59+
time = 333666
60+
flags = 0
61+
data = length 6061, hash 736C72B2
62+
sample 8:
63+
time = 266933
64+
flags = 0
65+
data = length 992, hash FE132F23
66+
sample 9:
67+
time = 233566
68+
flags = 0
69+
data = length 623, hash 5B2C1816
70+
sample 10:
71+
time = 300300
72+
flags = 0
73+
data = length 421, hash 742E69C1
74+
sample 11:
75+
time = 433766
76+
flags = 0
77+
data = length 4899, hash F72F86A1
78+
sample 12:
79+
time = 400400
80+
flags = 0
81+
data = length 568, hash 519A8E50
82+
sample 13:
83+
time = 367033
84+
flags = 0
85+
data = length 620, hash 3990AA39
86+
sample 14:
87+
time = 567233
88+
flags = 0
89+
data = length 5450, hash F06EC4AA
90+
sample 15:
91+
time = 500500
92+
flags = 0
93+
data = length 1051, hash 92DFA63A
94+
sample 16:
95+
time = 467133
96+
flags = 0
97+
data = length 874, hash 69587FB4
98+
sample 17:
99+
time = 533866
100+
flags = 0
101+
data = length 781, hash 36BE495B
102+
sample 18:
103+
time = 700700
104+
flags = 0
105+
data = length 4725, hash AC0C8CD3
106+
sample 19:
107+
time = 633966
108+
flags = 0
109+
data = length 1022, hash 5D8BFF34
110+
sample 20:
111+
time = 600600
112+
flags = 0
113+
data = length 790, hash 99413A99
114+
sample 21:
115+
time = 667333
116+
flags = 0
117+
data = length 610, hash 5E129290
118+
sample 22:
119+
time = 834166
120+
flags = 0
121+
data = length 2751, hash 769974CB
122+
sample 23:
123+
time = 767433
124+
flags = 0
125+
data = length 745, hash B78A477A
126+
sample 24:
127+
time = 734066
128+
flags = 0
129+
data = length 621, hash CF741E7A
130+
sample 25:
131+
time = 800800
132+
flags = 0
133+
data = length 505, hash 1DB4894E
134+
sample 26:
135+
time = 967633
136+
flags = 0
137+
data = length 1268, hash C15348DC
138+
sample 27:
139+
time = 900900
140+
flags = 0
141+
data = length 880, hash C2DE85D0
142+
sample 28:
143+
time = 867533
144+
flags = 0
145+
data = length 530, hash C9641EB0
146+
sample 29:
147+
time = 934266
148+
flags = 536870912
149+
data = length 568, hash 4FE5C8EA
150+
tracksEnded = true
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
seekMap:
2+
isSeekable = true
3+
duration = 1001000
4+
getPosition(0) = [[timeUs=0, position=1266]]
5+
getPosition(1) = [[timeUs=0, position=1266]]
6+
getPosition(500500) = [[timeUs=0, position=1266]]
7+
getPosition(1001000) = [[timeUs=0, position=1266]]
8+
numberOfTracks = 1
9+
track 0:
10+
total output bytes = 89876
11+
sample count = 30
12+
track duration = 1001000
13+
format 0:
14+
id = 1
15+
containerMimeType = video/mp4
16+
sampleMimeType = video/avc
17+
codecs = avc1.64001F
18+
maxInputSize = 36722
19+
maxNumReorderSamples = 2
20+
width = 1080
21+
height = 720
22+
frameRate = 29.97
23+
colorInfo:
24+
lumaBitdepth = 8
25+
chromaBitdepth = 8
26+
metadata = entries=[TSSE: description=null: values=[Lavf60.16.100], Mp4Timestamp: creation time=0, modification time=0, timescale=1000]
27+
initializationData:
28+
data = length 29, hash 4746B5D9
29+
data = length 10, hash 7A0D0F2B
30+
sample 0:
31+
time = 0
32+
flags = 1
33+
data = length 36692, hash D216076E
34+
sample 1:
35+
time = 66733
36+
flags = 0
37+
data = length 5312, hash D45D3CA0
38+
sample 2:
39+
time = 33366
40+
flags = 0
41+
data = length 599, hash 1BE7812D
42+
sample 3:
43+
time = 200200
44+
flags = 0
45+
data = length 7735, hash 4490F110
46+
sample 4:
47+
time = 133466
48+
flags = 0
49+
data = length 987, hash 560B5036
50+
sample 5:
51+
time = 100100
52+
flags = 0
53+
data = length 673, hash ED7CD8C7
54+
sample 6:
55+
time = 166833
56+
flags = 0
57+
data = length 523, hash 3020DF50
58+
sample 7:
59+
time = 333666
60+
flags = 0
61+
data = length 6061, hash 736C72B2
62+
sample 8:
63+
time = 266933
64+
flags = 0
65+
data = length 992, hash FE132F23
66+
sample 9:
67+
time = 233566
68+
flags = 0
69+
data = length 623, hash 5B2C1816
70+
sample 10:
71+
time = 300300
72+
flags = 0
73+
data = length 421, hash 742E69C1
74+
sample 11:
75+
time = 433766
76+
flags = 0
77+
data = length 4899, hash F72F86A1
78+
sample 12:
79+
time = 400400
80+
flags = 0
81+
data = length 568, hash 519A8E50
82+
sample 13:
83+
time = 367033
84+
flags = 0
85+
data = length 620, hash 3990AA39
86+
sample 14:
87+
time = 567233
88+
flags = 0
89+
data = length 5450, hash F06EC4AA
90+
sample 15:
91+
time = 500500
92+
flags = 0
93+
data = length 1051, hash 92DFA63A
94+
sample 16:
95+
time = 467133
96+
flags = 0
97+
data = length 874, hash 69587FB4
98+
sample 17:
99+
time = 533866
100+
flags = 0
101+
data = length 781, hash 36BE495B
102+
sample 18:
103+
time = 700700
104+
flags = 0
105+
data = length 4725, hash AC0C8CD3
106+
sample 19:
107+
time = 633966
108+
flags = 0
109+
data = length 1022, hash 5D8BFF34
110+
sample 20:
111+
time = 600600
112+
flags = 0
113+
data = length 790, hash 99413A99
114+
sample 21:
115+
time = 667333
116+
flags = 0
117+
data = length 610, hash 5E129290
118+
sample 22:
119+
time = 834166
120+
flags = 0
121+
data = length 2751, hash 769974CB
122+
sample 23:
123+
time = 767433
124+
flags = 0
125+
data = length 745, hash B78A477A
126+
sample 24:
127+
time = 734066
128+
flags = 0
129+
data = length 621, hash CF741E7A
130+
sample 25:
131+
time = 800800
132+
flags = 0
133+
data = length 505, hash 1DB4894E
134+
sample 26:
135+
time = 967633
136+
flags = 0
137+
data = length 1268, hash C15348DC
138+
sample 27:
139+
time = 900900
140+
flags = 0
141+
data = length 880, hash C2DE85D0
142+
sample 28:
143+
time = 867533
144+
flags = 0
145+
data = length 530, hash C9641EB0
146+
sample 29:
147+
time = 934266
148+
flags = 536870912
149+
data = length 568, hash 4FE5C8EA
150+
tracksEnded = true

0 commit comments

Comments
 (0)