Skip to content

Commit 2f63d47

Browse files
committed
id3: fix reading/writing v1 and v1.1
Properly read and write ID3v1 and v1.1 tags according to the spec [0]. ID3v1 tags are no longer always read as v1.1, which previously resulted in wrong track numbers. Writing will use v1 by default, automatically upgrading to v1.1 if needed. I am not entirely sure what the WinAmp special-case was for, but the (also fixed) testcase still passes so ... probably still covered. [0] https://id3.org/ID3v1
1 parent 9e3ff54 commit 2f63d47

2 files changed

Lines changed: 80 additions & 12 deletions

File tree

mutagen/id3/_id3v1.py

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -99,17 +99,23 @@ def ParseID3v1(data, v2_version=4, known_frames=None):
9999
# wrote only the characters available - e.g. "1" or "" - into the
100100
# year field. To parse those, reduce the size of the year field.
101101
# Amazingly, "0s" works as a struct format string.
102-
unpack_fmt = "3s30s30s30s%ds29sBB" % (len(data) - 124)
102+
unpack_fmt = "3s30s30s30s%ds30sB" % (len(data) - 124)
103103

104104
try:
105-
tag, title, artist, album, year, comment, track, genre = unpack(
105+
tag, title, artist, album, year, comment, genre = unpack(
106106
unpack_fmt, data)
107107
except StructError:
108108
return None
109109

110110
if tag != b"TAG":
111111
return None
112112

113+
track = 0
114+
if comment[-2] == 0:
115+
# treat it as v1.1
116+
track = comment[-1]
117+
comment = comment[:-2]
118+
113119
def fix(data):
114120
return data.split(b"\x00")[0].strip().decode('latin1')
115121

@@ -149,10 +155,7 @@ def fix(data):
149155
frames["COMM"] = frame_class["COMM"](
150156
encoding=0, lang="eng", desc="ID3v1 Comment", text=comment)
151157

152-
# Don't read a track number if it looks like the comment was
153-
# padded with spaces instead of nulls (thanks, WinAmp).
154-
if (track and frame_class["TRCK"] and
155-
((track != 32) or (data[-3] == b'\x00'[0]))):
158+
if track and frame_class["TRCK"]:
156159
frames["TRCK"] = TRCK(encoding=0, text=str(track))
157160
if genre != 255 and frame_class["TCON"]:
158161
frames["TCON"] = TCON(encoding=0, text=str(genre))
@@ -173,18 +176,26 @@ def MakeID3v1(id3):
173176
v1[name] = text + (b"\x00" * (30 - len(text)))
174177

175178
if "COMM" in id3:
176-
cmnt = id3["COMM"].text[0].encode('latin1', 'replace')[:28]
179+
cmnt = id3["COMM"].text[0].encode('latin1', 'replace')[:30]
177180
else:
178181
cmnt = b""
179-
v1["comment"] = cmnt + (b"\x00" * (29 - len(cmnt)))
182+
v1["comment"] = cmnt + (b"\x00" * (30 - len(cmnt)))
180183

181184
if "TRCK" in id3:
182185
try:
183186
v1["track"] = bchr(+id3["TRCK"])
184187
except ValueError:
185-
v1["track"] = b"\x00"
188+
# nothing useful to add, keep it as v1
189+
v1["track"] = b""
190+
else:
191+
# make it v1.1
192+
v1["comment"] = v1["comment"][:-2]
193+
v1["track"] = b"\x00" + v1["track"]
186194
else:
187-
v1["track"] = b"\x00"
195+
v1["track"] = b""
196+
197+
if len(v1["comment"]) + len(v1["track"]) != 30:
198+
raise ValueError("comment/track field must be 30 bytes long")
188199

189200
if "TCON" in id3:
190201
try:

tests/test_id3.py

Lines changed: 59 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -627,7 +627,7 @@ def test_year(self):
627627
def test_v1_not_v11(self):
628628
self.id3["TRCK"] = TRCK(encoding=0, text="32")
629629
tag = MakeID3v1(self.id3)
630-
self.failUnless(32, ParseID3v1(tag)["TRCK"])
630+
self.assertEqual("32", ParseID3v1(tag)["TRCK"])
631631
del self.id3["TRCK"]
632632
tag = MakeID3v1(self.id3)
633633
tag = tag[:125] + b' ' + tag[-1:]
@@ -643,7 +643,7 @@ def test_nulls(self):
643643
self.assertEquals(b'qrst'.decode('latin1'), tags['TALB'])
644644

645645
def test_nonascii(self):
646-
s = u'TAG%(title)30s%(artist)30s%(album)30s%(year)4s%(cmt)29s\x03\x01'
646+
s = u'TAG%(title)30s%(artist)30s%(album)30s%(year)4s%(cmt)28s\x00\x03\x01'
647647
s = s % dict(artist=u'abcd\xe9fg', title=u'hijklmn\xf3p',
648648
album=u'qrst\xfcv', cmt=u'wxyz', year=u'1234')
649649
tags = ParseID3v1(s.encode("latin-1"))
@@ -690,6 +690,63 @@ def test_v1_genre(self):
690690
v1tag = MakeID3v1(tag)
691691
self.failUnlessEqual(ParseID3v1(v1tag)["TCON"].genres, ["Pop"])
692692

693+
def test_v1_comment(self):
694+
written = MakeID3v1({"COMM": COMM(encoding=0,
695+
text="abcdefghijklmnopqrstuvwxyzABCDEFG")})
696+
# truncated
697+
self.assertEqual(written[97:127], b"abcdefghijklmnopqrstuvwxyzABCD")
698+
699+
parsed = ParseID3v1(written)
700+
self.assertEqual(parsed["COMM"], "abcdefghijklmnopqrstuvwxyzABCD")
701+
self.assertFalse("TRCK" in parsed)
702+
703+
def test_v1_comment_pad(self):
704+
written = MakeID3v1({"COMM": COMM(encoding=0, text=" comment")})
705+
self.assertEqual(written[97:127], b" comment" + (b"\x00" * 20)) # padded
706+
707+
parsed = ParseID3v1(written)
708+
self.assertEqual(parsed["COMM"], "comment") # leading whitespace stripped
709+
self.assertFalse("TRCK" in parsed)
710+
711+
def test_v1_nocomment(self):
712+
written = MakeID3v1({})
713+
self.assertEqual(written[97:127], b"\x00" * 30) # all null bytes
714+
715+
parsed = ParseID3v1(written)
716+
self.assertFalse("COMM" in parsed)
717+
self.assertFalse("TRCK" in parsed)
718+
719+
def test_v11_comment(self):
720+
written = MakeID3v1({
721+
"COMM": COMM(encoding=0, text="abcdefghijklmnopqrstuvwxyzABCDEFG"),
722+
"TRCK": TRCK(encoding=0, text="5")
723+
})
724+
# truncated with separator and track
725+
self.assertEqual(written[97:127], b"abcdefghijklmnopqrstuvwxyzAB\x00\x05")
726+
727+
parsed = ParseID3v1(written)
728+
self.assertEqual(parsed["COMM"], "abcdefghijklmnopqrstuvwxyzAB")
729+
self.assertEqual(parsed["TRCK"], "5")
730+
731+
def test_v11_nocomment(self):
732+
written = MakeID3v1({"TRCK": TRCK(encoding=0, text="6")})
733+
self.assertEqual(written[97:127], (b"\x00" * 29) + b"\x06")
734+
735+
parsed = ParseID3v1(written)
736+
self.assertFalse("COMM" in parsed)
737+
self.assertEqual(parsed["TRCK"], "6")
738+
739+
def test_v11_invalid_track(self):
740+
written = MakeID3v1({
741+
"COMM": COMM(encoding=0, text="abcdefghijklmnopqrstuvwxyzABCDEFG"),
742+
"TRCK": TRCK(encoding=0, text="#7")
743+
})
744+
self.assertEqual(written[97:127], b"abcdefghijklmnopqrstuvwxyzABCD") # no track
745+
746+
parsed = ParseID3v1(written)
747+
self.assertEqual(parsed["COMM"], "abcdefghijklmnopqrstuvwxyzABCD")
748+
self.assertFalse("TRCK" in parsed)
749+
693750

694751
class TestWriteID3v1(TestCase):
695752

0 commit comments

Comments
 (0)