From 5337e2977f159c29e2b8af575e56866862af241b Mon Sep 17 00:00:00 2001
Date: Thu, 15 Jan 2026 11:09:07 +0100
Subject: [PATCH 1/6] extractor: Check for valid offsets extracting MP3
 performer tags

This could be tricked to run out of bounds, add some check to ensure
it does not happen.

Closes: https://gitlab.gnome.org/GNOME/localsearch/-/issues/425
---
 src/extractor/tracker-extract-mp3.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/extractor/tracker-extract-mp3.c b/src/extractor/tracker-extract-mp3.c
index 0f13ad758..3d3abf8e8 100644
--- a/src/extractor/tracker-extract-mp3.c
+++ b/src/extractor/tracker-extract-mp3.c
@@ -1438,6 +1438,10 @@ extract_performers_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csi
 		text_instrument = &data[pos];
 		text_instrument_len = id3v2_strlen (text_encode, text_instrument, csize - 1);
 		offset = text_instrument_len + id3v2_nul_size (text_encode);
+
+		if (pos + offset >= csize)
+			break;
+
 		text_performer = &data[pos + offset];
 
 		if (version == 2.4f) {
-- 
GitLab


From 2da6a9442f09b2d83e5c508a4161a6aa586c5598 Mon Sep 17 00:00:00 2001
Date: Thu, 15 Jan 2026 11:11:27 +0100
Subject: [PATCH 2/6] extractor: Bail out on 0-size frame for ID3v2.0 tags

The code handling ID3v2.3 and v2.4 tags already bailed out correctly, do
this too for 2.0 tags, avoids a possible NULL pointer dereference.

Closes: https://gitlab.gnome.org/GNOME/localsearch/-/issues/426
---
 src/extractor/tracker-extract-mp3.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/extractor/tracker-extract-mp3.c b/src/extractor/tracker-extract-mp3.c
index 3d3abf8e8..81150e6fb 100644
--- a/src/extractor/tracker-extract-mp3.c
+++ b/src/extractor/tracker-extract-mp3.c
@@ -2583,6 +2583,7 @@ parse_id3v20 (const gchar          *data,
 			break;
 		} else if (csize == 0) {
 			g_debug ("[v20] Content size was 0, moving to next frame");
+			continue;
 		}
 
 		/* Early versions do not have unsynch per frame */
-- 
GitLab


From 79f47309bad068ff0c19c1431abab6766edc687f Mon Sep 17 00:00:00 2001
Date: Thu, 15 Jan 2026 11:14:49 +0100
Subject: [PATCH 3/6] extractor: Check for buffer boundaries extracting MP3
 TXXX tags

This code could be tricked into reading out of bounds, add a check
to ensure this does not happen.

Closes: https://gitlab.gnome.org/GNOME/localsearch/-/issues/427
---
 src/extractor/tracker-extract-mp3.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/extractor/tracker-extract-mp3.c b/src/extractor/tracker-extract-mp3.c
index 81150e6fb..e469550b1 100644
--- a/src/extractor/tracker-extract-mp3.c
+++ b/src/extractor/tracker-extract-mp3.c
@@ -1485,8 +1485,12 @@ extract_txxx_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csize, id
 	text_desc     = &data[pos + 4]; /* <text string according to encoding> $00 (00) */
 	text_desc_len = id3v2_strlen (text_encode, text_desc, csize - 4);
 
-	offset        = 4 + text_desc_len + id3v2_nul_size (text_encode);
-	text          = &data[pos + offset]; /* <full text string according to encoding> */
+	offset = 4 + text_desc_len + id3v2_nul_size (text_encode);
+
+	if (pos + offset >= csize)
+		return;
+
+	text = &data[pos + offset]; /* <full text string according to encoding> */
 
 	if (version == 2.3f) {
 		description = id3v2_text_to_utf8 (data[pos], &data[pos + 1], csize - 1, info);
-- 
GitLab


From efb4aa19ec8e4ec172457deea10ebb9a1a3147f1 Mon Sep 17 00:00:00 2001
Date: Thu, 15 Jan 2026 11:23:59 +0100
Subject: [PATCH 4/6] extractor: Minor code refactor

The APIC tag is the same for ID3v2.3 and 2.4 frames, refactor
its handling to a separate function, so the code is not repeated.
---
 src/extractor/tracker-extract-mp3.c | 82 +++++++++++++----------------
 1 file changed, 36 insertions(+), 46 deletions(-)

diff --git a/src/extractor/tracker-extract-mp3.c b/src/extractor/tracker-extract-mp3.c
index e469550b1..ff2a2fb75 100644
--- a/src/extractor/tracker-extract-mp3.c
+++ b/src/extractor/tracker-extract-mp3.c
@@ -1565,6 +1565,38 @@ extract_ufid_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csize)
 	tag->mb_recording_id = identifier;
 }
 
+static void
+extract_apic_tag (id3v2tag    *tag,
+                  const gchar *data,
+                  guint        pos,
+                  size_t       csize,
+                  id3tag      *info,
+                  MP3Data     *filedata,
+                  gfloat       version)
+{
+	char text_type;
+	const char *mime;
+	char pic_type;
+	const char *desc;
+	guint offset;
+	int mime_len;
+
+	text_type =  data[pos + 0];
+	mime = &data[pos + 1];
+	mime_len = strnlen (mime, csize - 1);
+	pic_type =  data[pos + 1 + mime_len + 1];
+	desc = &data[pos + 1 + mime_len + 1 + 1];
+
+	if (pic_type == 3 || (pic_type == 0 && filedata->media_art_size == 0)) {
+		offset = pos + 1 + mime_len + 2;
+		offset += id3v2_strlen (text_type, desc, csize - offset) + id3v2_nul_size (text_type);
+
+		filedata->media_art_data = &data[offset];
+		filedata->media_art_size = csize - offset;
+		filedata->media_art_mime = mime;
+	}
+}
+
 static void
 get_id3v24_tags (id3v24frame           frame,
                  const gchar          *data,
@@ -1578,31 +1610,10 @@ get_id3v24_tags (id3v24frame           frame,
 	guint pos = 0;
 
 	switch (frame) {
-	case ID3V24_APIC: {
+	case ID3V24_APIC:
 		/* embedded image */
-		gchar text_type;
-		const gchar *mime;
-		gchar pic_type;
-		const gchar *desc;
-		guint offset;
-		gint mime_len;
-
-		text_type =  data[pos + 0];
-		mime      = &data[pos + 1];
-		mime_len  = strnlen (mime, csize - 1);
-		pic_type  =  data[pos + 1 + mime_len + 1];
-		desc      = &data[pos + 1 + mime_len + 1 + 1];
-
-		if (pic_type == 3 || (pic_type == 0 && filedata->media_art_size == 0)) {
-			offset = pos + 1 + mime_len + 2;
-			offset += id3v2_strlen (text_type, desc, csize - offset) + id3v2_nul_size (text_type);
-
-			filedata->media_art_data = &data[offset];
-			filedata->media_art_size = csize - offset;
-			filedata->media_art_mime = mime;
-		}
+		extract_apic_tag (tag, data, pos, csize, info, filedata, 2.4f);
 		break;
-	}
 
 	case ID3V24_COMM: {
 		gchar *word;
@@ -1786,31 +1797,10 @@ get_id3v23_tags (id3v24frame           frame,
 	guint pos = 0;
 
 	switch (frame) {
-	case ID3V24_APIC: {
+	case ID3V24_APIC:
 		/* embedded image */
-		gchar text_type;
-		const gchar *mime;
-		gchar pic_type;
-		const gchar *desc;
-		guint offset;
-		gint  mime_len;
-
-		text_type =  data[pos + 0];
-		mime      = &data[pos + 1];
-		mime_len  = strnlen (mime, csize - 1);
-		pic_type  =  data[pos + 1 + mime_len + 1];
-		desc      = &data[pos + 1 + mime_len + 1 + 1];
-
-		if (pic_type == 3 || (pic_type == 0 && filedata->media_art_size == 0)) {
-			offset = pos + 1 + mime_len + 2;
-			offset += id3v2_strlen (text_type, desc, csize - offset) + id3v2_nul_size (text_type);
-
-			filedata->media_art_data = &data[offset];
-			filedata->media_art_size = csize - offset;
-			filedata->media_art_mime = mime;
-		}
+		extract_apic_tag (tag, data, pos, csize, info, filedata, 2.3f);
 		break;
-	}
 
 	case ID3V24_COMM: {
 		gchar *word;
-- 
GitLab


From 9cc562cc126c408efb2a8220fcd67f006902412c Mon Sep 17 00:00:00 2001
Date: Thu, 15 Jan 2026 11:29:48 +0100
Subject: [PATCH 5/6] extractor: Refactor/fix handling of COMM tags

Refactor the code handling COMM tags for ID3v2.3 and v2.4, so they
share a common implementation. In doing that, this also fixes a
missing check for boundaries for ID3v2.3, which existed for v2.4.

Closes: https://gitlab.gnome.org/GNOME/localsearch/-/issues/428
---
 src/extractor/tracker-extract-mp3.c | 98 +++++++++++++----------------
 1 file changed, 44 insertions(+), 54 deletions(-)

diff --git a/src/extractor/tracker-extract-mp3.c b/src/extractor/tracker-extract-mp3.c
index ff2a2fb75..25d208b15 100644
--- a/src/extractor/tracker-extract-mp3.c
+++ b/src/extractor/tracker-extract-mp3.c
@@ -1597,6 +1597,46 @@ extract_apic_tag (id3v2tag    *tag,
 	}
 }
 
+static void
+extract_comm_tag (id3v2tag    *tag,
+                  const gchar *data,
+                  guint        pos,
+                  size_t       csize,
+                  id3tag      *info,
+                  gfloat       version)
+{
+	gchar *word = NULL;
+	gchar text_encode;
+	const gchar *text_desc;
+	const gchar *text;
+	guint offset;
+	gint text_desc_len;
+
+	text_encode =  data[pos + 0]; /* $xx */
+	text_desc = &data[pos + 4]; /* <text string according to encoding> $00 (00) */
+	text_desc_len = id3v2_strlen (text_encode, text_desc, csize - 4);
+
+	offset = 4 + text_desc_len + id3v2_nul_size (text_encode);
+
+	if (offset >= csize)
+		return;
+
+	text = &data[pos + offset]; /* <full text string according to encoding> */
+
+	if (version == 2.3f)
+		word = id3v2_text_to_utf8 (text_encode, text, csize - offset, info);
+	else
+		word = id3v24_text_to_utf8 (text_encode, text, csize - offset, info);
+
+	if (!tracker_is_empty_string (word)) {
+		g_strstrip (word);
+		g_free (tag->comment);
+		tag->comment = word;
+	} else {
+		g_free (word);
+	}
+}
+
 static void
 get_id3v24_tags (id3v24frame           frame,
                  const gchar          *data,
@@ -1615,35 +1655,9 @@ get_id3v24_tags (id3v24frame           frame,
 		extract_apic_tag (tag, data, pos, csize, info, filedata, 2.4f);
 		break;
 
-	case ID3V24_COMM: {
-		gchar *word;
-		gchar text_encode;
-		const gchar *text_desc;
-		const gchar *text;
-		guint offset;
-		gint text_desc_len;
-
-		text_encode   =  data[pos + 0]; /* $xx */
-		text_desc     = &data[pos + 4]; /* <text string according to encoding> $00 (00) */
-		text_desc_len = id3v2_strlen (text_encode, text_desc, csize - 4);
-
-		offset        = 4 + text_desc_len + id3v2_nul_size (text_encode);
-		text          = &data[pos + offset]; /* <full text string according to encoding> */
-
-		if (offset >= csize)
-			break;
-
-		word = id3v24_text_to_utf8 (text_encode, text, csize - offset, info);
-
-		if (!tracker_is_empty_string (word)) {
-			g_strstrip (word);
-			g_free (tag->comment);
-			tag->comment = word;
-		} else {
-			g_free (word);
-		}
+	case ID3V24_COMM:
+		extract_comm_tag (tag, data, pos, csize, info, 2.4f);
 		break;
-	}
 
 	case ID3V24_TMCL: {
 		extract_performers_tags (tag, data, pos, csize, info, 2.4f);
@@ -1802,33 +1816,9 @@ get_id3v23_tags (id3v24frame           frame,
 		extract_apic_tag (tag, data, pos, csize, info, filedata, 2.3f);
 		break;
 
-	case ID3V24_COMM: {
-		gchar *word;
-		gchar text_encode;
-		const gchar *text_desc;
-		const gchar *text;
-		guint offset;
-		gint text_desc_len;
-
-		text_encode   =  data[pos + 0]; /* $xx */
-		text_desc     = &data[pos + 4]; /* <text string according to encoding> $00 (00) */
-		text_desc_len = id3v2_strlen (text_encode, text_desc, csize - 4);
-
-		offset        = 4 + text_desc_len + id3v2_nul_size (text_encode);
-		text          = &data[pos + offset]; /* <full text string according to encoding> */
-
-		word = id3v2_text_to_utf8 (text_encode, text, csize - offset, info);
-
-		if (!tracker_is_empty_string (word)) {
-			g_strstrip (word);
-			g_free (tag->comment);
-			tag->comment = word;
-		} else {
-			g_free (word);
-		}
-
+	case ID3V24_COMM:
+		extract_comm_tag (tag, data, pos, csize, info, 2.4f);
 		break;
-	}
 
 	case ID3V24_IPLS: {
 		extract_performers_tags (tag, data, pos, csize, info, 2.3f);
-- 
GitLab


From 2897ca48b7ae79db7dcfe7e66cdd5d75cb641466 Mon Sep 17 00:00:00 2001
Date: Thu, 15 Jan 2026 13:07:43 +0100
Subject: [PATCH 6/6] extractor: Fix accounting of offsets within MP3 performer
 tags

We were moving two needles when parsing multiple performers, on one
hand the base position within the MP3 frame, and on the other the
offset accounted for locally within performers, this made us inadvertently
pass maximum allowed lengths that actually were past the data frame.

To avoid this confusion, keep the base position fixed as it happens
during parsing of other tags, and only move the local offset variable.
This fixes the invalid maximum lenghts being passed.

Closes: https://gitlab.gnome.org/GNOME/localsearch/-/issues/429
---
 src/extractor/tracker-extract-mp3.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/extractor/tracker-extract-mp3.c b/src/extractor/tracker-extract-mp3.c
index 25d208b15..867e0b15f 100644
--- a/src/extractor/tracker-extract-mp3.c
+++ b/src/extractor/tracker-extract-mp3.c
@@ -1420,12 +1420,12 @@ static void
 extract_performers_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csize, id3tag *info, gfloat version)
 {
 	gchar text_encode;
-	guint offset = 0;
+	size_t offset = 0;
 	GSList *performers;
 	gint n_performers = 0;
 
 	text_encode = data[pos];
-	pos += 1;
+	offset += 1;
 	performers = NULL;
 
 	while (pos + offset < csize) {
@@ -1435,9 +1435,9 @@ extract_performers_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csi
 		gint text_performer_len;
 		gchar *performer = NULL;
 
-		text_instrument = &data[pos];
-		text_instrument_len = id3v2_strlen (text_encode, text_instrument, csize - 1);
-		offset = text_instrument_len + id3v2_nul_size (text_encode);
+		text_instrument = &data[pos + offset];
+		text_instrument_len = id3v2_strlen (text_encode, text_instrument, csize - offset);
+		offset += text_instrument_len + id3v2_nul_size (text_encode);
 
 		if (pos + offset >= csize)
 			break;
@@ -1454,7 +1454,7 @@ extract_performers_tags (id3v2tag *tag, const gchar *data, guint pos, size_t csi
 		n_performers += 1;
 
 		text_performer_len = id3v2_strlen (text_encode, text_performer, csize - offset);
-		pos += text_instrument_len + text_performer_len + 2*id3v2_nul_size (text_encode);
+		offset += text_performer_len + id3v2_nul_size (text_encode);
 	}
 
 	if (performers) {
-- 
GitLab

