From fa49377b0ab889cb704a8db647b2d1901cf64c7b Mon Sep 17 00:00:00 2001 From: Frank Schmirler Date: Tue, 16 Dec 2025 14:15:01 +0100 Subject: [PATCH 1/4] Fix decoding headers where the encoded part is embedded in a word. --- src/parser_utils.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/parser_utils.c b/src/parser_utils.c index a5cc6f09..18e83650 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -353,6 +353,17 @@ void fixupEncodedHeaderLine(char *buf, int buflen){ char *p = v; + /* + * https://www.ietf.org/rfc/rfc2047.txt says that + * + * "When displaying a particular header field that contains multiple + * 'encoded-word's, any 'linear-white-space' that separates a pair of + * adjacent 'encoded-word's is ignored." (6.2) + */ + + if(n_tokens > 0 && prev_encoded != 1) + strncat(puf, " ", sizeof(puf)-strlen(puf)-1); + do { memset(u, 0, sizeof(u)); @@ -368,10 +379,15 @@ void fixupEncodedHeaderLine(char *buf, int buflen){ */ int b64=0, qp=0; + prev_encoded = 0; memset(encoding, 0, sizeof(encoding)); r = strstr(p, "=?"); if(r){ + *r = '\0'; + strncat(puf, p, sizeof(puf)-strlen(puf)-1); + *r = '='; + p = r + 2; e = strchr(p, '?'); @@ -419,18 +435,6 @@ void fixupEncodedHeaderLine(char *buf, int buflen){ else if(qp == 1) decodeQP(u); - /* - * https://www.ietf.org/rfc/rfc2047.txt says that - * - * "When displaying a particular header field that contains multiple - * 'encoded-word's, any 'linear-white-space' that separates a pair of - * adjacent 'encoded-word's is ignored." (6.2) - */ - if(prev_encoded == 1 && (b64 == 1 || qp == 1)) {} - else if(n_tokens > 1){ - strncat(puf, " ", sizeof(puf)-strlen(puf)-1); - } - if(b64 == 1 || qp == 1){ prev_encoded = 1; need_encoding = 0; From 3d7e568586a911c4a339dc531435dbab14d2cbb7 Mon Sep 17 00:00:00 2001 From: Frank Schmirler Date: Thu, 8 Jan 2026 16:09:08 +0100 Subject: [PATCH 2/4] Fixed skipping whitespace between encoded words --- src/parser_utils.c | 23 +++++++++++++++++------ unit_tests/check_parser_utils.c | 4 ++-- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/src/parser_utils.c b/src/parser_utils.c index 18e83650..87edf41a 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -349,10 +349,6 @@ void fixupEncodedHeaderLine(char *buf, int buflen){ q = buf; do { - q = split_str(q, " ", v, sizeof(v)-1); - - char *p = v; - /* * https://www.ietf.org/rfc/rfc2047.txt says that * @@ -361,9 +357,24 @@ void fixupEncodedHeaderLine(char *buf, int buflen){ * adjacent 'encoded-word's is ignored." (6.2) */ - if(n_tokens > 0 && prev_encoded != 1) + if(prev_encoded == 1){ + r = strstr(q, "=?"); + if(r){ + s = q; + while(s < r && *s == ' ') + ++s; + if(s == r) + q = r; + } + } + else if(n_tokens > 0) strncat(puf, " ", sizeof(puf)-strlen(puf)-1); + q = split_str(q, " ", v, sizeof(v)-1); + + char *p = v; + prev_encoded = 0; + do { memset(u, 0, sizeof(u)); @@ -379,7 +390,6 @@ void fixupEncodedHeaderLine(char *buf, int buflen){ */ int b64=0, qp=0; - prev_encoded = 0; memset(encoding, 0, sizeof(encoding)); r = strstr(p, "=?"); @@ -455,6 +465,7 @@ void fixupEncodedHeaderLine(char *buf, int buflen){ } } else { + prev_encoded = 0; strncat(puf, u, sizeof(puf)-strlen(puf)-1); } diff --git a/unit_tests/check_parser_utils.c b/unit_tests/check_parser_utils.c index 5bfe7c34..802750ef 100644 --- a/unit_tests/check_parser_utils.c +++ b/unit_tests/check_parser_utils.c @@ -129,7 +129,7 @@ static void test_fixupEncodedHeaderLine(){ {"Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)", "Re: [Bitbucket] Issue #627: ldap user can't login (jsuto/piler)"}, {"=?iso-8859-2?Q?RE:_test.aaa.fu_z=F3na?=", "RE: test.aaa.fu zóna"}, {"=?iso-8859-2?Q?V=E1ltoz=E1s_az_IT_szervezetben_/_Personal_changes_in_the_?=", "Változás az IT szervezetben / Personal changes in the "}, - {"Re: AAAmil /29 UZ736363", "Re: AAAmil /29 UZ736363"}, + {"Re: AAAmil /29 UZ736363", "Re: AAAmil /29 UZ736363"}, {"=?UTF-8?Q?[JIRA]_Created:_(HUDSS-196)_T=C5=B1zfal_?=", "[JIRA] Created: (HUDSS-196) Tűzfal "}, {"=?iso-8859-2?Q?RE:_Baptista_Szeretetszolg=E1lat?=", "RE: Baptista Szeretetszolgálat"}, {"=?iso-8859-2?B?SXR0IGF6IE1OQiBuYWd5IGRvYuFzYTogaXNt6XQgYmVsZW55+mxuYWsgYSBoaXRlbGV66XNiZSAoMjAxNS4xMS4wMy4gLSBzakBhY3RzLmh1KQ==?=", "Itt az MNB nagy dobása: ismét belenyúlnak a hitelezésbe (2015.11.03. - sj@acts.hu)"}, @@ -143,7 +143,7 @@ static void test_fixupEncodedHeaderLine(){ {"=?UTF-8?Q?Megh=C3=ADv=C3=B3=20a=20Pulzus=20felm=C3=A9r=C3=A9sre=20/=20Inv?= =?UTF-8?Q?itation=20to=20the=20Pulse=20Survey?=", "Meghívó a Pulzus felmérésre / Invitation to the Pulse Survey"}, {"=?iso-8859-2?Q?vhost_l=E9trehoz=E1sa?=", "vhost létrehozása"}, {"Re: MAIL =?UTF-8?B?U1pPTEfDgUxUQVTDgVMgSElCQSAgIEdUUzogOTE1NDUyMQ==?=", "Re: MAIL SZOLGÁLTATÁS HIBA GTS: 9154521"}, - {"[spam???] Better Sex. Better Body. Better Life.", "[spam???] Better Sex. Better Body. Better Life."}, + {"[spam???] Better Sex. Better Body. Better Life.", "[spam???] Better Sex. Better Body. Better Life."}, {"1gy2tt. V3l4d. M5sk6nt", "1gy2tt. V3l4d. M5sk6nt"}, {"=?iso-8859-2?B?03Jp4XNpIG1lZ2xlcGV06XMsIG5pbmNzIHT2YmIgbWVudHPpZyBBbWVyaWthIHN64W3hcmEgKDIwMTUuMTEuMDYuIC0gc2pAYWN0cy5odSk=?=", "Óriási meglepetés, nincs több mentség Amerika számára (2015.11.06. - sj@acts.hu)"}, {"=?utf-8?B?Rlc6IEVtYWlsIGZvZ2Fkw6FzaSBoaWJh?=", "FW: Email fogadási hiba"}, From c46470817fb17ae2eacb73dfb3eb833bc9183e88 Mon Sep 17 00:00:00 2001 From: Frank Schmirler Date: Fri, 9 Jan 2026 14:51:11 +0100 Subject: [PATCH 3/4] Fixed missing spaces after encoded word --- src/parser_utils.c | 4 ++++ unit_tests/check_parser.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/parser_utils.c b/src/parser_utils.c index 87edf41a..1405c493 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -365,7 +365,11 @@ void fixupEncodedHeaderLine(char *buf, int buflen){ ++s; if(s == r) q = r; + else + strncat(puf, " ", sizeof(puf)-strlen(puf)-1); } + else + strncat(puf, " ", sizeof(puf)-strlen(puf)-1); } else if(n_tokens > 0) strncat(puf, " ", sizeof(puf)-strlen(puf)-1); diff --git a/unit_tests/check_parser.c b/unit_tests/check_parser.c index 6e7958ae..d18f308c 100644 --- a/unit_tests/check_parser.c +++ b/unit_tests/check_parser.c @@ -39,7 +39,7 @@ static void test_parser(struct config *cfg){ {"18-spam-html-encoding.eml", "", "a1 hitelcentrum kft Üveges szilvia a1hitelcentrum@t-online.hu a1hitelcentrum t online hu ", "t-online.hu", "postmaster postmaster@aaa.fu postmaster aaa fu ", "aaa.fu", "postmaster@aaa.fu postmaster aaa fu a1hitelcentrum@t-online.hu a1hitelcentrum t online hu ", "aaa.fu t-online.hu ", "", "TÁJÉKOZTATÁSVargay Péter", 0}, {"19-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-186.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "", "", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1}, {"20-pdf-attachment-bad-mime.eml", "<20100213$2b62e942$9cc2b$sxm@61-187.reverse.ukhost4u.com>", "jennifer - billing department billing@limitedsoftwareworld.com billing limitedsoftwareworld com ", "limitedsoftwareworld.com", "", "", "100000 100000@aaa.fu 100000 aaa fu ", "aaa.fu ", "", "Billing Summary for 100000, Processed on 2010-02-13 17:01:03", 1}, - {"21-register-tricky-urls.eml", "", "the register update-49363-08f0f768@list.theregister.co.uk 30cbee0b0f411fcf170416fb9f996c6f update 49363 08f0f768 list theregister co uk ", "list.theregister.co.uk", "", "", "hello@mail.aaa.fu hello mail aaa fu ", "mail.aaa.fu ", "", "[sp@m] Reg Headlines Friday July 20", 0}, + {"21-register-tricky-urls.eml", "", "the register update-49363-08f0f768@list.theregister.co.uk 30cbee0b0f411fcf170416fb9f996c6f update 49363 08f0f768 list theregister co uk ", "list.theregister.co.uk", "", "", "hello@mail.aaa.fu hello mail aaa fu ", "mail.aaa.fu ", "", "[sp@m] Reg Headlines Friday July 20", 0}, {"30-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", "RE: hxx-ajajajaja.com_ Aaagágyi és kia ttt_webstat hiba", 0}, {"31-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", "Re: stanhu \"domain not found\"-dal eldobja a @fohu-ra küldött leveleket...", 0}, {"32-subject.eml", "<3660278814815884@pongr-fabd8067e>", "aaapsi.hu info@aaapsi.hu info aaapsi hu ", "aaapsi.hu", "", "", "hello@acts.hu hello acts hu ", "acts.hu ", "", " www.ujsag.hu new virtual host reg. --> Aaaaaaaaa", 0}, From 73e4174f68444866acb40065d1ece76bd9974524 Mon Sep 17 00:00:00 2001 From: Frank Schmirler Date: Fri, 9 Jan 2026 14:57:17 +0100 Subject: [PATCH 4/4] Removed adding safeguard ';' to multiline MIME headers. --- src/parser_utils.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/parser_utils.c b/src/parser_utils.c index 1405c493..9aa44fc7 100644 --- a/src/parser_utils.c +++ b/src/parser_utils.c @@ -1103,11 +1103,6 @@ void fill_attachment_name_buf(struct parser_state *state, char *buf){ if(len + state->anamepos < SMALLBUFSIZE-3){ memcpy(&(state->attachment_name_buf[state->anamepos]), p, len); state->anamepos += len; - - // add a trailing separator semicolon to make sure there's separation - // with the next item - state->attachment_name_buf[state->anamepos] = ';'; - state->anamepos++; } }