diff -up --exclude='*.o' pine-4.63-untouched/pine4.63/pine/filter.c pine4.63/pine/filter.c --- pine-4.63-untouched/pine4.63/pine/filter.c 2005-04-08 04:00:56.000000000 +1000 +++ pine4.63/pine/filter.c 2005-08-02 17:01:48.000000000 +1000 @@ -2372,6 +2372,62 @@ gf_convert_utf8_charset(f, flg) /* + * This filter converts characters in one character set (the character + * set of a message, for example) to UTF-8. It handles conversions from + * the characters in all character sets supported by our Unicode library. + */ +void +gf_convert_charset_to_utf8(f, flg) + FILTER_S *f; + int flg; +{ + static char *charset = NULL; + GF_INIT(f, f->next); + + if(flg == GF_DATA){ + register unsigned char c; + int sbat = 0; +#define SMALLBUF_SIZE 256 + unsigned char smallbuf[SMALLBUF_SIZE]; + + if(!charset){ + while(GF_GETC(f, c)){ + GF_PUTC(f->next, c); + } + } + else do { + SIZEDTEXT src, dst; + + for(sbat = 0; sbat < SMALLBUF_SIZE && (GF_GETC(f, c)); + smallbuf[sbat++] = c); /*loop*/ + /* TODO: deal with when we split a sequence here... */ + + src.data = smallbuf; + src.size = sbat; + if (utf8_text(&src, charset, &dst, 0)){ + int dbat; + for(dbat = 0; dbat < dst.size; ++dbat){ + GF_PUTC(f->next,dst.data[dbat]); + } + + fs_give((void **) &dst.data); + } + } while (sbat == SMALLBUF_SIZE); + + GF_END(f, f->next); + } + else if(flg == GF_EOD){ + GF_FLUSH(f->next); + (*f->next->f)(f->next, GF_EOD); + } + else if(flg == GF_RESET){ + dprint(9, (debugfile, "-- gf_reset convert_charset_to_utf8\n")); + charset = (f->opt) ? (char*) (f->opt) : NULL; + + } +} + +/* * RICHTEXT-TO-PLAINTEXT filter */ diff -up --exclude='*.o' pine-4.63-untouched/pine4.63/pine/mailview.c pine4.63/pine/mailview.c --- pine-4.63-untouched/pine4.63/pine/mailview.c 2005-04-28 04:55:02.000000000 +1000 +++ pine4.63/pine/mailview.c 2005-08-02 16:47:14.000000000 +1000 @@ -2055,6 +2055,8 @@ charset_editorial(charset, msgno, handle int i, n; HANDLE_S *h = NULL; + if (quality == CV_LOSSLESS_TRANSLATION) return TRUE; + sprintf(buf, CHARSET_DISCLAIMER_1, charset ? charset : "US-ASCII"); p = &buf[strlen(buf)]; diff -up --exclude='*.o' pine-4.63-untouched/pine4.63/pine/pine.c pine4.63/pine/pine.c --- pine-4.63-untouched/pine4.63/pine/pine.c 2005-04-28 04:55:02.000000000 +1000 +++ pine4.63/pine/pine.c 2005-08-02 16:26:09.000000000 +1000 @@ -3367,8 +3367,9 @@ goodnight_gracey(pine_state, exit_val) if(pine_state->index_disp_format) fs_give((void **)&pine_state->index_disp_format); if(pine_state->conv_table){ - if(pine_state->conv_table->table) - fs_give((void **) &pine_state->conv_table->table); + if(pine_state->conv_table->table && + pine_state->conv_table->convert != gf_convert_charset_to_utf8) + fs_give((void **) &pine_state->conv_table->table); if(pine_state->conv_table->from_charset) fs_give((void **) &pine_state->conv_table->from_charset); diff -up --exclude='*.o' pine-4.63-untouched/pine4.63/pine/pine.h pine4.63/pine/pine.h --- pine-4.63-untouched/pine4.63/pine/pine.h 2005-04-16 08:07:15.000000000 +1000 +++ pine4.63/pine/pine.h 2005-08-02 16:13:41.000000000 +1000 @@ -3995,6 +3995,7 @@ typedef struct conversion_table { /* may be lost */ #define CV_LOSES_SOME_LETTERS 4 /* Some special chars and */ /* some letters may be lost */ +#define CV_LOSSLESS_TRANSLATION 5 /* Necessary but lossless */ /* @@ -4542,6 +4543,7 @@ void gf_euc_to_2022_jp PROTO((FILTER void gf_flow_text_post_compose PROTO((FILTER_S *, int)); void gf_convert_8bit_charset PROTO((FILTER_S *, int)); void gf_convert_utf8_charset PROTO((FILTER_S *, int)); +void gf_convert_charset_to_utf8 PROTO((FILTER_S *, int)); void gf_escape_filter PROTO((FILTER_S *, int)); void gf_control_filter PROTO((FILTER_S *, int)); void *gf_control_filter_opt PROTO((int *)); diff -up --exclude='*.o' pine-4.63-untouched/pine4.63/pine/strings.c pine4.63/pine/strings.c --- pine-4.63-untouched/pine4.63/pine/strings.c 2005-04-16 08:07:17.000000000 +1000 +++ pine4.63/pine/strings.c 2005-08-02 16:43:01.000000000 +1000 @@ -3459,6 +3459,21 @@ rfc1522_decode(d, len, s, charset) fs_give((void **) &dst.data); } } + else if(ct->convert == gf_convert_charset_to_utf8){ + SIZEDTEXT src,dst; + /* determine length of source */ + for(src.data = rv, src.size = 0; + (src.size < len) && src.data[src.size]; ++src.size); + /* convert charset */ + if(utf8_text (&src, cs, &dst, 0)) { + /* might not always fit (??) */ + if(dst.size <= len) { + memcpy(rv, dst.data, dst.size); + rv[dst.size] = '\0'; + } + fs_give((void **) &dst.data); + } + } } } } @@ -3803,7 +3818,8 @@ conversion_table(from_cs, to_cs) * and build a new one. */ if(ct){ - if(ct->table && (ct->convert != gf_convert_utf8_charset)) + if(ct->table && (ct->convert != gf_convert_utf8_charset && + ct->convert != gf_convert_charset_to_utf8)) fs_give((void **) &ct->table); if(ct->from_charset) @@ -3952,6 +3968,15 @@ conversion_table(from_cs, to_cs) break; } } + + /* we can always convert to UTF-8 via Unicode */ + if(ct->quality == CV_NO_TRANSLATE_POSSIBLE && to->type == CT_UTF8) + { + ct->quality = CV_LOSSLESS_TRANSLATION; + ct->table = ct->from_charset;/*(unsigned char*)from;*/ + ct->convert = gf_convert_charset_to_utf8; + } + } return(ct);