diff --git a/lib/charset.c b/lib/charset.c index 9130bba6..bbedb473 100644 --- a/lib/charset.c +++ b/lib/charset.c @@ -31,6 +31,58 @@ #include "../config.h" #include "mbselib.h" + +/* + * Search table for CHRS: kludge to the current name. + * This table may include obsolete names (and we need + * them). + */ +struct _charalias charalias[] = { +{(char *)"ASCII", (char *)"US-ASCII"}, +{(char *)"VT100", (char *)"US-ASCII"}, +{(char *)"AMIGA", (char *)"CP437"}, +{(char *)"IBMPC", (char *)"CP437"}, +{(char *)"PC-8", (char *)"CP437"}, +{(char *)"CP850", (char *)"CP437"}, +{(char *)"MACINTOSH", (char *)"MAC"}, +{(char *)"ALT", (char *)"CP866"}, +{(char *)"X-ALT", (char *)"CP866"}, +{(char *)"X-CP866", (char *)"CP866"}, +{(char *)"+7_FIDO", (char *)"CP866"}, +{(char *)"KOI8-U", (char *)"KOI8-R"}, +{(char *)"IBM-878", (char *)"KOI8-R"}, +{(char *)"CP878", (char *)"KOI8-R"}, +{(char *)"IBM-915", (char *)"ISO-8859-1"}, +{(char *)"X-CP1251", (char *)"CP1251"}, +{(char *)"GBK", (char *)"CP936"}, +{(char *)"HZ-GB-2312", (char *)"CP936"}, +{NULL, NULL} +}; + + + +/* + * Array of charset identifiers. Order is important for reverse + * search from rfc -> ftn, best ftn kludge should be on top. + */ +struct _charmap charmap[] = { +{(char *)"LATIN-1 2", (char *)"iso-8859-1", (char *)"LATIN1", (char *)"ISO-8859-1", (char *)"en_US"}, +{(char *)"CP437 2", (char *)"iso-8859-1", (char *)"CP437", (char *)"ISO-8859-1", (char *)"en_US"}, +{(char *)"CP865 2", (char *)"iso-8859-1", (char *)"CP865", (char *)"ISO-8859-1", (char *)"sv_SE"}, +{(char *)"MAC", (char *)"Macintosh", (char *)"MACINTOSH", (char *)"ISO-8859-1", (char *)"en_US"}, +{(char *)"LATIN-2 2", (char *)"iso-8859-2", (char *)"LATIN2", (char *)"ISO-8859-2", (char *)"cs_CZ"}, +{(char *)"CP852 2", (char *)"iso-8859-2", (char *)"CP852", (char *)"ISO-8859-2", (char *)"cs_CZ"}, +{(char *)"CP895 2", (char *)"iso-8859-2", (char *)"CP895", (char *)"ISO-8859-2", (char *)"cs_CZ"}, +{(char *)"LATIN-5 2", (char *)"iso-8859-5", (char *)"LATIN5", (char *)"ISO-8859-5", (char *)"turks"}, +{(char *)"CP866 2", (char *)"iso-8859-5", (char *)"CP866", (char *)"ISO-8859-5", (char *)"ru_RU"}, +{(char *)"LATIN-9 2", (char *)"iso-8859-15", (char *)"LATIN9", (char *)"ISO-8859-15", (char *)"en_US"}, +{(char *)"KOI8-R 2", (char *)"koi8-r", (char *)"KOI8-R", (char *)"KOI8-R", (char *)"ru_RUi.koi8r"}, +{(char *)"CP936 2", (char *)"hz-gb-2312", (char *)"GB2312", (char *)"GB2312", (char *)"zh_CN.gbk"}, +{NULL, NULL, NULL, NULL, NULL} +}; + + + #ifndef USE_EXPERIMENT #define BUF_APPEND(d,s) str_append(d,sizeof(d),s) diff --git a/lib/mbselib.h b/lib/mbselib.h index 3ddffd5d..5f457df6 100644 --- a/lib/mbselib.h +++ b/lib/mbselib.h @@ -449,6 +449,22 @@ struct icmp_filter { #define FTNC_MAXCHARS 12 /* Highest charset number */ + +extern struct _charalias { + char *alias; + char *ftnkludge; +} charalias[]; + + +extern struct _charmap { + char *ftnkludge; + char *rfcname; + char *ic_ftn; + char *ic_rfc; + char *lang; +} charmap[]; + + /***************************************************************************** * * Global typedefs. diff --git a/mbnntp/commands.c b/mbnntp/commands.c index 2b70abaf..dfee0ba9 100644 --- a/mbnntp/commands.c +++ b/mbnntp/commands.c @@ -60,6 +60,70 @@ static CharsetTable *charset_table_list; +/* + * Returns index of charset or -1 if not found. + */ +int find_ftn_charset(char *ftnkludge) +{ + static int i; + int j; + char *ftn, *cmp; + + Syslog('n', "find_ftn_charset(%s)", ftnkludge); + + ftn = calloc(80, sizeof(char)); + cmp = calloc(80, sizeof(char)); + + snprintf(ftn, 80, "%s", ftnkludge); + + for (i = 0; i < strlen(ftn); i++) { + if (ftn[i] == ' ') { + ftn[i] = '\0'; + break; + } + } + + for (i = 0; charalias[i].alias; i++) { + if (strcasecmp(ftn, charalias[i].alias) == 0) + break; + } + + if (charalias[i].alias == NULL) { + Syslog('n', "no alias found"); + } else { + Syslog('n', "found alias %s", charalias[i].ftnkludge); + snprintf(ftn, 80, "%s", charalias[i].ftnkludge); + } + + /* + * Now search real entry + */ + for (i = 0; charmap[i].ftnkludge; i++) { + snprintf(cmp, 80, "%s", charmap[i].ftnkludge); + for (j = 0; j < strlen(cmp); j++) { + if (cmp[j] == ' ') { + cmp[j] = '\0'; + break; + } + } + if (strcasecmp(ftn, cmp) == 0) + break; + } + + free(ftn); + free(cmp); + + if (charmap[i].ftnkludge == NULL) { + WriteError("find_ftn_charset(%s) not found", ftnkludge); + return -1; + } + + Syslog('n', "get_rfc_charset(%s) result %d", ftnkludge, i); + return i; +} + + + /* * Safe sending to the client with charset translation. */ @@ -125,7 +189,7 @@ void command_abhs(char *buf) { char *p, *cmd, *opt, *subj, *charset = NULL; unsigned int art = 0L; - int found; + int found, charindex; #ifndef USE_EXPERIMENT int i; #endif @@ -219,6 +283,25 @@ void command_abhs(char *buf) } } #endif + Syslog('n', "1 charset=\"%s\"", printable(charset, 0)); + + if (charset == NULL) { + if (msgs.Charset != FTNC_NONE) { + charset = xstrcpy(getrfcchrs(msgs.Charset)); + } else if (usercharset != FTNC_NONE) { + charset = xstrcpy(getrfcchrs(msgs.Charset)); + } else { + charset = xstrcpy((char *)"CP437"); + } + } + + Syslog('n', "2 charset=\"%s\"", printable(charset, 0)); + charindex = find_ftn_charset(charset); + + if (charindex != -1) { + Syslog('n', "setup iconv for %s to %s", charmap[charindex].ic_ftn, charmap[charindex].ic_rfc); + } + // We don't do translation to the users charset, the news reader must do that. // charset_set_in_out(getrfcchrs(msgs.Charset),getrfcchrs(usercharset)); @@ -236,8 +319,6 @@ void command_abhs(char *buf) if (strlen(Msg.Replyid)) send_nntp("References: %s", make_msgid(Msg.Replyid)); - Syslog('n', "charset=\"%s\"", MBSE_SS(charset)); - /* * Send RFC 2045 Multipurpose Internet Mail Extensions (MIME) header. * Order is: 1. Charset defined in the FTN message