ALT Linux repositórios
Group :: Sistema/Servidores
RPM: dictd
Main Changelog Spec Patches Sources Download Gear Bugs e FR Repocop
Patch: dict-1.9.15-alt-utf8.patch
Download
Download
--- dict.c.orig 2004-11-17 15:39:44 +0300
+++ dict.c 2005-09-22 01:35:02 +0400
@@ -25,6 +25,11 @@
#include "parse.h"
#include "md5.h"
#include <stdarg.h>
+#include <locale.h>
+#include <langinfo.h>
+#include <iconv.h>
+#include <wchar.h>
+#include <errno.h>
extern int yy_flex_debug;
lst_List dict_Servers;
@@ -32,6 +37,7 @@
FILE *dict_output;
#define BUFFERSIZE 2048
+#define UTFBUFFERSIZE BUFFERSIZE * 8
#define PIPESIZE 256
#define DEF_STRAT "."
#define DEF_DB "*"
@@ -98,6 +104,12 @@
#define EXST_INVALID_STRATEGY 40
#define EXST_CONNECTION_FAILED 41
+
+int utf8_mode = 1;
+
+iconv_t iconv_object = (iconv_t) -1 ;
+iconv_t iconv_reverse_object = (iconv_t) -1 ;
+
struct def {
lst_List data;
const char *word;
@@ -265,18 +277,118 @@
{
lst_List l = lst_create();
char line[BUFFERSIZE];
- int len;
+ static char utfline[BUFFERSIZE];
+ static char decodedline[UTFBUFFERSIZE];
+ int len;
+ size_t utflen;
+ size_t avail;
+
+ char *source;
+ char *destination;
+ size_t recoded;
+ wchar_t wide_char;
+
- while ((len = net_read(s, line, BUFFERSIZE - 1)) >= 0) {
- line [len] = 0;
+ while ((len = net_read(s, utfline, BUFFERSIZE - 1)) >= 0) {
+ utfline[len] = 0;
client_bytes += len;
- PRINTF(DBG_RAW,("* Text: %s\n",line));
- if (line[0] == '.' && line[1] == '\0') break;
- if (len >= 2 && line[0] == '.' && line[1] == '.')
- lst_append( l, xstrdup(line + 1) );
+ PRINTF(DBG_RAW,("* Text: %s\n",utfline));
+ if (utfline[0] == '.' && utfline[1] == '\0') break;
+ if( utf8_mode )
+ {
+ if (len >= 2 && line[0] == '.' && line[1] == '.')
+ lst_append( l, xstrdup(utfline + 1) );
+ else
+ lst_append( l, xstrdup(utfline) );
+ }
else
- lst_append( l, xstrdup(line) );
+ {
+ avail = UTFBUFFERSIZE;
+ source = utfline;
+ destination = decodedline;
+ recoded = 0;
+ utflen = len;
+
+ while( source < ( utfline + len ) )
+ {
+ recoded = iconv( iconv_object, &source, &utflen, &destination, &avail );
+ if( recoded == -1 )
+ {
+ switch ( errno )
+ {
+ case EILSEQ:
+ case EINVAL:
+ {
+ int mb_len = mbtowc( &wide_char, source, 6 );
+ switch( wide_char )
+ {
+ case 0x00E4: // a umlaut small
+ *destination = 'a';
+ destination++;
+ *destination = ':';
+ break;
+ case 0x00C4: // a umlaut big
+ *destination = 'A';
+ destination++;
+ *destination = ':';
+ break;
+ case 0x00F6: // o umlaut small
+ *destination = 'o';
+ destination++;
+ *destination = ':';
+ break;
+ case 0x00D6: // o umlaut big
+ *destination = 'O';
+ destination++;
+ *destination = ':';
+ break;
+ case 0x00FC: // u umlaut small
+ *destination = 'u';
+ destination++;
+ *destination = ':';
+ break;
+ case 0x00DC: // u umlaut small
+ *destination = 'u';
+ destination++;
+ *destination = ':';
+ break;
+ case 0x00DF: // eszet small
+ *destination = 's';
+ destination++;
+ *destination = 's';
+ break;
+ default :
+ *destination = '?';
+ break;
+ }
+
+ destination++;
+
+ if( mb_len < 1 )
+ mb_len = 1;
+
+ source += mb_len;
+ utflen -= mb_len;
+ avail -= mb_len;
+ break;
+ }
+ default:
+ source = utfline + len;
+ break;
+ };
+ };
+ };
+ if( destination < decodedline + UTFBUFFERSIZE )
+ *destination = 0;
+ else
+ decodedline[UTFBUFFERSIZE-1] = 0;
+ if (len >= 2 && utfline[0] == '.' && utfline[1] == '.')
+ lst_append( l, xstrdup( decodedline + 1 ) );
+ else
+ lst_append( l, xstrdup( decodedline ) );
+
+ };
}
if (len < 0) {
client_close_pager();
@@ -689,9 +801,43 @@
PRINTF(DBG_PIPE,("* Sending %d commands (%d bytes)\n",count,len));
PRINTF(DBG_RAW,("* Send/%d: %s",c->command,buffer));
- pt = alloca(2*len);
- client_crlf(pt,buffer);
- net_write( cmd_reply.s, pt, strlen(pt) );
+
+ if( utf8_mode )
+ {
+ pt = alloca(2*len);
+ client_crlf(pt,buffer);
+ net_write( cmd_reply.s, pt, strlen(pt) );
+ }
+ else
+ {
+ char *decodedline = alloca( 8 * len );
+ size_t utflen = len;
+ size_t avail = 8 * len - 1;
+
+ char *source = buffer;
+ char *destination = decodedline;
+ size_t recoded = 0;
+
+ recoded = iconv( iconv_reverse_object, &source, &utflen, &destination, &avail );
+ if( recoded == -1 )
+ {
+ pt = alloca(2*len);
+ client_crlf(pt,buffer);
+ net_write( cmd_reply.s, pt, strlen(pt) );
+ }
+ else
+ {
+ if( destination < decodedline + 8 * len - 1 )
+ *destination = 0;
+ else
+ decodedline[8 * len - 1] = 0;
+
+ pt = alloca( 2 * strlen( decodedline ) );
+ client_crlf( pt, decodedline );
+ net_write( cmd_reply.s, pt, strlen(pt) );
+ };
+
+ };
} else {
PRINTF(DBG_PIPE,("* Sending nothing\n"));
PRINTF(DBG_RAW,("* Send/%d\n",c->command));
@@ -1161,6 +1307,7 @@
"-s --strategy <strategy> strategy for matching or defining",
"-c --config <file> specify configuration file",
"-C --nocorrect disable attempted spelling correction",
+ "-n --notranslate disable UTF-8 -> client encoding translation",
"-D --dbs show available databases",
"-S --strats show available search strategies",
"-H --serverhelp show server help",
@@ -1217,6 +1364,7 @@
{ "match", 0, 0, 'm' },
{ "strategy", 1, 0, 's' },
{ "nocorrect", 0, 0, 'C' },
+ { "notranslate",0, 0, 'n' },
{ "config", 1, 0, 'c' },
{ "dbs", 0, 0, 'D' },
{ "strats", 0, 0, 'S' },
@@ -1236,6 +1384,31 @@
{ 0, 0, 0, 0 }
};
+ if( !setlocale(LC_CTYPE, "") )
+ {
+ utf8_mode = 1;
+ }
+ else
+ {
+ utf8_mode = ( strcmp( nl_langinfo(CODESET), "UTF-8" ) == 0 );
+ };
+
+ if( !utf8_mode )
+ {
+ iconv_object = iconv_open( nl_langinfo(CODESET), "UTF-8" );
+ if( iconv_object == (iconv_t) -1 )
+ utf8_mode = 1;
+ iconv_reverse_object = iconv_open( "UTF-8", nl_langinfo(CODESET) );
+ if( iconv_object == (iconv_t) -1 )
+ {
+ utf8_mode = 1;
+ if( iconv_object != (iconv_t) -1 )
+ iconv_close( iconv_object );
+ };
+ };
+
+
+
dict_output = stdout;
maa_init(argv[0]);
@@ -1249,7 +1422,7 @@
dbg_register( DBG_URL, "url" );
while ((c = getopt_long( argc, argv,
- "h:p:d:i:Ims:DSHau:c:Ck:VLvrP:",
+ "h:p:d:i:Imns:DSHau:c:Ck:VLvrP:",
longopts, NULL )) != EOF)
{
switch (c) {
@@ -1259,6 +1432,16 @@
case 'i': database = optarg; function |= INFO; break;
case 'I': function |= SERVER; break;
case 'm': function = MATCH; break;
+ case 'n':
+ {
+ if( utf8_mode == 0 && iconv_object != (iconv_t) -1 )
+ iconv_close( iconv_object );
+
+ if( utf8_mode == 0 && iconv_reverse_object != (iconv_t) -1 )
+ iconv_close( iconv_reverse_object );
+ utf8_mode = 1;
+ }
+ break;
case 's': strategy = optarg; break;
case 'D': function |= DBS; break;
case 'S': function |= STRATS; break;
@@ -1549,6 +1732,12 @@
tim_get_system( "total" ),
client_bytes / tim_get_real( "total" ) );
}
+
+ if( utf8_mode == 0 && iconv_object != (iconv_t) -1 )
+ iconv_close( iconv_object );
+
+ if( utf8_mode == 0 && iconv_reverse_object != (iconv_t) -1 )
+ iconv_close( iconv_reverse_object );
return ex_status;
}