Makefile | 2 +- README.ALT.ru_RU.UTF-8 | 8 ++++++++ wordsplit.mlp | 12 ++++++++---- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index 294bd17..1481291 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ ### Configuration section # The laguages you're interested in, besides English -LANGUAGES=-DFRENCH #-DSPANISH -DITALIAN -DGERMAN -DPORTUGUESE -DJAPANESE +LANGUAGES=-DFRENCH -DRUSSIAN -DGERMAN #-DSPANISH -DITALIAN -DPORTUGUESE -DJAPANESE # How to invoke the C preprocessor CPP=gcc -E -P $(LANGUAGES) - diff --git a/README.ALT.ru_RU.UTF-8 b/README.ALT.ru_RU.UTF-8 new file mode 100644 index 0000000..9aa8b17 --- /dev/null +++ b/README.ALT.ru_RU.UTF-8 @@ -0,0 +1,8 @@ + + В данной версии добавлена поддержка русских кодировок (работает +для KOI8-R и CP1251). Unicode не работает, но и за спам не считается. +Проверено на достаточно большой базе. + +-- + + vsl@altlinux.ru, evg@altlinux.ru diff --git a/wordsplit.mlp b/wordsplit.mlp index 88f620f..a9086df 100644 --- a/wordsplit.mlp +++ b/wordsplit.mlp @@ -35,10 +35,10 @@ aaaaaaeceeeeiiii\ aaaaaaeceeeeiiii\ noooooouuuuypy" -let normalize s = - for i = 0 to String.length s - 1 do - s.[i] <- tbl.[Char.code s.[i]] - done +let normalize s = s +(* for i = 0 to String.length s - 1 do + s.[i] <- tbl.[Char.code s.[i]] + done *) let all_uppercase s = try @@ -84,6 +84,10 @@ let letter = [ '' '' '' '' '' '' '' #endif +#ifdef RUSSIAN + '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' + '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' +#endif #ifdef PORTUGUESE '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' '' ''