Sisyphus repositório
Última atualização: 1 outubro 2023 | SRPMs: 18631 | Visitas: 37792656
en ru br
ALT Linux repositórios
S:3.0.6-alt1
5.0: 3.0.1-alt3
4.1: 3.0.0-alt1
4.0: 2.4.8-alt1
3.0: 2.4.5-alt1

Group :: Sistema/Internacionalização
RPM: stardict

 Main   Changelog   Spec   Patches   Sources   Download   Gear   Bugs e FR  Repocop 

#!/bin/gawk -f

# Convert slovnyk dictionaries to stardict format.
# Input file is in csv format (separated by ,) with word in
# third postion and translation in fourth position.
# eq - smth,smth,"word","translation"
# there are 2 notes about converting process
# 1 - if you have more than 1 same word in dictionary
# converter split all tranlations of this word in one and
# separate them by ,
# 2 - all word (no translations) will be converted to lowercase
# 3 - you should run script in non UTF8 environment beside
# LC_COLLATE for sorting and LC_CTYPE for lowercase
# it's due to lenght function return number of symbols
# and not bytes in utf8
# LC_ALL=C LC_COLLATE=be_BY.UTF8 LC_CTYPE=be_BY.UTF8 slovnyktodict.awk -v dictname=slovnyk_be-by_en-us slovnyk_be-by_en-us.csv
# will create slovnyk_be-by_en-us.{dict,idx,ifo} from slovnyk_be-by_en-us.csv
#
# you can send questions to alex.murugin@gmail.com

# This function (parse_csv) is in the public domain.
# For more information email LoranceStinson+csv@gmail.com.
# Or see http://lorance.freeshell.org/csv/
function parse_csv(string,csv,sep,quote,escape,newline,trim, fields,pos,strtrim) {
# Make sure there is something to parse.
if (length(string) == 0) return 0;
string = sep string; # The code below assumes ,FIELD.
fields = 0; # The number of fields found thus far.
while (length(string) > 0) {
# Remove spaces after the separator if requested.
if (trim && substr(string, 2, 1) == " ") {
if (length(string) == 1) return fields;
string = substr(string, 2);
continue;
}
strtrim = 0; # Used to trim quotes off strings.
# Handle a quoted field.
if (substr(string, 2, 1) == quote) {
pos = 2;
do {
pos++
if (pos != length(string) &&
substr(string, pos, 1) == escape &&
(substr(string, pos + 1, 1) == quote ||
substr(string, pos + 1, 1) == escape)) {
# Remove escaped quote characters.
string = substr(string, 1, pos - 1) substr(string, pos + 1);
} else if (substr(string, pos, 1) == quote) {
# Found the end of the string.
strtrim = 1;
} else if (newline && pos >= length(string)) {
# Handle embedded newlines if requested.
if (getline == -1) {
csverr = "Unable to read the next line.";
return -1;
}
string = string newline $0;
}
} while (pos < length(string) && strtrim == 0)
if (strtrim == 0) {
csverr = "Missing end quote.";
return -2;
}
} else {
# Handle an empty field.
if (length(string) == 1 || substr(string, 2, 1) == sep) {
csv[fields] = "";
fields++;
if (length(string) == 1)
return fields;
string = substr(string, 2);
continue;
}
# Search for a separator.
pos = index(substr(string, 2), sep);
# If there is no separator the rest of the string is a field.
if (pos == 0) {
csv[fields] = substr(string, 2);
fields++;
return fields;
}
}
# Remove spaces after the separator if requested.
if (trim && pos != length(string) && substr(string, pos + strtrim, 1) == " ") {
trim = strtrim
# Count the number fo spaces found.
while (pos < length(string) && substr(string, pos + trim, 1) == " ") {
trim++
}
# Remove them from the string.
string = substr(string, 1, pos + strtrim - 1) substr(string, pos + trim);
# Adjust pos with the trimmed spaces if a quotes string was not found.
if (!strtrim) {
pos -= trim;
}
}
# Make sure we are at the end of the string or there is a separator.
if ((pos != length(string) && substr(string, pos + 1, 1) != sep)) {
csverr = "Missing separator.";
return -3;
}
# Gather the field.
csv[fields] = substr(string, 2 + strtrim, pos - (1 + strtrim * 2));
fields++;
# Remove the field from the string for the next pass.
string = substr(string, pos + 1);
}
return fields;
}
# get this function from dictgen.php (stardict-tools)
function stardict_nbo(string) {
s1=and(string,255);
string=rshift(string,8);
s2=and(string,255);
string=rshift(string,8);
s3=and(string,255);
string=rshift(string,8);
s4=and(string,255);
string=rshift(string,8);
return sprintf("%c", s4) sprintf("%c", s3) sprintf("%c", s2) sprintf("%c", s1);
}
BEGIN {
ORS="";
ex=0;
if (dictname == "" ){
print "Please specify dictionary name\n";
ex=1;
exit 1;
}
if ( ( system("[ -a " dictname ".ifo ] || [ -a " dictname ".idx ] || [ -a " dictname ".dict ]") ) == 0) {
print "Some of destination files already exist\n";
ex=1;
exit 1;
}
}
{
if ( parse_csv($0, csv, ",", "\"", "\"", "\\n", 1) == 4) {
csv[2]=tolower(csv[2]);
if (dict[csv[2]])
dict[csv[2]]=dict[csv[2]]", ";
dict[csv[2]]=dict[csv[2]]csv[3];
}
}
END {
if (ex==1) {exit 1;}
cnt=asorti(dict,orig);
if ( cnt == 0) {
print "Empty dictionary\n";
exit 1;
}
pos=0;
for (z=1;z<=cnt;++z)
{
len=length(dict[orig[z]]);
print orig[z] sprintf("%c", 0) stardict_nbo(pos) stardict_nbo(len) >dictname".idx";
print dict[orig[z]] >dictname".dict";
pos=pos+len;
}
close(dictname".idx");
com= "stat --printf=\"%s\" " dictname ".idx";
com| getline size;
print "StarDict's dict ifo file\nversion=2.4.2\nwordcount="cnt"\nidxfilesize="size"\nbookname="dictname"\nsametypesequence=m\n">dictname".ifo";
}
 
projeto & código: Vladimir Lettiev aka crux © 2004-2005, Andrew Avramenko aka liks © 2007-2008
mantenedor atual: Michael Shigorin
mantenedor da tradução: Fernando Martini aka fmartini © 2009