configs/lists/phraselists/badwords/weighted_dutch | 2 +- .../lists/phraselists/badwords/weighted_russian | 129 +++++++++++++++++ .../lists/phraselists/pornography/weighted_russian | 148 ++++++++++++++++++++ configs/lists/weightedphraselist.in | 3 +- data/scripts/systemv-init.in | 125 ++++++----------- doc/Makefile.am | 1 + src/NaughtyFilter.cpp | 14 +- src/contentscanners/clamav.cpp | 1 + src/downloadmanagers/fancy.cpp | 1 + 9 files changed, 335 insertions(+), 89 deletions(-) diff --git a/configs/lists/phraselists/badwords/weighted_dutch b/configs/lists/phraselists/badwords/weighted_dutch index a229936..d92f3dd 100644 --- a/configs/lists/phraselists/badwords/weighted_dutch +++ b/configs/lists/phraselists/badwords/weighted_dutch @@ -14,7 +14,7 @@ < hondelul ><50> #cock-of-a-dog (commonly used, also by kids) < kak ><10> #shit < klootzak ><50> #ass-hole -< kut ><50> #cunt/pussy +< kut ><5> #cunt/pussy < kutwijf ><90> #fucking bitch < lul ><50> #dick/cock < moederneuker ><75> #motherfucker diff --git a/configs/lists/phraselists/badwords/weighted_russian b/configs/lists/phraselists/badwords/weighted_russian new file mode 100644 index 0000000..31cf5a7 --- /dev/null +++ b/configs/lists/phraselists/badwords/weighted_russian @@ -0,0 +1,129 @@ +# +# Dutch Swear Words Weighted Phrases +# Taken from swif.zip from dansguardian website +# + +#listcategory: "Bad words (Russian)" +#<1> #CharSet Identifier for Troubleshootin + +< ебать ><75 +< блять ><50> +< хуй ><75> +< пиздец ><75> +< пизда ><50> +< ебаный ><65> +< ебнутый ><65> +< ебанутый ><65> +< ебнуть ><65> +< объебать ><65> +< объебаный ><65> +< отъебать> <65> +< заебал > <65> +< заебать > <65> +< поебать > <65> +< нахуй ><65> +< похуй ><65> +< хуйня ><65> +< хуевый ><65> +< охуенный ><65> +< охуел ><65> +< охуеть ><65> +< похуеть ><65> +< бля ><65> +< пизданутый ><65> +< пиздануть ><65> +< пизди ><65> +< пиздану ><65> +< пиздану ><65> +< отпизженный ><65> +< отпизженый ><65> +< отпиздить ><65> +< мудазвон ><30> +< мудак ><30> +< сука ><10> +# +# Dutch Swear Words Weighted Phrases +# Taken from swif.zip from dansguardian website +# + +#listcategory: "Bad words (Russian)" +#<1> #CharSet Identifier for Troubleshootin + +< ><75 +< ><50> +< ><75> +< ><75> +< ><50> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< > <65> +< > <65> +< > <65> +< > <65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><30> +< ><30> +< ><10> +# +# Dutch Swear Words Weighted Phrases +# Taken from swif.zip from dansguardian website +# + +#listcategory: "Bad words (Russian)" +#<1> #CharSet Identifier for Troubleshootin + +< ><75 +< ><50> +< ><75> +< ><75> +< ><50> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< > <65> +< > <65> +< > <65> +< > <65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><65> +< ><30> +< ><30> +< ><10> diff --git a/configs/lists/phraselists/pornography/weighted_russian b/configs/lists/phraselists/pornography/weighted_russian index 6998dc5..800af0e 100644 --- a/configs/lists/phraselists/pornography/weighted_russian +++ b/configs/lists/phraselists/pornography/weighted_russian @@ -4,6 +4,74 @@ # Russian-English bad words dictionary - http://0.viv.ru/cont/dict_mat/1.html #listcategory: "Pornography (Russian)" +#<1> #CharSet Identifier for Troubleshooting + +<><50> #prostitutes +<><5> #photo +<><40> #bust +<><40> #anal +<><40> #anal +< ><50> #anal sex +<120> #anal sex (first 'a' is latin) +< ><50> #oral sex +<120> #oral sex (first 'o' is latin) +<><40> #sex +<><40> #sex +< ><50> #sex chat +<120> #sex chat (first 'c' is latin) +< ><50> #sex forum +<120> #sex forum (first 'c' is latin) +<><5> #leisure +< ><40> #intimate service +<><2> #teen +<><2> #mature +<><40> #lesbi +<><40> #lesbian +<><40> #bizarre +<><20> #asian +< ><5> #all categories +<><40> # bdsm +<><10> #pregnant +< ><40> #large breasts +<><40> #genitalia +<><40> #genitalia +<><40> #groupsex +< ><40> #woman cum +<><5> #celebrity +< ><40> #mature women +<><40> #latinas +<><5> #amateurs +< ><5> #in public +<><5> #blacks +<><40> #pissing +<><10> #posing +<><40> #porno +< ><40> #porn stars +<><4> #pictures +<><40> #masturbate +< ><40> #hidden camera +< ><60> #sperm on face +<><40> #strapon +<><40> #slime +<><40> #fat +<><40> #fetish +< ><40> #free porno and erotics +< ><40> #the porno gallery +< ><40> #whores of Moscow +<><30> +<><30> +<><30> +<><20> +< ><60> +< ><70> +< ><70> +< ><70> +< ><50> +<><50> +< ><50> +< ><50> + +#<1> #CharSet Identifier for Troubleshooting <><50> #prostitutes <><5> #photo @@ -57,3 +125,83 @@ < ><40> #free porno and erotics < ><40> #the porno gallery < ><40> #whores of Moscow +<><30> +<><30> +<><30> +<><20> +< ><60> +< ><70> +< ><70> +< ><70> +< ><50> +<><50> +< ><50> +< ><50> + +#<1> #CharSet Identifier for Troubleshooting + +<проститутки><50> #prostitutes +<фото><5> #photo +<бюст><40> #bust +<анал><40> #anal +<анальный><40> #anal +<анальный секс><50> #anal sex +<120> #anal sex (first 'a' is latin) +<оральный секс><50> #oral sex +<120> #oral sex (first 'o' is latin) +<секса><40> #sex +<секс><40> #sex +<секс чат><50> #sex chat +<120> #sex chat (first 'c' is latin) +<секс форум><50> #sex forum +<120> #sex forum (first 'c' is latin) +<досуга><5> #leisure +<интим услуги><40> #intimate service +<молодые><2> #teen +<пожилые><2> #mature +<лесби><40> #lesbi +<лесбиянки><40> #lesbian +<извращения><40> #bizarre +<азиатки><20> #asian +<все категории><5> #all categories +<бдсм><40> # bdsm +<беременные><10> #pregnant +<большие сиськи><40> #large breasts +<гениталии><40> #genitalia +<генеталии><40> #genitalia +<групповуха><40> #groupsex +<женская кончина><40> #woman cum +<знаменитости><5> #celebrity +<зрелые женщины><40> #mature women +<латинас><40> #latinas +<любители><5> #amateurs +<на публике><5> #in public +<негры><5> #blacks +<писающие><40> #pissing +<позирующие><10> #posing +<порно><40> #porno +<порно звезды><40> #porn stars +<рисунки><4> #pictures +<самотык><40> #masturbate +<скрытой камерой><40> #hidden camera +<сперма на лице><60> #sperm on face +<страпон><40> #strapon +<тины><40> #slime +<толстушки><40> #fat +<фетиш><40> #fetish +<бесплатное порно и эротика><40> #free porno and erotics +<самые лучшие порно галереи отобранные в ручную><40> #the porno gallery +<шлюхи москвы><40> #whores of Moscow +<порнушка><30> +<порно><30> +<Порнуха><30> +<эротика><20> +<Частная порнушка><60> +<Супер порнушка><70> +<порно фото><70> +<порно видео><70> +<порно сайт><50> +<порносайт><50> +<ролики порно><50> +<Русское порно><50> + diff --git a/configs/lists/weightedphraselist.in b/configs/lists/weightedphraselist.in index 4ba44bd..c64a63f 100644 --- a/configs/lists/weightedphraselist.in +++ b/configs/lists/weightedphraselist.in @@ -69,6 +69,7 @@ .Include<@DGCONFDIR@/lists/phraselists/badwords/weighted_dutch> .Include<@DGCONFDIR@/lists/phraselists/badwords/weighted_french> .Include<@DGCONFDIR@/lists/phraselists/badwords/weighted_german> #ALPHA# +.Include<@DGCONFDIR@/lists/phraselists/badwords/weighted_russian> #ALPHA# .Include<@DGCONFDIR@/lists/phraselists/badwords/weighted_portuguese> #ALPHA# .Include<@DGCONFDIR@/lists/phraselists/badwords/weighted_spanish> #ALPHA# @@ -117,7 +118,7 @@ #.Include<@DGCONFDIR@/lists/phraselists/domainsforsale/weighted> #.Include<@DGCONFDIR@/lists/phraselists/idtheft/weighted> .Include<@DGCONFDIR@/lists/phraselists/malware/weighted> #BETA# -.Include<@DGCONFDIR@/lists/phraselists/proxies/weighted> +#.Include<@DGCONFDIR@/lists/phraselists/proxies/weighted> #.Include<@DGCONFDIR@/lists/phraselists/translation/weighted> #.Include<@DGCONFDIR@/lists/phraselists/upstreamfilter/weighted> .Include<@DGCONFDIR@/lists/phraselists/warezhacking/weighted> diff --git a/data/scripts/systemv-init.in b/data/scripts/systemv-init.in index 9f26f35..65325ca 100644 --- a/data/scripts/systemv-init.in +++ b/data/scripts/systemv-init.in @@ -2,7 +2,7 @@ # # Startup script for dansguardian # -# chkconfig: 35 92 8 +# chkconfig: - 92 8 # description: A web content filtering plugin for web \ # proxies, developed to filter using lists of \ # banned phrases, MIME types, filename \ @@ -17,107 +17,72 @@ # Should-Start: # Required-Stop: squid # Should-Stop: -# Default-Start: 3 5 +# Default-Start: none # Default-Stop: 0 1 2 6 # Short-Description: Dansguardian web content filter # Description: Dansguardian web content filter ### END INIT INFO # File includes changes by Thomas Jarosch -function wait_for_pid() -{ - local PID=$1 - local RET=0 - - if [ $PID -eq 0 ] ; then - return $RET - fi - - # give 60 secs then KILL - local COUNTDOWN=60 +. /etc/init.d/functions + +PROCESSNAME=dansguardian +PIDFILE=/var/run/$PROCESSNAME.pid +LOCKFILE=/var/lock/subsys/$PROCESSNAME + - while [ -d /proc/${PID} ] && [ $COUNTDOWN -gt 0 ] ; do - sleep 1 - COUNTDOWN=$[$COUNTDOWN-1] - done +if [ ! -f "@DGBINDIR@/$PROCESSNAME" ] || [ ! -f "@DGCONFDIR@/dansguardian.conf" ];then + exit 0 +fi - if [ -d /proc/${PID} ]; then - COMMAND=`ps h -o command ${PID}` - logger "dansguardian: timeout waiting for PID ${PID}: ${COMMAND}; sending SIGKILL" - kill -KILL $PID >/dev/null 2>&1 - RET=1 - fi - - return $RET +start() +{ + start_daemon --pidfile "$PIDFILE" --lockfile "$LOCKFILE" --expect-user root -- $PROCESSNAME + RETVAL=$? + return $RETVAL } -# See how we were called. +stop() +{ + stop_daemon --pidfile "$PIDFILE" --lockfile "$LOCKFILE" --expect-user dansguardian -- $PROCESSNAME + RETVAL=$? + return $RETVAL +} + +restart() +{ + stop + start +} case "$1" in start) - if [ -f @DGBINDIR@/dansguardian ] && - [ -f @DGCONFDIR@/dansguardian.conf ]; then - echo -n "Starting dansguardian: " - if @DGBINDIR@/dansguardian 2> /dev/null; then - echo -e "\\033[60G\c" - echo -e "[ \\033[1;32m\c" - echo -e "OK\c" - echo -e "\\033[0;39m\c" - echo " ]" - [ -d /var/lock/subsys ] && touch /var/lock/subsys/dansguardian - else - echo -e "\\033[60G\c" - echo -e "[ \\033[1;31m\c" - echo -e "FAILED\c" - echo -e "\\033[0;39m\c" - echo " ]" - fi - fi + start ;; stop) - echo -n "Shutting down dansguardian: " - WAITPID=0 - if [ -f @DGPIDDIR@/dansguardian.pid ] ; then - WAITPID=`cat @DGPIDDIR@/dansguardian.pid` - fi - if @DGBINDIR@/dansguardian -q 2> /dev/null; then - if wait_for_pid $WAITPID ; then - echo -e "\\033[60G\c" - echo -e "[ \\033[1;32m\c" - echo -e "OK\c" - echo -e "\\033[0;39m\c" - echo " ]" - else - echo -e "\\033[60G\c" - echo -e "[ \\033[1;31m\c" - echo -e "FAILED\c" - echo -e "\\033[0;39m\c" - echo " ]" - fi - /bin/rm -f @DGPIDDIR@/dansguardian.pid - /bin/rm -f /tmp/.dguardianipc - [ -d /var/lock/subsys ] && /bin/rm -f /var/lock/subsys/dansguardian - else - echo -e "\\033[60G\c" - echo -e "[ \\033[1;31m\c" - echo -e "FAILED\c" - echo -e "\\033[0;39m\c" - echo " ]" - fi + if [ -e "$LOCKFILE" ]; then + stop + fi ;; restart) - $0 stop - $0 start + restart ;; +condstop) + if [ -e "$LOCKFILE" ]; then + stop + fi + ;; status) - if [ -f @DGBINDIR@/dansguardian ]; then - @DGBINDIR@/dansguardian -s - fi + " $PROCESSNAME" -s ;; - +condrestart) + if [ -e "$LOCKFILE" ]; then + restart + fi + ;; *) - echo "Usage: $0 {start|stop|restart|status}" >&2 + echo "Usage: $0 {start|stop|restart|condstop|condrestart|status}" >&2 ;; esac diff --git a/doc/Makefile.am b/doc/Makefile.am index 0208006..aa817b8 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -1,3 +1,4 @@ +docdir = ${datadir}/doc/${PACKAGE} MAINTAINERCLEANFILES = Makefile.in dist_man_MANS = dansguardian.8 dist_doc_DATA = AuthPlugins ContentScanners DownloadManagers FAQ FAQ.html Plugins diff --git a/src/NaughtyFilter.cpp b/src/NaughtyFilter.cpp index bd27b54..d18b2be 100644 --- a/src/NaughtyFilter.cpp +++ b/src/NaughtyFilter.cpp @@ -206,16 +206,16 @@ void NaughtyFilter::checkme(DataBuffer *body, String &url, String &domain) #endif for (i = 0; i < hexdecodedlen; i++) { c = hexdecoded[i]; - if (c >= 'A' && c <= 'Z') { - c = 'a' + c - 'A'; - } - else if (c >= 192 && c <= 221) { // for accented chars - c += 32; // 224 + c - 192 - } else { +// if (c >= 'A' && c <= 'Z') { +// c = 'a' + c - 'A'; +// } +// else if (c >= 192 && c <= 221) { // for accented chars +// c += 32; // 224 + c - 192 +// } else { if (c == 13 || c == 9 || c == 10) { c = 32; // convert all whitespace to a space } - } +// } bodylc[i] = c; } } diff --git a/src/contentscanners/clamav.cpp b/src/contentscanners/clamav.cpp index 6bf0e44..9a75310 100644 --- a/src/contentscanners/clamav.cpp +++ b/src/contentscanners/clamav.cpp @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/src/downloadmanagers/fancy.cpp b/src/downloadmanagers/fancy.cpp index 2b953fc..6c9e99e 100644 --- a/src/downloadmanagers/fancy.cpp +++ b/src/downloadmanagers/fancy.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include