ALT Linux Sisyphus - Pacote kernel-source-ipt_netflow

pax_global_header00006660000000000000000000000064121714713420014514gustar00rootroot0000000000000052 comment=b760c76835df194637644831b0c168ca82db62ec
kernel-source-ipt_netflow-1.8/000075500000000000000000000000001217147134200164705ustar00rootroot00000000000000kernel-source-ipt_netflow-1.8/.gear-rules000064400000000000000000000000071217147134200205340ustar00rootroot00000000000000tar: .
kernel-source-ipt_netflow-1.8/Makefile.in000064400000000000000000000022021217147134200205310ustar00rootroot00000000000000#

KVERSION = @KVERSION@
KDIR = @KDIR@
IPTABLES_CFLAGS = @IPTABLES_CFLAGS@
IPTABLES_MODULES = @IPTABLES_MODULES@

obj-m = ipt_NETFLOW.o

all: ipt_NETFLOW.ko libipt_NETFLOW.so
ipt_NETFLOW.ko: ipt_NETFLOW.c ipt_NETFLOW.h Makefile
	@echo Compiling for kernel $(KVERSION)
	make -C $(KDIR) M=$(CURDIR) modules
	@touch $@
minstall: | ipt_NETFLOW.ko
	make -C $(KDIR) M=$(CURDIR) modules_install INSTALL_MOD_PATH=$(DESTDIR)
mclean:
	make -C $(KDIR) M=$(CURDIR) clean
lclean:
	-rm -f *.so *_sh.o
clean: mclean lclean
	-rm -f *.so *.o modules.order

libipt_NETFLOW.so: libipt_NETFLOW.c Makefile
	gcc -O2 -Wall -Wunused $(IPTABLES_CFLAGS) -fPIC -o libipt_NETFLOW_sh.o -c libipt_NETFLOW.c
	gcc -shared  -o libipt_NETFLOW.so libipt_NETFLOW_sh.o

linstall: | libipt_NETFLOW.so
	install -D libipt_NETFLOW.so $(DESTDIR)$(IPTABLES_MODULES)/libipt_NETFLOW.so

install: minstall linstall

Makefile: Makefile.in configure
	./configure --make

load: all
	insmod ipt_NETFLOW.ko active_timeout=5
	iptables -A OUTPUT -d 0/0 -j NETFLOW
	iptables -A INPUT -d 0/0 -j NETFLOW

unload:
	iptables -D OUTPUT -d 0/0 -j NETFLOW
	iptables -D INPUT -d 0/0 -j NETFLOW
	rmmod ipt_NETFLOW.ko
kernel-source-ipt_netflow-1.8/README000064400000000000000000000244231217147134200173550ustar00rootroot00000000000000ipt_NETFLOW linux 2.6 kernel module by <abc@telekom.ru> -- 11 Feb 2008

============================
= OBTAINING LATEST VERSION =
============================

   $ git clone git://ipt-netflow.git.sourceforge.net/gitroot/ipt-netflow/ipt-netflow
   $ cd ipt-netflow


================
= INSTALLATION =
================

1. Besides kernel you will need iptables/netfilter source matching your
     installation or just fresh install from there: ftp://ftp.netfilter.org/pub/iptables/snapshot/
   I have this: ftp://ftp.netfilter.org/pub/iptables/snapshot/iptables-1.3.7-20070329.tar.bz2
   Unpack it somewhere and build with make.

2. Run ./configure script and it will create Makefile

3. make all install; depmod
   This will install kernel module and iptable specific library.

Troubleshooting:
   1) Sometimes you will want to add CC=gcc-3 to make command.
   Example: make CC=gcc-3.3

   2) Compile module with actual kernel source compiled.
   I.e. first compile kernel and boot into it, and then compile module.

   3) For autoloading module after reboot: set net.netflow.destination (or load
   module, if idestination set on load) after interfaces are up. Becasue module
   needs exporting interface (usually lo) to establish export connection.

4. After this point you should be able to load module
     and use -j NETFLOW target in your iptables. See next section.


===========
= RUNNING =
===========

1. You can load module by insmod like this:
   # insmod ipt_NETFLOW.ko destination=127.0.0.1:2055 debug=1

   Or if properly installed (make install; depmod) by this:
   # modprobe ipt_NETFLOW destination=127.0.0.1:2055

   See, you may add options in insmod/modprobe command line, or add
     them in /etc/ to modules.conf or modprobe.conf like thus:
   options ipt_NETFLOW destination=127.0.0.1:2055

2. Statistics is in /proc/net/stat/ipt_netflow
   To view slab statistics: grep ipt_netflow /proc/slabinfo

3. You can view parameters and control them via sysctl, example:
   # sysctl -w net.netflow.hashsize=32768

4. Example of directing all traffic into module:
   # iptables -A FORWARD -j NETFLOW
   # iptables -A INPUT -j NETFLOW
   # iptables -A OUTPUT -j NETFLOW


===========
= OPTIONS =
===========

   destination=127.0.0.1:2055
     - where to export netflow, to this ip address
       You will see this connection in netstat like this:
       udp 0 0 127.0.0.1:32772 127.0.0.1:2055 ESTABLISHED 

   destination=127.0.0.1:2055,192.0.0.1:2055
     - mirror flows to two (can be more) addresses,
       separate addresses with comma.

   inactive_timeout=15
     - export flow after it's inactive 15 seconds. Default value is 15.

   active_timeout=1800
     - export flow after it's active 1800 seconds (30 minutes). Default value is 1800.

   debug=0
     - debug level (none).

   sndbuf=number
     - size of output socket buffer in bytes. Recommend you to put
       higher value if you experience netflow packet drops (can be
       seen in statistics as 'sock: fail' number.)
       Default value is system default.

   hashsize=number
     - Hash table bucket size. Used for performance tuning.
       Abstractly speaking, it should be two times bigger than flows
       you usually have, but not need to.
       Default is system memory dependent small enough value.

   maxflows=2000000
     - Maximum number of flows to account. It's here to prevent DOS attacks. After
       this limit reached new flows will not be accounted. Default is
       2000000, zero is unlimited.

   aggregation=string..
     - Few aggregation rules (or some say they are rule.)

     Buffer for aggregation string 1024 bytes, and sysctl limit it
       to ~700 bytes, so don't write there a lot.
     Rules worked in definition order for each packet, so don't
       write them a lot again.
     Rules applied to both directions (dst and src).
     Rules tried until first match, but for netmask and port
        aggregations separately.
     Delimit them with commas.

     Rules are of two kinds: for netmask aggregation
        and port aggregation:

     a) Netmask aggregation example: 192.0.0.0/8=16
     Which mean to strip addresses matching subnet 192.0.0.0/8 to /16.

     b) Port aggregation example: 80-89=80
     Which mean to replace ports from 80 to 89 with 80.

     Full example:
        aggregation=192.0.0.0/8=16,10.0.0.0/8=16,80-89=80,3128=80
        
====================
= HOW TO READ STAT =
====================

  Statistics is your friend to fine tune and understand netflow module performance.

  To see stat:
  # cat /proc/net/stat/ipt_netflow

  How to interpret the data:

> Flows: active 5187 (peak 83905 reached 0d0h1m ago, maxflows 2000000), mem 283K

  active X: currently active flows in memory cache.
    - for optimum CPU performance it is recommended to set hash table size to
      twice of average of this value, or higher.
  peak X reached Y ago: peak value of active flows.
  mem XK: how much kilobytes of memory currently taken by active flows.
    - one active flow taking 56 bytes of memory.
    - there is system limit on cache size too.

> Hash: size 8192 (mem 32K), metric 1.0, 1.0, 1.0, 1.0. MemTraf: 1420 pkt, 364 K (pdu 0, 0).

  Hash: size X: current hash size/limit.
    - you can control this by sysctl net.netflow.hashsize variable.
    - increasing this value can significantly reduce CPU load.
    - default value is not optimal for performance.
    - optimal value is twice of average of active flows.
  mem XK: how much memory occupied by hash table.
    - hash table is fixed size by nature, taking 4 bytes per entry.
  metric X, X, X, X: how optimal is your hash table being used.
    - lesser value mean more optimal hash table use, min is 1.0.
    - this is moving average (EWMA) of hash table access divided
      by match rate (searches / matches) for 4sec, and 1, 5, 15 minutes.
      Sort of hash table load average.
  MemTraf: X pkt, X K: how much traffic accounted for flows that are in memory.
    - these flows that are residing in internal hash table.
  pdu X, X: how much traffic in flows preparing to be exported.
    - it is included already in aforementioned MemTraf total.

> Timeout: active 1800, inactive 15. Maxflows 2000000

  Timeout: active X: how much seconds to wait before exporting active flow.
    - same as sysctl net.netflow.active_timeout variable.
  inactive X: how much seconds to wait before exporting inactive flow.
    - same as sysctl net.netflow.inactive_timeout variable.
  Maxflows 2000000: maxflows limit.
    - all flows above maxflows limit must be dropped.
    - you can control maxflows limit by sysctl net.netflow.maxflows variable.

> Rate: 202448 bits/sec, 83 packets/sec; 1 min: 668463 bps, 930 pps; 5 min: 329039 bps, 483 pps

  - Module throughput values for 1 second, 1 minute, and 5 minutes.

> cpu#  stat: <search found new, trunc frag alloc maxflows>, sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes>
> cpu0  stat: 980540  10473 180600,    0    0    0    0, sock:   4983 928 0, 7124 K, traffic: 188765, 14 MB, drop: 27863, 1142 K

  cpu#: this is Total and per CPU statistics for:
  stat: <search found new, trunc frag alloc maxflows>: internal stat for:
  search found new: hash table searched, found, and not found counters.
  trunc: how much truncated packets is ignored
    - these are that possible don't have valid IP header.
    - accounted in drop packets counter but not in drop bytes.
  frag: how much fragmented packets have seen.
    - kernel always defragments INPUT/OUTPUT chains for us.
    - these packets are not ignored but not reassembled either, so:
    - if there is no enough data in fragment (ex. tcp ports) it is considered zero.
  alloc: how much cache memory allocations is failed.
    - packets ignored and accounted in drop stat.
    - probably increase system memory if this ever happen.
  maxflows: how much packets ignored on maxflows (maximum active flows reached).
    - packets ignored and accounted in drop stat.
    - you can control maxflows limit by sysctl net.netflow.maxflows variable.

  sock: <ok fail cberr, bytes>: table of exporting stats for:
  ok: how much Netflow PDUs are exported (i.e. UDP packets sent by module).
  fail: how much socket errors (i.e. packets failed to be sent).
    - packets dropped and their internal statistics cumulatively accounted in drop stat.
  cberr: how much connection refused ICMP errors we got from export target.
    - probably you not launched collector software on destination,
    - or specified wrong destination address.
    - flows lost in this fashion is not possible to account in drop stat.
    - these are ICMP errors, and would look like this in tcpdump:
      05:04:09.281247 IP alice.19440 > bob.2055: UDP, length 120
      05:04:09.281405 IP bob > alice: ICMP bob udp port 2055 unreachable, length 156
  bytes: how much kilobytes of exporting data successfully sent by the module.

  traffic: <pkt, bytes>: how much traffic is accounted.
  pkt, bytes: sum of packets/megabytes accounted by module.
    - flows that failed to be exported (on socket error) is accounted here too.

  drop: <pkt, bytes>: how much of traffic is not accounted.
  pkt, bytes: sum of packets/kilobytes we are lost/dropped.
    - reasons they are dropped and accounted here:
      truncated/fragmented packets,
      packet is for new flow but failed to allocate memory for it,
      packet is for new flow but maxflows is already reached,
      all flows in export packets that got socket error.

> sock0: 10.0.0.2:2055, sndbuf 106496, filled 0, peak 106848; err: sndbuf reached 928, other 0

  sockX: per destination stats for:
  X.X.X.X:Y: destination ip address and port.
    - controlled by sysctl net.netflow.destination variable.
  sndbuf X: how much data socket can hold in buffers.
    - controlled by sysctl net.netflow.sndbuf variable.
    - if you have packet drops due to sndbuf reached (error -11) increase this value.
  filled X: how much data in socket buffers right now.
  peak X: peak value of how much data in socket buffers was.
    - you will be interested to keep it below sndbuf value.
  err: how much packets are dropped due to errors.
    - all flows from them will be accounted in drop stat.
  sndbuf reached X: how much packets dropped due to sndbuf being too small (error -11).
  other X: dropped due to other possible errors.

> aggr0: ...
  aggrX: aggregation rulesets.
    - controlled by sysctl net.netflow.aggregation variable.

=========
= VOILA =
=========

kernel-source-ipt_netflow-1.8/README.promisc000064400000000000000000000040731217147134200210270ustar00rootroot00000000000000Hello,

If you wish to account with netflow module traffic mirrored on switch you may follow this example:


   Solution 1: General kernel patch.
   Solution 2: Alternative w/o kernel patch.


    **************
    * Solution 1 *
    **************

1. Patch your kernel with `raw_promisc.patch' to enable raw table to see promisc traffic.

 # cd /usr/src/linux
 # patch -p1 < ~/ipt_netflow/raw_promisc.patch

 Then recompile and reinstall patched kernel.

2. For example you mirroring traffic on your Cisco switch to 47th vlan:

 # interface FastEthernet0/32
 # description this port with vlan 47
 # switchport trunk encapsulation dot1q
 # switchport mode trunk
 # !
 # interface FastEthernet0/33
 # port monitor FastEthernet0/32
 # !

3. Enable promisc mode on interface to actually see the packets:

 # /sbin/ifconfig eth1 promisc

4. Configure vlan on your linux box:

 # /sbin/vconfig add eth1 47
 # /sbin/ifconfig eth1.47 up

5. Recompile ipt_netflow module with #define RAW_PROMISC_HACK uncommented:

 Find this line in ipt_NETFLOW.c (should be line 7):

//#define RAW_PROMISC_HACK

 And remove two slashes at beginning of the line, so it become like this:

#define RAW_PROMISC_HACK

 Re-compile module:

 # make clean all install

6. Load ipt_netflow module:

 # /sbin/modprobe ipt_NETFLOW hashsize=160000 destination=127.0.0.1:9800

7. Direct all packets from 47th vlan to ipt_netflow module:

 # /sbin/iptables -A PREROUTING -t raw -i eth1.47 -j NETFLOW

Voila.

ps. For Debian Squeeze instructions look at raw_promisc_debian_squeeze6.patch


    **************
    * Solution 2 *
    **************

By Anonymous.

> raw promisc hack is not needed
> there is a more elegant way to capture port mirrored traffic:
>
> 1. create a bridge of eth0 and dummy0
> 2. put eth0 to promisc
> 3. add a "-i br0 -j NETFLOW" rule to FORWARD (possibly also -j DROP after that)
>
> ...for some reason it works when ipt_netflow is attached to a bridge, but
> requires the promisc hack when attached to a real promisc interface.

Sometimes you may need to run:

 # brctl setageing br0 0

for this scheme to work.

kernel-source-ipt_netflow-1.8/configure000075500000000000000000000141311217147134200203770ustar00rootroot00000000000000#!/bin/bash

PATH=$PATH:/bin:/usr/bin:/usr/sbin:/sbin:/usr/local/sbin

error() {
  echo "! Error: $@"
  exit 1
}

iptables_src_version() {
  test "$IPTSRC" || return 1

  echo -n "Checking iptables sources version: "
  SRC="$IPTSRC/Makefile"
  test -s "$SRC" || error "Please build iptables first."
  VER=`sed -n 's/^$IPTABLES_$\?VERSION[ :]= \?//p' "$SRC"`
  test "$VER" || error "Unknown version of iptables."
  if [ "$VER" = "$IPTVER" ]; then
    echo "$VER (ok)"
  else
    echo "$VER"
    error "Source version ($VER) doesn't match binary ($IPTVER)"
  fi
}

get_lib_dir() {
  test -s "$1" && LIB=`sed -n 's/.*_LIB_DIR "$.*$"/\1/p' "$1"`
  if [ "$LIB" ]; then
    IPTLIB=$LIB
    echo "$IPTLIB (from sources)"
    return 0
  fi
  return 1
}

get_lib_from_bin() {
  LIB=`strings $IPTBIN | grep ^/.*lib.*tables`
  if [ "$LIB" ]; then
    IPTLIB=$LIB
    echo "$IPTLIB (from binary)"
    return 0
  fi
  return 1
}

get_lib_from_lib() {
  XLIB=`/usr/bin/ldd $IPTBIN | grep libxtables | sed -n 's!.* $/[^ ]\+$.*!\1!p'`
  test "$XLIB" || return 1
  LIB=`strings $XLIB | grep ^/.*lib.*tables`
  if [ "$LIB" ]; then
    IPTLIB=$LIB
    echo "$IPTLIB (from library)"
    return 0
  fi
  return 1
}

iptables_inc() {
  echo -n "Iptables include flags: "
  if [ "$IPTINC" ]; then
    IPTINC="-I$IPTINC"
    echo "$IPTINC (user specified)"
  elif [ "$PKGVER" ]; then
    IPTINC="$PKGINC"
    echo "$IPTINC (pkg-config)"
  else
    IPTINC="$IPTSRC/include"
    IPTINC="-I$IPTINC"
    echo "$IPTINC (from source)"
  fi
}

iptables_modules() {
  echo -n "Iptables module path: "
  if [ "$IPTLIB" ]; then
    echo "$IPTLIB (user specified)"
  else
    if [ "$PKGLIB" ]; then
      IPTLIB="$PKGLIB"
      echo "$IPTLIB (pkg-config)"
    else
      get_lib_dir "$IPTSRC/include/iptables.h" && return 0
      get_lib_dir "$IPTSRC/include/xtables.h" && return 0
      get_lib_dir "$IPTSRC/xtables/internal.h" && return 0
      get_lib_from_bin && return 0
      get_lib_from_lib && return 0
      error "can not find, try setting it with --ipt-lib="
    fi
  fi
}

try_dir() {
  if [ -d "$1/include" ]; then
    echo "Found iptables sources at $1"
    IPTSRC=$1
    return 0
  fi
  return 1
}

try_dirg() {
  try_dir "$1" && return 0
  try_dir "$1.git" && return 0
}

try_dir2() {
  test -d "$1" && try_dir `dirname $1` && return 0
}

iptables_find_version() {
  echo -n "Iptables binary version: "
  if [ "$IPTVER" ]; then
    echo "$IPTVER (user specified)"
  else
    IPTVER=`$IPTBIN -V 2>/dev/null | sed -n s/iptables.v//p`
    if [ "$IPTVER" ]; then
      echo "$IPTVER (detected from $IPTBIN)"
      return
    else
      echo "no iptables binary found"
    fi
    PKGVER=`pkg-config --modversion xtables 2>/dev/null`
    if [ "$PKGVER" ]; then
      IPTVER="$PKGVER"
      echo "Xtables version: $IPTVER (detected from `which pkg-config`)"
      return
    fi
    error "Can not find iptables version, try setting it with --ipt-ver="
  fi
}

iptables_try_pkgconfig() {
  if [ ! "$PKGVER" ]; then
    TRYPKGVER=`pkg-config --modversion xtables 2>/dev/null`
    echo -n "pkg-config for version $IPTVER exists: "
    pkg-config --exact-version=$IPTVER xtables
    if [ $? = 0 ]; then
      echo "Yes"
      PKGVER=$TRYPKGVER
    else
      echo "No (reported: $TRYPKGVER)"
    fi
  fi
  if [ "$PKGVER" ]; then
    PKGINC=`pkg-config --cflags xtables`
    PKGLIB=`pkg-config --variable=xtlibdir xtables`
    IPTCFLAGS="-DXTABLES"
  else
    # Newer versions of iptables should not have -I/kernel/include!
    # So I assume that newer version will have correct pkg-config set up
    # and if not, then it's older who need it.
    IPTCFLAGS="-I$KDIR/include -DIPTABLES_VERSION=\\\\\"$IPTVER\\\\\""
  fi
}

iptables_find_src() {
  test "$IPTINC" && return 1
  test "$PKGVER" && return 1

  VER="iptables-$IPTVER"
  if [ "$IPTSRC" ]; then
    echo "User specified source directory: $IPTSRC"
    try_dir $IPTSRC || error "Specified directory is not iptables source.."
  else
    echo "Searching for $VER sources.."
    try_dir "./$VER" && return 0
    try_dir "../$VER" && return 0
    try_dir "/usr/src/$VER" && return 0
    try_dirg "iptables" && return 0
    try_dirg "../iptables" && return 0
    try_dirg "/usr/src/iptables" && return 0
    try_dir2 `locate $VER/extensions | head -1` && return 0
    error "Can not find iptables source directory, try setting it with --ipt-src="
  fi
}

show_help() {
  echo "Possible options:"
  echo "  --ipt-ver=..  iptables version (ex.: 1.4.2)"
  echo "  --ipt-bin=..  iptables binary to use (ex.: /usr/sbin/iptables)"
  echo "  --ipt-src=..  directory for iptable source (ex.: ../iptables-1.4.2)"
  echo "  --ipt-lib=..  iptable modules path (ex.: /usr/libexec/xtables)"
  echo "  --ipt-inc=..  directory for iptable headers (ex.: /usr/include)"
  echo "  --kver=..     kernel version (ex.: 2.6.30-std-def-alt15)"
  echo "  --kdir=..     directory for kernel source (ex.: /usr/src/kernel)"
  exit 0
}

for ac_option
do
  case "$ac_option" in
    -*=*) ac_optarg=`echo "$ac_option" | sed 's/[-_a-zA-Z0-9]*=//'` ;;
    *) ac_optarg= ;;
  esac

  case "$ac_option" in
    --ipt-bin=*) IPTBIN="$ac_optarg" ;;
    --ipt-lib=*) IPTLIB="$ac_optarg" ;;
    --ipt-src=*) IPTSRC="$ac_optarg" ;;
    --ipt-ver=*) IPTVER="$ac_optarg" ;;
    --ipt-inc=*) IPTINC="$ac_optarg" ;;
    --kver=*)  KVERSION="$ac_optarg" ;;
    --kdir=*)      KDIR="$ac_optarg" ;;
    --make) echo called from make ;;
    --help) show_help ;;
    -*) echo Invalid option: $ac_option; exit 1 ;;
#    *) ni="$ni $ac_option" ;;
  esac
done

test "$KVERSION" || KVERSION=`uname -r`
echo Kernel version: $KVERSION

test "$KDIR" || KDIR=/lib/modules/$KVERSION/build
echo Kernel sources: $KDIR

test "$IPTBIN" || IPTBIN=`which iptables`

iptables_find_version	#IPTVER
iptables_try_pkgconfig	#try to configure from pkg-config
iptables_find_src	#IPTSRC
iptables_src_version	#check that IPTSRC match to IPTVER
iptables_inc		#IPTINC
iptables_modules	#IPTLIB

REPLACE="\
s!@KVERSION@!$KVERSION!;\
s!@KDIR@!$KDIR!;\
s!@IPTABLES_VERSION@!$IPTVER!;\
s!@IPTABLES_CFLAGS@!$IPTCFLAGS $IPTINC!;\
s!@IPTABLES_MODULES@!$IPTLIB!"

echo -n "Creating Makefile.. "
sed "$REPLACE" Makefile.in > Makefile
echo done.
echo
echo "  Now run: make all install"
echo

kernel-source-ipt_netflow-1.8/ipt_NETFLOW.c000064400000000000000000001232461217147134200206360ustar00rootroot00000000000000/*
 * This is NetFlow exporting module (NETFLOW target) for linux
 * (c) 2008-2012 <abc@telekom.ru>
 *
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

//#define RAW_PROMISC_HACK

#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/vmalloc.h>
#include <linux/seq_file.h>
#include <linux/random.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/icmp.h>
#include <linux/igmp.h>
#include <linux/inetdevice.h>
#include <linux/jhash.h>
#include <net/icmp.h>
#include <net/ip.h>
#include <net/tcp.h>
#include <net/route.h>
#include <net/dst.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/version.h>
#include <asm/unaligned.h>
#include "ipt_NETFLOW.h"
#ifdef CONFIG_BRIDGE_NETFILTER
#include <linux/netfilter_bridge.h>
#endif
#ifdef CONFIG_SYSCTL
#include <linux/sysctl.h>
#endif

#ifndef NIPQUAD
#define NIPQUAD(addr) \
	((unsigned char *)&addr)[0], \
	((unsigned char *)&addr)[1], \
	((unsigned char *)&addr)[2], \
	((unsigned char *)&addr)[3]
#endif
#ifndef HIPQUAD
#if defined(__LITTLE_ENDIAN)
#define HIPQUAD(addr) \
	((unsigned char *)&addr)[3], \
	((unsigned char *)&addr)[2], \
	((unsigned char *)&addr)[1], \
	((unsigned char *)&addr)[0]
#elif defined(__BIG_ENDIAN)
#define HIPQUAD NIPQUAD
#else
#error "Please fix asm/byteorder.h"
#endif /* __LITTLE_ENDIAN */
#endif

#ifndef IPT_CONTINUE
#define IPT_CONTINUE XT_CONTINUE
#define ipt_target xt_target
#endif

#define IPT_NETFLOW_VERSION "1.8"

MODULE_LICENSE("GPL");
MODULE_AUTHOR("<abc@telekom.ru>");
MODULE_DESCRIPTION("iptables NETFLOW target module");
MODULE_VERSION(IPT_NETFLOW_VERSION);

#define DST_SIZE 256
static char destination_buf[DST_SIZE] = "127.0.0.1:2055";
static char *destination = destination_buf;
module_param(destination, charp, 0400);
MODULE_PARM_DESC(destination, "export destination ipaddress:port");

static int inactive_timeout = 15;
module_param(inactive_timeout, int, 0600);
MODULE_PARM_DESC(inactive_timeout, "inactive flows timeout in seconds");

static int active_timeout = 30 * 60;
module_param(active_timeout, int, 0600);
MODULE_PARM_DESC(active_timeout, "active flows timeout in seconds");

static int debug = 0;
module_param(debug, int, 0600);
MODULE_PARM_DESC(debug, "debug verbosity level");

static int sndbuf;
module_param(sndbuf, int, 0400);
MODULE_PARM_DESC(sndbuf, "udp socket SNDBUF size");

static int hashsize;
module_param(hashsize, int, 0400);
MODULE_PARM_DESC(hashsize, "hash table size");

static int maxflows = 2000000;
module_param(maxflows, int, 0600);
MODULE_PARM_DESC(maxflows, "maximum number of flows");
static int peakflows = 0;
static unsigned long peakflows_at;

#define AGGR_SIZE 1024
static char aggregation_buf[AGGR_SIZE] = "";
static char *aggregation = aggregation_buf;
module_param(aggregation, charp, 0400);
MODULE_PARM_DESC(aggregation, "aggregation ruleset");

static DEFINE_PER_CPU(struct ipt_netflow_stat, ipt_netflow_stat);
static LIST_HEAD(usock_list);
static DEFINE_RWLOCK(sock_lock);

static unsigned int ipt_netflow_hash_rnd;
struct hlist_head *ipt_netflow_hash __read_mostly; /* hash table memory */
static unsigned int ipt_netflow_hash_size __read_mostly = 0; /* buckets */
static LIST_HEAD(ipt_netflow_list); /* all flows */
static LIST_HEAD(aggr_n_list);
static LIST_HEAD(aggr_p_list);
static DEFINE_RWLOCK(aggr_lock);
static struct kmem_cache *ipt_netflow_cachep __read_mostly; /* ipt_netflow memory */
static atomic_t ipt_netflow_count = ATOMIC_INIT(0);
static DEFINE_SPINLOCK(ipt_netflow_lock); /* hash table lock */

static long long pdu_packets = 0, pdu_traf = 0;
static struct netflow5_pdu pdu;
static unsigned long pdu_ts_mod;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
static void netflow_work_fn(void *work);
static DECLARE_WORK(netflow_work, netflow_work_fn, NULL);
#else
static void netflow_work_fn(struct work_struct *work);
static DECLARE_DELAYED_WORK(netflow_work, netflow_work_fn);
#endif
static struct timer_list rate_timer;

#define TCP_FIN_RST 0x05

static long long sec_prate = 0, sec_brate = 0;
static long long min_prate = 0, min_brate = 0;
static long long min5_prate = 0, min5_brate = 0;
static unsigned int metric = 10, min15_metric = 10, min5_metric = 10, min_metric = 10; /* hash metrics */

static int set_hashsize(int new_size);
static void destination_removeall(void);
static int add_destinations(char *ptr);
static void aggregation_remove(struct list_head *list);
static int add_aggregation(char *ptr);
static void netflow_scan_and_export(int flush);

static inline __be32 bits2mask(int bits) {
	return (bits? 0xffffffff << (32 - bits) : 0);
}

static inline int mask2bits(__be32 mask) {
	int n;

	for (n = 0; mask; n++)
		mask = (mask << 1) & 0xffffffff;
	return n;
}

/* under that lock worker is always stopped and not rescheduled,
 * and we can call worker sub-functions manually */
static DEFINE_MUTEX(worker_lock);
static inline void __start_scan_worker(void)
{
	schedule_delayed_work(&netflow_work, HZ / 10);
}

static inline void start_scan_worker(void)
{
	__start_scan_worker();
	mutex_unlock(&worker_lock);
}

/* we always stop scanner before write_lock(&sock_lock)
 * to let it never hold that spin lock */
static inline void __stop_scan_worker(void)
{
	cancel_delayed_work_sync(&netflow_work);
}

static inline void stop_scan_worker(void)
{
	mutex_lock(&worker_lock);
	__stop_scan_worker();
}

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
#define INIT_NET(x) x
#else
#define INIT_NET(x) init_net.x
#endif

#ifdef CONFIG_PROC_FS
/* procfs statistics /proc/net/stat/ipt_netflow */
static int nf_seq_show(struct seq_file *seq, void *v)
{
	unsigned int nr_flows = atomic_read(&ipt_netflow_count);
	int cpu;
	unsigned long long searched = 0, found = 0, notfound = 0;
	unsigned int truncated = 0, frags = 0, alloc_err = 0, maxflows_err = 0;
	unsigned int sock_errors = 0, send_failed = 0, send_success = 0;
	unsigned long long pkt_total = 0, traf_total = 0, exported_size = 0;
	unsigned long long pkt_drop = 0, traf_drop = 0;
	unsigned long long pkt_out = 0, traf_out = 0;
	struct ipt_netflow_sock *usock;
	struct netflow_aggr_n *aggr_n;
	struct netflow_aggr_p *aggr_p;
	int snum = 0;
	int peak = (jiffies - peakflows_at) / HZ;

	seq_printf(seq, "Flows: active %u (peak %u reached %ud%uh%um ago), mem %uK\n",
		   nr_flows,
		   peakflows,
		   peak / (60 * 60 * 24), (peak / (60 * 60)) % 24, (peak / 60) % 60,
		   (unsigned int)((nr_flows * sizeof(struct ipt_netflow)) >> 10));

	for_each_present_cpu(cpu) {
		struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu);

		searched += st->searched;
		found += st->found;
		notfound += st->notfound;
		truncated += st->truncated;
		frags += st->frags;
		alloc_err += st->alloc_err;
		maxflows_err += st->maxflows_err;
		send_success += st->send_success;
		send_failed += st->send_failed;
		sock_errors += st->sock_errors;
		exported_size += st->exported_size;
		pkt_total += st->pkt_total;
		traf_total += st->traf_total;
		pkt_drop += st->pkt_drop;
		traf_drop += st->traf_drop;
		pkt_out += st->pkt_out;
		traf_out += st->traf_out;
	}

#define FFLOAT(x, prec) (int)(x) / prec, (int)(x) % prec
	seq_printf(seq, "Hash: size %u (mem %uK), metric %d.%d, %d.%d, %d.%d, %d.%d. MemTraf: %llu pkt, %llu K (pdu %llu, %llu).\n",
		   ipt_netflow_hash_size, 
		   (unsigned int)((ipt_netflow_hash_size * sizeof(struct hlist_head)) >> 10),
		   FFLOAT(metric, 10),
		   FFLOAT(min_metric, 10),
		   FFLOAT(min5_metric, 10),
		   FFLOAT(min15_metric, 10),
		   pkt_total - pkt_out + pdu_packets,
		   (traf_total - traf_out + pdu_traf) >> 10,
		   pdu_packets,
		   pdu_traf);

	seq_printf(seq, "Timeout: active %d, inactive %d. Maxflows %u\n",
		   active_timeout,
		   inactive_timeout,
		   maxflows);

	seq_printf(seq, "Rate: %llu bits/sec, %llu packets/sec; Avg 1 min: %llu bps, %llu pps; 5 min: %llu bps, %llu pps\n",
		   sec_brate, sec_prate, min_brate, min_prate, min5_brate, min5_prate);

	seq_printf(seq, "cpu#  stat: <search found new, trunc frag alloc maxflows>, sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes>\n");

	seq_printf(seq, "Total stat: %6llu %6llu %6llu, %4u %4u %4u %4u, sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n",
		   (unsigned long long)searched,
		   (unsigned long long)found,
		   (unsigned long long)notfound,
		   truncated, frags, alloc_err, maxflows_err,
		   send_success, send_failed, sock_errors,
		   (unsigned long long)exported_size >> 10,
		   (unsigned long long)pkt_total, (unsigned long long)traf_total >> 20,
		   (unsigned long long)pkt_drop, (unsigned long long)traf_drop >> 10);

	if (num_present_cpus() > 1) {
		for_each_present_cpu(cpu) {
			struct ipt_netflow_stat *st;

			st = &per_cpu(ipt_netflow_stat, cpu);
			seq_printf(seq, "cpu%u  stat: %6llu %6llu %6llu, %4u %4u %4u %4u, sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n",
				   cpu,
				   (unsigned long long)st->searched,
				   (unsigned long long)st->found,
				   (unsigned long long)st->notfound,
				   st->truncated, st->frags, st->alloc_err, st->maxflows_err,
				   st->send_success, st->send_failed, st->sock_errors,
				   (unsigned long long)st->exported_size >> 10,
				   (unsigned long long)st->pkt_total, (unsigned long long)st->traf_total >> 20,
				   (unsigned long long)st->pkt_drop, (unsigned long long)st->traf_drop >> 10);
		}
	}

	read_lock(&sock_lock);
	list_for_each_entry(usock, &usock_list, list) {
		struct sock *sk = usock->sock->sk;

		seq_printf(seq, "sock%d: %u.%u.%u.%u:%u, sndbuf %u, filled %u, peak %u; err: sndbuf reached %u, other %u\n",
			   snum,
			   usock->ipaddr >> 24,
			   (usock->ipaddr >> 16) & 255,
			   (usock->ipaddr >> 8) & 255,
			   usock->ipaddr & 255,
			   usock->port,
			   sk->sk_sndbuf,
			   atomic_read(&sk->sk_wmem_alloc),
			   atomic_read(&usock->wmem_peak),
			   atomic_read(&usock->err_full),
			   atomic_read(&usock->err_other));
		snum++;
	}
	read_unlock(&sock_lock);

	read_lock_bh(&aggr_lock);
	snum = 0;
	list_for_each_entry(aggr_n, &aggr_n_list, list) {
		seq_printf(seq, "aggr#%d net: match %u.%u.%u.%u/%d strip %d\n",
			   snum,
			   HIPQUAD(aggr_n->addr),
			   mask2bits(aggr_n->mask),
			   mask2bits(aggr_n->aggr_mask));
		snum++;
	}
	snum = 0;
	list_for_each_entry(aggr_p, &aggr_p_list, list) {
		seq_printf(seq, "aggr#%d port: ports %u-%u replace %u\n",
			   snum,
			   aggr_p->port1,
			   aggr_p->port2,
			   aggr_p->aggr_port);
		snum++;
	}
	read_unlock_bh(&aggr_lock);
	return 0;
}

static int nf_seq_open(struct inode *inode, struct file *file)
{
	return single_open(file, nf_seq_show, NULL);
}

static struct file_operations nf_seq_fops = {
	.owner	 = THIS_MODULE,
	.open	 = nf_seq_open,
	.read	 = seq_read,
	.llseek	 = seq_lseek,
	.release = single_release,
};
#endif /* CONFIG_PROC_FS */

#ifdef CONFIG_SYSCTL

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32)
#define BEFORE2632(x,y) x,y
#else /* since 2.6.32 */
#define BEFORE2632(x,y)
#endif

/* sysctl /proc/sys/net/netflow */
static int hsize_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
			 void __user *buffer, size_t *lenp, loff_t *fpos)
{
	void *orig = ctl->data;
	int ret, hsize;

	if (write)
		ctl->data = &hsize;
	ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
	if (write) {
		ctl->data = orig;
		if (hsize < 1)
			return -EPERM;
		return set_hashsize(hsize)?:ret;
	} else
		return ret;
}

static int sndbuf_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
			 void __user *buffer, size_t *lenp, loff_t *fpos)
{
	int ret;
	struct ipt_netflow_sock *usock;
       
	read_lock(&sock_lock);
	if (list_empty(&usock_list)) {
		read_unlock(&sock_lock);
		return -ENOENT;
	}
	usock = list_first_entry(&usock_list, struct ipt_netflow_sock, list);
	sndbuf = usock->sock->sk->sk_sndbuf;
	read_unlock(&sock_lock);

	ctl->data = &sndbuf;
	ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
	if (!write)
		return ret;
	if (sndbuf < SOCK_MIN_SNDBUF)
		sndbuf = SOCK_MIN_SNDBUF;
	stop_scan_worker();
	write_lock(&sock_lock);
	list_for_each_entry(usock, &usock_list, list) {
		usock->sock->sk->sk_sndbuf = sndbuf;
	}
	write_unlock(&sock_lock);
	start_scan_worker();
	return ret;
}

static int destination_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
			 void __user *buffer, size_t *lenp, loff_t *fpos)
{
	int ret;

	ret = proc_dostring(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
	if (ret >= 0 && write) {
		stop_scan_worker();
		destination_removeall();
		add_destinations(destination_buf);
		start_scan_worker();
	}
	return ret;
}

static int aggregation_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
			 void __user *buffer, size_t *lenp, loff_t *fpos)
{
	int ret;

	if (debug > 1)
		printk(KERN_INFO "aggregation_procctl (%d) %u %llu\n", write, (unsigned int)(*lenp), *fpos);
	ret = proc_dostring(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
	if (ret >= 0 && write) {
		add_aggregation(aggregation_buf);
	}
	return ret;
}

static int flush_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
			 void __user *buffer, size_t *lenp, loff_t *fpos)
{
	int ret;
	int val;

	val = 0;
	ctl->data = &val;
	ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);

	if (!write)
		return ret;

	if (val > 0) {
		printk(KERN_INFO "ipt_NETFLOW: forced flush\n");
		stop_scan_worker();
		netflow_scan_and_export(1);
		start_scan_worker();
	}

	return ret;
}

static struct ctl_table_header *netflow_sysctl_header;

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
#define _CTL_NAME(x) .ctl_name = x,
#else
#define _CTL_NAME(x)
#endif
static struct ctl_table netflow_sysctl_table[] = {
	{
		_CTL_NAME(1)
		.procname	= "active_timeout",
		.mode		= 0644,
		.data		= &active_timeout,
		.maxlen		= sizeof(int),
		.proc_handler	= &proc_dointvec,
	},
	{
		_CTL_NAME(2)
		.procname	= "inactive_timeout",
		.mode		= 0644,
		.data		= &inactive_timeout,
		.maxlen		= sizeof(int),
		.proc_handler	= &proc_dointvec,
	},
	{
		_CTL_NAME(3)
		.procname	= "debug",
		.mode		= 0644,
		.data		= &debug,
		.maxlen		= sizeof(int),
		.proc_handler	= &proc_dointvec,
	},
	{
		_CTL_NAME(4)
		.procname	= "hashsize",
		.mode		= 0644,
		.data		= &ipt_netflow_hash_size,
		.maxlen		= sizeof(int),
		.proc_handler	= &hsize_procctl,
	},
	{
		_CTL_NAME(5)
		.procname	= "sndbuf",
		.mode		= 0644,
		.maxlen		= sizeof(int),
		.proc_handler	= &sndbuf_procctl,
	},
	{
		_CTL_NAME(6)
		.procname	= "destination",
		.mode		= 0644,
		.data		= &destination_buf,
		.maxlen		= sizeof(destination_buf),
		.proc_handler	= &destination_procctl,
	},
	{
		_CTL_NAME(7)
		.procname	= "aggregation",
		.mode		= 0644,
		.data		= &aggregation_buf,
		.maxlen		= sizeof(aggregation_buf),
		.proc_handler	= &aggregation_procctl,
	},
	{
		_CTL_NAME(8)
		.procname	= "maxflows",
		.mode		= 0644,
		.data		= &maxflows,
		.maxlen		= sizeof(int),
		.proc_handler	= &proc_dointvec,
	},
	{
		_CTL_NAME(9)
		.procname	= "flush",
		.mode		= 0644,
		.maxlen		= sizeof(int),
		.proc_handler	= &flush_procctl,
	},
	{ }
};

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
static struct ctl_table netflow_sysctl_root[] = {
	{
		_CTL_NAME(33)
		.procname	= "netflow",
		.mode		= 0555,
		.child		= netflow_sysctl_table,
	},
	{ }
};

static struct ctl_table netflow_net_table[] = {
	{
		.ctl_name	= CTL_NET,
		.procname	= "net",
		.mode		= 0555,
		.child		= netflow_sysctl_root,
	},
	{ }
};
#else /* >= 2.6.25 */
static struct ctl_path netflow_sysctl_path[] = {
	{
		.procname = "net",
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33)
		.ctl_name = CTL_NET
#endif
	},
	{ .procname = "netflow" },
	{ }
};
#endif /* 2.6.25 */
#endif /* CONFIG_SYSCTL */

/* socket code */
static void sk_error_report(struct sock *sk)
{
	/* clear connection refused errors if any */
	write_lock_bh(&sk->sk_callback_lock);
	if (debug > 1)
		printk(KERN_INFO "NETFLOW: socket error <%d>\n", sk->sk_err);
	sk->sk_err = 0;
	NETFLOW_STAT_INC(sock_errors);
	write_unlock_bh(&sk->sk_callback_lock);
	return;
}

// return numbers of sends succeded, 0 if none
/* only called in scan worker path */
static int netflow_send_pdu(void *buffer, int len)
{
	struct msghdr msg = { .msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL };
	struct kvec iov = { buffer, len };
	int retok = 0, ret;
	int snum = 0;
	struct ipt_netflow_sock *usock;

	list_for_each_entry(usock, &usock_list, list) {
		if (debug)
			printk(KERN_INFO "netflow_send_pdu: sendmsg(%d, %d) [%u %u]\n",
			       snum,
			       len,
			       atomic_read(&usock->sock->sk->sk_wmem_alloc),
			       usock->sock->sk->sk_sndbuf);
		ret = kernel_sendmsg(usock->sock, &msg, &iov, 1, (size_t)len);
		if (ret < 0) {
			char *suggestion = "";

			NETFLOW_STAT_INC_ATOMIC(send_failed);
			if (ret == -EAGAIN) {
				atomic_inc(&usock->err_full);
				suggestion = ": increase sndbuf!";
			} else
				atomic_inc(&usock->err_other);
			printk(KERN_ERR "netflow_send_pdu[%d]: sendmsg error %d: data loss %llu pkt, %llu bytes%s\n",
			       snum, ret, pdu_packets, pdu_traf, suggestion);
		} else {
			unsigned int wmem = atomic_read(&usock->sock->sk->sk_wmem_alloc);
			if (wmem > atomic_read(&usock->wmem_peak))
				atomic_set(&usock->wmem_peak, wmem);
			NETFLOW_STAT_INC_ATOMIC(send_success);
			NETFLOW_STAT_ADD_ATOMIC(exported_size, ret);
			retok++;
		}
		snum++;
	}
	return retok;
}

static void usock_free(struct ipt_netflow_sock *usock)
{
	printk(KERN_INFO "netflow: remove destination %u.%u.%u.%u:%u (%p)\n",
	       HIPQUAD(usock->ipaddr),
	       usock->port,
	       usock->sock);
	if (usock->sock)
		sock_release(usock->sock);
	usock->sock = NULL;
	vfree(usock); 
}

static void destination_removeall(void)
{
	write_lock(&sock_lock);
	while (!list_empty(&usock_list)) {
		struct ipt_netflow_sock *usock;

		usock = list_entry(usock_list.next, struct ipt_netflow_sock, list);
		list_del(&usock->list);
		write_unlock(&sock_lock);
		usock_free(usock);
		write_lock(&sock_lock);
	}
	write_unlock(&sock_lock);
}

static void add_usock(struct ipt_netflow_sock *usock)
{
	struct ipt_netflow_sock *sk;

	/* don't need empty sockets */
	if (!usock->sock) {
		usock_free(usock);
		return;
	}

	write_lock(&sock_lock);
	/* don't need duplicated sockets */
	list_for_each_entry(sk, &usock_list, list) {
		if (sk->ipaddr == usock->ipaddr &&
		    sk->port == usock->port) {
			write_unlock(&sock_lock);
			usock_free(usock);
			return;
		}
	}
	list_add_tail(&usock->list, &usock_list);
	printk(KERN_INFO "netflow: added destination %u.%u.%u.%u:%u\n",
	       HIPQUAD(usock->ipaddr),
	       usock->port);
	write_unlock(&sock_lock);
}

static struct socket *usock_alloc(__be32 ipaddr, unsigned short port)
{
	struct sockaddr_in sin;
	struct socket *sock;
	int error;

	if ((error = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
		printk(KERN_ERR "netflow: sock_create_kern error %d\n", error);
		return NULL;
	}
	sock->sk->sk_allocation = GFP_ATOMIC;
	sock->sk->sk_prot->unhash(sock->sk); /* hidden from input */
	sock->sk->sk_error_report = &sk_error_report; /* clear ECONNREFUSED */
	if (sndbuf)
		sock->sk->sk_sndbuf = sndbuf;
	else
		sndbuf = sock->sk->sk_sndbuf;
	memset(&sin, 0, sizeof(sin));
	sin.sin_family      = AF_INET;
	sin.sin_addr.s_addr = htonl(ipaddr);
	sin.sin_port        = htons(port);
	if ((error = sock->ops->connect(sock, (struct sockaddr *)&sin,
				  sizeof(sin), 0)) < 0) {
		printk(KERN_ERR "netflow: error connecting UDP socket %d\n", error);
		sock_release(sock);
		return NULL;
	}
	return sock;
}

#define SEPARATORS " ,;\t\n"
static int add_destinations(char *ptr)
{
	while (ptr) {
		unsigned char ip[4]; 
		unsigned short port;

		ptr += strspn(ptr, SEPARATORS);

		if (sscanf(ptr, "%hhu.%hhu.%hhu.%hhu:%hu",
			   ip, ip + 1, ip + 2, ip + 3, &port) == 5) {
			struct ipt_netflow_sock *usock;

			if (!(usock = vmalloc(sizeof(*usock)))) {
				printk(KERN_ERR "netflow: can't vmalloc socket\n");
				return -ENOMEM;
			}

			memset(usock, 0, sizeof(*usock));
			usock->ipaddr = ntohl(*(__be32 *)ip);
			usock->port = port;
			usock->sock = usock_alloc(usock->ipaddr, port);
			atomic_set(&usock->wmem_peak, 0);
			atomic_set(&usock->err_full, 0);
			atomic_set(&usock->err_other, 0);
			add_usock(usock);
		} else
			break;

		ptr = strpbrk(ptr, SEPARATORS);
	}
	return 0;
}

static void aggregation_remove(struct list_head *list)
{
	write_lock_bh(&aggr_lock);
	while (!list_empty(list)) {
		struct netflow_aggr_n *aggr; /* match netflow_aggr_p too */

		aggr = list_entry(list->next, struct netflow_aggr_n, list);
		list_del(&aggr->list);
		write_unlock_bh(&aggr_lock);
		vfree(aggr);
		write_lock_bh(&aggr_lock);
	}
	write_unlock_bh(&aggr_lock);
}

static int add_aggregation(char *ptr)
{
	struct netflow_aggr_n *aggr_n, *aggr, *tmp;
	struct netflow_aggr_p *aggr_p;
	LIST_HEAD(new_aggr_n_list);
	LIST_HEAD(new_aggr_p_list);
	LIST_HEAD(old_aggr_list);

	while (ptr && *ptr) {
		unsigned char ip[4]; 
		unsigned int mask;
		unsigned int port1, port2;
		unsigned int aggr_to;

		ptr += strspn(ptr, SEPARATORS);

		if (sscanf(ptr, "%hhu.%hhu.%hhu.%hhu/%u=%u",
			   ip, ip + 1, ip + 2, ip + 3, &mask, &aggr_to) == 6) {

			if (!(aggr_n = vmalloc(sizeof(*aggr_n)))) {
				printk(KERN_ERR "netflow: can't vmalloc aggr\n");
				return -ENOMEM;
			}
			memset(aggr_n, 0, sizeof(*aggr_n));

			aggr_n->addr = ntohl(*(__be32 *)ip);
			aggr_n->mask = bits2mask(mask);
			aggr_n->aggr_mask = bits2mask(aggr_to);
			aggr_n->prefix = mask;
			printk(KERN_INFO "netflow: add aggregation [%u.%u.%u.%u/%u=%u]\n",
			       HIPQUAD(aggr_n->addr), mask, aggr_to);
			list_add_tail(&aggr_n->list, &new_aggr_n_list);

		} else if (sscanf(ptr, "%u-%u=%u", &port1, &port2, &aggr_to) == 3 ||
			   sscanf(ptr, "%u=%u", &port2, &aggr_to) == 2) {

			if (!(aggr_p = vmalloc(sizeof(*aggr_p)))) {
				printk(KERN_ERR "netflow: can't vmalloc aggr\n");
				return -ENOMEM;
			}
			memset(aggr_p, 0, sizeof(*aggr_p));

			aggr_p->port1 = port1;
			aggr_p->port2 = port2;
			aggr_p->aggr_port = aggr_to;
			printk(KERN_INFO "netflow: add aggregation [%u-%u=%u]\n",
			       port1, port2, aggr_to);
			list_add_tail(&aggr_p->list, &new_aggr_p_list);
		} else {
			printk(KERN_ERR "netflow: bad aggregation rule: %s (ignoring)\n", ptr);
			break;
		}

		ptr = strpbrk(ptr, SEPARATORS);
	}

	/* swap lists */
	write_lock_bh(&aggr_lock);
	list_for_each_entry_safe(aggr, tmp, &aggr_n_list, list)
		list_move(&aggr->list, &old_aggr_list);
	list_for_each_entry_safe(aggr, tmp, &aggr_p_list, list)
		list_move(&aggr->list, &old_aggr_list);

	list_for_each_entry_safe(aggr, tmp, &new_aggr_n_list, list)
		list_move_tail(&aggr->list, &aggr_n_list);
	list_for_each_entry_safe(aggr, tmp, &new_aggr_p_list, list)
		list_move_tail(&aggr->list, &aggr_p_list);
	write_unlock_bh(&aggr_lock);
	aggregation_remove(&old_aggr_list);
	return 0;
}

static inline u_int32_t hash_netflow(const struct ipt_netflow_tuple *tuple)
{
	/* tuple is rounded to u32s */
	return jhash2((u32 *)tuple, NETFLOW_TUPLE_SIZE, ipt_netflow_hash_rnd) % ipt_netflow_hash_size;
}

static struct ipt_netflow *
ipt_netflow_find(const struct ipt_netflow_tuple *tuple, unsigned int hash)
{
	struct ipt_netflow *nf;
	struct hlist_node *pos;

	hlist_for_each_entry(nf, pos, &ipt_netflow_hash[hash], hlist) {
		if (ipt_netflow_tuple_equal(tuple, &nf->tuple) &&
		    nf->nr_bytes < FLOW_FULL_WATERMARK) {
			NETFLOW_STAT_INC(found);
			return nf;
		}
		NETFLOW_STAT_INC(searched);
	}
	NETFLOW_STAT_INC(notfound);
	return NULL;
}

static struct hlist_head *alloc_hashtable(int size)
{
	struct hlist_head *hash;

	hash = vmalloc(sizeof(struct hlist_head) * size);
	if (hash) {
		int i;

		for (i = 0; i < size; i++)
			INIT_HLIST_HEAD(&hash[i]);
	} else
		printk(KERN_ERR "netflow: unable to vmalloc hash table.\n");

	return hash;
}

static int set_hashsize(int new_size)
{
	struct hlist_head *new_hash, *old_hash;
	unsigned int hash;
	struct ipt_netflow *nf;
	int rnd;

	printk(KERN_INFO "netflow: allocating new hash table %u -> %u buckets\n",
	       ipt_netflow_hash_size, new_size);
	new_hash = alloc_hashtable(new_size);
	if (!new_hash)
		return -ENOMEM;

	get_random_bytes(&rnd, 4);

	/* rehash */
	spin_lock_bh(&ipt_netflow_lock);
	old_hash = ipt_netflow_hash;
	ipt_netflow_hash = new_hash;
	ipt_netflow_hash_size = new_size;
	ipt_netflow_hash_rnd = rnd;
	/* hash_netflow() is dependent on ipt_netflow_hash_* values */
	list_for_each_entry(nf, &ipt_netflow_list, list) {
		hash = hash_netflow(&nf->tuple);
		/* hlist_add_head overwrites hlist pointers for this node
		 * so it's good */
		hlist_add_head(&nf->hlist, &new_hash[hash]);
	}
	spin_unlock_bh(&ipt_netflow_lock);

	vfree(old_hash);

	return 0;
}

static struct ipt_netflow *
ipt_netflow_alloc(struct ipt_netflow_tuple *tuple)
{
	struct ipt_netflow *nf;
	long count;

	nf = kmem_cache_alloc(ipt_netflow_cachep, GFP_ATOMIC);
	if (!nf) {
		printk(KERN_ERR "Can't allocate netflow.\n");
		return NULL;
	}

	memset(nf, 0, sizeof(*nf));
	nf->tuple = *tuple;

	count = atomic_inc_return(&ipt_netflow_count);
	if (count > peakflows) {
		peakflows = count;
		peakflows_at = jiffies;
	}

	return nf;
}

static void ipt_netflow_free(struct ipt_netflow *nf)
{
	atomic_dec(&ipt_netflow_count);
	kmem_cache_free(ipt_netflow_cachep, nf);
}

static struct ipt_netflow *
init_netflow(struct ipt_netflow_tuple *tuple,
	     struct sk_buff *skb, unsigned int hash)
{
	struct ipt_netflow *nf;

	nf = ipt_netflow_alloc(tuple);
	if (!nf)
		return NULL;

	hlist_add_head(&nf->hlist, &ipt_netflow_hash[hash]);
	list_add(&nf->list, &ipt_netflow_list);

	return nf;
}

/* cook pdu, send, and clean */
/* only called in scan worker path */
static void netflow_export_pdu(void)
{
	struct timeval tv;
	int pdusize;

	if (!pdu.nr_records)
		return;

	if (debug > 1)
		printk(KERN_INFO "netflow_export_pdu with %d records\n", pdu.nr_records);
	do_gettimeofday(&tv);

	pdu.version	= htons(5);
	pdu.ts_uptime	= htonl(jiffies_to_msecs(jiffies));
	pdu.ts_usecs	= htonl(tv.tv_sec);
	pdu.ts_unsecs	= htonl(tv.tv_usec);
	//pdu.eng_type	= 0;
	//pdu.eng_id	= 0;
	//pdu.padding	= 0;

	pdusize = NETFLOW5_HEADER_SIZE + sizeof(struct netflow5_record) * pdu.nr_records;

	/* especially fix nr_records before export */
	pdu.nr_records	= htons(pdu.nr_records);

	if (netflow_send_pdu(&pdu, pdusize) == 0) {
		/* not least one send succeded, account stat for dropped packets */
		NETFLOW_STAT_ADD_ATOMIC(pkt_drop, pdu_packets);
		NETFLOW_STAT_ADD_ATOMIC(traf_drop, pdu_traf);
	}

	pdu.seq = htonl(ntohl(pdu.seq) + ntohs(pdu.nr_records));

	pdu.nr_records	= 0;
	pdu_packets = 0;
	pdu_traf = 0;
}

/* only called in scan worker path */
static void netflow_export_flow(struct ipt_netflow *nf)
{
	struct netflow5_record *rec;

	if (debug > 2)
		printk(KERN_INFO "adding flow to export (%d)\n", pdu.nr_records);

	pdu_packets += nf->nr_packets;
	pdu_traf += nf->nr_bytes;
	pdu_ts_mod = jiffies;
	rec = &pdu.flow[pdu.nr_records++];

	/* make V5 flow record */
	rec->s_addr	= nf->tuple.s_addr;
	rec->d_addr	= nf->tuple.d_addr;
	//rec->nexthop	= 0;
	rec->i_ifc	= htons(nf->tuple.i_ifc);
	rec->o_ifc	= htons(nf->o_ifc);
	rec->nr_packets = htonl(nf->nr_packets);
	rec->nr_octets	= htonl(nf->nr_bytes);
	rec->ts_first	= htonl(jiffies_to_msecs(nf->ts_first));
	rec->ts_last	= htonl(jiffies_to_msecs(nf->ts_last));
	rec->s_port	= nf->tuple.s_port;
	rec->d_port	= nf->tuple.d_port;
	//rec->reserved	= 0;
	rec->tcp_flags	= nf->tcp_flags;
	rec->protocol	= nf->tuple.protocol;
	rec->tos	= nf->tuple.tos;
	//rec->s_as	= 0;
	//rec->d_as	= 0;
	rec->s_mask	= nf->s_mask;
	rec->d_mask	= nf->d_mask;
	//rec->padding	= 0;
	ipt_netflow_free(nf);

	if (pdu.nr_records == NETFLOW5_RECORDS_MAX)
		netflow_export_pdu();
}

static inline int active_needs_export(struct ipt_netflow *nf, long a_timeout)
{
	/* active too long, finishing, or having too much bytes */
	return ((jiffies - nf->ts_first) > a_timeout) ||
		(nf->tuple.protocol == IPPROTO_TCP &&
		 (nf->tcp_flags & TCP_FIN_RST) &&
		 (jiffies - nf->ts_last) > (1 * HZ)) ||
		nf->nr_bytes >= FLOW_FULL_WATERMARK;
}

/* could be called with zero to flush cache and pdu */
/* this function is guaranteed to be called non-concurrently */
static void netflow_scan_and_export(int flush)
{
	long i_timeout = inactive_timeout * HZ;
	long a_timeout = active_timeout * HZ;

	if (flush)
		i_timeout = 0;

	spin_lock_bh(&ipt_netflow_lock);
	while (!list_empty(&ipt_netflow_list)) {
		struct ipt_netflow *nf;
	       
		nf = list_entry(ipt_netflow_list.prev, struct ipt_netflow, list);
		/* Note: i_timeout checked with >= to allow specifying zero timeout
		 * to purge all flows on module unload */
		if (((jiffies - nf->ts_last) >= i_timeout) ||
		    active_needs_export(nf, a_timeout)) {
			hlist_del(&nf->hlist);
			list_del(&nf->list);
			NETFLOW_STAT_ADD(pkt_out, nf->nr_packets);
			NETFLOW_STAT_ADD(traf_out, nf->nr_bytes);
			spin_unlock_bh(&ipt_netflow_lock);
			netflow_export_flow(nf);
			spin_lock_bh(&ipt_netflow_lock);
		} else {
			/* all flows which need to be exported is always at the tail
			 * so if no more exportable flows we can break */
			break;
		}
	}
	spin_unlock_bh(&ipt_netflow_lock);

	/* flush flows stored in pdu if there no new flows for too long */
	/* Note: using >= to allow flow purge on zero timeout */
	if ((jiffies - pdu_ts_mod) >= i_timeout)
		netflow_export_pdu();
}

#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
static void netflow_work_fn(void *dummy)
#else
static void netflow_work_fn(struct work_struct *dummy)
#endif
{
	netflow_scan_and_export(0);
	__start_scan_worker();
}

#define RATESHIFT 2
#define SAMPLERATE (RATESHIFT*RATESHIFT)
#define NUMSAMPLES(minutes) (minutes * 60 / SAMPLERATE)
#define _A(v, m) (v) * (1024 * 2 / (NUMSAMPLES(m) + 1)) >> 10
// x * (1024 / y) >> 10 is because I can not just divide long long integer
#define CALC_RATE(ewma, cur, minutes) ewma += _A(cur - ewma, minutes)
// calculate EWMA throughput rate for whole module
static void rate_timer_calc(unsigned long dummy)
{
	static u64 old_pkt_total = 0;
	static u64 old_traf_total = 0;
	static u64 old_searched = 0;
	static u64 old_found = 0;
	static u64 old_notfound = 0;
	u64 searched = 0;
	u64 found = 0;
	u64 notfound = 0;
	unsigned int dsrch, dfnd, dnfnd;
	u64 pkt_total = 0;
	u64 traf_total = 0;
	int cpu;

	for_each_present_cpu(cpu) {
		struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu);

		pkt_total += st->pkt_total;
		traf_total += st->traf_total;
		searched += st->searched;
		found += st->found;
		notfound += st->notfound;
	}

	sec_prate = (pkt_total - old_pkt_total) >> RATESHIFT;
	CALC_RATE(min5_prate, sec_prate, 5);
	CALC_RATE(min_prate, sec_prate, 1);
	old_pkt_total = pkt_total;

	sec_brate = ((traf_total - old_traf_total) * 8) >> RATESHIFT;
	CALC_RATE(min5_brate, sec_brate, 5);
	CALC_RATE(min_brate, sec_brate, 1);
	old_traf_total = traf_total;

	dsrch = searched - old_searched;
	dfnd = found - old_found;
	dnfnd = notfound - old_notfound;
	old_searched = searched;
	old_found = found;
	old_notfound = notfound;
	/* if there is no access to hash keep rate steady */
	metric = (dfnd + dnfnd)? 10 * (dsrch + dfnd + dnfnd) / (dfnd + dnfnd) : metric;
	CALC_RATE(min15_metric, (unsigned long long)metric, 15);
	CALC_RATE(min5_metric, (unsigned long long)metric, 5);
	CALC_RATE(min_metric, (unsigned long long)metric, 1);

	mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE));
}

/* packet receiver */
static unsigned int netflow_target(
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
			   struct sk_buff **pskb,
#else
			   struct sk_buff *skb,
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
			   const struct net_device *if_in,
			   const struct net_device *if_out,
			   unsigned int hooknum,
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,17)
			   const struct xt_target *target,
#endif
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
			   const void *targinfo,
			   void *userinfo
#else
			   const void *targinfo
#endif
#else /* since 2.6.28 */
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
			   const struct xt_target_param *par
#else
			   const struct xt_action_param *par
#endif
#endif
		)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
	struct sk_buff *skb = *pskb;
#endif
	struct iphdr _iph, *iph;
	struct ipt_netflow_tuple tuple;
	struct ipt_netflow *nf;
	__u8 tcp_flags;
	struct netflow_aggr_n *aggr_n;
	struct netflow_aggr_p *aggr_p;
	__u8 s_mask, d_mask;
	unsigned int hash;

	iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); //iph = ip_hdr(skb);

	if (iph == NULL) {
		NETFLOW_STAT_INC(truncated);
		NETFLOW_STAT_INC(pkt_drop);
		return IPT_CONTINUE;
	}

	tuple.s_addr	= iph->saddr;
	tuple.d_addr	= iph->daddr;
	tuple.s_port	= 0;
	tuple.d_port	= 0;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
	tuple.i_ifc	= if_in? if_in->ifindex : -1;
#else
	tuple.i_ifc	= par->in? par->in->ifindex : -1;
#endif
	tuple.protocol	= iph->protocol;
	tuple.tos	= iph->tos;
	tcp_flags	= 0; /* Cisco sometimes have TCP ACK for non TCP packets, don't get it */
	s_mask		= 0;
	d_mask		= 0;

	if (iph->frag_off & htons(IP_OFFSET))
		NETFLOW_STAT_INC(frags);
	else {
		switch (tuple.protocol) {
		    case IPPROTO_TCP: {
			struct tcphdr _hdr, *hp;

			if ((hp = skb_header_pointer(skb, iph->ihl * 4, 14, &_hdr))) {
				tuple.s_port = hp->source;
				tuple.d_port = hp->dest;
				tcp_flags = (u_int8_t)(ntohl(tcp_flag_word(hp)) >> 16);
			}
			break;
		    }
		    case IPPROTO_UDP: {
			struct udphdr _hdr, *hp;

			if ((hp = skb_header_pointer(skb, iph->ihl * 4, 4, &_hdr))) {
				tuple.s_port = hp->source;
				tuple.d_port = hp->dest;
			}
			break;
		    }
		    case IPPROTO_ICMP: {
			struct icmphdr _hdr, *hp;

			if ((hp = skb_header_pointer(skb, iph->ihl * 4, 2, &_hdr)))
				tuple.d_port = (hp->type << 8) | hp->code;
			break;
		    }
		    case IPPROTO_IGMP: {
			struct igmphdr *_hdr, *hp;

			if ((hp = skb_header_pointer(skb, iph->ihl * 4, 1, &_hdr)))
				tuple.d_port = hp->type;
			}
			break;
	       	}
	} /* not fragmented */

	/* aggregate networks */
	read_lock_bh(&aggr_lock);
	list_for_each_entry(aggr_n, &aggr_n_list, list)
		if ((ntohl(tuple.s_addr) & aggr_n->mask) == aggr_n->addr) {
			tuple.s_addr &= htonl(aggr_n->aggr_mask);
			s_mask = aggr_n->prefix;
			break; 
		}
	list_for_each_entry(aggr_n, &aggr_n_list, list)
		if ((ntohl(tuple.d_addr) & aggr_n->mask) == aggr_n->addr) {
			tuple.d_addr &= htonl(aggr_n->aggr_mask);
			d_mask = aggr_n->prefix;
			break; 
		}

	/* aggregate ports */
	list_for_each_entry(aggr_p, &aggr_p_list, list)
		if (ntohs(tuple.s_port) >= aggr_p->port1 &&
		    ntohs(tuple.s_port) <= aggr_p->port2) {
			tuple.s_port = htons(aggr_p->aggr_port);
			break;
		}

	list_for_each_entry(aggr_p, &aggr_p_list, list)
		if (ntohs(tuple.d_port) >= aggr_p->port1 &&
		    ntohs(tuple.d_port) <= aggr_p->port2) {
			tuple.d_port = htons(aggr_p->aggr_port);
			break;
		}
	read_unlock_bh(&aggr_lock);

	hash = hash_netflow(&tuple);
	spin_lock_bh(&ipt_netflow_lock);
	/* record */
	nf = ipt_netflow_find(&tuple, hash);
	if (!nf) {
		if (maxflows > 0 && atomic_read(&ipt_netflow_count) >= maxflows) {
			/* This is DOS attack prevention */
			NETFLOW_STAT_INC(maxflows_err);
			NETFLOW_STAT_INC(pkt_drop);
			NETFLOW_STAT_ADD(traf_drop, ntohs(iph->tot_len));
			spin_unlock_bh(&ipt_netflow_lock);
			return IPT_CONTINUE;
		}

		nf = init_netflow(&tuple, skb, hash);
		if (!nf || IS_ERR(nf)) {
			NETFLOW_STAT_INC(alloc_err);
			NETFLOW_STAT_INC(pkt_drop);
			NETFLOW_STAT_ADD(traf_drop, ntohs(iph->tot_len));
			spin_unlock_bh(&ipt_netflow_lock);
			return IPT_CONTINUE;
		}

		nf->ts_first = jiffies;
		nf->tcp_flags = tcp_flags;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
		nf->o_ifc = if_out? if_out->ifindex : -1;
#else
		nf->o_ifc = par->out? par->out->ifindex : -1;
#endif
		nf->s_mask = s_mask;
		nf->d_mask = d_mask;

		if (debug > 2)
			printk(KERN_INFO "ipt_netflow: new (%u) %hd:%hd SRC=%u.%u.%u.%u:%u DST=%u.%u.%u.%u:%u\n",
			       atomic_read(&ipt_netflow_count),
			       tuple.i_ifc, nf->o_ifc,
			       NIPQUAD(tuple.s_addr), ntohs(tuple.s_port),
			       NIPQUAD(tuple.d_addr), ntohs(tuple.d_port));
	} else {
		/* ipt_netflow_list is sorted by access time:
		 * most recently accessed flows are at head, old flows remain at tail
		 * this function bubble up flow to the head */
		list_move(&nf->list, &ipt_netflow_list);
	}

	nf->nr_packets++;
	nf->nr_bytes += ntohs(iph->tot_len);
	nf->ts_last = jiffies;
	nf->tcp_flags |= tcp_flags;

	NETFLOW_STAT_INC(pkt_total);
	NETFLOW_STAT_ADD(traf_total, ntohs(iph->tot_len));

	if (active_needs_export(nf, active_timeout * HZ)) {
		/* ok, if this active flow to be exported
		 * bubble it to the tail */
		list_move_tail(&nf->list, &ipt_netflow_list);

		/* Blog: I thought about forcing timer to wake up sooner if we have
		 * enough exportable flows, but in fact this doesn't have much sense,
		 * becasue this would only move flow data from one memory to another
		 * (from our buffers to socket buffers, and socket buffers even have
		 * limited size). But yes, this is disputable. */
	}

	spin_unlock_bh(&ipt_netflow_lock);

	return IPT_CONTINUE;
}

static struct ipt_target ipt_netflow_reg = {
	.name		= "NETFLOW",
	.target		= netflow_target,
	.family		= AF_INET,
#ifndef RAW_PROMISC_HACK
	.table		= "filter",
#ifndef NF_IP_LOCAL_IN /* 2.6.25 */
	.hooks		= (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
				(1 << NF_INET_LOCAL_OUT),
#else
	.hooks		= (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) |
				(1 << NF_IP_LOCAL_OUT),
#endif /* NF_IP_LOCAL_IN */
#else
	.table          = "raw",
#ifndef NF_IP_LOCAL_IN
	.hooks          = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
				(1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_PRE_ROUTING),
#else
	.hooks          = (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) |
				(1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_PRE_ROUTING),
#endif /* NF_IP_LOCAL_IN */
#endif /* !RAW_PROMISC_HACK */
	.me		= THIS_MODULE
};

static int __init ipt_netflow_init(void)
{
#ifdef CONFIG_PROC_FS
	struct proc_dir_entry *proc_stat;
#endif

	get_random_bytes(&ipt_netflow_hash_rnd, 4);

	/* determine hash size (idea from nf_conntrack_core.c) */
	if (!hashsize) {
		hashsize = (((num_physpages << PAGE_SHIFT) / 16384)
					 / sizeof(struct hlist_head));
		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
			hashsize = 8192;
	}
	if (hashsize < 16)
		hashsize = 16;
	printk(KERN_INFO "ipt_netflow version %s (%u buckets)\n",
		IPT_NETFLOW_VERSION, hashsize);

	ipt_netflow_hash_size = hashsize;
	ipt_netflow_hash = alloc_hashtable(ipt_netflow_hash_size);
	if (!ipt_netflow_hash) {
		printk(KERN_ERR "Unable to create ipt_neflow_hash\n");
		goto err;
	}

	ipt_netflow_cachep = kmem_cache_create("ipt_netflow",
						sizeof(struct ipt_netflow), 0,
						0, NULL
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
						, NULL
#endif
					      );
	if (!ipt_netflow_cachep) {
		printk(KERN_ERR "Unable to create ipt_netflow slab cache\n");
		goto err_free_hash;
	}

#ifdef CONFIG_PROC_FS
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
	proc_stat = create_proc_entry("ipt_netflow", S_IRUGO, INIT_NET(proc_net_stat));
	if (!proc_stat) {
		printk(KERN_ERR "Unable to create /proc/net/stat/ipt_netflow entry\n");
		goto err_free_netflow_slab;
	}
	proc_stat->proc_fops = &nf_seq_fops;
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30)
	proc_stat->owner = THIS_MODULE;
#endif
	printk(KERN_INFO "netflow: registered: /proc/net/stat/ipt_netflow\n");
#endif
#else
	proc_stat = proc_create("ipt_netflow", S_IRUGO, INIT_NET(proc_net_stat),&nf_seq_fops);
	if (!proc_stat) {
		printk(KERN_ERR "Unable to create /proc/net/stat/ipt_netflow entry\n");
		goto err_free_netflow_slab;
	}
#endif

#ifdef CONFIG_SYSCTL
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
	netflow_sysctl_header = register_sysctl_table(netflow_net_table
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21)
						      , 0 /* insert_at_head */
#endif
						      );
#else /* 2.6.25 */
	netflow_sysctl_header = register_sysctl_paths(netflow_sysctl_path, netflow_sysctl_table);
#endif
	if (!netflow_sysctl_header) {
		printk(KERN_ERR "netflow: can't register to sysctl\n");
		goto err_free_proc_stat;
	} else
		printk(KERN_INFO "netflow: registered: sysctl net.netflow\n");
#endif

	if (!destination)
		destination = aggregation_buf;
	if (destination != destination_buf) {
		strlcpy(destination_buf, destination, sizeof(destination_buf));
		destination = destination_buf;
	}
	if (add_destinations(destination) < 0)
		goto err_free_sysctl;

	if (!aggregation)
		aggregation = aggregation_buf;
	if (aggregation != aggregation_buf) {
		strlcpy(aggregation_buf, aggregation, sizeof(aggregation_buf));
		aggregation = aggregation_buf;
	}
	add_aggregation(aggregation);

	__start_scan_worker();
	setup_timer(&rate_timer, rate_timer_calc, 0);
	mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE));

	if (xt_register_target(&ipt_netflow_reg))
		goto err_stop_timer;

	peakflows_at = jiffies;

	printk(KERN_INFO "ipt_netflow loaded.\n");
	return 0;

err_stop_timer:
	__stop_scan_worker();
	del_timer_sync(&rate_timer);
	destination_removeall();
	aggregation_remove(&aggr_n_list);
	aggregation_remove(&aggr_p_list);
err_free_sysctl:
#ifdef CONFIG_SYSCTL
	unregister_sysctl_table(netflow_sysctl_header);
err_free_proc_stat:
#endif
#ifdef CONFIG_PROC_FS
	remove_proc_entry("ipt_netflow", INIT_NET(proc_net_stat));
err_free_netflow_slab:
#endif  
	kmem_cache_destroy(ipt_netflow_cachep);
err_free_hash:
	vfree(ipt_netflow_hash);
err:
	return -ENOMEM;
}

static void __exit ipt_netflow_fini(void)
{
	printk(KERN_INFO "ipt_netflow unloading..\n");

#ifdef CONFIG_SYSCTL
	unregister_sysctl_table(netflow_sysctl_header);
#endif
#ifdef CONFIG_PROC_FS
	remove_proc_entry("ipt_netflow", INIT_NET(proc_net_stat));
#endif

	xt_unregister_target(&ipt_netflow_reg);
	__stop_scan_worker();
	netflow_scan_and_export(1);
	del_timer_sync(&rate_timer);

	synchronize_sched();

	destination_removeall();
	aggregation_remove(&aggr_n_list);
	aggregation_remove(&aggr_p_list);

	kmem_cache_destroy(ipt_netflow_cachep);
	vfree(ipt_netflow_hash);

	printk(KERN_INFO "ipt_netflow unloaded.\n");
}

module_init(ipt_netflow_init);
module_exit(ipt_netflow_fini);

/* vim: set sw=8: */
kernel-source-ipt_netflow-1.8/ipt_NETFLOW.h000064400000000000000000000115171217147134200206400ustar00rootroot00000000000000/*
 *   This file is part of NetFlow exporting module.
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#ifndef _IP_NETFLOW_H
#define _IP_NETFLOW_H

/*
 * Some tech info:
 * http://www.cisco.com/en/US/products/ps6601/prod_white_papers_list.html
 * http://www.cisco.com/en/US/products/sw/netmgtsw/ps1964/products_implementation_design_guide09186a00800d6a11.html
 */

#define NETFLOW5_RECORDS_MAX 30

struct netflow5_record {
	__be32		s_addr;
	__be32		d_addr;
	__be32		nexthop;
	__be16		i_ifc;
	__be16		o_ifc;
	__be32		nr_packets;
	__be32		nr_octets;
	__be32		ts_first;
	__be32		ts_last;
	__be16		s_port;
	__be16		d_port;
	__u8		reserved;
	__u8		tcp_flags;
	__u8		protocol;
	__u8		tos;
	__be16		s_as;
	__be16		d_as;
	__u8		s_mask;
	__u8		d_mask;
	__u16		padding;
} __attribute__ ((packed));

/* NetFlow v5 packet */
struct netflow5_pdu {
	__be16			version;
	__be16			nr_records;
	__be32			ts_uptime;
	__be32			ts_usecs;
	__be32			ts_unsecs;
	__be32			seq;
	__u8			eng_type;
	__u8			eng_id;
	__u16			padding;
	struct netflow5_record	flow[NETFLOW5_RECORDS_MAX];
} __attribute__ ((packed));
#define NETFLOW5_HEADER_SIZE (sizeof(struct netflow5_pdu) - NETFLOW5_RECORDS_MAX * sizeof(struct netflow5_record))

/* hashed data which identify unique flow */
struct ipt_netflow_tuple {
	__be32		s_addr;	// Network byte order
	__be32		d_addr; // -"-
	__be16		s_port; // -"-
	__be16		d_port; // -"-
	__be16		i_ifc;	// Local byte order
	__u8		protocol;
	__u8		tos;
};
/* tuple size is rounded to u32s */
#define NETFLOW_TUPLE_SIZE (sizeof(struct ipt_netflow_tuple) / 4)

/* maximum bytes flow can have, after it reached flow become not searchable and will be exported soon */
#define FLOW_FULL_WATERMARK 0xffefffff

/* flow entry */
struct ipt_netflow {
	struct hlist_node hlist; // hashtable search chain
	struct list_head list; // all flows chain

	/* unique per flow data (hashed, NETFLOW_TUPLE_SIZE) */
	struct ipt_netflow_tuple tuple;

	/* volatile data */
	__be16		o_ifc;
	__u8		s_mask;
	__u8		d_mask;

	/* flow statistics */
	u_int32_t	nr_packets;
	u_int32_t	nr_bytes;
	unsigned long	ts_first;
	unsigned long	ts_last;
	__u8		tcp_flags; /* `OR' of all tcp flags */
};

static inline int ipt_netflow_tuple_equal(const struct ipt_netflow_tuple *t1,
				    const struct ipt_netflow_tuple *t2)
{
	return (!memcmp(t1, t2, sizeof(struct ipt_netflow_tuple)));
}

struct ipt_netflow_sock {
	struct list_head list;
	struct socket *sock;
	__be32 ipaddr;
	unsigned short port;
	atomic_t wmem_peak;	// sk_wmem_alloc peak value
	atomic_t err_full;	// socket filled error
	atomic_t err_other;	// other socket errors
};

struct netflow_aggr_n {
	struct list_head list;
	__u32 mask;
	__u32 addr;
	__u32 aggr_mask;
	__u8 prefix;
};

struct netflow_aggr_p {
	struct list_head list;
	__u16 port1;
	__u16 port2;
	__u16 aggr_port;
};

#define NETFLOW_STAT_INC(count) (__get_cpu_var(ipt_netflow_stat).count++)
#define NETFLOW_STAT_ADD(count, val) (__get_cpu_var(ipt_netflow_stat).count += (unsigned long long)val)

#define NETFLOW_STAT_INC_ATOMIC(count)				\
	do {							\
		preempt_disable();				\
		(__get_cpu_var(ipt_netflow_stat).count++);	\
		preempt_enable();				\
	} while(0);

#define NETFLOW_STAT_ADD_ATOMIC(count, val)			\
	do {							\
		preempt_disable();				\
		(__get_cpu_var(ipt_netflow_stat).count += (unsigned long long)val); \
		preempt_enable();				\
	} while(0);


/* statistics */
struct ipt_netflow_stat {
	u64 searched;			// hash stat
	u64 found;			// hash stat
	u64 notfound;			// hash stat
	unsigned int truncated;		// packets stat
	unsigned int frags;		// packets stat
	unsigned int alloc_err;		// failed to allocate flow mem
	unsigned int maxflows_err;	// maxflows reached
	unsigned int send_success;	// sendmsg() ok
	unsigned int send_failed;	// sendmsg() failed
	unsigned int sock_errors;	// socket error callback called (got icmp refused)
	u64 exported_size;		// netflow traffic itself
	u64 pkt_total;			// packets accounted total
	u64 traf_total;			// traffic accounted total
	u64 pkt_drop;			// packets not accounted total
	u64 traf_drop;			// traffic not accounted total
	u64 pkt_out;			// packets out of the memory
	u64 traf_out;			// traffic out of the memory
};

#ifndef list_first_entry
#define list_first_entry(ptr, type, member) \
	list_entry((ptr)->next, type, member)
#endif

#endif
/* vim: set sw=8: */
kernel-source-ipt_netflow-1.8/kernel-source-ipt_netflow.spec000064400000000000000000000017621217147134200244600ustar00rootroot00000000000000Name: kernel-source-ipt_netflow
Version: 1.8
Release: alt2

Summary: Netflow iptables module for Linux kernel 
License: GPL
Group: Development/Kernel
URL: http://sourceforge.net/projects/ipt-netflow/
Packager: Kernel Maintainer Team <kernel@packages.altlinux.org>

Source0: %name-%version.tar

BuildArch: noarch
BuildPreReq: rpm-build-kernel

%description
Ipt-netflow is very fast and effective Netflow exporting module for
Linux kernel. Designed for Linux router with heavy network load.
This is netfilter/iptables module adding support for NETFLOW target.

%prep
%setup -q -c

%install
mkdir -p %kernel_srcdir
mv %name-%version ipt_netflow-%version
tar -cjf %kernel_srcdir/ipt_netflow-%version.tar.bz2 ipt_netflow-%version

%files
%attr(0644,root,root) %kernel_src/ipt_netflow-%version.tar.bz2

%changelog
* Wed Jul 17 2013 Anton V. Boyarshinov <boyarsh@altlinux.ru> 1.8-alt2
- support for kernel 3.10 added

* Tue May 07 2013 Anton V. Boyarshinov <boyarsh@altlinux.ru> 1.8-alt1
- build as separate module 

kernel-source-ipt_netflow-1.8/libipt_NETFLOW.c000064400000000000000000000052731217147134200213240ustar00rootroot00000000000000/*
 * iptables helper for NETFLOW target
 * <abc@telekom.ru>
 *
 *
 *   This file is part of NetFlow exporting module.
 *
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <getopt.h>
#include <net/if.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>

#define __EXPORTED_HEADERS__
#ifdef XTABLES
#include <xtables.h>
#else
#include <iptables.h>
#endif

#ifdef XTABLES_VERSION_CODE	// since 1.4.1
#define MOD140
#define iptables_target         xtables_target
#endif

#ifdef iptables_target		// only in 1.4.0
#define MOD140
#endif

#ifdef MOD140
#define ipt_entry_target	xt_entry_target
#define register_target		xtables_register_target
#define _IPT_ENTRY		void
#define _IPT_IP			void
#ifndef IPT_ALIGN
#define IPT_ALIGN		XT_ALIGN
#endif
#else // before 1.3.x
#define _IPT_ENTRY struct ipt_entry
#define _IPT_IP struct ipt_ip
#endif

static struct option opts[] = {
  {0}
};

static void help(void)
{
	printf( "NETFLOW target\n");
}

//static int parse(int c, char **argv, int invert, unsigned int *flags,
//      const _IPT_ENTRY *entry,
//      struct ipt_entry_target **target)
static int parse(int c, char **argv, int invert, unsigned int *flags,
	     const _IPT_ENTRY  *entry,
	     struct ipt_entry_target **targetinfo)

{

	return 1;
}

static void final_check(unsigned int flags)
{
}

static void save(const _IPT_IP *ip, const struct ipt_entry_target *match)
{
}

static void print(const _IPT_IP *ip,
      const struct ipt_entry_target *target,
      int numeric)
{
	printf("NETFLOW ");
}

static struct iptables_target netflow = { 
#ifdef MOD140
	.family		= AF_INET,
#endif
	.next		= NULL,
	.name		= "NETFLOW",
#ifdef XTABLES_VERSION
	.version	= XTABLES_VERSION,
#else
	.version	= IPTABLES_VERSION,
#endif
	.size           = IPT_ALIGN(0),
	.userspacesize  = IPT_ALIGN(0),
	.help		= &help,
	.parse		= &parse,
	.final_check    = &final_check,
	.print		= &print,
	.save		= &save,
	.extra_opts     = opts
};

#ifndef _init
#define _init __attribute__((constructor)) _INIT
#endif
void _init(void)
{
	register_target(&netflow);
}
kernel-source-ipt_netflow-1.8/raw_promisc.patch000064400000000000000000000014761217147134200220460ustar00rootroot00000000000000
 This simple hack will allow to see promisc traffic in raw table of
 iptables. Of course you will need to enable promisc on the interface.
 Refer to README.promisc for details.

 Example how to catch desired traffic:
   iptables -A PREROUTING -t raw -i eth2 -j NETFLOW

--- linux-2.6.26/net/ipv4/ip_input.old.c        2008-07-14 01:51:29.000000000 +0400
+++ linux-2.6.26/net/ipv4/ip_input.c        2008-08-06 14:02:16.000000000 +0400
@@ -378,12 +378,6 @@
        struct iphdr *iph;
        u32 len;
 
-       /* When the interface is in promisc. mode, drop all the crap
-        * that it receives, do not try to analyse it.
-        */
-       if (skb->pkt_type == PACKET_OTHERHOST)
-               goto drop;
-
        IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES);
 
        if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
kernel-source-ipt_netflow-1.8/raw_promisc_debian_squeeze6.patch000064400000000000000000000022551217147134200251730ustar00rootroot00000000000000
 Short manual and patch for Debian Squeeze
 suggested by Pavel Odintsov:

On Thu, Dec 27, 2012 at 07:46:30PM +0400, Pavel Odintsov wrote:
>
> ������� ������ ��� �������� Debian Squeeze ���� ������ promisc.
> 
> cd /usr/src
> apt-get install -y dpkg-dev
> apt-get build-dep  linux-image-2.6.32-5-amd64
> cd linux-2.6-2.6.32/
> apt-get source  linux-image-2.6.32-5-amd64
> 
> wget .... /root/raw_promisc_debian_squeeze6.patch
> patch -p1 < raw_promisc_debian_squeeze6.patch
> ����������� ����� �������:
> debian/rules source
> 
> ��������� ������:
> debian/rules binary
> 

diff -rupN linux-2.6-2.6.32/net/ipv4/ip_input.c linux-2.6-2.6.32_promisc_raw//net/ipv4/ip_input.c
--- linux-2.6-2.6.32/net/ipv4/ip_input.c	2009-12-03 04:51:21.000000000 +0100
+++ linux-2.6-2.6.32_promisc_raw//net/ipv4/ip_input.c	2012-06-25 19:13:49.000000000 +0200
@@ -383,8 +383,8 @@ int ip_rcv(struct sk_buff *skb, struct n
 	/* When the interface is in promisc. mode, drop all the crap
 	 * that it receives, do not try to analyse it.
 	 */
-	if (skb->pkt_type == PACKET_OTHERHOST)
-		goto drop;
+	//if (skb->pkt_type == PACKET_OTHERHOST)
+	//	goto drop;
 
 
 	IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len);