(Logical change 1.3)

This commit is contained in:
osdl.org!shemminger 2004-04-15 20:56:59 +00:00
parent 86fdf0e47b
commit aba5acdfdb
137 changed files with 31144 additions and 0 deletions

2
Config
View File

@ -0,0 +1,2 @@
TC_CONFIG_DIFFSERV=n
TC_CONFIG_ATM=n

View File

@ -0,0 +1,77 @@
# Path to parent kernel include files directory
DESTDIR=
SBINDIR=/sbin
CONFDIR=/etc/iproute2
DOCDIR=/usr/doc/iproute2
KERNEL_INCLUDE=/usr/src/linux/include
LIBC_INCLUDE=/usr/include
DEFINES= -DRESOLVE_HOSTNAMES
#options if you have a bind>=4.9.4 libresolv (or, maybe, glibc)
LDLIBS=-lresolv
ADDLIB=
#options if you compile with libc5, and without a bind>=4.9.4 libresolv
#LDLIBS=
#ADDLIB=inet_ntop.o inet_pton.o
#options for decnet
ADDLIB+=dnet_ntop.o dnet_pton.o
#options for ipx
ADDLIB+=ipx_ntop.o ipx_pton.o
ifeq ($(LIBC_INCLUDE)/socketbits.h,$(wildcard $(LIBC_INCLUDE)/socketbits.h))
ifeq ($(LIBC_INCLUDE)/net/if_packet.h,$(wildcard $(LIBC_INCLUDE)/net/if_packet.h))
GLIBCFIX=-I../include-glibc -include ../include-glibc/glibc-bugs.h
endif
endif
ifeq ($(LIBC_INCLUDE)/bits/socket.h,$(wildcard $(LIBC_INCLUDE)/bits/socket.h))
GLIBCFIX=-I../include-glibc -I/usr/include/db3 -include ../include-glibc/glibc-bugs.h
endif
CC = gcc
CCOPTS = -D_GNU_SOURCE -O2 -Wstrict-prototypes -Wall -g
CFLAGS = $(CCOPTS) $(GLIBCFIX) -I$(KERNEL_INCLUDE) -I../include $(DEFINES)
LDLIBS += -L../lib -lnetlink -lutil
SUBDIRS=lib ip tc misc
LIBNETLINK=../lib/libnetlink.a ../lib/libutil.a
all: check-kernel
@set -e; \
for i in $(SUBDIRS); \
do $(MAKE) -C $$i; done
check-kernel:
ifeq ($(KERNEL_INCLUDE),)
@echo "Please, set correct KERNEL_INCLUDE"; false
else
@set -e; \
if [ ! -r $(KERNEL_INCLUDE)/linux/autoconf.h ]; then \
echo "Please, compile the kernel first"; false; fi
endif
install: all
install -m 0755 -d $(DESTDIR)$(SBINDIR)
install -m 0755 -d $(DESTDIR)$(CONFDIR)
install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples
install -m 0755 -d $(DESTDIR)$(DOCDIR)/examples/diffserv
install -m 0644 README.iproute2+tc $(shell find examples -type f -maxdepth 1) $(DESTDIR)$(DOCDIR)/examples
install -m 0644 $(shell echo examples/diffserv/*) $(DESTDIR)$(DOCDIR)/examples/diffserv
@for i in $(SUBDIRS) doc; do $(MAKE) -C $$i install; done
@cd etc/iproute2; for i in *; do \
if [ ! -e $(DESTDIR)$(CONFDIR)/$$i ]; then \
echo install -m 0644 $$i $(DESTDIR)$(CONFDIR); \
install -m 0644 $$i $(DESTDIR)$(CONFDIR); fi; done
clean:
for i in $(SUBDIRS) doc; \
do $(MAKE) -C $$i clean; done
.EXPORT_ALL_VARIABLES:

View File

@ -0,0 +1,7 @@
File: tcp_diag.c
Status: desired for kernels < 2.4.17
not needed for kernels >= 2.4.17
Description: adds tcpdiag facility to kernel to accelerate ss utility
and pidentd
Side effects: none

View File

@ -0,0 +1,623 @@
/*
* tcp_diag.c Module for monitoring TCP sockets.
*
* Version: $
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/config.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/random.h>
#include <linux/cache.h>
#include <linux/init.h>
#include <net/icmp.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/inet_common.h>
#include <linux/inet.h>
#include <linux/stddef.h>
#include "tcp_diag.h"
static struct sock *tcpnl;
#define TCPDIAG_PUT(skb, attrtype, attrlen) \
({ int rtalen = RTA_LENGTH(attrlen); \
struct rtattr *rta; \
if (skb_tailroom(skb) < RTA_ALIGN(rtalen)) goto nlmsg_failure; \
rta = (void*)__skb_put(skb, RTA_ALIGN(rtalen)); \
rta->rta_type = attrtype; \
rta->rta_len = rtalen; \
RTA_DATA(rta); })
static int tcpdiag_fill(struct sk_buff *skb, struct sock *sk,
int ext, u32 pid, u32 seq)
{
struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
struct tcpdiagmsg *r;
struct nlmsghdr *nlh;
struct tcp_info *info = NULL;
struct tcpdiag_meminfo *minfo = NULL;
unsigned char *b = skb->tail;
nlh = NLMSG_PUT(skb, pid, seq, TCPDIAG_GETSOCK, sizeof(*r));
r = NLMSG_DATA(nlh);
if (sk->state != TCP_TIME_WAIT) {
if (ext & (1<<(TCPDIAG_MEMINFO-1)))
minfo = TCPDIAG_PUT(skb, TCPDIAG_MEMINFO, sizeof(*minfo));
if (ext & (1<<(TCPDIAG_INFO-1)))
info = TCPDIAG_PUT(skb, TCPDIAG_INFO, sizeof(*info));
}
r->tcpdiag_family = sk->family;
r->tcpdiag_state = sk->state;
r->tcpdiag_timer = 0;
r->tcpdiag_retrans = 0;
r->id.tcpdiag_sport = sk->sport;
r->id.tcpdiag_dport = sk->dport;
r->id.tcpdiag_src[0] = sk->rcv_saddr;
r->id.tcpdiag_dst[0] = sk->daddr;
r->id.tcpdiag_if = sk->bound_dev_if;
*((struct sock **)&r->id.tcpdiag_cookie) = sk;
if (r->tcpdiag_state == TCP_TIME_WAIT) {
struct tcp_tw_bucket *tw = (struct tcp_tw_bucket*)sk;
long tmo = tw->ttd - jiffies;
if (tmo < 0)
tmo = 0;
r->tcpdiag_state = tw->substate;
r->tcpdiag_timer = 3;
r->tcpdiag_expires = (tmo*1000+HZ-1)/HZ;
r->tcpdiag_rqueue = 0;
r->tcpdiag_wqueue = 0;
r->tcpdiag_uid = 0;
r->tcpdiag_inode = 0;
#ifdef CONFIG_IPV6
if (r->tcpdiag_family == AF_INET6) {
memcpy(r->id.tcpdiag_src, &tw->v6_rcv_saddr, 16);
memcpy(r->id.tcpdiag_dst, &tw->v6_daddr, 16);
}
#endif
nlh->nlmsg_len = skb->tail - b;
return skb->len;
}
#ifdef CONFIG_IPV6
if (r->tcpdiag_family == AF_INET6) {
memcpy(r->id.tcpdiag_src, &sk->net_pinfo.af_inet6.rcv_saddr, 16);
memcpy(r->id.tcpdiag_dst, &sk->net_pinfo.af_inet6.daddr, 16);
}
#endif
#define EXPIRES_IN_MS(tmo) ((tmo-jiffies)*1000+HZ-1)/HZ
if (tp->pending == TCP_TIME_RETRANS) {
r->tcpdiag_timer = 1;
r->tcpdiag_retrans = tp->retransmits;
r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout);
} else if (tp->pending == TCP_TIME_PROBE0) {
r->tcpdiag_timer = 4;
r->tcpdiag_retrans = tp->probes_out;
r->tcpdiag_expires = EXPIRES_IN_MS(tp->timeout);
} else if (timer_pending(&sk->timer)) {
r->tcpdiag_timer = 2;
r->tcpdiag_retrans = tp->probes_out;
r->tcpdiag_expires = EXPIRES_IN_MS(sk->timer.expires);
} else {
r->tcpdiag_timer = 0;
r->tcpdiag_expires = 0;
}
#undef EXPIRES_IN_MS
r->tcpdiag_rqueue = tp->rcv_nxt - tp->copied_seq;
r->tcpdiag_wqueue = tp->write_seq - tp->snd_una;
r->tcpdiag_uid = sock_i_uid(sk);
r->tcpdiag_inode = sock_i_ino(sk);
if (minfo) {
minfo->tcpdiag_rmem = atomic_read(&sk->rmem_alloc);
minfo->tcpdiag_wmem = sk->wmem_queued;
minfo->tcpdiag_fmem = sk->forward_alloc;
minfo->tcpdiag_tmem = atomic_read(&sk->wmem_alloc);
}
if (info) {
u32 now = tcp_time_stamp;
info->tcpi_state = sk->state;
info->tcpi_ca_state = tp->ca_state;
info->tcpi_retransmits = tp->retransmits;
info->tcpi_probes = tp->probes_out;
info->tcpi_backoff = tp->backoff;
info->tcpi_options = 0;
if (tp->tstamp_ok)
info->tcpi_options |= TCPI_OPT_TIMESTAMPS;
if (tp->sack_ok)
info->tcpi_options |= TCPI_OPT_SACK;
if (tp->wscale_ok) {
info->tcpi_options |= TCPI_OPT_WSCALE;
info->tcpi_snd_wscale = tp->snd_wscale;
info->tcpi_rcv_wscale = tp->rcv_wscale;
} else {
info->tcpi_snd_wscale = 0;
info->tcpi_rcv_wscale = 0;
}
#ifdef CONFIG_INET_ECN
if (tp->ecn_flags&TCP_ECN_OK)
info->tcpi_options |= TCPI_OPT_ECN;
#endif
info->tcpi_rto = (1000000*tp->rto)/HZ;
info->tcpi_ato = (1000000*tp->ack.ato)/HZ;
info->tcpi_snd_mss = tp->mss_cache;
info->tcpi_rcv_mss = tp->ack.rcv_mss;
info->tcpi_unacked = tp->packets_out;
info->tcpi_sacked = tp->sacked_out;
info->tcpi_lost = tp->lost_out;
info->tcpi_retrans = tp->retrans_out;
info->tcpi_fackets = tp->fackets_out;
info->tcpi_last_data_sent = ((now - tp->lsndtime)*1000)/HZ;
info->tcpi_last_ack_sent = 0;
info->tcpi_last_data_recv = ((now - tp->ack.lrcvtime)*1000)/HZ;
info->tcpi_last_ack_recv = ((now - tp->rcv_tstamp)*1000)/HZ;
info->tcpi_pmtu = tp->pmtu_cookie;
info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
info->tcpi_rtt = ((1000000*tp->srtt)/HZ)>>3;
info->tcpi_rttvar = ((1000000*tp->mdev)/HZ)>>2;
info->tcpi_snd_ssthresh = tp->snd_ssthresh;
info->tcpi_snd_cwnd = tp->snd_cwnd;
info->tcpi_advmss = tp->advmss;
info->tcpi_reordering = tp->reordering;
}
nlh->nlmsg_len = skb->tail - b;
return skb->len;
nlmsg_failure:
skb_trim(skb, b - skb->data);
return -1;
}
extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif);
#ifdef CONFIG_IPV6
extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
struct in6_addr *daddr, u16 dport,
int dif);
#endif
static int tcpdiag_get_exact(struct sk_buff *in_skb, struct nlmsghdr *nlh)
{
int err;
struct sock *sk;
struct tcpdiagreq *req = NLMSG_DATA(nlh);
struct sk_buff *rep;
if (req->tcpdiag_family == AF_INET) {
sk = tcp_v4_lookup(req->id.tcpdiag_dst[0], req->id.tcpdiag_dport,
req->id.tcpdiag_src[0], req->id.tcpdiag_sport,
req->id.tcpdiag_if);
}
#ifdef CONFIG_IPV6
else if (req->tcpdiag_family == AF_INET6) {
sk = tcp_v6_lookup((struct in6_addr*)req->id.tcpdiag_dst, req->id.tcpdiag_dport,
(struct in6_addr*)req->id.tcpdiag_src, req->id.tcpdiag_sport,
req->id.tcpdiag_if);
}
#endif
else {
return -EINVAL;
}
if (sk == NULL)
return -ENOENT;
err = -ESTALE;
if ((req->id.tcpdiag_cookie[0] != TCPDIAG_NOCOOKIE ||
req->id.tcpdiag_cookie[1] != TCPDIAG_NOCOOKIE) &&
sk != *((struct sock **)&req->id.tcpdiag_cookie[0]))
goto out;
err = -ENOMEM;
rep = alloc_skb(NLMSG_SPACE(sizeof(struct tcpdiagmsg)+
sizeof(struct tcpdiag_meminfo)+
sizeof(struct tcp_info)+64), GFP_KERNEL);
if (!rep)
goto out;
if (tcpdiag_fill(rep, sk, req->tcpdiag_ext,
NETLINK_CB(in_skb).pid,
nlh->nlmsg_seq) <= 0)
BUG();
err = netlink_unicast(tcpnl, rep, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
if (err > 0)
err = 0;
out:
if (sk) {
if (sk->state == TCP_TIME_WAIT)
tcp_tw_put((struct tcp_tw_bucket*)sk);
else
sock_put(sk);
}
return err;
}
int bitstring_match(u32 *a1, u32 *a2, int bits)
{
int words = bits >> 5;
bits &= 0x1f;
if (words) {
if (memcmp(a1, a2, words << 2))
return 0;
}
if (bits) {
__u32 w1, w2;
__u32 mask;
w1 = a1[words];
w2 = a2[words];
mask = htonl((0xffffffff) << (32 - bits));
if ((w1 ^ w2) & mask)
return 0;
}
return 1;
}
int tcpdiag_bc_run(char *bc, int len, struct sock *sk)
{
while (len > 0) {
int yes = 1;
struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc;
switch (op->code) {
case TCPDIAG_BC_NOP:
break;
case TCPDIAG_BC_JMP:
yes = 0;
break;
case TCPDIAG_BC_S_GE:
yes = (sk->num >= op[1].no);
break;
case TCPDIAG_BC_S_LE:
yes = (sk->num <= op[1].no);
break;
case TCPDIAG_BC_D_GE:
yes = (ntohs(sk->dport) >= op[1].no);
break;
case TCPDIAG_BC_D_LE:
yes = (ntohs(sk->dport) <= op[1].no);
break;
case TCPDIAG_BC_AUTO:
yes = !(sk->userlocks&SOCK_BINDPORT_LOCK);
break;
case TCPDIAG_BC_S_COND:
case TCPDIAG_BC_D_COND:
{
struct tcpdiag_hostcond *cond = (struct tcpdiag_hostcond*)(op+1);
u32 *addr;
if (cond->port != -1 &&
cond->port != (op->code == TCPDIAG_BC_S_COND ? sk->num : ntohs(sk->dport))) {
yes = 0;
break;
}
if (cond->prefix_len == 0)
break;
if (sk->family == AF_INET6) {
if (op->code == TCPDIAG_BC_S_COND)
addr = (u32*)&sk->net_pinfo.af_inet6.rcv_saddr;
else
addr = (u32*)&sk->net_pinfo.af_inet6.daddr;
} else {
if (op->code == TCPDIAG_BC_S_COND)
addr = &sk->rcv_saddr;
else
addr = &sk->daddr;
}
if (bitstring_match(addr, cond->addr, cond->prefix_len))
break;
if (sk->family == AF_INET6 && cond->family == AF_INET) {
if (addr[0] == 0 && addr[1] == 0 &&
addr[2] == __constant_htonl(0xffff) &&
bitstring_match(addr+3, cond->addr, cond->prefix_len))
break;
}
yes = 0;
break;
}
}
if (yes) {
len -= op->yes;
bc += op->yes;
} else {
len -= op->no;
bc += op->no;
}
}
return (len == 0);
}
int valid_cc(char *bc, int len, int cc)
{
while (len >= 0) {
struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc;
if (cc > len)
return 0;
if (cc == len)
return 1;
if (op->yes < 4)
return 0;
len -= op->yes;
bc += op->yes;
}
return 0;
}
int tcpdiag_bc_audit(char *bytecode, int bytecode_len)
{
char *bc = bytecode;
int len = bytecode_len;
while (len > 0) {
struct tcpdiag_bc_op *op = (struct tcpdiag_bc_op*)bc;
//printk("BC: %d %d %d {%d} / %d\n", op->code, op->yes, op->no, op[1].no, len);
switch (op->code) {
case TCPDIAG_BC_AUTO:
case TCPDIAG_BC_S_COND:
case TCPDIAG_BC_D_COND:
case TCPDIAG_BC_S_GE:
case TCPDIAG_BC_S_LE:
case TCPDIAG_BC_D_GE:
case TCPDIAG_BC_D_LE:
if (op->yes < 4 || op->yes > len+4)
return -EINVAL;
case TCPDIAG_BC_JMP:
if (op->no < 4 || op->no > len+4)
return -EINVAL;
if (op->no < len &&
!valid_cc(bytecode, bytecode_len, len-op->no))
return -EINVAL;
break;
case TCPDIAG_BC_NOP:
if (op->yes < 4 || op->yes > len+4)
return -EINVAL;
break;
default:
return -EINVAL;
}
bc += op->yes;
len -= op->yes;
}
return len == 0 ? 0 : -EINVAL;
}
int tcpdiag_dump(struct sk_buff *skb, struct netlink_callback *cb)
{
int i, num;
int s_i, s_num;
struct tcpdiagreq *r = NLMSG_DATA(cb->nlh);
struct rtattr *bc = NULL;
if (cb->nlh->nlmsg_len > 4+NLMSG_SPACE(sizeof(struct tcpdiagreq)))
bc = (struct rtattr*)(r+1);
s_i = cb->args[1];
s_num = num = cb->args[2];
if (cb->args[0] == 0) {
if (!(r->tcpdiag_states&(TCPF_LISTEN|TCPF_SYN_RECV)))
goto skip_listen_ht;
tcp_listen_lock();
for (i = s_i; i < TCP_LHTABLE_SIZE; i++) {
struct sock *sk = tcp_listening_hash[i];
if (i > s_i)
s_num = 0;
for (sk = tcp_listening_hash[i], num = 0;
sk != NULL;
sk = sk->next, num++) {
if (num < s_num)
continue;
if (!(r->tcpdiag_states&TCPF_LISTEN) ||
r->id.tcpdiag_dport)
continue;
if (r->id.tcpdiag_sport != sk->sport && r->id.tcpdiag_sport)
continue;
if (bc && !tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), sk))
continue;
if (tcpdiag_fill(skb, sk, r->tcpdiag_ext,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq) <= 0) {
tcp_listen_unlock();
goto done;
}
}
}
tcp_listen_unlock();
skip_listen_ht:
cb->args[0] = 1;
s_i = num = s_num = 0;
}
if (!(r->tcpdiag_states&~(TCPF_LISTEN|TCPF_SYN_RECV)))
return skb->len;
for (i = s_i; i < tcp_ehash_size; i++) {
struct tcp_ehash_bucket *head = &tcp_ehash[i];
struct sock *sk;
if (i > s_i)
s_num = 0;
read_lock_bh(&head->lock);
for (sk = head->chain, num = 0;
sk != NULL;
sk = sk->next, num++) {
if (num < s_num)
continue;
if (!(r->tcpdiag_states&(1<<sk->state)))
continue;
if (r->id.tcpdiag_sport != sk->sport && r->id.tcpdiag_sport)
continue;
if (r->id.tcpdiag_dport != sk->dport && r->id.tcpdiag_dport)
continue;
if (bc && !tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), sk))
continue;
if (tcpdiag_fill(skb, sk, r->tcpdiag_ext,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq) <= 0) {
read_unlock_bh(&head->lock);
goto done;
}
}
if (r->tcpdiag_states&TCPF_TIME_WAIT) {
for (sk = tcp_ehash[i+tcp_ehash_size].chain;
sk != NULL;
sk = sk->next, num++) {
if (num < s_num)
continue;
if (!(r->tcpdiag_states&(1<<sk->zapped)))
continue;
if (r->id.tcpdiag_sport != sk->sport && r->id.tcpdiag_sport)
continue;
if (r->id.tcpdiag_dport != sk->dport && r->id.tcpdiag_dport)
continue;
if (bc && !tcpdiag_bc_run(RTA_DATA(bc), RTA_PAYLOAD(bc), sk))
continue;
if (tcpdiag_fill(skb, sk, r->tcpdiag_ext,
NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq) <= 0) {
read_unlock_bh(&head->lock);
goto done;
}
}
}
read_unlock_bh(&head->lock);
}
done:
cb->args[1] = i;
cb->args[2] = num;
return skb->len;
}
static int tcpdiag_dump_done(struct netlink_callback *cb)
{
return 0;
}
static __inline__ int
tcpdiag_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
{
if (!(nlh->nlmsg_flags&NLM_F_REQUEST))
return 0;
if (nlh->nlmsg_type != TCPDIAG_GETSOCK)
goto err_inval;
if (NLMSG_LENGTH(sizeof(struct tcpdiagreq)) > skb->len)
goto err_inval;
if (nlh->nlmsg_flags&NLM_F_DUMP) {
if (nlh->nlmsg_len > 4 + NLMSG_SPACE(sizeof(struct tcpdiagreq))) {
struct rtattr *rta = (struct rtattr*)(NLMSG_DATA(nlh) + sizeof(struct tcpdiagreq));
if (rta->rta_type != TCPDIAG_REQ_BYTECODE ||
rta->rta_len < 8 ||
rta->rta_len > nlh->nlmsg_len - NLMSG_SPACE(sizeof(struct tcpdiagreq)))
goto err_inval;
if (tcpdiag_bc_audit(RTA_DATA(rta), RTA_PAYLOAD(rta)))
goto err_inval;
}
return netlink_dump_start(tcpnl, skb, nlh,
tcpdiag_dump,
tcpdiag_dump_done);
} else {
return tcpdiag_get_exact(skb, nlh);
}
err_inval:
return -EINVAL;
}
extern __inline__ void tcpdiag_rcv_skb(struct sk_buff *skb)
{
int err;
struct nlmsghdr * nlh;
if (skb->len >= NLMSG_SPACE(0)) {
nlh = (struct nlmsghdr *)skb->data;
if (nlh->nlmsg_len < sizeof(*nlh) || skb->len < nlh->nlmsg_len)
return;
err = tcpdiag_rcv_msg(skb, nlh);
if (err)
netlink_ack(skb, nlh, err);
}
}
static void tcpdiag_rcv(struct sock *sk, int len)
{
struct sk_buff *skb;
while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
tcpdiag_rcv_skb(skb);
kfree_skb(skb);
}
}
static int __init tcpdiag_init(void)
{
tcpnl = netlink_kernel_create(NETLINK_TCPDIAG, tcpdiag_rcv);
if (tcpnl == NULL)
return -EBUSY;
return 0;
}
static void __exit tcpdiag_exit(void)
{
printk(KERN_INFO "Caution: unloading tcp_diag is not very well supported. Nothing to worry, but yet.\n");
if (tcpnl)
sock_release(tcpnl->socket);
}
module_init(tcpdiag_init);
module_exit(tcpdiag_exit);
/*
* Local variables:
* compile-command: "gcc -DMOPS -DMODULE -D__KERNEL__ -I../include -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -c tcp_diag.c"
* End:
*/

View File

@ -0,0 +1,46 @@
File: rt_cache_stat.dif
Apply to: kernel < 2.4.7
Status: recommended for kernels < 2.4.7.
already present in >= 2.4.7
Description: tracing efficiency of routing cache
Side effects: none
File: pidentd-3.0.12.dif
Apply to: pident-3.0.12 tree f.e. from am redhat rpm
Status: highly recommended
Description: Patch to pidentd allowing to use tcpdiag facility and fixing
some bugs in original pident.
Side effects: none. Does not break anything not depending on kernel version,
even if tcpdiag is absent.
Advice: not related to this patch but should be said yet.
Do NOT configure pidentd to use threads! Use option
"--without-threads" when doing "configure".
pidentd is typical example of application where
threading results in nothing but collapse of performance.
Apparently author learned thread programming and decided
to apply new knowledge to the first victim.
File: symbol_exports.dif
Apply to: kernel < 2.4.17
Status: desired for kernels < 2.4.17
not needed for kernels >= 2.4.17
Description: exports symbols required to load tcpdiag module
tcpdiag is builtin since 2.4.17, hence the exports
are redundant.
Side effects: none
File: af_unix.dif
Apply to: kernel
Status: recommended
Desciption: implements fragmented skb for unix sockets reducing
vm pressure for datagram sockets and adds to /proc/net/unix
columns allowing to monitor recv/send memory and identify
peer of connected sockets.
Side effects: "lsof" blames something about unix sockets.
Not a big loss, lsof is not able to tell anything more
clever than "can't identify protocol" for sockets anyway.
Note: the patch affects area where one or two lines changed
several times while 2.4. It does not depend on this,
but unfortunately may reject. It apply cleanly to
2.4.17.

View File

@ -0,0 +1,401 @@
diff -ur ../vger3-011229/linux/net/unix/af_unix.c linux/net/unix/af_unix.c
--- ../vger3-011229/linux/net/unix/af_unix.c Mon Dec 3 20:24:03 2001
+++ linux/net/unix/af_unix.c Sat Jan 5 04:30:19 2002
@@ -112,6 +112,7 @@
#include <asm/checksum.h>
int sysctl_unix_max_dgram_qlen = 10;
+int sysctl_unix_stream_pages = MAX_SKB_FRAGS;
unix_socket *unix_socket_table[UNIX_HASH_SIZE+1];
rwlock_t unix_table_lock = RW_LOCK_UNLOCKED;
@@ -1123,9 +1124,6 @@
struct scm_cookie scm;
memset(&scm, 0, sizeof(scm));
unix_detach_fds(&scm, skb);
-
- /* Alas, it calls VFS */
- /* So fscking what? fput() had been SMP-safe since the last Summer */
scm_destroy(&scm);
sock_wfree(skb);
}
@@ -1140,6 +1138,67 @@
scm->fp = NULL;
}
+int datagram_copy_fromiovec(struct iovec *iov, struct sk_buff *skb, int size)
+{
+ struct sock *sk;
+ struct sk_buff **tail, *skb1;
+ int copy = min_t(int, size, skb_tailroom(skb));
+
+ if (memcpy_fromiovec(skb_put(skb, copy), iov, copy))
+ goto do_fault;
+
+ if ((size -= copy) == 0)
+ return 0;
+
+ sk = skb->sk;
+ skb1 = skb;
+ tail = &skb_shinfo(skb)->frag_list;
+
+ do {
+ struct page *page;
+ int i = skb_shinfo(skb1)->nr_frags;
+
+ if (i == MAX_SKB_FRAGS) {
+ skb1 = alloc_skb(0, sk->allocation);
+ if (skb1 == NULL)
+ goto do_oom;
+ *tail = skb1;
+ tail = &skb1->next;
+ i = 0;
+ skb->truesize += skb1->truesize;
+ atomic_add(skb1->truesize, &sk->wmem_alloc);
+ }
+
+ page = alloc_pages(sk->allocation, 0);
+ if (page == NULL)
+ goto do_oom;
+
+ copy = min_t(int, size, PAGE_SIZE);
+ skb_shinfo(skb1)->nr_frags=i+1;
+ skb_shinfo(skb1)->frags[i].page = page;
+ skb_shinfo(skb1)->frags[i].page_offset = 0;
+ skb_shinfo(skb1)->frags[i].size = copy;
+
+ skb1->len += copy;
+ skb1->data_len += copy;
+ if (skb != skb1) {
+ skb->len += copy;
+ skb->data_len += copy;
+ }
+ skb->truesize += PAGE_SIZE;
+ atomic_add(PAGE_SIZE, &sk->wmem_alloc);
+ if (memcpy_fromiovec(page_address(page), iov, copy))
+ goto do_fault;
+ } while ((size -= copy) > 0);
+ return 0;
+
+do_oom:
+ return -ENOMEM;
+
+do_fault:
+ return -EFAULT;
+}
+
/*
* Send AF_UNIX data.
*/
@@ -1155,6 +1214,7 @@
unsigned hash;
struct sk_buff *skb;
long timeo;
+ int alloc;
err = -EOPNOTSUPP;
if (msg->msg_flags&MSG_OOB)
@@ -1178,10 +1238,14 @@
goto out;
err = -EMSGSIZE;
- if ((unsigned)len > sk->sndbuf - 32)
+ if ((unsigned)len > sk->sndbuf)
goto out;
- skb = sock_alloc_send_skb(sk, len, msg->msg_flags&MSG_DONTWAIT, &err);
+ alloc = len;
+ if (alloc > SKB_MAX_HEAD(0))
+ alloc = SKB_MAX_HEAD(0);
+
+ skb = sock_alloc_send_skb(sk, alloc, msg->msg_flags&MSG_DONTWAIT, &err);
if (skb==NULL)
goto out;
@@ -1190,7 +1254,7 @@
unix_attach_fds(scm, skb);
skb->h.raw = skb->data;
- err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
+ err = datagram_copy_fromiovec(msg->msg_iov, skb, len);
if (err)
goto out_free;
@@ -1275,74 +1339,57 @@
return err;
}
-
static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int len,
struct scm_cookie *scm)
{
struct sock *sk = sock->sk;
unix_socket *other = NULL;
- struct sockaddr_un *sunaddr=msg->msg_name;
- int err,size;
struct sk_buff *skb;
+ int err;
int sent=0;
err = -EOPNOTSUPP;
if (msg->msg_flags&MSG_OOB)
goto out_err;
- if (msg->msg_namelen) {
- err = (sk->state==TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP);
+ err = -ENOTCONN;
+ other = unix_peer_get(sk);
+ if (!other)
goto out_err;
- } else {
- sunaddr = NULL;
- err = -ENOTCONN;
- other = unix_peer_get(sk);
- if (!other)
- goto out_err;
- }
if (sk->shutdown&SEND_SHUTDOWN)
goto pipe_err;
- while(sent < len)
- {
- /*
- * Optimisation for the fact that under 0.01% of X messages typically
- * need breaking up.
- */
+ while(sent < len) {
+ int size, alloc;
- size=len-sent;
+ size = len-sent;
/* Keep two messages in the pipe so it schedules better */
- if (size > sk->sndbuf/2 - 64)
- size = sk->sndbuf/2 - 64;
+ if (size > sk->sndbuf/2)
+ size = sk->sndbuf/2;
- if (size > SKB_MAX_ALLOC)
- size = SKB_MAX_ALLOC;
-
/*
* Grab a buffer
*/
-
- skb=sock_alloc_send_skb(sk,size,msg->msg_flags&MSG_DONTWAIT, &err);
+ alloc = size;
+
+ if (size > SKB_MAX_HEAD(0)) {
+ alloc = SKB_MAX_HEAD(0);
+ if (size > alloc + sysctl_unix_stream_pages*PAGE_SIZE)
+ size = alloc + sysctl_unix_stream_pages*PAGE_SIZE;
+ }
+
+ skb=sock_alloc_send_skb(sk,alloc,msg->msg_flags&MSG_DONTWAIT, &err);
if (skb==NULL)
goto out_err;
- /*
- * If you pass two values to the sock_alloc_send_skb
- * it tries to grab the large buffer with GFP_NOFS
- * (which can fail easily), and if it fails grab the
- * fallback size buffer which is under a page and will
- * succeed. [Alan]
- */
- size = min_t(int, size, skb_tailroom(skb));
-
memcpy(UNIXCREDS(skb), &scm->creds, sizeof(struct ucred));
if (scm->fp)
unix_attach_fds(scm, skb);
- if ((err = memcpy_fromiovec(skb_put(skb,size), msg->msg_iov, size)) != 0) {
+ if ((err = datagram_copy_fromiovec(msg->msg_iov, skb, size)) != 0) {
kfree_skb(skb);
goto out_err;
}
@@ -1418,13 +1465,10 @@
scm->creds = *UNIXCREDS(skb);
- if (!(flags & MSG_PEEK))
- {
+ if (!(flags & MSG_PEEK)) {
if (UNIXCB(skb).fp)
unix_detach_fds(scm, skb);
- }
- else
- {
+ } else {
/* It is questionable: on PEEK we could:
- do not return fds - good, but too simple 8)
- return fds, and do not return them on read (old strategy,
@@ -1483,13 +1527,10 @@
return timeo;
}
-
-
static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, int size,
int flags, struct scm_cookie *scm)
{
struct sock *sk = sock->sk;
- struct sockaddr_un *sunaddr=msg->msg_name;
int copied = 0;
int check_creds = 0;
int target;
@@ -1515,21 +1556,18 @@
down(&sk->protinfo.af_unix.readsem);
- do
- {
+ do {
int chunk;
struct sk_buff *skb;
skb=skb_dequeue(&sk->receive_queue);
- if (skb==NULL)
- {
+ if (skb==NULL) {
if (copied >= target)
break;
/*
* POSIX 1003.1g mandates this order.
*/
-
if ((err = sock_error(sk)) != 0)
break;
if (sk->shutdown & RCV_SHUTDOWN)
@@ -1551,60 +1589,44 @@
if (check_creds) {
/* Never glue messages from different writers */
- if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0) {
- skb_queue_head(&sk->receive_queue, skb);
- break;
- }
+ if (memcmp(UNIXCREDS(skb), &scm->creds, sizeof(scm->creds)) != 0)
+ goto out_put_back;
} else {
/* Copy credentials */
scm->creds = *UNIXCREDS(skb);
check_creds = 1;
}
- /* Copy address just once */
- if (sunaddr)
- {
- unix_copy_addr(msg, skb->sk);
- sunaddr = NULL;
- }
+ chunk = min_t(int, skb->len - sk->protinfo.af_unix.copied, size);
+ err = skb_copy_datagram_iovec(skb, sk->protinfo.af_unix.copied, msg->msg_iov, chunk);
+ if (err)
+ goto out_put_back;
- chunk = min_t(unsigned int, skb->len, size);
- if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
- skb_queue_head(&sk->receive_queue, skb);
- if (copied == 0)
- copied = -EFAULT;
- break;
- }
copied += chunk;
size -= chunk;
/* Mark read part of skb as used */
- if (!(flags & MSG_PEEK))
- {
- skb_pull(skb, chunk);
-
+ if (!(flags & MSG_PEEK)) {
if (UNIXCB(skb).fp)
unix_detach_fds(scm, skb);
/* put the skb back if we didn't use it up.. */
- if (skb->len)
- {
- skb_queue_head(&sk->receive_queue, skb);
- break;
- }
+ if ((sk->protinfo.af_unix.copied += chunk) < skb->len)
+ goto out_put_back;
+
+ sk->protinfo.af_unix.copied = 0;
kfree_skb(skb);
if (scm->fp)
break;
- }
- else
- {
+ } else {
/* It is questionable, see note in unix_dgram_recvmsg.
*/
if (UNIXCB(skb).fp)
scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+out_put_back:
/* put message back and return */
skb_queue_head(&sk->receive_queue, skb);
break;
@@ -1676,10 +1698,12 @@
break;
}
+ down(&sk->protinfo.af_unix.readsem);
spin_lock(&sk->receive_queue.lock);
if((skb=skb_peek(&sk->receive_queue))!=NULL)
- amount=skb->len;
+ amount=skb->len - sk->protinfo.af_unix.copied;
spin_unlock(&sk->receive_queue.lock);
+ up(&sk->protinfo.af_unix.readsem);
err = put_user(amount, (int *)arg);
break;
}
@@ -1734,7 +1758,7 @@
int i;
unix_socket *s;
- len+= sprintf(buffer,"Num RefCount Protocol Flags Type St "
+ len+= sprintf(buffer,"Peer RcvQueue WMem Flags Type St "
"Inode Path\n");
read_lock(&unix_table_lock);
@@ -1742,10 +1766,10 @@
{
unix_state_rlock(s);
- len+=sprintf(buffer+len,"%p: %08X %08X %08X %04X %02X %5ld",
- s,
- atomic_read(&s->refcnt),
- 0,
+ len+=sprintf(buffer+len,"%08lX: %08X %08X %08X %04X %02X %5ld",
+ unix_peer(s) ? sock_i_ino(unix_peer(s)) : 0,
+ skb_queue_len(&s->receive_queue),
+ atomic_read(&s->wmem_alloc),
s->state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
s->type,
s->socket ?
diff -ur ../vger3-011229/linux/net/unix/sysctl_net_unix.c linux/net/unix/sysctl_net_unix.c
--- ../vger3-011229/linux/net/unix/sysctl_net_unix.c Tue Jan 30 21:20:16 2001
+++ linux/net/unix/sysctl_net_unix.c Sat Jan 5 04:10:58 2002
@@ -13,10 +13,14 @@
#include <linux/sysctl.h>
extern int sysctl_unix_max_dgram_qlen;
+extern int sysctl_unix_stream_pages;
ctl_table unix_table[] = {
{NET_UNIX_MAX_DGRAM_QLEN, "max_dgram_qlen",
&sysctl_unix_max_dgram_qlen, sizeof(int), 0600, NULL,
+ &proc_dointvec },
+ {NET_UNIX_STREAM_PAGES, "stream_pages",
+ &sysctl_unix_stream_pages, sizeof(int), 0600, NULL,
&proc_dointvec },
{0}
};

View File

@ -0,0 +1,270 @@
diff -ur ../pidentd-3.0.12-orig/src/k_linux.c ./src/k_linux.c
--- ../pidentd-3.0.12-orig/src/k_linux.c Sat Jan 12 00:44:05 2002
+++ ./src/k_linux.c Sat Nov 3 07:51:28 2001
@@ -26,12 +26,65 @@
#include "pidentd.h"
+#define NETLINK_TCPDIAG 4
+#define TCPDIAG_GETSOCK 18
+
+#include <linux/uio.h>
+#include <linux/netlink.h>
+
+/* Socket identity */
+struct tcpdiag_sockid
+{
+ __u16 tcpdiag_sport;
+ __u16 tcpdiag_dport;
+ __u32 tcpdiag_src[4];
+ __u32 tcpdiag_dst[4];
+ __u32 tcpdiag_if;
+ __u32 tcpdiag_cookie[2];
+#define TCPDIAG_NOCOOKIE (~0U)
+};
+
+/* Request structure */
+
+struct tcpdiagreq
+{
+ __u8 tcpdiag_family; /* Family of addresses. */
+ __u8 tcpdiag_src_len;
+ __u8 tcpdiag_dst_len;
+ __u8 tcpdiag_ext; /* Query extended information */
+
+ struct tcpdiag_sockid id;
+
+ __u32 tcpdiag_states; /* States to dump */
+ __u32 tcpdiag_dbs; /* Tables to dump (NI) */
+};
+
+struct tcpdiagmsg
+{
+ __u8 tcpdiag_family;
+ __u8 tcpdiag_state;
+ __u8 tcpdiag_timer;
+ __u8 tcpdiag_retrans;
+
+ struct tcpdiag_sockid id;
+
+ __u32 tcpdiag_expires;
+ __u32 tcpdiag_rqueue;
+ __u32 tcpdiag_wqueue;
+ __u32 tcpdiag_uid;
+ __u32 tcpdiag_inode;
+};
+
+
+int tcpdiag_fd = -1;
+
/*
** Make sure we are running on a supported OS version
*/
int
ka_init(void)
{
+ tcpdiag_fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_TCPDIAG);
return 0; /* We always succeed */
}
@@ -56,6 +109,144 @@
}
+
+int k_lookup_tcpdiag(struct kernel *kp)
+{
+ struct sockaddr_nl nladdr;
+ struct {
+ struct nlmsghdr nlh;
+ struct tcpdiagreq r;
+ } req;
+ struct msghdr msg;
+ char buf[8192];
+ struct iovec iov[1];
+ struct tcpdiagmsg *r;
+ static unsigned seqno = 123456;
+
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+
+ req.nlh.nlmsg_len = sizeof(req);
+ req.nlh.nlmsg_type = TCPDIAG_GETSOCK;
+ req.nlh.nlmsg_flags = NLM_F_REQUEST;
+ req.nlh.nlmsg_pid = 0;
+ req.nlh.nlmsg_seq = ++seqno;
+ memset(&req.r, 0, sizeof(req.r));
+ req.r.tcpdiag_family = AF_INET;
+ req.r.tcpdiag_states = ~0;
+
+ req.r.id.tcpdiag_dport = kp->remote.sin_port;
+ req.r.id.tcpdiag_sport = kp->local.sin_port;
+ req.r.id.tcpdiag_dst[0] = kp->remote.sin_addr.s_addr;
+ req.r.id.tcpdiag_src[0] = kp->local.sin_addr.s_addr;
+ req.r.id.tcpdiag_cookie[0] = TCPDIAG_NOCOOKIE;
+ req.r.id.tcpdiag_cookie[1] = TCPDIAG_NOCOOKIE;
+ kp->ruid = NO_UID;
+
+ iov[0] = (struct iovec){ &req, sizeof(req) };
+
+ msg = (struct msghdr) {
+ (void*)&nladdr, sizeof(nladdr),
+ iov, 1,
+ NULL, 0,
+ 0
+ };
+
+ if (sendmsg(tcpdiag_fd, &msg, 0) < 0) {
+ if (errno == ECONNREFUSED) {
+ close(tcpdiag_fd);
+ tcpdiag_fd = -1;
+ return 0;
+ }
+ syslog(LOG_ERR, "system error on tcpdiag sendmsg: %m");
+ return -1;
+ }
+
+ iov[0] = (struct iovec){ buf, sizeof(buf) };
+
+ while (1) {
+ int status;
+ struct nlmsghdr *h;
+
+ msg = (struct msghdr) {
+ (void*)&nladdr, sizeof(nladdr),
+ iov, 1,
+ NULL, 0,
+ 0
+ };
+
+ status = recvmsg(tcpdiag_fd, &msg, 0);
+
+ if (status < 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+ return -1;
+ }
+ if (status == 0) {
+ return -1;
+ }
+
+ h = (struct nlmsghdr*)buf;
+ while (NLMSG_OK(h, status)) {
+ int err;
+
+ if (/*h->nlmsg_pid != rth->local.nl_pid ||*/
+ h->nlmsg_seq != seqno)
+ goto skip_it;
+
+ if (h->nlmsg_type == NLMSG_DONE)
+ return -1;
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h);
+ if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
+ return -1;
+ } else {
+ errno = -err->error;
+ if (errno == ECONNREFUSED) {
+ close(tcpdiag_fd);
+ tcpdiag_fd = -1;
+ return 0;
+ }
+ if (errno != ENOENT)
+ syslog(LOG_ERR, "tcpdiag answers: %m");
+ }
+ return -1;
+ }
+
+ r = NLMSG_DATA(h);
+
+ /* Lookup _may_ return listening socket, if no
+ * better matches are found. */
+ if (r->id.tcpdiag_dport == kp->remote.sin_port &&
+ r->id.tcpdiag_dst[0] == kp->remote.sin_addr.s_addr) {
+ kp->ruid = r->tcpdiag_uid;
+ if (!r->tcpdiag_inode && !r->tcpdiag_uid) {
+ /* _NEVER_ return "root" for closed
+ * sockets. Otherwise people think
+ * that it is sysadmin who abuses their
+ * poor ircd. :-) */
+ syslog(LOG_NOTICE,
+ "Req for stale socket(%d) %d from %x/%d",
+ r->tcpdiag_state, ntohs(r->id.tcpdiag_sport),
+ r->id.tcpdiag_dst[0], ntohs(r->id.tcpdiag_dport));
+ return -1;
+ }
+ return 1;
+ }
+
+ return -1;
+
+skip_it:
+ h = NLMSG_NEXT(h, status);
+ }
+ if ((msg.msg_flags & MSG_TRUNC) || status) {
+ syslog(LOG_ERR, "truncated tcp_diag message");
+ return -1;
+ }
+ }
+}
+
+
int
ka_lookup(void *vp, struct kernel *kp)
{
@@ -64,16 +255,23 @@
long r_laddr, r_raddr, myladdr, myraddr;
int r_lport, r_rport, mylport, myrport;
int euid;
-
-
+
+ if (tcpdiag_fd >= 0) {
+ int res;
+ if ((res = k_lookup_tcpdiag(kp)) != 0)
+ return res;
+ syslog(LOG_ERR, "tcp_diag is not loaded, fallback to proc");
+ }
+
+
r_rport = ntohs(kp->remote.sin_port);
r_lport = ntohs(kp->local.sin_port);
r_raddr = kp->remote.sin_addr.s_addr;
r_laddr = kp->local.sin_addr.s_addr;
+ kp->ruid = NO_UID;
fp = (FILE *) vp;
- kp->ruid = NO_UID;
rewind(fp);
/* eat header */
@@ -82,13 +280,26 @@
while (fgets(buf, sizeof(buf)-1, fp) != NULL)
{
- if (sscanf(buf, "%*d: %lx:%x %lx:%x %*x %*x:%*x %*x:%*x %*x %d %*d %*d",
- &myladdr, &mylport, &myraddr, &myrport, &euid) == 5)
+ int state, ino;
+ if (sscanf(buf, "%*d: %x:%x %x:%x %x %*x:%*x %*x:%*x %*x %d %*d %u",
+ &myladdr, &mylport, &myraddr, &myrport,
+ &state, &euid, &ino) == 7)
{
if (myladdr == r_laddr && mylport == r_lport &&
myraddr == r_raddr && myrport == r_rport)
{
kp->euid = euid;
+ if (ino == 0 && euid == 0)
+ {
+ /* _NEVER_ return "root" for closed
+ * sockets. Otherwise people think
+ * that it is sysadmin who abuses their
+ * poor ircd. :-) */
+ syslog(LOG_NOTICE,
+ "Req for stale socket(%d) %d from %x/%d",
+ state, r_rport, r_raddr, r_lport);
+ return -1;
+ }
return 1;
}
}

View File

@ -0,0 +1,230 @@
--- linux/include/net/route.h.orig Tue Apr 17 07:25:48 2001
+++ linux/include/net/route.h Tue Jul 10 23:35:18 2001
@@ -14,6 +14,7 @@
* Alan Cox : Support for TCP parameters.
* Alexey Kuznetsov: Major changes for new routing code.
* Mike McLagan : Routing by source
+ * Robert Olsson : Added rt_cache statistics
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -90,6 +91,20 @@
__u32 o_packets;
__u32 i_bytes;
__u32 i_packets;
+};
+
+struct rt_cache_stat
+{
+ unsigned in_hit;
+ unsigned in_slow_tot;
+ unsigned in_slow_mc;
+ unsigned in_no_route;
+ unsigned in_brd;
+ unsigned in_martian_dst;
+ unsigned in_martian_src;
+ unsigned out_hit;
+ unsigned out_slow_tot;
+ unsigned out_slow_mc;
};
extern struct ip_rt_acct *ip_rt_acct;
--- linux/net/ipv4/route.c.orig Wed Mar 28 22:01:15 2001
+++ linux/net/ipv4/route.c Tue Jul 10 23:27:51 2001
@@ -52,6 +52,7 @@
* Tobias Ringstrom : Uninitialized res.type in ip_route_output_slow.
* Vladimir V. Ivanov : IP rule info (flowid) is really useful.
* Marc Boucher : routing by fwmark
+ * Robert Olsson : Added rt_cache statistics
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -201,6 +202,8 @@
static unsigned rt_hash_mask;
static int rt_hash_log;
+struct rt_cache_stat rt_cache_stat[NR_CPUS];
+
static int rt_intern_hash(unsigned hash, struct rtable * rth, struct rtable ** res);
static __inline__ unsigned rt_hash_code(u32 daddr, u32 saddr, u8 tos)
@@ -270,6 +273,44 @@
len = length;
return len;
}
+
+
+#ifdef CONFIG_PROC_FS
+static int rt_cache_stat_get_info(char *buffer, char **start, off_t offset, int length)
+{
+ int i, lcpu;
+ int len=0;
+ unsigned int dst_entries = atomic_read(&ipv4_dst_ops.entries);
+
+ for (lcpu=0; lcpu<smp_num_cpus; lcpu++) {
+ i = cpu_logical_map(lcpu);
+
+ len += sprintf(buffer+len, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ dst_entries,
+ rt_cache_stat[i].in_hit,
+ rt_cache_stat[i].in_slow_tot,
+ rt_cache_stat[i].in_slow_mc,
+ rt_cache_stat[i].in_no_route,
+ rt_cache_stat[i].in_brd,
+ rt_cache_stat[i].in_martian_dst,
+ rt_cache_stat[i].in_martian_src,
+
+ rt_cache_stat[i].out_hit,
+ rt_cache_stat[i].out_slow_tot,
+ rt_cache_stat[i].out_slow_mc
+ );
+ }
+ len -= offset;
+
+ if (len > length)
+ len = length;
+ if (len < 0)
+ len = 0;
+
+ *start = buffer + offset;
+ return len;
+}
+#endif
static __inline__ void rt_free(struct rtable *rt)
{
@@ -1163,6 +1204,8 @@
u32 spec_dst;
struct in_device *in_dev = in_dev_get(dev);
u32 itag = 0;
+ int cpu = smp_processor_id();
+
/* Primary sanity checks. */
@@ -1221,6 +1264,7 @@
if (!LOCAL_MCAST(daddr) && IN_DEV_MFORWARD(in_dev))
rth->u.dst.input = ip_mr_input;
#endif
+ rt_cache_stat[cpu].in_slow_mc++;
in_dev_put(in_dev);
hash = rt_hash_code(daddr, saddr^(dev->ifindex<<5), tos);
@@ -1259,6 +1303,7 @@
u32 spec_dst;
int err = -EINVAL;
int free_res = 0;
+ int cpu = smp_processor_id();
/*
* IP on this device is disabled.
@@ -1308,6 +1353,8 @@
}
free_res = 1;
+ rt_cache_stat[cpu].in_slow_tot++;
+
#ifdef CONFIG_IP_ROUTE_NAT
/* Policy is applied before mapping destination,
but rerouting after map should be made with old source.
@@ -1455,6 +1502,7 @@
}
flags |= RTCF_BROADCAST;
res.type = RTN_BROADCAST;
+ rt_cache_stat[cpu].in_brd++;
local_input:
rth = dst_alloc(&ipv4_dst_ops);
@@ -1498,6 +1546,7 @@
goto intern;
no_route:
+ rt_cache_stat[cpu].in_no_route++;
spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
res.type = RTN_UNREACHABLE;
goto local_input;
@@ -1506,6 +1555,7 @@
* Do not cache martian addresses: they should be logged (RFC1812)
*/
martian_destination:
+ rt_cache_stat[cpu].in_martian_dst++;
#ifdef CONFIG_IP_ROUTE_VERBOSE
if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit())
printk(KERN_WARNING "martian destination %u.%u.%u.%u from %u.%u.%u.%u, dev %s\n",
@@ -1520,6 +1570,8 @@
goto done;
martian_source:
+
+ rt_cache_stat[cpu].in_martian_src++;
#ifdef CONFIG_IP_ROUTE_VERBOSE
if (IN_DEV_LOG_MARTIANS(in_dev) && net_ratelimit()) {
/*
@@ -1550,6 +1602,7 @@
struct rtable * rth;
unsigned hash;
int iif = dev->ifindex;
+ int cpu = smp_processor_id();
tos &= IPTOS_RT_MASK;
hash = rt_hash_code(daddr, saddr^(iif<<5), tos);
@@ -1567,6 +1620,7 @@
rth->u.dst.lastuse = jiffies;
dst_hold(&rth->u.dst);
rth->u.dst.__use++;
+ rt_cache_stat[cpu].in_hit++;
read_unlock(&rt_hash_table[hash].lock);
skb->dst = (struct dst_entry*)rth;
return 0;
@@ -1621,6 +1675,7 @@
int free_res = 0;
int err;
u32 tos;
+ int cpu = smp_processor_id();
tos = oldkey->tos & (IPTOS_RT_MASK|RTO_ONLINK);
key.dst = oldkey->dst;
@@ -1847,14 +1902,18 @@
rth->u.dst.output=ip_output;
+ rt_cache_stat[cpu].out_slow_tot++;
+
if (flags&RTCF_LOCAL) {
rth->u.dst.input = ip_local_deliver;
rth->rt_spec_dst = key.dst;
}
if (flags&(RTCF_BROADCAST|RTCF_MULTICAST)) {
rth->rt_spec_dst = key.src;
- if (flags&RTCF_LOCAL && !(dev_out->flags&IFF_LOOPBACK))
+ if (flags&RTCF_LOCAL && !(dev_out->flags&IFF_LOOPBACK)) {
rth->u.dst.output = ip_mc_output;
+ rt_cache_stat[cpu].out_slow_mc++;
+ }
#ifdef CONFIG_IP_MROUTE
if (res.type == RTN_MULTICAST) {
struct in_device *in_dev = in_dev_get(dev_out);
@@ -1894,6 +1953,7 @@
{
unsigned hash;
struct rtable *rth;
+ int cpu = smp_processor_id();
hash = rt_hash_code(key->dst, key->src^(key->oif<<5), key->tos);
@@ -1912,6 +1972,7 @@
rth->u.dst.lastuse = jiffies;
dst_hold(&rth->u.dst);
rth->u.dst.__use++;
+ rt_cache_stat[cpu].out_hit++;
read_unlock_bh(&rt_hash_table[hash].lock);
*rp = rth;
return 0;
@@ -2339,6 +2400,7 @@
add_timer(&rt_periodic_timer);
proc_net_create ("rt_cache", 0, rt_cache_get_info);
+ proc_net_create ("rt_cache_stat", 0, rt_cache_stat_get_info);
#ifdef CONFIG_NET_CLS_ROUTE
create_proc_read_entry("net/rt_acct", 0, 0, ip_rt_acct_read, NULL);
#endif

View File

@ -0,0 +1,56 @@
diff -ur ../vger3-010830/linux/net/ipv6/tcp_ipv6.c linux/net/ipv6/tcp_ipv6.c
--- ../vger3-010830/linux/net/ipv6/tcp_ipv6.c Wed Jun 13 21:14:05 2001
+++ linux/net/ipv6/tcp_ipv6.c Fri Oct 12 06:59:07 2001
@@ -339,13 +339,18 @@
return tcp_v6_lookup_listener(daddr, hnum, dif);
}
-#define tcp_v6_lookup(sa, sp, da, dp, dif) \
-({ struct sock *___sk; \
- local_bh_disable(); \
- ___sk = __tcp_v6_lookup((sa),(sp),(da),ntohs(dp),(dif)); \
- local_bh_enable(); \
- ___sk; \
-})
+__inline__ struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
+ struct in6_addr *daddr, u16 dport,
+ int dif)
+{
+ struct sock *sk;
+
+ local_bh_disable();
+ sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
+ local_bh_enable();
+
+ return sk;
+}
/*
diff -ur ../vger3-010830/linux/net/netsyms.c linux/net/netsyms.c
--- ../vger3-010830/linux/net/netsyms.c Sun Aug 19 22:01:45 2001
+++ linux/net/netsyms.c Fri Oct 12 07:59:17 2001
@@ -72,6 +72,11 @@
extern int netdev_finish_unregister(struct net_device *dev);
+extern struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
+ struct in6_addr *daddr, u16 dport,
+ int dif);
+extern struct sock *tcp_v4_lookup(u32 saddr, u16 sport, u32 daddr, u16 dport, int dif);
+
#include <linux/rtnetlink.h>
#ifdef CONFIG_IPX_MODULE
@@ -284,7 +289,11 @@
EXPORT_SYMBOL(ndisc_mc_map);
EXPORT_SYMBOL(register_inet6addr_notifier);
EXPORT_SYMBOL(unregister_inet6addr_notifier);
+EXPORT_SYMBOL(tcp_v6_lookup);
#endif
+EXPORT_SYMBOL(tcp_v4_lookup);
+EXPORT_SYMBOL(tcp_timewait_cachep);
+EXPORT_SYMBOL(tcp_hashinfo);
#if defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE)
/* inet functions common to v4 and v6 */
EXPORT_SYMBOL(inet_release);

65
README
View File

@ -0,0 +1,65 @@
Primary FTP site is:
ftp://ftp.inr.ac.ru/ip-routing/
Mirrors are:
ftp://linux.wauug.org/pub/net
ftp://ftp.nc.ras.ru/pub/mirrors/ftp.inr.ac.ru/ip-routing/
ftp://ftp.gts.cz/MIRRORS/ftp.inr.ac.ru/
ftp://ftp.funet.fi/pub/mirrors/ftp.inr.ac.ru/ip-routing/ (STM1 to USA)
ftp://sunsite.icm.edu.pl/pub/Linux/iproute/
ftp://ftp.sunet.se/pub/Linux/ip-routing/
ftp://ftp.nvg.ntnu.no/pub/linux/ip-routing/
ftp://ftp.crc.ca/pub/systems/linux/ip-routing/
ftp://ftp.proxad.net/mirrors/ftp.inr.ac.ru/ip-routing/
ftp://donlug.dn.ua/pub/mirrors/ip-routing/
ftp://omni.rk.tusur.ru/mirrors/ftp.inr.ac.ru/ip-routing/
ftp://ftp.src.uchicago.edu/pub/linux/ip-routing/
http://www.asit.ro/ip-routing/
ftp://ftp.infoscience.co.jp/pub/linux/ip-routing/ (Japan)
ftp://ftp.sucs.swan.ac.uk/pub/mirrors/ftp.inr.ac.ru/ip-routing
http://mirror.schell.de/ftp.inr.ac.ru/ip-routing/ (Germany)
ftp://ftp.gin.cz/MIRRORS/ftp.inr.ac.ru/ip-routing
ftp://mirror.aarnet.edu.au/pub/ip-routing/ (Australia)
http://mirror.aarnet.edu.au/pub/ip-routing/ (Australia)
RPMs are available at:
ftp://omni.rk.tusur.ru/Tango/
ftp://ftp4.dgtu.donetsk.ua/pub/BlackCat/6.0/contrib/SRPMS/i[35]86/
How to compile this.
--------------------
1. Look at start of Makefile and set correct values for:
KERNEL_INCLUDE should point to correct linux kernel include directory.
Default (/usr/src/linux/include) is right as rule.
ADDLIB should contain inet_* functions, if your libc contains
obsolete resolver library (<4.9.4) and you have no correct libresolv.
ADDLIB should also contain dnet_* functions if you don't have a
libdnet with support for them. If your libdnet does have support,
then comment out that line and uncomment the line to add -ldnet to
LDLIBS.
LDLIBS should be empty, if you have no libresolv.
2. make
Utilities "ip" and "rtmon" are in ip/ directory now,
"tc" is in tc/. That's all.
3. To make documentation, cd to doc/ directory , then
look at start of Makefile and set correct values for
PAGESIZE=a4 , ie: a4 , letter ... (string)
PAGESPERPAGE=2 , ie: 1 , 2 ... (numeric)
and make there. It assumes, that latex, dvips and psnup
are in your path.
Alexey Kuznetsov
kuznet@ms2.inr.ac.ru

View File

@ -0,0 +1,41 @@
Here are a few quick points about DECnet support...
o No name resolution is available as yet, all addresses must be
entered numerically.
o The neighbour cache may well list every entry as having the address
0.170. This is due to a problem that I need to sort out kernel side.
It is harmless (but don't try and use neigh add yet) just look in
/proc/net/decnet_neigh to see the real addresses for now.
o The rtnetlink support in the kernel is rather exprimental, expect a
few odd things to happen for the next few DECnet kernel releases.
o Whilst you can use ip addr add to add more than one DECnet address to an
interface, don't expect addresses which are not the same as the
kernels node address to work properly. i.e. You will break the DECnet
protocol if you do add anything other than the automatically generated
interface addresses to ethernet cards. This option is there for future
link layer support, where the device will have to be configed for
DECnet explicitly.
o The DECnet support is currently self contained. You do not need the
libdnet library to use it. In fact until I've sent the dnet_pton and
dnet_ntop functions to Patrick to add, you can't use libdnet.
o If you are not using the very latest 2.3.xx series kernels, don't
try and list DECnet routes if you've got IPv6 compiled into the
kernel. It will oops.
o My main reason for writing the DECnet support for iproute2 was to
check out the DECnet routing code, so the route get and
route show cache commands are likely to be the most debugged out of
all of them.
o If you find bugs in the DECnet support, please send them to me in the
first instance, and then I'll send Alexey a patch to fix it. IPv4/6
bugs should be sent to Alexey as before.
Steve Whitehouse <SteveW@ACM.org>

View File

@ -0,0 +1,119 @@
iproute2+tc*
It's the first release of Linux traffic control engine.
NOTES.
* csz scheduler is inoperational at the moment, and probably
never will be repaired but replaced with h-pfq scheduler.
* To use "fw" classifier you will need ipfwchains patch.
* No manual available. Ask me, if you have problems (only try to guess
answer yourself at first 8)).
Micro-manual how to start it the first time
-------------------------------------------
A. Attach CBQ to eth1:
tc qdisc add dev eth1 root handle 1: cbq bandwidth 10Mbit allot 1514 cell 8 \
avpkt 1000 mpu 64
B. Add root class:
tc class add dev eth1 parent 1:0 classid 1:1 cbq bandwidth 10Mbit rate 10Mbit \
allot 1514 cell 8 weight 1Mbit prio 8 maxburst 20 avpkt 1000
C. Add default interactive class:
tc class add dev eth1 parent 1:1 classid 1:2 cbq bandwidth 10Mbit rate 1Mbit \
allot 1514 cell 8 weight 100Kbit prio 3 maxburst 20 avpkt 1000 split 1:0 \
defmap c0
D. Add default class:
tc class add dev eth1 parent 1:1 classid 1:3 cbq bandwidth 10Mbit rate 8Mbit \
allot 1514 cell 8 weight 800Kbit prio 7 maxburst 20 avpkt 1000 split 1:0 \
defmap 3f
etc. etc. etc. Well, it is enough to start 8) The rest can be guessed 8)
Look also at more elaborated example, ready to start rsvpd,
in rsvp/cbqinit.eth1.
Terminology and advices about setting CBQ parameters may be found in Sally Floyd
papers.
Pairs X:Y are class handles, X:0 are qdisc heandles.
weight should be proportional to rate for leaf classes
(I choosed it ten times less, but it is not necessary)
defmap is bitmap of logical priorities served by this class.
E. Another qdiscs are simpler. F.e. let's join TBF on class 1:2
tc qdisc add dev eth1 parent 1:2 tbf rate 64Kbit buffer 5Kb/8 limit 10Kb
F. Look at all that we created:
tc qdisc ls dev eth1
tc class ls dev eth1
G. Install "route" classifier on root of cbq and map destination from realm
1 to class 1:2
tc filter add dev eth1 parent 1:0 protocol ip prio 100 route to 1 classid 1:2
H. Assign routes to 10.11.12.0/24 to realm 1
ip route add 10.11.12.0/24 dev eth1 via whatever realm 1
etc. The same thing can be made with rules.
I still did not test ipchains, but they should work too.
Setup of rsvp and u32 classifiers is more hairy.
If you read RSVP specs, you will understand how rsvp classifier
works easily. What's about u32... That's example:
#! /bin/sh
TC=/home/root/tc
# Setup classifier root on eth1 root (it is cbq)
$TC filter add dev eth1 parent 1:0 prio 5 protocol ip u32
# Create hash table of 256 slots with ID 1:
$TC filter add dev eth1 parent 1:0 prio 5 handle 1: u32 divisor 256
# Add to 6th slot of hash table rule to select tcp/telnet to 193.233.7.75
# direct it to class 1:4 and prescribe to fall to best effort,
# if traffic violate TBF (32kbit,5K)
$TC filter add dev eth1 parent 1:0 prio 5 u32 ht 1:6: \
match ip dst 193.233.7.75 \
match tcp dst 0x17 0xffff \
flowid 1:4 \
police rate 32kbit buffer 5kb/8 mpu 64 mtu 1514 index 1
# Add to 1th slot of hash table rule to select icmp to 193.233.7.75
# direct it to class 1:4 and prescribe to fall to best effort,
# if traffic violate TBF (10kbit,5K)
$TC filter add dev eth1 parent 1:0 prio 5 u32 ht 1:: \
sample ip protocol 1 0xff \
match ip dst 193.233.7.75 \
flowid 1:4 \
police rate 10kbit buffer 5kb/8 mpu 64 mtu 1514 index 2
# Lookup hash table, if it is not fragmented frame
# Use protocol as hash key
$TC filter add dev eth1 parent 1:0 prio 5 handle ::1 u32 ht 800:: \
match ip nofrag \
offset mask 0x0F00 shift 6 \
hashkey mask 0x00ff0000 at 8 \
link 1:
Alexey Kuznetsov
kuznet@ms2.inr.ac.ru

168
RELNOTES
View File

@ -0,0 +1,168 @@
[020116]
! 1. Compile with rh-7.2
! 2. What the hell some people blame on socklen_t defined in unistd.h? Check.
* Kim Woelders <kim@woelders.dk>, various useful fixups: compilation
with old kernels, cross-compiling, "all" == "any" in prefix spec.
* Collected from my disk, cleaned and packed to directory iproute2/misc/
several utilities: ss, nstat, ifstat, rtacct, arpd and module tcp_diag.
Writing some docs. me.
* prepared patchlet for pidentd to use tcp_diag.
* David Miller: 64bit (and even worse 64bit kernel/32 bit user :-) fixes
to above. tcp_diag is merged to main tree.
* Alexandr D. Kanevskiy <kad@blackcatlinux.com>: various flaws in ss
* Alexandr D. Kanevskiy <kad@blackcatlinux.com>: oops, more aggressive caching
of names opened old bugs: ip started to print garbage in some places.
* Robert Olsson, rt_cache_stat. Renamed to rtstat.
* An old bug in "ip maddr ls": reduntant empty lines in output.
Seeing this crap for ages but lucky match of desire/ability to repair
and a huff about this happened only today. :-)
* "Mr. James W. Laferriere" <babydr@baby-dragons.com>
doc: option to produce ps output for non-a4 and not only 2 pages/sheet.
* Jamal's patch for ingres qdisc.
* Bernd Eckenfels <ecki@lina.inka.de>: deleted orphaned bogus #include
in include/utils.h.
* Julian Anastasov <ja@ssi.bg>: uninitialized fields in nexthop
producing funny "dead" nexthops in multipath routes.
Stupid me, look at the first line in [010803]... Was it difficult to guess
this that time? People blame for several months. :-)
Special thanks to bert hubert <ahu@ds9a.nl> who raised the issue in netdev.
Thanks and apologies to Terry Schmidt <terry@nycwireless.net>,
Ruben Puettmann <ruben.puettmann@freenet-ag.de>,
Mark Ivens <mivens@clara.net>.
* willy tarreau <wtarreau@yahoo.fr>: "make install" target.
* Tunable limit for sch_sfq. Patch to kernel activating this
is about to be submitted. Reminded by Adi Nugroho <Adi@iNterNUX.co.id>.
[010824]
* ip address add sets scope of loopback addreses to "host".
Advised by David Miller.
* ZIP! <zip@killerlabs.com> and David Ford <david@blue-labs.org>
Some strcpy's changed to strncpy's.
* David Ford <david@blue-labs.org>, test for compilation with gcc3.
* David Ford <david@blue-labs.org>. Damn, I broke rtnl_talk in previous
snapshot.
[010803]
* If "dev" is not specified in multipath route, ifindex remained
uninitialized. Grr. Thanks to Kunihiro Ishiguro <kunihiro@zebra.org>.
* Rafal Maszkowski <rzm@icm.edu.pl>, batch mode tc. The most old patch.
* Updates list of data protocol ids.
Lots of reporters. I bring my apologies.
* Jan Rekorajski <baggins@sith.mimuw.edu.pl>. Updated list of datalink types.
* Christina Chen <chenchristina@cwc.nus.edu.sg>. Bug in parsing IPv6 address match in u32.
* Pekka Savola <pekkas@netcore.fi>. ip -6 route flush dev lo stuck
on deleting root of the table.
* Werner. dsmark fixes.
* Alexander Demenshin <aldem-reply@aldem.net>. Old miracleous bug
in ip monitor. It was puzzle, people permanently blame that
it prints some crap.
* Rui Prior <rprior@inescporto.pt>. f_route failed to resolve fromif.
Werner also noticed this and sent patch. Bad place... [RETHINK]
* Kim Woelders <kim@woelders.dk>.
- changes in Makefile for cross-compile
- understand "all" as alias for "any"
- bug in iprule.c
! [ NB. Also he sent patch for kernel. Do not forget! ]
* Werner. Fix to tc core files: wrong exits etc.
* Bernd Jendrissek <berndj@prism.co.za>. Some sanitizations of tc.c
!* Marian Jancar <marian.jancar@infonet.cz>. He say q_tbf prints wrong latency!
! Seems, he is wrong.
* Werner (and Nikolai Vladychevski <niko@isl.net.mx>) check ->print_copts
to avoid segfault.
[001007]
* Compiles under rh-7.0
[000928]
* Sorry. I have lost all the CVS with changes made since 000305.
If someone sent me a patch after this date, please, resubmit.
Restored from the last backup and mailboxes:
* Edit ip-cref.tex by raf <raf2@zip.com.au>.
* RTAX_REORDERING support.
* IFLA_MASTER support.
* Bug in rtnl_talk(), libnetlink.c. Reported by David P. Olshfski
<olshef@us.ibm.com>
[000305]
* Bugs in RESOLVE_HOSTNAMES. Bratislav Ilich <bilik@@zepter.ru>
* ARPHRD_IEEE802_TR
[000225]
* ECN in q_red.c.
[000221]
* diffserv update from Jamal Hadi Salim
* Some bits of IPX from Steve Whitehouse.
* ATM qdisc from Werner Almesberger
* Support for new attributes on routes in linux-2.3.
[991023]
No news, only several bugs are fixed.
* Since ss990630 "ip rule list" printed wrong prefix length.
Vladimir V. Ivanov <vlad@alis.tusur.ru>
* "ip rule" parsed >INT_MAX values of metric incorrectly.
Matthew G. Marsh <mgm@paktronix.com>
* Some improvements in doc/Makefile advised by
Andi Kleen and Werner Almesberger.
[990824]
* new attributes in "ip route": rtt, rttvar, cwnd, ssthresh and advmss.
* some updates in documentaion to reflect new status.
[990630]
* DiffServ support.
Werner Almesberger <almesber@lrc.di.epfl.ch>
Jamal Hadi Salim <hadi@nortelnetworks.com>
* DECnet support.
Steve Whitehouse <SteveW@ACM.org>
* Some minor tweaks in docs and code.
[990530]
* routel script. Stephen R. van den Berg <srb@cuci.nl>
* Bug in tc/q_prio.c resetting priomap. Reported by
Ole Husgaard <sparre@login.dknet.dk> and
Jan Kasprzak <kas@informatics.muni.cz>
* IP command reference manual is published (ip-cref.tex).
I am sorry, but tc-cref.tex is still not ready, to be more
exact the draft does not describe current tc 8-)
* ip, rtmon, rtacct utilities are updated according to manual 8-)
Lots of changes:
- (MAIN) "flush" command for addr, neigh and route.
- error messages are sanitized; now it does not print
usage() page on each error.
- output format is improved.
- "oneline" mode is added.
- etc.
* Name databases; resolution acsii <-> numeric is split out to lib/*
* scripts ifcfg, ifone and rtpr.
* examples/dhcp-client-script is copied from my patch to ISC dhcp.
* Makefile in doc/ directory.
[990417]
* "pmtudisc" flag to "ip tunnel". Phil Karn <karn@ka9q.ampr.org>
* bug in tc/q_tbf.c preventing setting peak_rate, Martin Mares <mj@ucw.cz>
* doc/flowlabels.tex
[990329]
* This snapshot fixes some compatibility problems, which I introduced
occasionally to previous snapshots.
* Namely, "allot" to "tc qdisc add ... cbq" is accepted but ignored.
* Another changes are supposed to be shown in the next snapshot, but
because of troubles with "allot" I am forced to release premature
version. Namely, "cell", "prio", "weight" etc. are optional now.
* doc/ip-tunnels.tex
[990327]
* History was not recorded.
[981002]
* Rani Assaf <rani@magic.metawire.com> contributed resolving
addresses to names.
BEWARE! DO NOT USE THIS OPTION, WHEN REPORTING BUGS IN
IPROUTE OR IN KERENEL. ALL THE BUG REPORTS MUST CONTAIN
ONLY NUMERIC ADDRESSES.
[981101]
* now it should compile for any libc.

View File

@ -0,0 +1,57 @@
PSFILES=ip-cref.ps ip-tunnels.ps api-ip6-flowlabels.ps ss.ps nstat.ps arpd.ps rtstat.ps
# tc-cref.ps
# api-rtnl.tex api-pmtudisc.tex api-news.tex
# iki-netdev.ps iki-neighdst.ps
LATEX=latex
DVIPS=dvips
SGML2DVI=sgml2latex --output=dvi
SGML2HTML=sgml2html -s 0
LPR=lpr -Zsduplex
SHELL=bash
PAGESIZE=a4
PAGESPERPAGE=2
HTMLFILES=$(subst .sgml,.html,$(shell echo *.sgml))
DVIFILES=$(subst .ps,.dvi,$(PSFILES))
all: pstwocol
pstwocol: $(PSFILES)
html: $(HTMLFILES)
dvi: $(DVIFILES)
print: $(PSFILES)
$(LPR) $(PSFILES)
%.dvi: %.sgml
$(SGML2DVI) $<
%.dvi: %.tex
@set -e; pass=2; echo "Running LaTeX $<"; \
while [ `$(LATEX) $< </dev/null 2>&1 | \
grep -c '^\(LaTeX Warning: Label(s) may\|No file \|! Emergency stop\)'` -ge 1 ]; do \
if [ $$pass -gt 3 ]; then \
echo "Seems, something is wrong. Try by hands." ; exit 1 ; \
fi; \
echo "Re-running LaTeX $<, $${pass}d pass"; pass=$$[$$pass + 1]; \
done
%.ps: %.dvi
$(DVIPS) $< -o $@.tmp
./do-psnup $@.tmp $@ $(PAGESIZE) $(PAGESPERPAGE)
rm -f $@.tmp
%.html: %.sgml
$(SGML2HTML) $<
install:
install -m 0644 $(shell echo *.tex) $(DESTDIR)$(DOCDIR)
install -m 0644 $(shell echo *.sgml) $(DESTDIR)$(DOCDIR)
clean:
rm -f *.aux *.log *.toc $(PSFILES) $(DVIFILES) *.html

View File

@ -0,0 +1,16 @@
Partially finished work.
1. User Reference manuals.
1.1 IP Command reference (ip-cref.tex, published)
1.2 TC Command reference (tc-cref.tex)
1.3 IP tunnels (ip-tunnels.tex, published)
2. Linux-2.2 Networking API
2.1 RTNETLINK (api-rtnl.tex)
2.2 Path MTU Discovery (api-pmtudisc.tex)
2.3 IPv6 Flow Labels (api-ip6-flowlabels.tex, published)
2.4 Miscellaneous extensions (api-misc.tex)
3. Linux-2.2 Networking Intra-Kernel Interfaces
3.1 NetDev --- Networking Devices and netdev... (iki-netdev.tex)
3.2 Neighbour cache and destination cache. (iki-neighdst.tex)

View File

@ -0,0 +1 @@
\def\Draft{020116}

View File

@ -0,0 +1,429 @@
\documentstyle[12pt,twoside]{article}
\def\TITLE{IPv6 Flow Labels}
\input preamble
\begin{center}
\Large\bf IPv6 Flow Labels in Linux-2.2.
\end{center}
\begin{center}
{ \large Alexey~N.~Kuznetsov } \\
\em Institute for Nuclear Research, Moscow \\
\verb|kuznet@ms2.inr.ac.ru| \\
\rm April 11, 1999
\end{center}
\vspace{5mm}
\tableofcontents
\section{Introduction.}
Every IPv6 packet carries 28 bits of flow information. RFC2460 splits
these bits to two fields: 8 bits of traffic class (or DS field, if you
prefer this term) and 20 bits of flow label. Currently there exist
no well-defined API to manage IPv6 flow information. In this document
I describe an attempt to design the API for Linux-2.2 IPv6 stack.
\vskip 1mm
The API must solve the following tasks:
\begin{enumerate}
\item To allow user to set traffic class bits.
\item To allow user to read traffic class bits of received packets.
This feature is not so useful as the first one, however it will be
necessary f.e.\ to implement ECN [RFC2481] for datagram oriented services
or to implement receiver side of SRP or another end-to-end protocol
using traffic class bits.
\item To assign flow labels to packets sent by user.
\item To get flow labels of received packets. I do not know
any applications of this feature, but it is possible that receiver will
want to use flow labels to distinguish sub-flows.
\item To allocate flow labels in the way, compliant to RFC2460. Namely:
\begin{itemize}
\item
Flow labels must be uniformly distributed (pseudo-)random numbers,
so that any subset of 20 bits can be used as hash key.
\item
Flows with coinciding source address and flow label must have identical
destination address and not-fragmentable extensions headers (i.e.\
hop by hop options and all the headers up to and including routing header,
if it is present.)
\begin{NB}
There is a hole in specs: some hop-by-hop options can be
defined only on per-packet base (f.e.\ jumbo payload option).
Essentially, it means that such options cannot present in packets
with flow labels.
\end{NB}
\begin{NB}
NB notes here and below reflect only my personal opinion,
they should be read with smile or should not be read at all :-).
\end{NB}
\item
Flow labels have finite lifetime and source is not allowed to reuse
flow label for another flow within the maximal lifetime has expired,
so that intermediate nodes will be able to invalidate flow state before
the label is taken over by another flow.
Flow state, including lifetime, is propagated along datagram path
by some application specific methods
(f.e.\ in RSVP PATH messages or in some hop-by-hop option).
\end{itemize}
\end{enumerate}
\section{Sending/receiving flow information.}
\paragraph{Discussion.}
\addcontentsline{toc}{subsection}{Discussion}
It was proposed (Where? I do not remember any explicit statement)
to solve the first four tasks using
\verb|sin6_flowinfo| field added to \verb|struct| \verb|sockaddr_in6|
(see RFC2553).
\begin{NB}
This method is difficult to consider as reasonable, because it
puts additional overhead to all the services, despite of only
very small subset of them (none, to be more exact) really use it.
It contradicts both to IETF spirit and the letter. Before RFC2553
one justification existed, IPv6 address alignment left 4 byte
hole in \verb|sockaddr_in6| in any case. Now it has no justification.
\end{NB}
We have two problems with this method. The first one is common for all OSes:
if \verb|recvmsg()| initializes \verb|sin6_flowinfo| to flow info
of received packet, we loose one very important property of BSD socket API,
namely, we are not allowed to use received address for reply directly
and have to mangle it, even if we are not interested in flowinfo subtleties.
\begin{NB}
RFC2553 adds new requirement: to clear \verb|sin6_flowinfo|.
Certainly, it is not solution but rather attempt to force applications
to make unnecessary work. Well, as usually, one mistake in design
is followed by attempts to patch the hole and more mistakes...
\end{NB}
Another problem is Linux specific. Historically Linux IPv6 did not
initialize \verb|sin6_flowinfo| at all, so that, if kernel does not
support flow labels, this field is not zero, but a random number.
Some applications also did not take care about it.
\begin{NB}
Following RFC2553 such applications can be considered as broken,
but I still think that they are right: clearing all the address
before filling known fields is robust but stupid solution.
Useless wasting CPU cycles and
memory bandwidth is not a good idea. Such patches are acceptable
as temporary hacks, but not as standard of the future.
\end{NB}
\paragraph{Implementation.}
\addcontentsline{toc}{subsection}{Implementation}
By default Linux IPv6 does not read \verb|sin6_flowinfo| field
assuming that common applications are not obliged to initialize it
and are permitted to consider it as pure alignment padding.
In order to tell kernel that application
is aware of this field, it is necessary to set socket option
\verb|IPV6_FLOWINFO_SEND|.
\begin{verbatim}
int on = 1;
setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO_SEND,
(void*)&on, sizeof(on));
\end{verbatim}
Linux kernel never fills \verb|sin6_flowinfo| field, when passing
message to user space, though the kernels which support flow labels
initialize it to zero. If user wants to get received flowinfo, he
will set option \verb|IPV6_FLOWINFO| and after this he will receive
flowinfo as ancillary data object of type \verb|IPV6_FLOWINFO|
(cf.\ RFC2292).
\begin{verbatim}
int on = 1;
setsockopt(sock, SOL_IPV6, IPV6_FLOWINFO, (void*)&on, sizeof(on));
\end{verbatim}
Flowinfo received and latched by a connected TCP socket also may be fetched
with \verb|getsockopt()| \verb|IPV6_PKTOPTIONS| together with
another optional information.
Besides that, in the spirit of RFC2292 the option \verb|IPV6_FLOWINFO|
may be used as alternative way to send flowinfo with \verb|sendmsg()| or
to latch it with \verb|IPV6_PKTOPTIONS|.
\paragraph{Note about IPv6 options and destination address.}
\addcontentsline{toc}{subsection}{IPv6 options and destination address}
If \verb|sin6_flowinfo| does contain not zero flow label,
destination address in \verb|sin6_addr| and non-fragmentable
extension headers are ignored. Instead, kernel uses the values
cached at flow setup (see below). However, for connected sockets
kernel prefers the values set at connection time.
\paragraph{Example.}
\addcontentsline{toc}{subsection}{Example}
After setting socket option \verb|IPV6_FLOWINFO|
flowlabel and DS field are received as ancillary data object
of type \verb|IPV6_FLOWINFO| and level \verb|SOL_IPV6|.
In the cases when it is convenient to use \verb|recvfrom(2)|,
it is possible to replace library variant with your own one,
sort of:
\begin{verbatim}
#include <sys/socket.h>
#include <netinet/in6.h>
size_t recvfrom(int fd, char *buf, size_t len, int flags,
struct sockaddr *addr, int *addrlen)
{
size_t cc;
char cbuf[128];
struct cmsghdr *c;
struct iovec iov = { buf, len };
struct msghdr msg = { addr, *addrlen,
&iov, 1,
cbuf, sizeof(cbuf),
0 };
cc = recvmsg(fd, &msg, flags);
if (cc < 0)
return cc;
((struct sockaddr_in6*)addr)->sin6_flowinfo = 0;
*addrlen = msg.msg_namelen;
for (c=CMSG_FIRSTHDR(&msg); c; c = CMSG_NEXTHDR(&msg, c)) {
if (c->cmsg_level != SOL_IPV6 ||
c->cmsg_type != IPV6_FLOWINFO)
continue;
((struct sockaddr_in6*)addr)->sin6_flowinfo = *(__u32*)CMSG_DATA(c);
}
return cc;
}
\end{verbatim}
\section{Flow label management.}
\paragraph{Discussion.}
\addcontentsline{toc}{subsection}{Discussion}
Requirements of RFC2460 are pretty tough. Particularly, lifetimes
longer than boot time require to store allocated labels at stable
storage, so that the full implementation necessarily includes user space flow
label manager. There are at least three different approaches:
\begin{enumerate}
\item {\bf ``Cooperative''. } We could leave flow label allocation wholly
to user space. When user needs label he requests manager directly. The approach
is valid, but as any ``cooperative'' approach it suffers of security problems.
\begin{NB}
One idea is to disallow not privileged user to allocate flow
labels, but instead to pass the socket to manager via \verb|SCM_RIGHTS|
control message, so that it will allocate label and assign it to socket
itself. Hmm... the idea is interesting.
\end{NB}
\item {\bf ``Indirect''.} Kernel redirects requests to user level daemon
and does not install label until the daemon acknowledged the request.
The approach is the most promising, it is especially pleasant to recognize
parallel with IPsec API [RFC2367,Craig]. Actually, it may share API with
IPsec.
\item {\bf ``Stupid''.} To allocate labels in kernel space. It is the simplest
method, but it suffers of two serious flaws: the first,
we cannot lease labels with lifetimes longer than boot time, the second,
it is sensitive to DoS attacks. Kernel have to remember all the obsolete
labels until their expiration and malicious user may fastly eat all the
flow label space.
\end{enumerate}
Certainly, I choose the most ``stupid'' method. It is the cheapest one
for implementor (i.e.\ me), and taking into account that flow labels
still have no serious applications it is not useful to work on more
advanced API, especially, taking into account that eventually we
will get it for no fee together with IPsec.
\paragraph{Implementation.}
\addcontentsline{toc}{subsection}{Implementation}
Socket option \verb|IPV6_FLOWLABEL_MGR| allows to
request flow label manager to allocate new flow label, to reuse
already allocated one or to delete old flow label.
Its argument is \verb|struct| \verb|in6_flowlabel_req|:
\begin{verbatim}
struct in6_flowlabel_req
{
struct in6_addr flr_dst;
__u32 flr_label;
__u8 flr_action;
__u8 flr_share;
__u16 flr_flags;
__u16 flr_expires;
__u16 flr_linger;
__u32 __flr_reserved;
/* Options in format of IPV6_PKTOPTIONS */
};
\end{verbatim}
\begin{itemize}
\item \verb|dst| is IPv6 destination address associated with the label.
\item \verb|label| is flow label value in network byte order. If it is zero,
kernel will allocate new pseudo-random number. Otherwise, kernel will try
to lease flow label ordered by user. In this case, it is user task to provide
necessary flow label randomness.
\item \verb|action| is requested operation. Currently, only three operations
are defined:
\begin{verbatim}
#define IPV6_FL_A_GET 0 /* Get flow label */
#define IPV6_FL_A_PUT 1 /* Release flow label */
#define IPV6_FL_A_RENEW 2 /* Update expire time */
\end{verbatim}
\item \verb|flags| are optional modifiers. Currently
only \verb|IPV6_FL_A_GET| has modifiers:
\begin{verbatim}
#define IPV6_FL_F_CREATE 1 /* Allowed to create new label */
#define IPV6_FL_F_EXCL 2 /* Do not create new label */
\end{verbatim}
\item \verb|share| defines who is allowed to reuse the same flow label.
\begin{verbatim}
#define IPV6_FL_S_NONE 0 /* Not defined */
#define IPV6_FL_S_EXCL 1 /* Label is private */
#define IPV6_FL_S_PROCESS 2 /* May be reused by this process */
#define IPV6_FL_S_USER 3 /* May be reused by this user */
#define IPV6_FL_S_ANY 255 /* Anyone may reuse it */
\end{verbatim}
\item \verb|linger| is time in seconds. After the last user releases flow
label, it will not be reused with different destination and options at least
during this time. If \verb|share| is not \verb|IPV6_FL_S_EXCL| the label
still can be shared by another sockets. Current implementation does not allow
unprivileged user to set linger longer than 60 sec.
\item \verb|expires| is time in seconds. Flow label will be kept at least
for this time, but it will not be destroyed before user released it explicitly
or closed all the sockets using it. Current implementation does not allow
unprivileged user to set timeout longer than 60 sec. Proviledged applications
MAY set longer lifetimes, but in this case they MUST save allocated
labels at stable storage and restore them back after reboot before the first
application allocates new flow.
\end{itemize}
This structure is followed by optional extension headers associated
with this flow label in format of \verb|IPV6_PKTOPTIONS|. Only
\verb|IPV6_HOPOPTS|, \verb|IPV6_RTHDR| and, if \verb|IPV6_RTHDR| presents,
\verb|IPV6_DSTOPTS| are allowed.
\paragraph{Example.}
\addcontentsline{toc}{subsection}{Example}
The function \verb|get_flow_label| allocates
private flow label.
\begin{verbatim}
int get_flow_label(int fd, struct sockaddr_in6 *dst, __u32 fl)
{
int on = 1;
struct in6_flowlabel_req freq;
memset(&freq, 0, sizeof(freq));
freq.flr_label = htonl(fl);
freq.flr_action = IPV6_FL_A_GET;
freq.flr_flags = IPV6_FL_F_CREATE | IPV6_FL_F_EXCL;
freq.flr_share = IPV6_FL_S_EXCL;
memcpy(&freq.flr_dst, &dst->sin6_addr, 16);
if (setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR,
&freq, sizeof(freq)) == -1) {
perror ("can't lease flowlabel");
return -1;
}
dst->sin6_flowinfo |= freq.flr_label;
if (setsockopt(fd, SOL_IPV6, IPV6_FLOWINFO_SEND,
&on, sizeof(on)) == -1) {
perror ("can't send flowinfo");
freq.flr_action = IPV6_FL_A_PUT;
setsockopt(fd, SOL_IPV6, IPV6_FLOWLABEL_MGR,
&freq, sizeof(freq));
return -1;
}
return 0;
}
\end{verbatim}
A bit more complicated example using routing header can be found
in \verb|ping6| utility (\verb|iputils| package). Linux rsvpd backend
contains an example of using operation \verb|IPV6_FL_A_RENEW|.
\paragraph{Listing flow labels.}
\addcontentsline{toc}{subsection}{Listing flow labels}
List of currently allocated
flow labels may be read from \verb|/proc/net/ip6_flowlabel|.
\begin{verbatim}
Label S Owner Users Linger Expires Dst Opt
A1BE5 1 0 0 6 3 3ffe2400000000010a0020fffe71fb30 0
\end{verbatim}
\begin{itemize}
\item \verb|Label| is hexadecimal flow label value.
\item \verb|S| is sharing style.
\item \verb|Owner| is ID of creator, it is zero, pid or uid, depending on
sharing style.
\item \verb|Users| is number of applications using the label now.
\item \verb|Linger| is \verb|linger| of this label in seconds.
\item \verb|Expires| is time until expiration of the label in seconds. It may
be negative, if the label is in use.
\item \verb|Dst| is IPv6 destination address.
\item \verb|Opt| is length of options, associated with the label. Option
data are not accessible.
\end{itemize}
\paragraph{Flow labels and RSVP.}
\addcontentsline{toc}{subsection}{Flow labels and RSVP}
RSVP daemon supports IPv6 flow labels
without any modifications to standard ISI RAPI. Sender must allocate
flow label, fill corresponding sender template and submit it to local rsvp
daemon. rsvpd will check the label and start to announce it in PATH
messages. Rsvpd on sender node will renew the flow label, so that it will not
be reused before path state expires and all the intermediate
routers and receiver purge flow state.
\verb|rtap| utility is modified to parse flow labels. F.e.\ if user allocated
flow label \verb|0xA1234|, he may write:
\begin{verbatim}
RTAP> sender 3ffe:2400::1/FL0xA1234 <Tspec>
\end{verbatim}
Receiver makes reservation with command:
\begin{verbatim}
RTAP> reserve ff 3ffe:2400::1/FL0xA1234 <Flowspec>
\end{verbatim}
\end{document}

View File

@ -0,0 +1,130 @@
<!doctype linuxdoc system>
<article>
<title>ARPD Daemon
<author>Alexey Kuznetsov, <tt/kuznet@ms2.inr.ac.ru/
<date>some_negative_number, 20 Sep 2001
<abstract>
<tt/arpd/ is daemon collecting gratuitous ARP information, saving
it on local disk and feeding it to kernel on demand to avoid
redundant broadcasting due to limited size of kernel ARP cache.
</abstract>
<p><bf/Description/
<p>The format of the command is:
<tscreen><verb>
arpd OPTIONS [ INTERFACE [ INTERFACE ... ] ]
</verb></tscreen>
<p> <tt/OPTIONS/ are:
<itemize>
<item><tt/-l/ - dump <tt/arpd/ database to stdout and exit. Output consists
of three columns: interface index, IP address and MAC address.
Negative entries for dead hosts are also shown, in this case MAC address
is replaced by word <tt/FAILED/ followed by colon and time when the fact
that host is dead was proven the last time.
<item><tt/-f FILE/ - read and load <tt/arpd/ database from <tt/FILE/
in text format similar dumped by option <tt/-l/. Exit after load,
probably listing resulting database, if option <tt/-l/ is also given.
If <tt/FILE/ is <tt/-/, <tt/stdin/ is read to get ARP table.
<item><tt/-b DATABASE/ - location of database file. Default location is
<tt>/var/lib/arpd/arpd.db</tt>.
<item><tt/-a NUMBER/ - <tt/arpd/ not only passively listens ARP on wire, but
also send brodcast queries itself. <tt/NUMBER/ is number of such queries
to make before destination is considered as dead. When <tt/arpd/ is started
as kernel helper (i.e. with <tt/app_solicit/ enabled in <tt/sysctl/
or even with option <tt/-k/) without this option and still did not learn enough
information, you can observe 1 second gaps in service. Not fatal, but
not good.
<item><tt/-k/ - suppress sending broadcast queries by kernel. It takes
sense together with option <tt/-a/.
<item><tt/-n TIME/ - timeout of negative cache. When resolution fails <tt/arpd/
suppresses further attempts to resolve for this period. It makes sense
only together with option <tt/-k/. This timeout should not be too much
longer than boot time of a typical host not supporting gratuitous ARP.
Default value is 60 seconds.
<item><tt/-R RATE/ - maximal steady rate of broadcasts sent by <tt/arpd/
in packets per second. Default value is 1.
<item><tt/-B NUMBER/ - number of broadcasts sent by <tt/arpd/ back to back.
Default value is 3. Together with option <tt/-R/ this option allows
to police broadcasting not to exceed <tt/B+R*T/ over any interval
of time <tt/T/.
</itemize>
<p><tt/INTERFACE/ is name of networking inteface to watch.
If no interfaces given, <tt/arpd/ monitors all the interfaces.
In this case <tt/arpd/ does not adjust <tt/sysctl/ parameters,
it is supposed user does this himself after <tt/arpd/ is started.
<p> Signals
<p> <tt/arpd/ exits gracefully syncing database and restoring adjusted
<tt/sysctl/ parameters, when receives <tt/SIGINT/ or <tt/SIGTERM/.
<tt/SIGHUP/ syncs database to disk. <tt/SIGUSR1/ sends some statistics
to <tt/syslog/. Effect of another signals is undefined, they may corrupt
database and leave <tt/sysctl/ parameters in an unpredictable state.
<p> Note
<p> In order to <tt/arpd/ be able to serve as ARP resolver, kernel must be
compiled with the option <tt/CONFIG_ARPD/ and, in the case when interface list
is not given on command line, variable <tt/app_solicit/
on interfaces of interest should be set in <tt>/proc/sys/net/ipv4/neigh/*</tt>.
If this is not made <tt/arpd/ still collects gratuitous ARP information
in its database.
<p> Examples
<enum>
<item> Start <tt/arpd/ to collect gratuitous ARP, but not messing
with kernel functionality:
<tscreen><verb>
arpd -b /var/tmp/arpd.db
</verb></tscreen>
<item> Look at result after some time:
<tscreen><verb>
killall arpd
arpd -l -b /var/tmp/arpd.db
</verb></tscreen>
<item> To enable kernel helper, leaving leading role to kernel:
<tscreen><verb>
arpd -b /var/tmp/arpd.db -a 1 eth0 eth1
</verb></tscreen>
<item> Completely replace kernel resolution on interfaces <tt/eth0/
and <tt/eth1/. In this case kernel still does unicast probing to
validate entries, but all the broadcast activity is suppressed
and made under authority of <tt/arpd/:
<tscreen><verb>
arpd -b /var/tmp/arpd.db -a 3 -k eth0 eth1
</verb></tscreen>
This is mode which <tt/arpd/ is supposed to work normally.
It is not default just to prevent occasional enabling of too aggressive
mode occasionally.
</enum>
</article>

View File

@ -0,0 +1,16 @@
#! /bin/bash
# $1 = Temporary file . "string"
# $2 = File to process . "string"
# $3 = Page size . ie: a4 , letter ... "string"
# $4 = Number of pages to fit on a single sheet . "numeric"
if type psnup >&/dev/null; then
echo "psnup -$4 -p$3 $1 $2"
psnup -$4 -p$3 $1 $2
elif type psmulti >&/dev/null; then
echo "psmulti $1 > $2"
psmulti $1 > $2
else
echo "cp $1 $2"
cp $1 $2
fi

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,469 @@
\documentstyle[12pt,twoside]{article}
\def\TITLE{Tunnels over IP}
\input preamble
\begin{center}
\Large\bf Tunnels over IP in Linux-2.2
\end{center}
\begin{center}
{ \large Alexey~N.~Kuznetsov } \\
\em Institute for Nuclear Research, Moscow \\
\verb|kuznet@ms2.inr.ac.ru| \\
\rm March 17, 1999
\end{center}
\vspace{5mm}
\tableofcontents
\section{Instead of introduction: micro-FAQ.}
\begin{itemize}
\item
Q: In linux-2.0.36 I used:
\begin{verbatim}
ifconfig tunl1 10.0.0.1 pointopoint 193.233.7.65
\end{verbatim}
to create tunnel. It does not work in 2.2.0!
A: You are right, it does not work. The command written above is split to two commands.
\begin{verbatim}
ip tunnel add MY-TUNNEL mode ipip remote 193.233.7.65
\end{verbatim}
will create tunnel device with name \verb|MY-TUNNEL|. Now you may configure
it with:
\begin{verbatim}
ifconfig MY-TUNNEL 10.0.0.1
\end{verbatim}
Certainly, if you prefer name \verb|tunl1| to \verb|MY-TUNNEL|,
you still may use it.
\item
Q: In linux-2.0.36 I used:
\begin{verbatim}
ifconfig tunl0 10.0.0.1
route add -net 10.0.0.0 gw 193.233.7.65 dev tunl0
\end{verbatim}
to tunnel net 10.0.0.0 via router 193.233.7.65. It does not
work in 2.2.0! Moreover, \verb|route| prints a funny error sort of
``network unreachable'' and after this I found a strange direct route
to 10.0.0.0 via \verb|tunl0| in routing table.
A: Yes, in 2.2 the rule that {\em normal} gateway must reside on directly
connected network has not any exceptions. You may tell kernel, that
this particular route is {\em abnormal}:
\begin{verbatim}
ifconfig tunl0 10.0.0.1 netmask 255.255.255.255
ip route add 10.0.0.0/8 via 193.233.7.65 dev tunl0 onlink
\end{verbatim}
Note keyword \verb|onlink|, it is the magic key that orders kernel
not to check for consistency of gateway address.
Probably, after this explanation you have already guessed another method
to cheat kernel:
\begin{verbatim}
ifconfig tunl0 10.0.0.1 netmask 255.255.255.255
route add -host 193.233.7.65 dev tunl0
route add -net 10.0.0.0 netmask 255.0.0.0 gw 193.233.7.65
route del -host 193.233.7.65 dev tunl0
\end{verbatim}
Well, if you like such tricks, nobody may prohibit you to use them.
Only do not forget
that between \verb|route add| and \verb|route del| host 193.233.7.65 is
unreachable.
\item
Q: In 2.0.36 I used to load \verb|tunnel| device module and \verb|ipip| module.
I cannot find any \verb|tunnel| in 2.2!
A: Linux-2.2 has single module \verb|ipip| for both directions of tunneling
and for all IPIP tunnel devices.
\item
Q: \verb|traceroute| does not work over tunnel! Well, stop... It works,
only skips some number of hops.
A: Yes. By default tunnel driver copies \verb|ttl| value from
inner packet to outer one. It means that path traversed by tunneled
packets to another endpoint is not hidden. If you dislike this, or if you
are going to use some routing protocol expecting that packets
with ttl 1 will reach peering host (f.e.\ RIP, OSPF or EBGP)
and you are not afraid of
tunnel loops, you may append option \verb|ttl 64|, when creating tunnel
with \verb|ip tunnel add|.
\item
Q: ... Well, list of things, which 2.0 was able to do finishes.
\end{itemize}
\paragraph{Summary of differences between 2.2 and 2.0.}
\begin{itemize}
\item {\bf In 2.0} you could compile tunnel device into kernel
and got set of 4 devices \verb|tunl0| ... \verb|tunl3| or,
alternatively, compile it as module and load new module
for each new tunnel. Also, module \verb|ipip| was necessary
to receive tunneled packets.
{\bf 2.2} has {\em one\/} module \verb|ipip|. Loading it you get base
tunnel device \verb|tunl0| and another tunnels may be created with command
\verb|ip tunnel add|. These new devices may have arbitrary names.
\item {\bf In 2.0} you set remote tunnel endpoint address with
the command \verb|ifconfig| ... \verb|pointopoint A|.
{\bf In 2.2} this command has the same semantics on all
the interfaces, namely it sets not tunnel endpoint,
but address of peering host, which is directly reachable
via this tunnel,
rather than via Internet. Actual tunnel endpoint address \verb|A|
should be set with \verb|ip tunnel add ... remote A|.
\item {\bf In 2.0} you create tunnel routes with the command:
\begin{verbatim}
route add -net 10.0.0.0 gw A dev tunl0
\end{verbatim}
{\bf 2.2} interprets this command equally for all device
kinds and gateway is required to be directly reachable via this tunnel,
rather than via Internet. You still may use \verb|ip route add ... onlink|
to override this behaviour.
\end{itemize}
\section{Tunnel setup: basics}
Standard Linux-2.2 kernel supports three flavor of tunnels,
listed in the following table:
\vspace{2mm}
\begin{tabular}{lll}
\vrule depth 0.8ex width 0pt\relax
Mode & Description & Base device \\
ipip & IP over IP & tunl0 \\
sit & IPv6 over IP & sit0 \\
gre & ANY over GRE over IP & gre0
\end{tabular}
\vspace{2mm}
\noindent All the kinds of tunnels are created with one command:
\begin{verbatim}
ip tunnel add <NAME> mode <MODE> [ local <S> ] [ remote <D> ]
\end{verbatim}
This command creates new tunnel device with name \verb|<NAME>|.
The \verb|<NAME>| is an arbitrary string. Particularly,
it may be even \verb|eth0|. The rest of parameters set
different tunnel characteristics.
\begin{itemize}
\item
\verb|mode <MODE>| sets tunnel mode. Three modes are available now
\verb|ipip|, \verb|sit| and \verb|gre|.
\item
\verb|remote <D>| sets remote endpoint of the tunnel to IP
address \verb|<D>|.
\item
\verb|local <S>| sets fixed local address for tunneled
packets. It must be an address on another interface of this host.
\end{itemize}
\let\thefootnote\oldthefootnote
Both \verb|remote| and \verb|local| may be omitted. In this case we
say that they are zero or wildcard. Two tunnels of one mode cannot
have the same \verb|remote| and \verb|local|. Particularly it means
that base device or fallback tunnel cannot be replicated.\footnote{
This restriction is relaxed for keyed GRE tunnels.}
Tunnels are divided to two classes: {\bf pointopoint} tunnels, which
have some not wildcard \verb|remote| address and deliver all the packets
to this destination, and {\bf NBMA} (i.e. Non-Broadcast Multi-Access) tunnels,
which have no \verb|remote|. Particularly, base devices (f.e.\ \verb|tunl0|)
are NBMA, because they have neither \verb|remote| nor
\verb|local| addresses.
After tunnel device is created you should configure it as you did
it with another devices. Certainly, the configuration of tunnels has
some features related to the fact that they work over existing Internet
routing infrastructure and simultaneously create new virtual links,
which changes this infrastructure. The danger that not enough careful
tunnel setup will result in formation of tunnel loops,
collapse of routing or flooding network with exponentially
growing number of tunneled fragments is very real.
Protocol setup on pointopoint tunnels does not differ of configuration
of another devices. You should set a protocol address with \verb|ifconfig|
and add routes with \verb|route| utility.
NBMA tunnels are different. To route something via NBMA tunnel
you have to explain to driver, where it should deliver packets to.
The only way to make it is to create special routes with gateway
address pointing to desired endpoint. F.e.\
\begin{verbatim}
ip route add 10.0.0.0/24 via <A> dev tunl0 onlink
\end{verbatim}
It is important to use option \verb|onlink|, otherwise
kernel will refuse request to create route via gateway not directly
reachable over device \verb|tunl0|. With IPv6 the situation is much simpler:
when you start device \verb|sit0|, it automatically configures itself
with all IPv4 addresses mapped to IPv6 space, so that all IPv4
Internet is {\em really reachable} via \verb|sit0|! Excellent, the command
\begin{verbatim}
ip route add 3FFE::/16 via ::193.233.7.65 dev sit0
\end{verbatim}
will route \verb|3FFE::/16| via \verb|sit0|, sending all the packets
destined to this prefix to 193.233.7.65.
\section{Tunnel setup: options}
Command \verb|ip tunnel add| has several additional options.
\begin{itemize}
\item \verb|ttl N| --- set fixed TTL \verb|N| on tunneled packets.
\verb|N| is number in the range 1--255. 0 is special value,
meaning that packets inherit TTL value.
Default value is: \verb|inherit|.
\item \verb|tos T| --- set fixed tos \verb|T| on tunneled packets.
Default value is: \verb|inherit|.
\item \verb|dev DEV| --- bind tunnel to device \verb|DEV|, so that
tunneled packets will be routed only via this device and will
not be able to escape to another device, when route to endpoint changes.
\item \verb|nopmtudisc| --- disable Path MTU Discovery on this tunnel.
It is enabled by default. Note that fixed ttl is incompatible
with this option: tunnels with fixed ttl always make pmtu discovery.
\end{itemize}
\verb|ipip| and \verb|sit| tunnels have no more options. \verb|gre|
tunnels are more complicated:
\begin{itemize}
\item \verb|key K| --- use keyed GRE with key \verb|K|. \verb|K| is
either number or IP address-like dotted quad.
\item \verb|csum| --- checksum tunneled packets.
\item \verb|seq| --- serialize packets.
\begin{NB}
I think this option does not
work. At least, I did not test it, did not debug it and
even do not understand, how it is supposed to work and for what
purpose Cisco planned to use it.
\end{NB}
\end{itemize}
Actually, these GRE options can be set separately for input and
output directions by prefixing corresponding keywords with letter
\verb|i| or \verb|o|. F.e.\ \verb|icsum| orders to accept only
packets with correct checksum and \verb|ocsum| means, that
our host will calculate and send checksum.
Command \verb|ip tunnel add| is not the only operation,
which can be made with tunnels. Certainly, you may get short help page
with:
\begin{verbatim}
ip tunnel help
\end{verbatim}
Besides that, you may view list of installed tunnels with the help of command:
\begin{verbatim}
ip tunnel ls
\end{verbatim}
Also you may look at statistics:
\begin{verbatim}
ip -s tunnel ls Cisco
\end{verbatim}
where \verb|Cisco| is name of tunnel device. Command
\begin{verbatim}
ip tunnel del Cisco
\end{verbatim}
destroys tunnel \verb|Cisco|. And, finally,
\begin{verbatim}
ip tunnel change Cisco mode sit local ME remote HE ttl 32
\end{verbatim}
changes its parameters.
\section{Differences 2.2 and 2.0 tunnels revisited.}
Now we can discuss more subtle differences between tunneling in 2.0
and 2.2.
\begin{itemize}
\item In 2.0 all tunneled packets were received promiscuously
as soon as you loaded module \verb|ipip|. 2.2 tries to select the best
tunnel device and packet looks as received on this. F.e.\ if host
received \verb|ipip| packet from host \verb|D| destined to our
local address \verb|S|, kernel searches for matching tunnels
in order:
\begin{tabular}{ll}
1 & \verb|remote| is \verb|D| and \verb|local| is \verb|S| \\
2 & \verb|remote| is \verb|D| and \verb|local| is wildcard \\
3 & \verb|remote| is wildcard and \verb|local| is \verb|S| \\
4 & \verb|tunl0|
\end{tabular}
If tunnel exists, but it is not in \verb|UP| state, the tunnel is ignored.
Note, that if \verb|tunl0| is \verb|UP| it receives all the IPIP packets,
not acknowledged by more specific tunnels.
Be careful, it means that without carefully installed firewall rules
anyone on the Internet may inject to your network any packets with
source addresses indistinguishable from local ones. It is not so bad idea
to design tunnels in the way enforcing maximal route symmetry
and to enable reversed path filter (\verb|rp_filter| sysctl option) on
tunnel devices.
\item In 2.2 you can monitor and debug tunnels with \verb|tcpdump|.
F.e.\ \verb|tcpdump| \verb|-i Cisco| \verb|-nvv| will dump packets,
which kernel output, via tunnel \verb|Cisco| and the packets received on it
from kernel viewpoint.
\end{itemize}
\section{Linux and Cisco IOS tunnels.}
Among another tunnels Cisco IOS supports IPIP and GRE.
Essentially, Cisco setup is subset of options, available for Linux.
Let us consider the simplest example:
\begin{verbatim}
interface Tunnel0
tunnel mode gre ip
tunnel source 10.10.14.1
tunnel destination 10.10.13.2
\end{verbatim}
This command set translates to:
\begin{verbatim}
ip tunnel add Tunnel0 \
mode gre \
local 10.10.14.1 \
remote 10.10.13.2
\end{verbatim}
Any questions? No questions.
\section{Interaction IPIP tunnels and DVMRP.}
DVMRP exploits IPIP tunnels to route multicasts via Internet.
\verb|mrouted| creates
IPIP tunnels listed in its configuration file automatically.
From kernel and user viewpoints there are no differences between
tunnels, created in this way, and tunnels created by \verb|ip tunnel|.
I.e.\ if \verb|mrouted| created some tunnel, it may be used to
route unicast packets, provided appropriate routes are added.
And vice versa, if administrator has already created a tunnel,
it will be reused by \verb|mrouted|, if it requests DVMRP
tunnel with the same local and remote addresses.
Do not wonder, if your manually configured tunnel is
destroyed, when mrouted exits.
\section{Broadcast GRE ``tunnels''.}
It is possible to set \verb|remote| for GRE tunnel to a multicast
address. Such tunnel becomes {\bf broadcast} tunnel (though word
tunnel is not quite appropriate in this case, it is rather virtual network).
\begin{verbatim}
ip tunnel add Universe local 193.233.7.65 \
remote 224.66.66.66 ttl 16
ip addr add 10.0.0.1/16 dev Universe
ip link set Universe up
\end{verbatim}
This tunnel is true broadcast network and broadcast packets are
sent to multicast group 224.66.66.66. By default such tunnel starts
to resolve both IP and IPv6 addresses via ARP/NDISC, so that
if multicast routing is supported in surrounding network, all GRE nodes
will find one another automatically and will form virtual Ethernet-like
broadcast network. If multicast routing does not work, it is unpleasant
but not fatal flaw. The tunnel becomes NBMA rather than broadcast network.
You may disable dynamic ARPing by:
\begin{verbatim}
echo 0 > /proc/sys/net/ipv4/neigh/Universe/mcast_solicit
\end{verbatim}
and to add required information to ARP tables manually:
\begin{verbatim}
ip neigh add 10.0.0.2 lladdr 128.6.190.2 dev Universe nud permanent
\end{verbatim}
In this case packets sent to 10.0.0.2 will be encapsulated in GRE
and sent to 128.6.190.2. It is possible to facilitate address resolution
using methods typical for another NBMA networks f.e.\ to start user
level \verb|arpd| daemon, which will maintain database of hosts attached
to GRE virtual network or ask for information
dedicated ARP or NHRP server.
Actually, such setup is the most natural for tunneling,
it is really flexible, scalable and easily managable, so that
it is strongly recommended to be used with GRE tunnels instead of ugly
hack with NBMA mode and \verb|onlink| modifier. Unfortunately,
by historical reasons broadcast mode is not supported by IPIP tunnels,
but this probably will change in future.
\section{Traffic control issues.}
Tunnels are devices, hence all the power of Linux traffic control
applies to them. The simplest (and the most useful in practice)
example is limiting tunnel bandwidth. The following command:
\begin{verbatim}
tc qdisc add dev tunl0 root tbf \
rate 128Kbit burst 4K limit 10K
\end{verbatim}
will limit tunneled traffic to 128Kbit with maximal burst size of 4K
and queuing not more than 10K.
However, you should remember, that tunnels are {\em virtual} devices
implemented in software and true queue management is impossible for them
just because they have no queues. Instead, it is better to create classes
on real physical interfaces and to map tunneled packets to them.
In general case of dynamic routing you should create such classes
on all outgoing interfaces, or, alternatively,
to use option \verb|dev DEV| to bind tunnel to a fixed physical device.
In the last case packets will be routed only via specified device
and you need to setup corresponding classes only on it.
Though you have to pay for this convenience,
if routing will change, your tunnel will fail.
Suppose that CBQ class \verb|1:ABC| has been created on device \verb|eth0|
specially for tunnel \verb|Cisco| with endpoints \verb|S| and \verb|D|.
Now you can select IPIP packets with addresses \verb|S| and \verb|D|
with some classifier and map them to class \verb|1:ABC|. F.e.\
it is easy to make with \verb|rsvp| classifier:
\begin{verbatim}
tc filter add dev eth0 pref 100 proto ip rsvp \
session D ipproto ipip filter S \
classid 1:ABC
\end{verbatim}
If you want to make more detailed classification of sub-flows
transmitted via tunnel, you can build CBQ subtree,
rooted at \verb|1:ABC| and attach to subroot set of rules parsing
IPIP packets more deeply.
\end{document}

View File

@ -0,0 +1,110 @@
<!doctype linuxdoc system>
<article>
<title>NSTAT, IFSTAT and RTACCT Utilities
<author>Alexey Kuznetosv, <tt/kuznet@ms2.inr.ac.ru/
<date>some_negative_number, 20 Sep 2001
<abstract>
<tt/nstat/, <tt/ifstat/ and <tt/rtacct/ are simple tools helping
to monitor kernel snmp counters and network interface statistics.
</abstract>
<p> These utilities are very similar, so that I describe
them simultaneously, using name <tt/Xstat/ in the places which apply
to all of them.
<p>The format of the command is:
<tscreen><verb>
Xstat [ OPTIONS ] [ PATTERN [ PATTERN ... ] ]
</verb></tscreen>
<p>
<tt/PATTERN/ is shell style pattern, selecting identifier
of SNMP variables or interfaces to show. Variable is displayed
if one of patterns matches its name. If no patterns are given,
<tt/Xstat/ assumes that user wants to see all the variables.
<p> <tt/OPTIONS/ is list of single letter options, using common unix
conventions.
<itemize>
<item><tt/-h/ - show help page
<item><tt/-?/ - the same, of course
<item><tt/-v/, <tt/-V/ - print version of <tt/Xstat/ and exit
<item><tt/-z/ - dump zero counters too. By default they are not shown.
<item><tt/-a/ - dump absolute values of counters. By default <tt/Xstat/
calculates increments since the previous use.
<item><tt/-s/ - do not update history, so that the next time you will
see counters including values accumulated to the moment
of this measurement too.
<item><tt/-n/ - do not display anything, only update history.
<item><tt/-r/ - reset history.
<item><tt/-d INTERVAL/ - <tt/Xstat/ is run in daemon mode collecting
statistics. <tt/INTERVAL/ is interval between measurements
in seconds.
<item><tt/-t INTERVAL/ - time interval to average rates. Default value
is 60 seconds.
<item><tt/-e/ - display extended information about errors (<tt/ifstat/ only).
</itemize>
<p>
History is just dump saved in file <tt>/tmp/.Xstat.uUID</tt>
or in file given by environment variables <tt/NSTAT_HISTORY/,
<tt/IFSTAT_HISTORY/ and <tt/RTACCT_HISTORY/.
Each time when you use <tt/Xstat/ values there are updated.
If you use patterns, only the values which you _really_ see
are updated. If you want to skip an unintersting period,
use option <tt/-n/, or just output to <tt>/dev/null</tt>.
<p>
<tt/Xstat/ understands when history is invalidated by system reboot
or source of information switched between different instances
of daemonic <tt/Xstat/ and kernel SNMP tables and does not
use invalid history.
<p> Beware, <tt/Xstat/ will not produce sane output,
when many processes use it simultaneously. If several processes
under single user need this utility they should use environment
variables to put their history in safe places
or to use it with options <tt/-a -s/.
<p>
Well, that's all. The utility is very simple, but nevertheless
very handy.
<p> <bf/Output of XSTAT/
<p> The first line of output is <tt/#/ followed by identifier
of source of information, it may be word <tt/kernel/, when <tt/Xstat/
gets information from kernel or some dotted decimal number followed
by parameters, when it obtains information from running <tt/Xstat/ daemon.
<p>In the case of <tt/nstat/ the rest of output consists of three columns:
SNMP MIB identifier,
its value (or increment since previous measurement) and average
rate of increase of the counter per second. <tt/ifstat/ outputs
interface name followed by pairs of counter and rate of its change.
<p> <bf/Daemonic Xstat/
<p> <tt/Xstat/ may be started as daemon by any user. This makes sense
to avoid wrapped counters and to obtain reasonable long counters
for large time. Also <tt/Xstat/ daemon calculates average rates.
For the first goal sampling interval (option <tt/-d/) may be large enough,
f.e. for gigabit rates byte counters overflow not more frequently than
each 40 seconds and you may select interval of 20 seconds.
From the other hand, when <tt/Xstat/ is used for estimating rates
interval should be less than averaging period (option <tt/-t/), otherwise
estimation loses in quality.
Client <tt/Xstat/, before trying to get information from the kernel,
contacts daemon started by this user, then it tries system wide
daemon, which is supposed to be started by superuser. And only if
none of them replied it gets information from kernel.
<p> <bf/Environment/
<p> <tt/NSTAT_HISTORY/ - name of history file for <tt/nstat/.
<p> <tt/IFSTAT_HISTORY/ - name of history file for <tt/ifstat/.
<p> <tt/RTACCT_HISTORY/ - name of history file for <tt/rtacct/.
</article>

View File

@ -0,0 +1,26 @@
\textwidth 6.0in
\textheight 8.5in
\input SNAPSHOT
\pagestyle{myheadings}
\markboth{\protect\TITLE}{}
\markright{{\protect\sc iproute2-ss\Draft}}
% To print it in compact form: both sides on one sheet (psnup -2)
\evensidemargin=\oddsidemargin
\newenvironment{NB}{\bgroup \vskip 1mm\leftskip 1cm \footnotesize \noindent NB.
}{\par\egroup \vskip 1mm}
\def\threeonly{[2.3.15+ only] }
\begin{document}
\makeatletter
\renewcommand{\@oddhead}{{\protect\sc iproute2-ss\Draft} \hfill \protect\arabic{page}}
\makeatother
\let\oldthefootnote\thefootnote
\def\thefootnote{}
\footnotetext{Copyright \copyright~1999 A.N.Kuznetsov}

View File

@ -0,0 +1,52 @@
<!doctype linuxdoc system>
<article>
<title>RTACCT Utility
<author>Robert Olsson
<date>some_negative_number, 20 Dec 2001
<p>
Here is some code for monitoring the route cache. For systems handling high
network load, servers, routers, firewalls etc the route cache and its garbage
collection is crucial. Linux has a solid implementation.
<p>
The kernel patch (not required since linux-2.4.7) adds statistics counters
from route cache process into
/proc/net/rt_cache_stat. A companion user mode program presents the statistics
in a vmstat or iostat manner. The ratio between cache hits and misses gives
the flow length.
<p>
Hopefully it can help understanding performance and DoS and other related
issues.
<p> An URL where newer versions of this utility can be (probably) found
is ftp://robur.slu.se/pub/Linux/net-development/rt_cache_stat/
<p><bf/Description/
<p>The format of the command is:
<tscreen><verb>
rtstat [ OPTIONS ]
</verb></tscreen>
<p> <tt/OPTIONS/ are:
<itemize>
<item><tt/-h/, <tt/-help/ - show help page and version of the utility.
<item><tt/-i INTERVAL/ - interval between snapshots, default value is
2 seconds.
<item><tt/-s NUMBER/ - whether to print header line. 0 inhibits header line,
1 prescribes to print it once and 2 (this is default setting) forces header
line each 20 lines.
</itemize>
</article>

View File

@ -0,0 +1,525 @@
<!doctype linuxdoc system>
<article>
<title>SS Utility: Quick Intro
<author>Alexey Kuznetosv, <tt/kuznet@ms2.inr.ac.ru/
<date>some_negative_number, 20 Sep 2001
<abstract>
<tt/ss/ is one another utility to investigate sockets.
Functionally it is NOT better than <tt/netstat/ combined
with some perl/awk scripts and though it is surely faster
it is not enough to make it much better. :-)
So, stop reading this now and do not waste your time.
Well, certainly, it proposes some functionality, which current
netstat is still not able to do, but surely will soon.
</abstract>
<sect>Why?
<p> <tt>/proc</tt> interface is inadequate, unfortunately.
When amount of sockets is enough large, <tt/netstat/ or even
plain <tt>cat /proc/net/tcp/</tt> cause nothing but pains and curses.
In linux-2.4 the desease became worse: even if amount
of sockets is small reading <tt>/proc/net/tcp/</tt> is slow enough.
This utility presents a new approach, which is supposed to scale
well. I am not going to describe technical details here and
will concentrate on description of the command.
The only important thing to say is that it is not so bad idea
to load module <tt/tcp_diag/, which can be found in directory
<tt/Modules/ of <tt/iproute2/. If you do not make this <tt/ss/
will work, but it falls back to <tt>/proc</tt> and becomes slow
like <tt/netstat/, well, a bit faster yet (see section "Some numbers").
<sect>Old news
<p>
In the simplest form <tt/ss/ is equivalent to netstat
with some small deviations.
<itemize>
<item><tt/ss -t -a/ dumps all TCP sockets
<item><tt/ss -u -a/ dumps all UDP sockets
<item><tt/ss -w -a/ dumps all RAW sockets
<item><tt/ss -x -a/ dumps all UNIX sockets
</itemize>
<p>
Option <tt/-o/ shows TCP timers state.
Option <tt/-e/ shows some extended information.
Etc. etc. etc. Seems, all the options of netstat related to sockets
are supported. Though not AX.25 and other bizarres. :-)
If someone wants, he can make support for decnet and ipx.
Some rudimentary support for them is already present in iproute2 libutils,
and I will be glad to see these new members.
<p>
However, standard functionality is a bit different:
<p>
The first: without option <tt/-a/ sockets in states
<tt/TIME-WAIT/ and <tt/SYN-RECV/ are skipped too.
It is more reasonable default, I think.
<p>
The second: format of UNIX sockets is different. It coincides
with tcp/udp. Though standard kernel still does not allow to
see write/read queues and peer address of connected UNIX sockets,
the patch doing this exists.
<p>
The third: default is to dump only TCP sockets, rather than all of the types.
<p>
The next: by default it does not resolve numeric host addresses (like <tt/ip/)!
Resolving is enabled with option <tt/-r/. Service names, usually stored
in local files, are resolved by default. Also, if service database
does not contain references to a port, <tt/ss/ queries system
<tt/rpcbind/. RPC services are prefixed with <tt/rpc./
Resolution of services may be suppressed with option <tt/-n/.
<p>
It does not accept "long" options (I dislike them, sorry).
So, address family is given with family identifier following
option <tt/-f/ to be algined to iproute2 conventions.
Mostly, it is to allow option parser to parse
addresses correctly, but as side effect it really limits dumping
to sockets supporting only given family. Option <tt/-A/ followed
by list of socket tables to dump is also supported.
Logically, id of socket table is different of _address_ family, which is
another point of incompatibility. So, id is one of
<tt/all/, <tt/tcp/, <tt/udp/,
<tt/raw/, <tt/inet/, <tt/unix/, <tt/packet/, <tt/netlink/. See?
Well, <tt/inet/ is just abbreviation for <tt/tcp|udp|raw/
and it is not difficult to guess that <tt/packet/ allows
to look at packet sockets. Actually, there are also some other abbreviations,
f.e. <tt/unix_dgram/ selects only datagram UNIX sockets.
<p>
The next: well, I still do not know. :-)
<sect>Time to talk about new functionality.
<p>It is builtin filtering of socket lists.
<sect1> Filtering by state.
<p>
<tt/ss/ allows to filter socket states, using keywords
<tt/state/ and <tt/exclude/, followed by some state
identifier.
<p>
State identifier are standard TCP state names (not listed,
they are useless for you if you already do not know them)
or abbreviations:
<itemize>
<item><tt/all/ - for all the states
<item><tt/bucket/ - for TCP minisockets (<tt/TIME-WAIT|SYN-RECV/)
<item><tt/big/ - all except for minisockets
<item><tt/connected/ - not closed and not listening
<item><tt/synchronized/ - connected and not <tt/SYN-SENT/
</itemize>
<p>
F.e. to dump all tcp sockets except <tt/SYN-RECV/:
<tscreen><verb>
ss exclude SYN-RECV
</verb></tscreen>
<p>
If neither <tt/state/ nor <tt/exclude/ directives
are present,
state filter defaults to <tt/all/ with option <tt/-a/
or to <tt/all/,
excluding listening, syn-recv, time-wait and closed sockets.
<sect1> Filtering by addresses and ports.
<p>
Option list may contain address/port filter.
It is boolean expression which consists of boolean operation
<tt/or/, <tt/and/, <tt/not/ and predicates.
Actually, all the flavors of names for boolean operations are eaten:
<tt/&amp/, <tt/&amp&amp/, <tt/|/, <tt/||/, <tt/!/, but do not forget
about special sense given to these symbols by unix shells and escape
them correctly, when used from command line.
<p>
Predicates may be of the folowing kinds:
<itemize>
<item>A. Address/port match, where address is checked against mask
and port is either wildcard or exact. It is one of:
<tscreen><verb>
dst prefix:port
src prefix:port
src unix:STRING
src link:protocol:ifindex
src nl:channel:pid
</verb></tscreen>
Both prefix and port may be absent or replaced with <tt/*/,
which means wildcard. UNIX socket use more powerful scheme
matching to socket names by shell wildcards. Also, prefixes
unix: and link: may be omitted, if address family is evident
from context (with option <tt/-x/ or with <tt/-f unix/
or with <tt/unix/ keyword)
<p>
F.e.
<tscreen><verb>
dst 10.0.0.1
dst 10.0.0.1:
dst 10.0.0.1/32:
dst 10.0.0.1:*
</verb></tscreen>
are equivalent and mean socket connected to
any port on host 10.0.0.1
<tscreen><verb>
dst 10.0.0.0/24:22
</verb></tscreen>
sockets connected to port 22 on network
10.0.0.0...255.
<p>
Note that port separated of address with colon, which creates
troubles with IPv6 addresses. Generally, we interpret the last
colon as splitting port. To allow to give IPv6 addresses,
trick like used in IPv6 HTTP URLs may be used:
<tscreen><verb>
dst [::1]
</verb></tscreen>
are sockets connected to ::1 on any port
<p>
Another way is <tt/dst ::1/128/. / helps to understand that
colon is part of IPv6 address.
<p>
Now we can add another alias for <tt/dst 10.0.0.1/:
<tt/dst [10.0.0.1]/. :-)
<p> Address may be a DNS name. In this case all the addresses are looked
up (in all the address families, if it is not limited by option <tt/-f/
or special address prefix <tt/inet:/, <tt/inet6/) and resulting
expression is <tt/or/ over all of them.
<item> B. Port expressions:
<tscreen><verb>
dport &gt= :1024
dport != :22
sport &lt :32000
</verb></tscreen>
etc.
All the relations: <tt/&lt/, <tt/&gt/, <tt/=/, <tt/>=/, <tt/=/, <tt/==/,
<tt/!=/, <tt/eq/, <tt/ge/, <tt/lt/, <tt/ne/...
Use variant which you like more, but not forget to escape special
characters when typing them in command line. :-)
Note that port number syntactically coincides to the case A!
You may even add an IP address, but it will not participate
incomparison, except for <tt/==/ and <tt/!=/, which are equivalent
to corresponding predicates of type A. F.e.
<p>
<tt/dst 10.0.0.1:22/
is equivalent to <tt/dport eq 10.0.0.1:22/
and
<tt/not dst 10.0.0.1:22/ is equivalent to
<tt/dport neq 10.0.0.1:22/
<item>C. Keyword <tt/autobound/. It matches to sockets bound automatically
on local system.
</itemize>
<sect> Examples
<p>
<itemize>
<item>1. List all the tcp sockets in state <tt/FIN-WAIT-1/ for our apache
to network 193.233.7/24 and look at their timers:
<tscreen><verb>
ss -o state fin-wait-1 \( sport = :http or sport = :https \) \
dst 193.233.7/24
</verb></tscreen>
Oops, forgot to say that missing logical operation is
equivalent to <tt/and/.
<item> 2. Well, now look at the rest...
<tscreen><verb>
ss -o excl fin-wait-1
ss state fin-wait-1 \( sport neq :http and sport neq :https \) \
or not dst 193.233.7/24
</verb></tscreen>
Note that we have to do _two_ calls of ss to do this.
State match is always anded to address/port match.
The reason for this is purely technical: ss does fast skip of
not matching states before parsing addresses and I consider the
ability to skip fastly gobs of time-wait and syn-recv sockets
as more important than logical generality.
<item> 3. So, let's look at all our sockets using autobound ports:
<tscreen><verb>
ss -a -A all autobound
</verb></tscreen>
<item> 4. And eventually find all the local processes connected
to local X servers:
<tscreen><verb>
ss -xp dst "/tmp/.X11-unix/*"
</verb></tscreen>
Pardon, this does not work with current kernel, patching is required.
But we still can look at server side:
<tscreen><verb>
ss -x src "/tmp/.X11-unix/*"
</verb></tscreen>
</itemize>
<sect> Returning to ground: real manual
<p>
<sect1> Command arguments
<p> General format of arguments to <tt/ss/ is:
<tscreen><verb>
ss [ OPTIONS ] [ STATE-FILTER ] [ ADDRESS-FILTER ]
</verb></tscreen>
<sect2><tt/OPTIONS/
<p> <tt/OPTIONS/ is list of single letter options, using common unix
conventions.
<itemize>
<item><tt/-h/ - show help page
<item><tt/-?/ - the same, of course
<item><tt/-v/, <tt/-V/ - print version of <tt/ss/ and exit
<item><tt/-s/ - print summary statistics. This option does not parse
socket lists obtaining summary from various sources. It is useful
when amount of sockets is so huge that parsing <tt>/proc/net/tcp</tt>
is painful.
<item><tt/-D FILE/ - do not display anything, just dump raw information
about TCP sockets to <tt/FILE/ after applying filters. If <tt/FILE/ is <tt/-/
<tt/stdout/ is used.
<item><tt/-F FILE/ - read continuation of filter from <tt/FILE/.
Each line of <tt/FILE/ is interpreted like single command line option.
If <tt/FILE/ is <tt/-/ <tt/stdin/ is used.
<item><tt/-r/ - try to resolve numeric address/ports
<item><tt/-n/ - do not try to resolve ports
<item><tt/-o/ - show some optional information, f.e. TCP timers
<item><tt/-i/ - show some infomration specific to TCP (RTO, congestion
window, slow start threshould etc.)
<item><tt/-e/ - show even more optional information
<item><tt/-m/ - show extended information on memory used by the socket.
It is available only with <tt/tcp_diag/ enabled.
<item><tt/-p/ - show list of processes owning the socket
<item><tt/-f FAMILY/ - default address family used for parsing addresses.
Also this option limits listing to sockets supporting
given address family. Currently the following families
are supported: <tt/unix/, <tt/inet/, <tt/inet6/, <tt/link/,
<tt/netlink/.
<item><tt/-4/ - alias for <tt/-f inet/
<item><tt/-6/ - alias for <tt/-f inet6/
<item><tt/-0/ - alias for <tt/-f link/
<item><tt/-A LIST-OF-TABLES/ - list of socket tables to dump, separated
by commas. The following identifiers are understood:
<tt/all/, <tt/inet/, <tt/tcp/, <tt/udp/, <tt/raw/,
<tt/unix/, <tt/packet/, <tt/netlink/, <tt/unix_dgram/,
<tt/unix_stream/, <tt/packet_raw/, <tt/packet_dgram/.
<item><tt/-x/ - alias for <tt/-A unix/
<item><tt/-t/ - alias for <tt/-A tcp/
<item><tt/-u/ - alias for <tt/-A udp/
<item><tt/-w/ - alias for <tt/-A raw/
<item><tt/-a/ - show sockets of all the states. By default sockets
in states <tt/LISTEN/, <tt/TIME-WAIT/, <tt/SYN_RECV/
and <tt/CLOSE/ are skipped.
<item><tt/-l/ - show only sockets in state <tt/LISTEN/
</itemize>
<sect2><tt/STATE-FILTER/
<p><tt/STATE-FILTER/ allows to construct arbitrary set of
states to match. Its syntax is sequence of keywords <tt/state/
and <tt/exclude/ followed by identifier of state.
Available identifiers are:
<p>
<itemize>
<item> All standard TCP states: <tt/established/, <tt/syn-sent/,
<tt/syn-recv/, <tt/fin-wait-1/, <tt/fin-wait-2/, <tt/time-wait/,
<tt/closed/, <tt/close-wait/, <tt/last-ack/, <tt/listen/ and <tt/closing/.
<item><tt/all/ - for all the states
<item><tt/connected/ - all the states except for <tt/listen/ and <tt/closed/
<item><tt/synchronized/ - all the <tt/connected/ states except for
<tt/syn-sent/
<item><tt/bucket/ - states, which are maintained as minisockets, i.e.
<tt/time-wait/ and <tt/syn-recv/.
<item><tt/big/ - opposite to <tt/bucket/
</itemize>
<sect2><tt/ADDRESS_FILTER/
<p><tt/ADDRESS_FILTER/ is boolean expression with operations <tt/and/, <tt/or/
and <tt/not/, which can be abbreviated in C style f.e. as <tt/&amp/,
<tt/&amp&amp/.
<p>
Predicates check socket addresses, both local and remote.
There are the following kinds of predicates:
<itemize>
<item> <tt/dst ADDRESS_PATTERN/ - matches remote address and port
<item> <tt/src ADDRESS_PATTERN/ - matches local address and port
<item> <tt/dport RELOP PORT/ - compares remote port to a number
<item> <tt/sport RELOP PORT/ - compares local port to a number
<item> <tt/autobound/ - checks that socket is bound to an ephemeral
port
</itemize>
<p><tt/RELOP/ is some of <tt/&lt=/, <tt/&gt=/, <tt/==/ etc.
To make this more convinient for use in unix shell, alphabetic
FORTRAN-like notations <tt/le/, <tt/gt/ etc. are accepted as well.
<p>The format and semantics of <tt/ADDRESS_PATTERN/ depends on address
family.
<itemize>
<item><tt/inet/ - <tt/ADDRESS_PATTERN/ consists of IP prefix, optionally
followed by colon and port. If prefix or port part is absent or replaced
with <tt/*/, this means wildcard match.
<item><tt/inet6/ - The same as <tt/inet/, only prefix refers to an IPv6
address. Unlike <tt/inet/ colon becomes ambiguous, so that <tt/ss/ allows
to use scheme, like used in URLs, where address is suppounded with
<tt/[/ ... <tt/]/.
<item><tt/unix/ - <tt/ADDRESS_PATTERN/ is shell-style wildcard.
<item><tt/packet/ - format looks like <tt/inet/, only interface index
stays instead of port and link layer protocol id instead of address.
<item><tt/netlink/ - format looks like <tt/inet/, only socket pid
stays instead of port and netlink channel instead of address.
</itemize>
<p><tt/PORT/ is syntactically <tt/ADDRESS_PATTERN/ with wildcard
address part. Certainly, it is undefined for UNIX sockets.
<sect1> Environment variables
<p>
<tt/ss/ allows to change source of information using various
environment variables:
<p>
<itemize>
<item> <tt/PROC_SLABINFO/ to override <tt>/proc/slabinfo</tt>
<item> <tt/PROC_NET_TCP/ to override <tt>/proc/net/tcp</tt>
<item> <tt/PROC_NET_UDP/ to override <tt>/proc/net/udp</tt>
<item> etc.
</itemize>
<p>
Variable <tt/PROC_ROOT/ allows to change root of all the <tt>/proc/</tt>
hierarchy.
<p>
Variable <tt/TCPDIAG_FILE/ prescribes to open a file instead of
requesting kernel to dump information about TCP sockets.
<p> This option is used mainly to investigate bug reports,
when dumps of files usually found in <tt>/proc/</tt> are recevied
by e-mail.
<sect1> Output format
<p>Six columns. The first is <tt/Netid/, it denotes socket type and
transport protocol, when it is ambiguous: <tt/tcp/, <tt/udp/, <tt/raw/,
<tt/u_str/ is abbreviation for <tt/unix_stream/, <tt/u_dgr/ for UNIX
datagram sockets, <tt/nl/ for netlink, <tt/p_raw/ and <tt/p_dgr/ for
raw and datagram packet sockets. This column is optional, it will
be hidden, if filter selects an unique netid.
<p>
The second column is <tt/State/. Socket state is displayed here.
The names are standard TCP names, except for <tt/UNCONN/, which
cannot happen for TCP, but normal for not connected sockets
of another types. Again, this column can be hidden.
<p>
Then two columns (<tt/Recv-Q/ and <tt/Send-Q/) showing amount of data
queued for receive and transmit.
<p>
And the last two columns display local address and port of the socket
and its peer address, if the socket is connected.
<p>
If options <tt/-o/, <tt/-e/ or <tt/-p/ were given, options are
displayed not in fixed positions but separated by spaces pairs:
<tt/option:value/. If value is not a single number, it is presented
as list of values, enclosed to <tt/(/ ... <tt/)/ and separated with
commas. F.e.
<tscreen><verb>
timer:(keepalive,111min,0)
</verb></tscreen>
is typical format for TCP timer (option <tt/-o/).
<tscreen><verb>
users:((X,113,3))
</verb></tscreen>
is typical for list of users (option <tt/-p/).
<sect>Some numbers
<p>
Well, let us use <tt/pidentd/ and a tool <tt/ibench/ to measure
its performance. It is 30 requests per second here. Nothing to test,
it is too slow. OK, let us patch pidentd with patch from directory
Patches. After this it handles about 4300 requests per second
and becomes handy tool to pollute socket tables with lots of timewait
buckets.
<p>
So, each test starts from pollution tables with 30000 sockets
and then doing full dump of the table piped to wc and measuring
timings with time:
<p>Results:
<itemize>
<item> <tt/netstat -at/ - 15.6 seconds
<item> <tt/ss -atr/, but without <tt/tcp_diag/ - 5.4 seconds
<item> <tt/ss -atr/ with <tt/tcp_diag/ - 0.47 seconds
</itemize>
No comments. Though one comment is necessary, most of time
without <tt/tcp_diag/ is wasted inside kernel with completely
blocked networking. More than 10 seconds, yes. <tt/tcp_diag/
does the same work for 100 milliseconds of system time.
</article>

View File

@ -0,0 +1,13 @@
0x10 lowdelay
0x08 throughput
0x04 reliability
# This value overlap with ECT, do not use it!
0x02 mincost
# These values seems do not want to die, Cisco likes them by a strange reason.
0x20 priority
0x40 immediate
0x60 flash
0x80 flash-override
0xa0 critical
0xc0 internet
0xe0 network

View File

@ -0,0 +1,25 @@
#
# Reserved protocols.
#
0 unspec
1 redirect
2 kernel
3 boot
4 static
8 gated
9 ra
10 mrt
11 zebra
12 bird
#
# Used by me for gated
#
254 gated/aggr
253 gated/bgp
252 gated/ospf
251 gated/ospfase
250 gated/rip
249 gated/static
248 gated/conn
247 gated/inet
246 gated/default

View File

@ -0,0 +1,13 @@
#
# reserved values
#
0 cosmos
#
# local
#
#1 inr.ac
#2 inr.ruhep
#3 freenet
#4 radio-msu
#5 russia
#6 internet

View File

@ -0,0 +1,11 @@
#
# reserved values
#
0 global
255 nowhere
254 host
253 link
#
# pseudo-reserved
#
200 site

View File

@ -0,0 +1,11 @@
#
# reserved values
#
255 local
254 main
253 default
0 unspec
#
# local
#
#1 inr.ruhep

View File

@ -0,0 +1,49 @@
#! /bin/sh -x
#
# sample script on using the ingress capabilities
# this script shows how one can rate limit incoming SYNs
# Useful for TCP-SYN attack protection. You can use
# IPchains to have more powerful additions to the SYN (eg
# in addition the subnet)
#
#path to various utilities;
#change to reflect yours.
#
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
TC=$IPROUTE/tc/tc
IP=$IPROUTE/ip/ip
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
INDEV=eth2
#
# tag all incoming SYN packets through $INDEV as mark value 1
############################################################
$IPCHAINS -A input -i $INDEV -y -m 1
############################################################
#
# install the ingress qdisc on the ingress interface
############################################################
$TC qdisc add dev $INDEV handle ffff: ingress
############################################################
#
#
# SYN packets are 40 bytes (320 bits) so three SYNs equals
# 960 bits (approximately 1kbit); so we rate limit below
# the incoming SYNs to 3/sec (not very sueful really; but
#serves to show the point - JHS
############################################################
$TC filter add dev $INDEV parent ffff: protocol ip prio 50 handle 1 fw \
police rate 1kbit burst 40 mtu 9k drop flowid :1
############################################################
#
echo "---- qdisc parameters Ingress ----------"
$TC qdisc ls dev $INDEV
echo "---- Class parameters Ingress ----------"
$TC class ls dev $INDEV
echo "---- filter parameters Ingress ----------"
$TC filter ls dev $INDEV parent ffff:
#deleting the ingress qdisc
#$TC qdisc del $INDEV ingress

View File

@ -0,0 +1,76 @@
#! /bin/sh
TC=/home/root/tc
IP=/home/root/ip
DEVICE=eth1
BANDWIDTH="bandwidth 10Mbit"
# Attach CBQ on $DEVICE. It will have handle 1:.
# $BANDWIDTH is real $DEVICE bandwidth (10Mbit).
# avpkt is average packet size.
# mpu is minimal packet size.
$TC qdisc add dev $DEVICE root handle 1: cbq \
$BANDWIDTH avpkt 1000 mpu 64
# Create root class with classid 1:1. This step is not necessary.
# bandwidth is the same as on CBQ itself.
# rate == all the bandwidth
# allot is MTU + MAC header
# maxburst measure allowed class burstiness (please,read S.Floyd and VJ papers)
# est 1sec 8sec means, that kernel will evaluate average rate
# on this class with period 1sec and time constant 8sec.
# This rate is viewed with "tc -s class ls dev $DEVICE"
$TC class add dev $DEVICE parent 1:0 classid :1 est 1sec 8sec cbq \
$BANDWIDTH rate 10Mbit allot 1514 maxburst 50 avpkt 1000
# Bulk.
# New parameters are:
# weight, which is set to be proportional to
# "rate". It is not necessary, weight=1 will work as well.
# defmap and split say that best effort ttraffic, not classfied
# by another means will fall to this class.
$TC class add dev $DEVICE parent 1:1 classid :2 est 1sec 8sec cbq \
$BANDWIDTH rate 4Mbit allot 1514 weight 500Kbit \
prio 6 maxburst 50 avpkt 1000 split 1:0 defmap ff3d
# OPTIONAL.
# Attach "sfq" qdisc to this class, quantum is MTU, perturb
# gives period of hash function perturbation in seconds.
#
$TC qdisc add dev $DEVICE parent 1:2 sfq quantum 1514b perturb 15
# Interactive-burst class
$TC class add dev $DEVICE parent 1:1 classid :3 est 2sec 16sec cbq \
$BANDWIDTH rate 1Mbit allot 1514 weight 100Kbit \
prio 2 maxburst 100 avpkt 1000 split 1:0 defmap c0
$TC qdisc add dev $DEVICE parent 1:3 sfq quantum 1514b perturb 15
# Background.
$TC class add dev $DEVICE parent 1:1 classid :4 est 1sec 8sec cbq \
$BANDWIDTH rate 100Kbit allot 1514 weight 10Mbit \
prio 7 maxburst 10 avpkt 1000 split 1:0 defmap 2
$TC qdisc add dev $DEVICE parent 1:4 sfq quantum 1514b perturb 15
# Realtime class for RSVP
$TC class add dev $DEVICE parent 1:1 classid 1:7FFE cbq \
rate 5Mbit $BANDWIDTH allot 1514b avpkt 1000 \
maxburst 20
# Reclassified realtime traffic
#
# New element: split is not 1:0, but 1:7FFE. It means,
# that only real-time packets, which violated policing filters
# or exceeded reshaping buffers will fall to it.
$TC class add dev $DEVICE parent 1:7FFE classid 1:7FFF est 4sec 32sec cbq \
rate 1Mbit $BANDWIDTH allot 1514b avpkt 1000 weight 10Kbit \
prio 6 maxburst 10 split 1:7FFE defmap ffff

View File

@ -0,0 +1,446 @@
#!/bin/bash
#
# dhclient-script for Linux.
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version
# 2 of the License, or (at your option) any later version.
#
# Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
#
# Probably, I did not understand, what this funny feature as "alias"
# means exactly. For now I suppose, that it is a static address, which
# we should install and preserve.
#
exec >> /tmp/DHS.log 2>&1
echo dhc-script $* reason=$reason
set | grep "^\(old_\|new_\|check_\)"
LOG () {
echo LOG $* ;
}
# convert 8bit mask to length
# arg: $1 = mask
#
Mask8ToLen() {
local l=0;
while [ $l -le 7 ]; do
if [ $[ ( 1 << $l ) + $1 ] -eq 256 ]; then
return $[ 8 - $l ]
fi
l=$[ $l + 1 ]
done
return 0;
}
# convert inet dotted quad mask to length
# arg: $1 = dotquad mask
#
MaskToLen() {
local masklen=0
local mask8=$1
case $1 in
0.0.0.0)
return 0;
;;
255.*.0.0)
masklen=8
mask8=${mask8#255.}
mask8=${mask8%.0.0}
;;
255.255.*.0)
masklen=16
mask8=${mask8#255.255.}
mask8=${mask8%.0}
;;
255.255.255.*)
masklen=24
mask8=${mask8#255.255.255.}
;;
*)
return 255
;;
esac
Mask8ToLen $mask8
return $[ $? + $masklen ]
}
# calculate ABC "natural" mask
# arg: $1 = dotquad address
#
ABCMask () {
local class;
class=${1%%.*}
if [ "$1" = "255.255.255.255" ]; then
echo $1
elif [ "$1" = "0.0.0.0" ]; then
echo $1
elif [ $class -ge 224 ]; then
echo 240.0.0.0
elif [ $class -ge 192 ]; then
echo 255.255.255.0
elif [ $class -ge 128 ]; then
echo 255.255.0.0
else
echo 255.0.0.0
fi
}
# calculate ABC "natural" mask length
# arg: $1 = dotquad address
#
ABCMaskLen () {
local class;
class=${1%%.*}
if [ "$1" = "255.255.255.255" ]; then
return 32
elif [ "$1" = "0.0.0.0" ]; then
return 0
elif [ $class -ge 224 ]; then
return 4;
elif [ $class -ge 192 ]; then
return 24;
elif [ $class -ge 128 ]; then
return 16;
else
return 8;
fi
}
# Delete IP address
# args: $1 = interface
# $2 = address
# $3 = mask
# $4 = broadcast
# $5 = label
#
DelINETAddr () {
local masklen=32
local addrid=$1
LOG DelINETAddr $*
if [ "$5" ]; then
addrid=$addrid:$5
fi
LOG ifconfig $addrid down
ifconfig $addrid down
}
# Add IP address
# args: $1 = interface
# $2 = address
# $3 = mask
# $4 = broadcast
# $5 = label
#
AddINETAddr () {
local mask_arg
local brd_arg
local addrid=$1
LOG AddINETAddr $*
if [ "$5" ]; then
addrid=$addrid:$5
fi
if [ "$3" ]; then
mask_arg="netmask $3"
fi
if [ "$4" ]; then
brd_arg="broadcast $4"
fi
LOG ifconfig $addrid $2 $mask_arg $brd_arg up
ifconfig $addrid $2 $mask_arg $brd_arg up
}
# Add default routes
# args: $1 = routers list
#
AddDefaultRoutes() {
local router
if [ "$1" ]; then
LOG AddDefaultRoutes $*
for router in $1; do
LOG route add default gw $router
route add default gw $router
done ;
fi
}
# Delete default routes
# args: $1 = routers list
#
DelDefaultRoutes() {
local router
if [ "$1" ]; then
LOG DelDefaultRoutes $*
for router in $1; do
LOG route del default gw $router
route del default gw $router
done
fi
}
# ping a host
# args: $1 = dotquad address of the host
#
PingNode() {
LOG PingNode $*
if ping -q -c 1 -w 2 $1 ; then
return 0;
fi
return 1;
}
# Check (and add route, if alive) default routers
# args: $1 = routers list
# returns: 0 if at least one router is alive.
#
CheckRouterList() {
local router
local succeed=1
LOG CheckRouterList $*
for router in $1; do
if PingNode $router ; then
succeed=0
route add default gw $router
fi
done
return $succeed
}
# Delete/create static routes.
# args: $1 = operation (del/add)
# $2 = routes list in format "dst1 nexthop1 dst2 ..."
#
# BEWARE: this feature of DHCP is obsolete, because does not
# support subnetting.
#
X-StaticRouteList() {
local op=$1
local lst="$2"
local masklen
LOG X-StaticRouteList $*
if [ "$lst" ]; then
set $lst
while [ $# -gt 1 ]; do
route $op -net $1 netmask `ABCMask "$1"` gw $2
shift; shift;
done
fi
}
# Create static routes.
# arg: $1 = routes list in format "dst1 nexthop1 dst2 ..."
#
AddStaticRouteList() {
LOG AddStaticRouteList $*
X-StaticRouteList add "$1"
}
# Delete static routes.
# arg: $1 = routes list in format "dst1 nexthop1 dst2 ..."
#
DelStaticRouteList() {
LOG DelStaticRouteList $*
X-StaticRouteList del "$1"
}
# Broadcast unsolicited ARP to update neighbours' caches.
# args: $1 = interface
# $2 = address
#
UnsolicitedARP() {
if [ -f /sbin/arping ]; then
/sbin/arping -A -c 1 -I "$1" "$2" &
(sleep 2 ; /sbin/arping -U -c 1 -I "$1" "$2" ) &
fi
}
# Duplicate address detection.
# args: $1 = interface
# $2 = test address
# returns: 0, if DAD succeeded.
DAD() {
if [ -f /sbin/arping ]; then
/sbin/arping -c 2 -w 3 -D -I "$1" "$2"
return $?
fi
return 0
}
# Setup resolver.
# args: NO
# domain and nameserver list are passed in global variables.
#
# NOTE: we try to be careful and not to break user supplied resolv.conf.
# The script mangles it, only if it has dhcp magic signature.
#
UpdateDNS() {
local nameserver
local idstring="#### Generated by DHCPCD"
LOG UpdateDNS $*
if [ "$new_domain_name" = "" -a "$new_domain_name_servers" = "" ]; then
return 0;
fi
echo $idstring > /etc/resolv.conf.dhcp
if [ "$new_domain_name" ]; then
echo search $new_domain_name >> /etc/resolv.conf.dhcp
fi
echo options ndots:1 >> /etc/resolv.conf.dhcp
if [ "$new_domain_name_servers" ]; then
for nameserver in $new_domain_name_servers; do
echo nameserver $nameserver >> /etc/resolv.conf.dhcp
done
else
echo nameserver 127.0.0.1 >> /etc/resolv.conf.dhcp
fi
if [ -f /etc/resolv.conf ]; then
if [ "`head -1 /etc/resolv.conf`" != "$idstring" ]; then
return 0
fi
if [ "$old_domain_name" = "$new_domain_name" -a
"$new_domain_name_servers" = "$old_domain_name_servers" ]; then
return 0
fi
fi
mv /etc/resolv.conf.dhcp /etc/resolv.conf
}
case $reason in
NBI)
exit 1
;;
MEDIUM)
exit 0
;;
PREINIT)
ifconfig $interface:dhcp down
ifconfig $interface:dhcp1 down
if [ -d /proc/sys/net/ipv4/conf/$interface ]; then
ifconfig $interface:dhcp 10.10.10.10 netmask 255.255.255.255
ifconfig $interface:dhcp down
if [ -d /proc/sys/net/ipv4/conf/$interface ]; then
LOG The interface $interface already configured.
fi
fi
ifconfig $interface:dhcp up
exit 0
;;
ARPSEND)
exit 0
;;
ARPCHECK)
if DAD "$interface" "$check_ip_address" ; then
exit 0
fi
exit 1
;;
BOUND|RENEW|REBIND|REBOOT)
if [ "$old_ip_address" -a "$alias_ip_address" -a \
"$alias_ip_address" != "$old_ip_address" ]; then
DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
fi
if [ "$old_ip_address" -a "$old_ip_address" != "$new_ip_address" ]; then
DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
DelDefaultRoutes "$old_routers"
DelStaticRouteList "$old_static_routes"
fi
if [ "$old_ip_address" = "" -o "$old_ip_address" != "$new_ip_address" -o \
"$reason" = "BOUND" -o "$reason" = "REBOOT" ]; then
AddINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
AddStaticRouteList "$new_static_routes"
AddDefaultRoutes "$new_routers"
UnsolicitedARP "$interface" "$new_ip_address"
fi
if [ "$new_ip_address" != "$alias_ip_address" -a "$alias_ip_address" ]; then
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
fi
UpdateDNS
exit 0
;;
EXPIRE|FAIL)
if [ "$alias_ip_address" ]; then
DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
fi
if [ "$old_ip_address" ]; then
DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
DelDefaultRoutes "$old_routers"
DelStaticRouteList "$old_static_routes"
fi
if [ "$alias_ip_address" ]; then
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
fi
exit 0
;;
TIMEOUT)
if [ "$alias_ip_address" ]; then
DelINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
fi
# Seems, <null address> means, that no more old leases found.
# Or does it mean bug in dhcpcd? 8) Fail for now.
if [ "$new_ip_address" = "<null address>" ]; then
if [ "$old_ip_address" ]; then
DelINETAddr "$interface" "$old_ip_address" "$old_subnet_mask" "$old_broadcast_address" dhcp
fi
if [ "$alias_ip_address" ]; then
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
fi
exit 1
fi
if DAD "$interface" "$new_ip_address" ; then
AddINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
UnsolicitedARP "$interface" "$new_ip_address"
if [ "$alias_ip_address" -a "$alias_ip_address" != "$new_ip_address" ]; then
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
UnsolicitedARP "$interface" "$alias_ip_address"
fi
if CheckRouterList "$new_routers" ; then
AddStaticRouteList "$new_static_routes"
UpdateDNS
exit 0
fi
fi
DelINETAddr "$interface" "$new_ip_address" "$new_subnet_mask" "$new_broadcast_address" dhcp
DelDefaultRoutes "$old_routers"
DelStaticRouteList "$old_static_routes"
if [ "$alias_ip_address" ]; then
AddINETAddr "$interface" "$alias_ip_address" "$alias_subnet_mask" "$alias_broadcast_address" dhcp1
fi
exit 1
;;
esac
exit 0

View File

@ -0,0 +1,68 @@
#! /bin/sh -x
#
# sample script on using the ingress capabilities
# This script just tags on the ingress interfac using Ipchains
# the result is used for fast classification and re-marking
# on the egress interface
#
#path to various utilities;
#change to reflect yours.
#
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
TC=$IPROUTE/tc/tc
IP=$IPROUTE/ip/ip
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
INDEV=eth2
EGDEV="dev eth1"
#
# tag all incoming packets from host 10.2.0.24 to value 1
# tag all incoming packets from host 10.2.0.3 to value 2
# tag the rest of incoming packets from subnet 10.2.0.0/24 to value 3
#These values are used in the egress
#
############################################################
$IPCHAINS -A input -s 10.2.0.4/24 -m 3
$IPCHAINS -A input -i $INDEV -s 10.2.0.24 -m 1
$IPCHAINS -A input -i $INDEV -s 10.2.0.3 -m 2
######################## Egress side ########################
# attach a dsmarker
#
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64 set_tc_index
#
# values of the DSCP to change depending on the class
#
#becomes EF
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
value 0xb8
#becomes AF11
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
value 0x28
#becomes AF21
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
value 0x48
#
#
# The class mapping
#
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 1 fw classid 1:1
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 2 fw classid 1:2
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 3 fw classid 1:3
#
#
echo "---- qdisc parameters Ingress ----------"
$TC qdisc ls dev $INDEV
echo "---- Class parameters Ingress ----------"
$TC class ls dev $INDEV
echo "---- filter parameters Ingress ----------"
$TC filter ls dev $INDEV parent 1:0
echo "---- qdisc parameters Egress ----------"
$TC qdisc ls $EGDEV
echo "---- Class parameters Egress ----------"
$TC class ls $EGDEV
echo "---- filter parameters Egress ----------"
$TC filter ls $EGDEV parent 1:0

View File

@ -0,0 +1,87 @@
#! /bin/sh -x
#
# sample script on using the ingress capabilities
# This script tags the fwmark on the ingress interface using IPchains
# the result is used first for policing on the Ingress interface then
# for fast classification and re-marking
# on the egress interface
#
#path to various utilities;
#change to reflect yours.
#
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
TC=$IPROUTE/tc/tc
IP=$IPROUTE/ip/ip
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
INDEV=eth2
EGDEV="dev eth1"
#
# tag all incoming packets from host 10.2.0.24 to value 1
# tag all incoming packets from host 10.2.0.3 to value 2
# tag the rest of incoming packets from subnet 10.2.0.0/24 to value 3
#These values are used in the egress
############################################################
$IPCHAINS -A input -s 10.2.0.0/24 -m 3
$IPCHAINS -A input -i $INDEV -s 10.2.0.24 -m 1
$IPCHAINS -A input -i $INDEV -s 10.2.0.3 -m 2
############################################################
#
# install the ingress qdisc on the ingress interface
############################################################
$TC qdisc add dev $INDEV handle ffff: ingress
############################################################
#
# attach a fw classifier to the ingress which polices anything marked
# by ipchains to tag value 3 (The rest of the subnet packets -- not
# tag 1 or 2) to not go beyond 1.5Mbps
# Allow up to at least 60 packets to burst (assuming maximum packet
# size of # 1.5 KB) in the long run and upto about 6 packets in the
# shot run
############################################################
$TC filter add dev $INDEV parent ffff: protocol ip prio 50 handle 3 fw \
police rate 1500kbit burst 90k mtu 9k drop flowid :1
############################################################
######################## Egress side ########################
# attach a dsmarker
#
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
#
# values of the DSCP to change depending on the class
#
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
value 0xb8
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
value 0x28
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
value 0x48
#
#
# The class mapping
#
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 1 fw classid 1:1
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 2 fw classid 1:2
$TC filter add $EGDEV parent 1:0 protocol ip prio 4 handle 3 fw classid 1:3
#
#
echo "---- qdisc parameters Ingress ----------"
$TC qdisc ls dev $INDEV
echo "---- Class parameters Ingress ----------"
$TC class ls dev $INDEV
echo "---- filter parameters Ingress ----------"
$TC filter ls dev $INDEV parent ffff:
echo "---- qdisc parameters Egress ----------"
$TC qdisc ls $EGDEV
echo "---- Class parameters Egress ----------"
$TC class ls $EGDEV
echo "---- filter parameters Egress ----------"
$TC filter ls $EGDEV parent 1:0
#
#deleting the ingress qdisc
#$TC qdisc del $DEV ingress

View File

@ -0,0 +1,170 @@
#! /bin/sh -x
#
# sample script on using the ingress capabilities using u32 classifier
# This script tags tcindex based on metering on the ingress
# interface the result is used for fast classification and re-marking
# on the egress interface
# This is an example of a color aware mode marker with PIR configured
# based on draft-wahjak-mcm-00.txt (section 3.1)
#
# The colors are defined using the Diffserv Fields
#path to various utilities;
#change to reflect yours.
#
IPROUTE=/usr/src/iproute2-current
TC=$IPROUTE/tc/tc
IP=$IPROUTE/ip/ip
INDEV=eth0
EGDEV="dev eth1"
CIR1=1500kbit
CIR2=1000kbit
#The CBS is about 60 MTU sized packets
CBS1=90k
CBS2=90k
############################################################
#
# install the ingress qdisc on the ingress interface
$TC qdisc add dev $INDEV handle ffff: ingress
############################################################
#
# Create u32 filters
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1: u32 \
divisor 1
############################################################
# The meters: Note that we have shared meters in this case as identified
# by the index parameter
meter1=" police index 1 rate $CIR1 burst $CBS1 "
meter2=" police index 2 rate $CIR2 burst $CBS1 "
meter3=" police index 3 rate $CIR2 burst $CBS2 "
meter4=" police index 4 rate $CIR1 burst $CBS2 "
meter5=" police index 5 rate $CIR1 burst $CBS2 "
# All packets are marked with a tcindex value which is used on the egress
# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
# *********************** AF41 ***************************
#AF41 (DSCP 0x22) is passed on with a tcindex value 1
#if it doesnt exceed its CIR/CBS
#policer 1 is used.
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
match ip tos 0x88 0xfc \
$meter1 \
continue flowid :1
#
# if it exceeds the above but not the extra rate/burst below, it gets a
# tcindex value of 2
# policer 2 is used
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
match ip tos 0x88 0xfc \
$meter2 \
continue flowid :2
#
# if it exceeds the above but not the rule below, it gets a tcindex value
# of 3 (policer 3)
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
match ip tos 0x88 0xfc \
$meter3 \
drop flowid :3
#
# *********************** AF42 ***************************
#AF42 (DSCP 0x24) from is passed on with a tcindex value 2
#if it doesnt exceed its CIR/CBS
#policer 2 is used. Note that this is shared with the AF41
#
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
match ip tos 0x90 0xfc \
$meter2 \
continue flowid :2
#
# if it exceeds the above but not the rule below, it gets a tcindex value
# of 3 (policer 3)
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
match ip tos 0x90 0xfc \
$meter3 \
drop flowid :3
#
# *********************** AF43 ***************************
#
#AF43 (DSCP 0x26) from is passed on with a tcindex value 3
#if it doesnt exceed its CIR/CBS
#policer 3 is used. Note that this is shared with the AF41 and AF42
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
match ip tos 0x98 0xfc \
$meter3 \
drop flowid :3
#
# *********************** BE ***************************
#
# Anything else (not from the AF4*) gets discarded if it
# exceeds 1Mbps and by default goes to BE if it doesnt
# Note that the BE class is also used by the AF4* in the worst
# case
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 7 u32 \
match ip src 0/0\
$meter4 \
drop flowid :4
######################## Egress side ########################
# attach a dsmarker
#
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
#
# values of the DSCP to change depending on the class
#note that the ECN bits are masked out
#
#AF41 (0x88 is 0x22 shifted to the right by two bits)
#
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
value 0x88
#AF42
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
value 0x90
#AF43
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
value 0x98
#BE
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
value 0x0
#
#
# The class mapping
#
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 1 tcindex classid 1:1
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 2 tcindex classid 1:2
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 3 tcindex classid 1:3
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 4 tcindex classid 1:4
#
#
echo "---- qdisc parameters Ingress ----------"
$TC qdisc ls dev $INDEV
echo "---- Class parameters Ingress ----------"
$TC class ls dev $INDEV
echo "---- filter parameters Ingress ----------"
$TC filter ls dev $INDEV parent ffff:
echo "---- qdisc parameters Egress ----------"
$TC qdisc ls $EGDEV
echo "---- Class parameters Egress ----------"
$TC class ls $EGDEV
echo "---- filter parameters Egress ----------"
$TC filter ls $EGDEV parent 1:0
#
#deleting the ingress qdisc
#$TC qdisc del $INDEV ingress

View File

@ -0,0 +1,132 @@
#! /bin/sh -x
#
# sample script on using the ingress capabilities
# This script fwmark tags(IPchains) based on metering on the ingress
# interface the result is used for fast classification and re-marking
# on the egress interface
# This is an example of a color blind mode marker with no PIR configured
# based on draft-wahjak-mcm-00.txt (section 3.1)
#
#path to various utilities;
#change to reflect yours.
#
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
TC=$IPROUTE/tc/tc
IP=$IPROUTE/ip/ip
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
INDEV=eth2
EGDEV="dev eth1"
CIR1=1500kbit
CIR2=1000kbit
#The CBS is about 60 MTU sized packets
CBS1=90k
CBS2=90k
meter1="police rate $CIR1 burst $CBS1 "
meter2="police rate $CIR1 burst $CBS2 "
meter3="police rate $CIR2 burst $CBS1 "
meter4="police rate $CIR2 burst $CBS2 "
meter5="police rate $CIR2 burst $CBS2 "
#
# tag the rest of incoming packets from subnet 10.2.0.0/24 to fw value 1
# tag all incoming packets from any other subnet to fw tag 2
############################################################
$IPCHAINS -A input -i $INDEV -s 0/0 -m 2
$IPCHAINS -A input -i $INDEV -s 10.2.0.0/24 -m 1
#
############################################################
# install the ingress qdisc on the ingress interface
$TC qdisc add dev $INDEV handle ffff: ingress
#
############################################################
# All packets are marked with a tcindex value which is used on the egress
# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
#
############################################################
#
# anything with fw tag of 1 is passed on with a tcindex value 1
#if it doesnt exceed its allocated rate (CIR/CBS)
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1 fw \
$meter1 \
continue flowid 4:1
#
# if it exceeds the above but not the extra rate/burst below, it gets a
#tcindex value of 2
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 handle 1 fw \
$meter2 \
continue flowid 4:2
#
# if it exceeds the above but not the rule below, it gets a tcindex value
# of 3
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 1 fw \
$meter3 \
drop flowid 4:3
#
# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it
# exceeds 1Mbps and by default goes to BE if it doesnt
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 2 fw \
$meter5 \
drop flowid 4:4
######################## Egress side ########################
# attach a dsmarker
#
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
#
# values of the DSCP to change depending on the class
#note that the ECN bits are masked out
#
#AF41 (0x88 is 0x22 shifted to the right by two bits)
#
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
value 0x88
#AF42
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
value 0x90
#AF43
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
value 0x98
#BE
$TC class change $EGDEV classid 1:4 dsmark mask 0x3 \
value 0x0
#
#
# The class mapping (using tcindex; could easily have
# replaced it with the fw classifier instead)
#
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 1 tcindex classid 1:1
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 2 tcindex classid 1:2
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 3 tcindex classid 1:3
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 4 tcindex classid 1:4
#
#
echo "---- qdisc parameters Ingress ----------"
$TC qdisc ls dev $INDEV
echo "---- Class parameters Ingress ----------"
$TC class ls dev $INDEV
echo "---- filter parameters Ingress ----------"
$TC filter ls dev $INDEV parent ffff:
echo "---- qdisc parameters Egress ----------"
$TC qdisc ls $EGDEV
echo "---- Class parameters Egress ----------"
$TC class ls $EGDEV
echo "---- filter parameters Egress ----------"
$TC filter ls $EGDEV parent 1:0
#
#deleting the ingress qdisc
#$TC qdisc del $INDEV ingress

View File

@ -0,0 +1,198 @@
#! /bin/sh -x
#
# sample script on using the ingress capabilities using u32 classifier
# This script tags tcindex based on metering on the ingress
# interface the result is used for fast classification and re-marking
# on the egress interface
# This is an example of a color aware mode marker with PIR configured
# based on draft-wahjak-mcm-00.txt (section 3.2)
#
# The colors are defined using the Diffserv Fields
#path to various utilities;
#change to reflect yours.
#
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
TC=$IPROUTE/tc/tc
IP=$IPROUTE/ip/ip
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
INDEV=eth2
EGDEV="dev eth1"
CIR1=1000kbit
CIR2=500kbit
# the PIR is what is in excess of the CIR
PIR1=1000kbit
PIR2=500kbit
#The CBS is about 60 MTU sized packets
CBS1=90k
CBS2=90k
#the EBS is about 20 max sized packets
EBS1=30k
EBS2=30k
# The meters: Note that we have shared meters in this case as identified
# by the index parameter
meter1=" police index 1 rate $CIR1 burst $CBS1 "
meter1a=" police index 2 rate $PIR1 burst $EBS1 "
meter2=" police index 3 rate $CIR2 burst $CBS1 "
meter2a=" police index 4 rate $PIR2 burst $EBS1 "
meter3=" police index 5 rate $CIR2 burst $CBS2 "
meter3a=" police index 6 rate $PIR2 burst $EBS2 "
meter4=" police index 7 rate $CIR1 burst $CBS2 "
############################################################
#
# install the ingress qdisc on the ingress interface
$TC qdisc add dev $INDEV handle ffff: ingress
############################################################
#
# All packets are marked with a tcindex value which is used on the egress
# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
#
# *********************** AF41 ***************************
#AF41 (DSCP 0x22) from is passed on with a tcindex value 1
#if it doesnt exceed its CIR/CBS + PIR/EBS
#policer 1 is used.
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 1 u32 \
match ip tos 0x88 0xfc \
$meter1 \
continue flowid :1
$TC filter add dev $INDEV parent ffff: protocol ip prio 2 u32 \
match ip tos 0x88 0xfc \
$meter1a \
continue flowid :1
#
# if it exceeds the above but not the extra rate/burst below, it gets a
# tcindex value of 2
# policer 2 is used
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 3 u32 \
match ip tos 0x88 0xfc \
$meter2 \
continue flowid :2
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
match ip tos 0x88 0xfc \
$meter2a \
continue flowid :2
#
# if it exceeds the above but not the rule below, it gets a tcindex value
# of 3 (policer 3)
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
match ip tos 0x88 0xfc \
$meter3 \
continue flowid :3
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
match ip tos 0x88 0xfc \
$meter3a \
drop flowid :3
#
# *********************** AF42 ***************************
#AF42 (DSCP 0x24) from is passed on with a tcindex value 2
#if it doesnt exceed its CIR/CBS + PIR/EBS
#policer 2 is used. Note that this is shared with the AF41
#
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 8 u32 \
match ip tos 0x90 0xfc \
$meter2 \
continue flowid :2
$TC filter add dev $INDEV parent ffff: protocol ip prio 9 u32 \
match ip tos 0x90 0xfc \
$meter2a \
continue flowid :2
#
# if it exceeds the above but not the rule below, it gets a tcindex value
# of 3 (policer 3)
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 10 u32 \
match ip tos 0x90 0xfc \
$meter3 \
continue flowid :3
$TC filter add dev $INDEV parent ffff: protocol ip prio 11 u32 \
match ip tos 0x90 0xfc \
$meter3a \
drop flowid :3
#
# *********************** AF43 ***************************
#
#AF43 (DSCP 0x26) from is passed on with a tcindex value 3
#if it doesnt exceed its CIR/CBS + PIR/EBS
#policer 3 is used. Note that this is shared with the AF41 and AF42
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 13 u32 \
match ip tos 0x98 0xfc \
$meter3 \
continue flowid :3
$TC filter add dev $INDEV parent ffff: protocol ip prio 14 u32 \
match ip tos 0x98 0xfc \
$meter3a \
drop flowid :3
#
## *********************** BE ***************************
##
## Anything else (not from the AF4*) gets discarded if it
## exceeds 1Mbps and by default goes to BE if it doesnt
## Note that the BE class is also used by the AF4* in the worst
## case
##
$TC filter add dev $INDEV parent ffff: protocol ip prio 16 u32 \
match ip src 0/0\
$meter4 \
drop flowid :4
######################## Egress side ########################
# attach a dsmarker
#
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
#
# values of the DSCP to change depending on the class
#note that the ECN bits are masked out
#
#AF41 (0x88 is 0x22 shifted to the right by two bits)
#
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
value 0x88
#AF42
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
value 0x90
#AF43
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
value 0x98
#BE
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
value 0x0
#
#
# The class mapping
#
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 1 tcindex classid 1:1
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 2 tcindex classid 1:2
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 3 tcindex classid 1:3
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 4 tcindex classid 1:4
#
#
echo "---- qdisc parameters Ingress ----------"
$TC qdisc ls dev $INDEV
echo "---- Class parameters Ingress ----------"
$TC class ls dev $INDEV
echo "---- filter parameters Ingress ----------"
$TC filter ls dev $INDEV parent ffff:
echo "---- qdisc parameters Egress ----------"
$TC qdisc ls $EGDEV
echo "---- Class parameters Egress ----------"
$TC class ls $EGDEV
echo "---- filter parameters Egress ----------"
$TC filter ls $EGDEV parent 1:0
#
#deleting the ingress qdisc
#$TC qdisc del $INDEV ingress

View File

@ -0,0 +1,144 @@
#! /bin/sh -x
#
# sample script on using the ingress capabilities
# This script fwmark tags(IPchains) based on metering on the ingress
# interface the result is used for fast classification and re-marking
# on the egress interface
# This is an example of a color blind mode marker with no PIR configured
# based on draft-wahjak-mcm-00.txt (section 3.1)
#
#path to various utilities;
#change to reflect yours.
#
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
TC=$IPROUTE/tc/tc
IP=$IPROUTE/ip/ip
IPCHAINS=/root/DS-6-beta/ipchains-1.3.9/ipchains
INDEV=eth2
EGDEV="dev eth1"
CIR1=1500kbit
CIR2=500kbit
#The CBS is about 60 MTU sized packets
CBS1=90k
CBS2=90k
meter1="police rate $CIR1 burst $CBS1 "
meter1a="police rate $CIR2 burst $CBS1 "
meter2="police rate $CIR1 burst $CBS2 "
meter2a="police rate $CIR2 burst $CBS2 "
meter3="police rate $CIR2 burst $CBS1 "
meter3a="police rate $CIR2 burst $CBS1 "
meter4="police rate $CIR2 burst $CBS2 "
meter5="police rate $CIR1 burst $CBS2 "
#
# tag the rest of incoming packets from subnet 10.2.0.0/24 to fw value 1
# tag all incoming packets from any other subnet to fw tag 2
############################################################
$IPCHAINS -A input -i $INDEV -s 0/0 -m 2
$IPCHAINS -A input -i $INDEV -s 10.2.0.0/24 -m 1
#
############################################################
# install the ingress qdisc on the ingress interface
$TC qdisc add dev $INDEV handle ffff: ingress
#
############################################################
# All packets are marked with a tcindex value which is used on the egress
# tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
#
############################################################
#
# anything with fw tag of 1 is passed on with a tcindex value 1
#if it doesnt exceed its allocated rate (CIR/CBS)
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 1 handle 1 fw \
$meter1 \
continue flowid 4:1
$TC filter add dev $INDEV parent ffff: protocol ip prio 2 handle 1 fw \
$meter1a \
continue flowid 4:1
#
# if it exceeds the above but not the extra rate/burst below, it gets a
#tcindex value of 2
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 3 handle 1 fw \
$meter2 \
continue flowid 4:2
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 handle 1 fw \
$meter2a \
continue flowid 4:2
#
# if it exceeds the above but not the rule below, it gets a tcindex value
# of 3
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 handle 1 fw \
$meter3 \
continue flowid 4:3
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 handle 1 fw \
$meter3a \
drop flowid 4:3
#
# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it
# exceeds 1Mbps and by default goes to BE if it doesnt
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 7 handle 2 fw \
$meter5 \
drop flowid 4:4
######################## Egress side ########################
# attach a dsmarker
#
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
#
# values of the DSCP to change depending on the class
#note that the ECN bits are masked out
#
#AF41 (0x88 is 0x22 shifted to the right by two bits)
#
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
value 0x88
#AF42
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
value 0x90
#AF43
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
value 0x98
#BE
$TC class change $EGDEV classid 1:4 dsmark mask 0x3 \
value 0x0
#
#
# The class mapping (using tcindex; could easily have
# replaced it with the fw classifier instead)
#
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 1 tcindex classid 1:1
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 2 tcindex classid 1:2
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 3 tcindex classid 1:3
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 4 tcindex classid 1:4
#
#
echo "---- qdisc parameters Ingress ----------"
$TC qdisc ls dev $INDEV
echo "---- Class parameters Ingress ----------"
$TC class ls dev $INDEV
echo "---- filter parameters Ingress ----------"
$TC filter ls dev $INDEV parent ffff:
echo "---- qdisc parameters Egress ----------"
$TC qdisc ls $EGDEV
echo "---- Class parameters Egress ----------"
$TC class ls $EGDEV
echo "---- filter parameters Egress ----------"
$TC filter ls $EGDEV parent 1:0
#
#deleting the ingress qdisc
#$TC qdisc del $INDEV ingress

View File

@ -0,0 +1,145 @@
#! /bin/sh
#
# sample script on using the ingress capabilities using u32 classifier
# This script tags tcindex based on metering on the ingress
# interface the result is used for fast classification and re-marking
# on the egress interface
# This is an example of a color blind mode marker with PIR configured
# based on draft-wahjak-mcm-00.txt (section 3.2)
#
#path to various utilities;
#change to reflect yours.
#
IPROUTE=/root/DS-6-beta/iproute2-990530-dsing
TC=$IPROUTE/tc/tc
IP=$IPROUTE/ip/ip
INDEV=eth2
EGDEV="dev eth1"
CIR1=1000kbit
CIR2=1000kbit
# The PIR is the excess (in addition to the CIR i.e if always
# going to the PIR --> average rate is CIR+PIR)
PIR1=1000kbit
PIR2=500kbit
#The CBS is about 60 MTU sized packets
CBS1=90k
CBS2=90k
#the EBS is about 10 max sized packets
EBS1=15k
EBS2=15k
# The meters
meter1=" police rate $CIR1 burst $CBS1 "
meter1a=" police rate $PIR1 burst $EBS1 "
meter2=" police rate $CIR2 burst $CBS1 "
meter2a="police rate $PIR2 burst $CBS1 "
meter3=" police rate $CIR2 burst $CBS2 "
meter3a=" police rate $PIR2 burst $EBS2 "
meter4=" police rate $CIR1 burst $CBS2 "
meter5=" police rate $CIR1 burst $CBS2 "
# install the ingress qdisc on the ingress interface
############################################################
$TC qdisc add dev $INDEV handle ffff: ingress
############################################################
#
############################################################
# All packets are marked with a tcindex value which is used on the egress
# NOTE: tcindex 1 maps to AF41, 2->AF42, 3->AF43, 4->BE
#
#anything from subnet 10.2.0.2/24 is passed on with a tcindex value 1
#if it doesnt exceed its CIR/CBS + PIR/EBS
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 1 u32 \
match ip src 10.2.0.0/24 $meter1 \
continue flowid :1
$TC filter add dev $INDEV parent ffff: protocol ip prio 2 u32 \
match ip src 10.2.0.0/24 $meter1a \
continue flowid :1
#
# if it exceeds the above but not the extra rate/burst below, it gets a
#tcindex value of 2
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 3 u32 \
match ip src 10.2.0.0/24 $meter2 \
continue flowid :2
$TC filter add dev $INDEV parent ffff: protocol ip prio 4 u32 \
match ip src 10.2.0.0/24 $meter2a \
continue flowid :2
#
# if it exceeds the above but not the rule below, it gets a tcindex value
# of 3
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 5 u32 \
match ip src 10.2.0.0/24 $meter3 \
continue flowid :3
$TC filter add dev $INDEV parent ffff: protocol ip prio 6 u32 \
match ip src 10.2.0.0/24 $meter3a \
drop flowid :3
#
#
# Anything else (not from the subnet 10.2.0.24/24) gets discarded if it
# exceeds 1Mbps and by default goes to BE if it doesnt
#
$TC filter add dev $INDEV parent ffff: protocol ip prio 7 u32 \
match ip src 0/0 $meter5 \
drop flowid :4
######################## Egress side ########################
# attach a dsmarker
#
$TC qdisc add $EGDEV handle 1:0 root dsmark indices 64
#
# values of the DSCP to change depending on the class
#note that the ECN bits are masked out
#
#AF41 (0x88 is 0x22 shifted to the right by two bits)
#
$TC class change $EGDEV classid 1:1 dsmark mask 0x3 \
value 0x88
#AF42
$TC class change $EGDEV classid 1:2 dsmark mask 0x3 \
value 0x90
#AF43
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
value 0x98
#BE
$TC class change $EGDEV classid 1:3 dsmark mask 0x3 \
value 0x0
#
#
# The class mapping
#
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 1 tcindex classid 1:1
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 2 tcindex classid 1:2
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 3 tcindex classid 1:3
$TC filter add $EGDEV parent 1:0 protocol ip prio 1 \
handle 4 tcindex classid 1:4
#
#
echo "---- qdisc parameters Ingress ----------"
$TC qdisc ls dev $INDEV
echo "---- Class parameters Ingress ----------"
$TC class ls dev $INDEV
echo "---- filter parameters Ingress ----------"
$TC filter ls dev $INDEV parent ffff:
echo "---- qdisc parameters Egress ----------"
$TC qdisc ls $EGDEV
echo "---- Class parameters Egress ----------"
$TC class ls $EGDEV
echo "---- filter parameters Egress ----------"
$TC filter ls $EGDEV parent 1:0
#
#deleting the ingress qdisc
#$TC qdisc del $INDEV ingress

View File

@ -0,0 +1,98 @@
Note all these are mere examples which can be customized to your needs
AFCBQ
-----
AF PHB built using CBQ, DSMARK,GRED (default in GRIO mode) ,RED for BE
and the tcindex classifier with some algorithmic mapping
EFCBQ
-----
EF PHB built using CBQ (for rate control and prioritization),
DSMARK( to remark DSCPs), tcindex classifier and RED for the BE
traffic.
EFPRIO
------
EF PHB using the PRIO scheduler, Token Bucket to rate control EF,
tcindex classifier, DSMARK to remark, and RED for the BE traffic
EDGE scripts
==============
CB-3(1|2)-(u32/chains)
======================
The major differences are that the classifier is u32 on -u32 extension
and IPchains on the chains extension. CB stands for color Blind
and 31 is for the mode where only a CIR and CBS are defined whereas
32 stands for a mode where a CIR/CBS + PIR/EBS are defined.
Color Blind (CB)
==========-----=
We look at one special subnet that we are interested in for simplicty
reasons to demonstrate the capability. We send the packets from that
subnet to AF4*, BE or end up dropping depending on the metering results.
The algorithm overview is as follows:
*classify:
**case: subnet X
----------------
if !exceed meter1 tag as AF41
else
if !exceed meter2 tag as AF42
else
if !exceed meter 3 tag as AF43
else
drop
default case: Any other subnet
-------------------------------
if !exceed meter 5 tag as AF43
else
drop
One Egress side change the DSCPs of the packets to reflect AF4* and BE
based on the tags from the ingress.
-------------------------------------------------------------
Color Aware
===========
Define some meters with + policing and give them IDs eg
meter1=police index 1 rate $CIR1 burst $CBS1
meter2=police index 2 rate $CIR2 burst $CBS2 etc
General overview:
classify based on the DSCPs and use the policer ids to decide tagging
*classify on ingress:
switch (dscp) {
case AF41: /* tos&0xfc == 0x88 */
if (!exceed meter1) break;
case AF42: /* tos&0xfc == 0x90 */
if (!exceed meter2) {
tag as AF42;
break;
}
case AF43: /* tos&0xfc == 0x98 */
if (!exceed meter3) {
tag as AF43;
break;
} else
drop;
default:
if (!exceed meter4) tag as BE;
else drop;
}
On the Egress side mark the proper AF tags

View File

@ -0,0 +1,105 @@
#!/usr/bin/perl
#
#
# AF using CBQ for a single interface eth0
# 4 AF classes using GRED and one BE using RED
# Things you might want to change:
# - the device bandwidth (set at 10Mbits)
# - the bandwidth allocated for each AF class and the BE class
# - the drop probability associated with each AF virtual queue
#
# AF DSCP values used (based on AF draft 04)
# -----------------------------------------
# AF DSCP values
# AF1 1. 0x0a 2. 0x0c 3. 0x0e
# AF2 1. 0x12 2. 0x14 3. 0x16
# AF3 1. 0x1a 2. 0x1c 3. 0x1e
# AF4 1. 0x22 2. 0x24 3. 0x26
#
#
# A simple DSCP-class relationship formula used to generate
# values in the for loop of this script; $drop stands for the
# DP
# $dscp = ($class*8+$drop*2)
#
# if you use GRIO buffer sharing, then GRED priority is set as follows:
# $gprio=$drop+1;
#
$TC = "/usr/src/iproute2-current/tc/tc";
$DEV = "dev lo";
$DEV = "dev eth1";
$DEV = "dev eth0";
# the BE-class number
$beclass = "5";
#GRIO buffer sharing on or off?
$GRIO = "";
$GRIO = "grio";
# The bandwidth of your device
$linerate="10Mbit";
# The BE and AF rates
%rate_table=();
$berate="1500Kbit";
$rate_table{"AF1rate"}="1500Kbit";
$rate_table{"AF2rate"}="1500Kbit";
$rate_table{"AF3rate"}="1500Kbit";
$rate_table{"AF4rate"}="1500Kbit";
#
#
#
print "\n# --- General setup ---\n";
print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex mask 0xfc " .
"shift 2 pass_on\n";
#"shift 2\n";
print "$TC qdisc add $DEV parent 1:0 handle 2:0 cbq bandwidth $linerate ".
"cell 8 avpkt 1000 mpu 64\n";
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 tcindex ".
"mask 0xf0 shift 4 pass_on\n";
for $class (1..4) {
print "\n# --- AF Class $class specific setup---\n";
$AFrate=sprintf("AF%drate",$class);
print "$TC class add $DEV parent 2:0 classid 2:$class cbq ".
"bandwidth $linerate rate $rate_table{$AFrate} avpkt 1000 prio ".
(6-$class)." bounded allot 1514 weight 1 maxburst 21\n";
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 handle $class ".
"tcindex classid 2:$class\n";
print "$TC qdisc add $DEV parent 2:$class gred setup DPs 3 default 2 ".
"$GRIO\n";
#
# per DP setup
#
for $drop (1..3) {
print "\n# --- AF Class $class DP $drop---\n";
$dscp = $class*8+$drop*2;
$tcindex = sprintf("1%x%x",$class,$drop);
print "$TC filter add $DEV parent 1:0 protocol ip prio 1 ".
"handle $dscp tcindex classid 1:$tcindex\n";
$prob = $drop*0.02;
if ($GRIO) {
$gprio = $drop+1;
print "$TC qdisc change $DEV parent 2:$class gred limit 60KB min 15KB ".
"max 45KB burst 20 avpkt 1000 bandwidth $linerate DP $drop ".
"probability $prob ".
"prio $gprio\n";
} else {
print "$TC qdisc change $DEV parent 2:$class gred limit 60KB min 15KB ".
"max 45KB burst 20 avpkt 1000 bandwidth $linerate DP $drop ".
"probability $prob \n";
}
}
}
#
#
print "\n#------BE Queue setup------\n";
print "$TC filter add $DEV parent 1:0 protocol ip prio 2 ".
"handle 0 tcindex mask 0 classid 1:1\n";
print "$TC class add $DEV parent 2:0 classid 2:$beclass cbq ".
"bandwidth $linerate rate $berate avpkt 1000 prio 6 " .
"bounded allot 1514 weight 1 maxburst 21 \n";
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 handle 0 tcindex ".
"classid 2:5\n";
print "$TC qdisc add $DEV parent 2:5 red limit 60KB min 15KB max 45KB ".
"burst 20 avpkt 1000 bandwidth $linerate probability 0.4\n";

View File

@ -0,0 +1,25 @@
#!/usr/bin/perl
$TC = "/root/DS-6-beta/iproute2-990530-dsing/tc/tc";
$DEV = "dev eth1";
$efrate="1.5Mbit";
$MTU="1.5kB";
print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex ".
"mask 0xfc shift 2\n";
print "$TC qdisc add $DEV parent 1:0 handle 2:0 prio\n";
#
# EF class: Maximum about one MTU sized packet allowed on the queue
#
print "$TC qdisc add $DEV parent 2:1 tbf rate $efrate burst $MTU limit 1.6kB\n";
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 ".
"handle 0x2e tcindex classid 2:1 pass_on\n";
#
# BE class
#
print "#BE class(2:2) \n";
print "$TC qdisc add $DEV parent 2:2 red limit 60KB ".
"min 15KB max 45KB burst 20 avpkt 1000 bandwidth 10Mbit ".
"probability 0.4\n";
#
print "$TC filter add $DEV parent 2:0 protocol ip prio 2 ".
"handle 0 tcindex mask 0 classid 2:2 pass_on\n";

View File

@ -0,0 +1,31 @@
#!/usr/bin/perl
#
$TC = "/root/DS-6-beta/iproute2-990530-dsing/tc/tc";
$DEV = "dev eth1";
print "$TC qdisc add $DEV handle 1:0 root dsmark indices 64 set_tc_index\n";
print "$TC filter add $DEV parent 1:0 protocol ip prio 1 tcindex ".
"mask 0xfc shift 2\n";
print "$TC qdisc add $DEV parent 1:0 handle 2:0 cbq bandwidth ".
"10Mbit cell 8 avpkt 1000 mpu 64\n";
#
# EF class
#
print "$TC class add $DEV parent 2:0 classid 2:1 cbq bandwidth ".
"10Mbit rate 1500Kbit avpkt 1000 prio 1 bounded isolated ".
"allot 1514 weight 1 maxburst 10 \n";
# packet fifo for EF?
print "$TC qdisc add $DEV parent 2:1 pfifo limit 5\n";
print "$TC filter add $DEV parent 2:0 protocol ip prio 1 ".
"handle 0x2e tcindex classid 2:1 pass_on\n";
#
# BE class
#
print "#BE class(2:2) \n";
print "$TC class add $DEV parent 2:0 classid 2:2 cbq bandwidth ".
"10Mbit rate 5Mbit avpkt 1000 prio 7 allot 1514 weight 1 ".
"maxburst 21 borrow split 2:0 defmap 0xffff \n";
print "$TC qdisc add $DEV parent 2:2 red limit 60KB ".
"min 15KB max 45KB burst 20 avpkt 1000 bandwidth 10Mbit ".
"probability 0.4\n";
print "$TC filter add $DEV parent 2:0 protocol ip prio 2 ".
"handle 0 tcindex mask 0 classid 2:2 pass_on\n";

View File

@ -0,0 +1,125 @@
These were the tests done to validate the Diffserv scripts.
This document will be updated continously. If you do more
thorough validation testing please post the details to the
diffserv mailing list.
Nevertheless, these tests should serve for basic validation.
AFCBQ, EFCBQ, EFPRIO
----------------------
generate all possible DSCPs and observe that they
get sent to the proper classes. In the case of AF also
to the correct Virtual Queues.
Edge1
-----
generate TOS values 0x0,0x10,0xbb each with IP addresses
10.2.0.24 (mark 1), 10.2.0.3 (mark2) and 10.2.0.30 (mark 3)
and observe that they get marked as expected.
Edge2
-----
-Repeat the tests in Edge1
-ftp with data direction from 10.2.0.2
*observe that the metering/policing works correctly (and the marking
as well). In this case the mark used will be 3
Edge31-cb-chains
----------------
-ftp with data direction from 10.2.0.2
*observe that the metering/policing works correctly (and the marking
as well). In this case the mark used will be 1.
Metering: The data throughput should not exceed 2*CIR1 + 2*CIR2
which is roughly: 5mbps
Marking: the should be a variation of marked packets:
AF41(TOS=0x88) AF42(0x90) AF43(0x98) and BE (0x0)
More tests required to see the interaction of several sources (other
than subnet 10.2.0.0/24).
Edge31-ca-u32
--------------
Generate data using modified tcpblast from 10.2.0.2 (behind eth2) to the
discard port of 10.1.0.2 (behind eth1)
1) generate with src tos = 0x88
Metering: Allocated throughput should not exceed 2*CIR1 + 2*CIR2
approximately 5mbps
Marking: Should vary between 0x88,0x90,0x98 and 0x0
2) generate with src tos = 0x90
Metering: Allocated throughput should not exceed CIR1 + 2*CIR2
approximately 3.5mbps
Marking: Should vary between 0x90,0x98 and 0x0
3) generate with src tos = 0x98
Metering: Allocated throughput should not exceed CIR1 + CIR2
approximately 2.5mbps
Marking: Should vary between 0x98 and 0x0
4) generate with src tos any other than the above
Metering: Allocated throughput should not exceed CIR1
approximately 1.5mbps
Marking: Should be consistent at 0x0
TODO: Testing on how each color shares when all 4 types of packets
are going through the edge device
Edge32-cb-u32, Edge32-cb-chains
-------------------------------
-ftp with data direction from 10.2.0.2
*observe that the metering/policing works correctly (and the marking
as well).
Metering:
The data throughput should not exceed 2*CIR1 + 2*CIR2
+ 2*PIR2 + PIR1 for u32 which is roughly: 6mbps
The data throughput should not exceed 2*CIR1 + 5*CIR2
for chains which is roughly: 6mbps
Marking: the should be a variation of marked packets:
AF41(TOS=0x88) AF42(0x90) AF43(0x98) and BE (0x0)
TODO:
-More tests required to see the interaction of several sources (other
than subnet 10.2.0.0/24).
-More tests needed to capture stats on how many times the CIR was exceeded
but the data was not remarked etc.
Edge32-ca-u32
--------------
Generate data using modified tcpblast from 10.2.0.2 (behind eth2) to the
discard port of 10.1.0.2 (behind eth1)
1) generate with src tos = 0x88
Metering: Allocated throughput should not exceed 2*CIR1 + 2*CIR2
+PIR1 -- approximately 4mbps
Marking: Should vary between 0x88,0x90,0x98 and 0x0
2) generate with src tos = 0x90
Metering: Allocated throughput should not exceed CIR1 + 2*CIR2
+ 2* PIR2 approximately 3mbps
Marking: Should vary between 0x90,0x98 and 0x0
3) generate with src tos = 0x98
Metering: Allocated throughput should not exceed PIR1+ CIR1 + CIR2
approximately 2.5mbps
Marking: Should vary between 0x98 and 0x0
4) generate with src tos any other than the above
Metering: Allocated throughput should not exceed CIR1
approximately 1mbps
Marking: Should be consistent at 0x0
TODO: Testing on how each color shares when all 4 types of packets
are going through the edge device

View File

@ -0,0 +1,25 @@
/* I cannot describe, how I laughed, when saw, that now sys/socket.h
includes ALL OF networking include files. 8)8)8)
Bravo! Aah, they forgot sockaddr_ll, sockaddr_pkt and sockaddr_nl...
Not a big problem, we only start the way to single UNIVERSAL include file:
#include <GNU-Gnu_is_Not_Unix.h>.
Jokes apart, it is full crap. Removed.
--ANK
*/
/* Union of all sockaddr types (required by IPv6 Basic API). This is
somewhat evil. */
/* 8)8) Well, ipngwg really does strange things sometimes, but
not in such extent! It is removed long ago --ANK
*/
union sockaddr_union
{
struct sockaddr sa;
char __maxsize[128];
};

View File

@ -0,0 +1,10 @@
/* Mess with various libdb in various glibcs is something...
* Crooked hands of hackers can result in amazing results making
* incompatibility at all the levels without any reasons.
*
* The simplest trick which I was able to invent is to write fake
* db.h including db_185.h and adding -I/usr/include/db3 to CFLAGS.
* Looks ugly but compiles everywhere.
*/
#include <db_185.h>

View File

@ -0,0 +1,20 @@
#ifndef __GLIBC_BUGS_H__
#define __GLIBC_BUGS_H__ 1
#include <features.h>
#include <sys/types.h>
#if defined(__GLIBC__) && __GLIBC__ >= 2
#ifndef __KERNEL_STRICT_NAMES
#define __KERNEL_STRICT_NAMES 1
#endif
#include <linux/types.h>
typedef __u16 in_port_t;
typedef __u32 in_addr_t;
#endif
#endif

View File

@ -0,0 +1,11 @@
#ifndef _NETINET_IN_H
#define _NETINET_IN_H 1
#include "glibc-bugs.h"
#include <sys/socket.h>
#include <sys/types.h>
#include <linux/in.h>
#define SOL_IP 0
#endif /* netinet/in.h */

View File

@ -0,0 +1,9 @@
#ifndef __NETINET_IP_H
#define __NETINET_IP_H 1
#include <glibc-bugs.h>
#include <netinet/in.h>
#include <linux/ip.h>
#endif /* netinet/ip.h */

View File

@ -0,0 +1,270 @@
/* System-specific socket constants and types. Linux version.
Copyright (C) 1991, 92, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Library General Public License for more details.
You should have received a copy of the GNU Library General Public
License along with the GNU C Library; see the file COPYING.LIB. If not,
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
#ifndef _SOCKETBITS_H
#define _SOCKETBITS_H 1
#include <features.h>
#define __need_size_t
#define __need_NULL
#include <stddef.h>
__BEGIN_DECLS
/* Type for length arguments in socket calls. */
#ifndef __socklen_t_defined
typedef unsigned int socklen_t;
# define __socklen_t_defined
#endif
/* Types of sockets. */
enum __socket_type
{
SOCK_STREAM = 1, /* Sequenced, reliable, connection-based
byte streams. */
#define SOCK_STREAM SOCK_STREAM
SOCK_DGRAM = 2, /* Connectionless, unreliable datagrams
of fixed maximum length. */
#define SOCK_DGRAM SOCK_DGRAM
SOCK_RAW = 3, /* Raw protocol interface. */
#define SOCK_RAW SOCK_RAW
SOCK_RDM = 4, /* Reliably-delivered messages. */
#define SOCK_RDM SOCK_RDM
SOCK_SEQPACKET = 5, /* Sequenced, reliable, connection-based,
datagrams of fixed maximum length. */
#define SOCK_SEQPACKET SOCK_SEQPACKET
SOCK_PACKET = 10 /* Linux specific way of getting packets
at the dev level. For writing rarp and
other similar things on the user level. */
#define SOCK_PACKET SOCK_PACKET
};
/* Protocol families. */
#define PF_UNSPEC 0 /* Unspecified. */
#define PF_LOCAL 1 /* Local to host (pipes and file-domain). */
#define PF_UNIX PF_LOCAL /* Old BSD name for PF_LOCAL. */
#define PF_FILE PF_LOCAL /* POSIX name for PF_LOCAL. */
#define PF_INET 2 /* IP protocol family. */
#define PF_AX25 3 /* Amateur Radio AX.25. */
#define PF_IPX 4 /* Novell Internet Protocol. */
#define PF_APPLETALK 5 /* Don't use this. */
#define PF_NETROM 6 /* Amateur radio NetROM. */
#define PF_BRIDGE 7 /* Multiprotocol bridge. */
#define PF_AAL5 8 /* Reserved for Werner's ATM. */
#define PF_X25 9 /* Reserved for X.25 project. */
#define PF_INET6 10 /* IP version 6. */
#define PF_ROSE 11 /* Amateur Radio X.25 PLP */
#define PF_DECnet 12 /* Reserved for DECnet project */
#define PF_NETBEUI 13 /* Reserved for 802.2LLC project*/
#define PF_SECURITY 14 /* Security callback pseudo AF */
#define PF_KEY 15 /* PF_KEY key management API */
#define PF_NETLINK 16
#define PF_ROUTE PF_NETLINK /* Alias to emulate 4.4BSD */
#define PF_PACKET 17 /* Packet family */
#define PF_MAX 32 /* For now.. */
/* Address families. */
#define AF_UNSPEC PF_UNSPEC
#define AF_LOCAL PF_LOCAL
#define AF_UNIX PF_UNIX
#define AF_FILE PF_FILE
#define AF_INET PF_INET
#define AF_AX25 PF_AX25
#define AF_IPX PF_IPX
#define AF_APPLETALK PF_APPLETALK
#define AF_NETROM PF_NETROM
#define AF_BRIDGE PF_BRIDGE
#define AF_AAL5 PF_AAL5
#define AF_X25 PF_X25
#define AF_INET6 PF_INET6
#define AF_ROSE PF_ROSE
#define AF_DECnet PF_DECnet
#define AF_NETBEUI PF_NETBEUI
#define AF_SECURITY PF_SECURITY
#define pseudo_AF_KEY pseudo_PF_KEY
#define AF_NETLINK PF_NETLINK
#define AF_ROUTE PF_ROUTE
#define AF_PACKET PF_PACKET
#define AF_MAX PF_MAX
/* Socket level values. Others are defined in the appropriate headers.
XXX These definitions also should go into the appropriate headers as
far as they are available. */
#define SOL_IPV6 41
#define SOL_ICMPV6 58
#define SOL_RAW 255
#define SOL_AX25 257
#define SOL_ATALK 258
#define SOL_NETROM 259
#define SOL_ROSE 260
#define SOL_DECNET 261
#define SOL_X25 262
/* Maximum queue length specifiable by listen. */
#define SOMAXCONN 128
/* Get the definition of the macro to define the common sockaddr members. */
#if __GLIBC_MINOR__ >= 1
#include <bits/sockaddr.h>
#else
#include <sockaddrcom.h>
#endif
/* Structure describing a generic socket address. */
struct sockaddr
{
__SOCKADDR_COMMON (sa_); /* Common data: address family and length. */
char sa_data[14]; /* Address data. */
};
/* Bits in the FLAGS argument to `send', `recv', et al. */
enum
{
MSG_OOB = 0x01, /* Process out-of-band data. */
#define MSG_OOB MSG_OOB
MSG_PEEK = 0x02, /* Peek at incoming messages. */
#define MSG_PEEK MSG_PEEK
MSG_DONTROUTE = 0x04, /* Don't use local routing. */
#define MSG_DONTROUTE MSG_DONTROUTE
MSG_CTRUNC = 0x08, /* Control data lost before delivery. */
#define MSG_CTRUNC MSG_CTRUNC
MSG_PROXY = 0x10, /* Supply or ask second address. */
#define MSG_PROXY MSG_PROXY
MSG_TRUNC = 0x20,
#define MSG_TRUNC MSG_TRUNC
MSG_DONTWAIT = 0x40,
#define MSG_DONTWAIT MSG_DONTWAIT
MSG_WAITALL = 0x100,
#define MSG_WAITALL MSG_WAITALL
MSG_ERRQUEUE = 0x2000,
#define MSG_ERRQUEUE MSG_ERRQUEUE
MSG_NOSIGNAL = 0x4000,
#define MSG_NOSIGNAL MSG_NOSIGNAL
};
/* Structure describing messages sent by
`sendmsg' and received by `recvmsg'. */
struct msghdr
{
__ptr_t msg_name; /* Address to send to/receive from. */
socklen_t msg_namelen; /* Length of address data. */
struct iovec *msg_iov; /* Vector of data to send/receive into. */
size_t msg_iovlen; /* Number of elements in the vector. */
__ptr_t msg_control; /* Ancillary data (eg BSD filedesc passing). */
size_t msg_controllen; /* Ancillary data buffer length. */
int msg_flags; /* Flags on received message. */
};
/* Structure used for storage of ancillary data object information. */
struct cmsghdr
{
size_t cmsg_len; /* Length of data in cmsg_data plus length
of cmsghdr structure. */
int cmsg_level; /* Originating protocol. */
int cmsg_type; /* Protocol specific type. */
#if !defined __STRICT_ANSI__ && defined __GNUC__ && __GNUC__ >= 2
unsigned char __cmsg_data[0]; /* Ancillary data. */
#endif
};
/* Ancillary data object manipulation macros. */
#if !defined __STRICT_ANSI__ && defined __GNUC__ && __GNUC__ >= 2
# define CMSG_DATA(cmsg) ((cmsg)->__cmsg_data)
#else
# define CMSG_DATA(cmsg) ((unsigned char *) ((struct cmsghdr *) (cmsg) + 1))
#endif
#define CMSG_NXTHDR(mhdr, cmsg) __cmsg_nxthdr (mhdr, cmsg)
#define CMSG_FIRSTHDR(mhdr) \
((size_t) (mhdr)->msg_controllen >= sizeof (struct cmsghdr) \
? (struct cmsghdr *) (mhdr)->msg_control : (struct cmsghdr *) NULL)
#define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) )
#define CMSG_SPACE(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + CMSG_ALIGN(len))
#define CMSG_LEN(len) (CMSG_ALIGN(sizeof(struct cmsghdr)) + (len))
#ifndef _EXTERN_INLINE
# define _EXTERN_INLINE extern __inline
#endif
extern struct cmsghdr *__cmsg_nxthdr __P ((struct msghdr *__mhdr,
struct cmsghdr *__cmsg));
_EXTERN_INLINE struct cmsghdr *
__cmsg_nxthdr (struct msghdr *__mhdr, struct cmsghdr *__cmsg)
{
if ((size_t) __cmsg->cmsg_len < sizeof (struct cmsghdr))
/* The kernel header does this so there may be a reason. */
return NULL;
__cmsg = (struct cmsghdr *)
((unsigned char *) __cmsg + CMSG_ALIGN(__cmsg->cmsg_len));
if ( (unsigned char *) (__cmsg + 1) >=
(unsigned char *) __mhdr->msg_control + __mhdr->msg_controllen)
/* No more entries. */
return NULL;
return __cmsg;
}
/* Socket level message types. This must match the definitions in
<linux/socket.h>. */
enum
{
SCM_RIGHTS = 0x01, /* Data array contains access rights. */
#define SCM_RIGHTS SCM_RIGHTS
SCM_CREDENTIALS = 0x02, /* Data array is `struct ucred'. */
#define SCM_CREDENTIALS SCM_CREDENTIALS
};
/* Get socket manipulation related informations from kernel headers. */
#ifdef THIS_IS_CRAP
#ifndef _LINUX_TYPES_H
# define _LINUX_TYPES_H
#endif
#endif
#include <asm/socket.h>
#include <asm/types.h>
struct ucred
{
__u32 pid;
__u32 uid;
__u32 gid;
};
/* Structure used to manipulate the SO_LINGER option. */
struct linger
{
int l_onoff; /* Nonzero to linger on close. */
int l_linger; /* Time to linger. */
};
__END_DECLS
#endif /* socketbits.h */

View File

@ -0,0 +1 @@
static char SNAPSHOT[] = "020116";

View File

@ -0,0 +1,46 @@
#ifndef __LIBNETLINK_H__
#define __LIBNETLINK_H__ 1
#include <asm/types.h>
#include <linux/netlink.h>
#include <linux/rtnetlink.h>
struct rtnl_handle
{
int fd;
struct sockaddr_nl local;
struct sockaddr_nl peer;
__u32 seq;
__u32 dump;
};
extern int rtnl_open(struct rtnl_handle *rth, unsigned subscriptions);
extern void rtnl_close(struct rtnl_handle *rth);
extern int rtnl_wilddump_request(struct rtnl_handle *rth, int fam, int type);
extern int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req, int len);
extern int rtnl_dump_filter(struct rtnl_handle *rth,
int (*filter)(struct sockaddr_nl *, struct nlmsghdr *n, void *),
void *arg1,
int (*junk)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
void *arg2);
extern int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
unsigned groups, struct nlmsghdr *answer,
int (*junk)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
void *jarg);
extern int rtnl_send(struct rtnl_handle *rth, char *buf, int);
extern int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data);
extern int addattr_l(struct nlmsghdr *n, int maxlen, int type, void *data, int alen);
extern int rta_addattr32(struct rtattr *rta, int maxlen, int type, __u32 data);
extern int rta_addattr_l(struct rtattr *rta, int maxlen, int type, void *data, int alen);
extern int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len);
extern int rtnl_listen(struct rtnl_handle *, int (*handler)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
void *jarg);
extern int rtnl_from_file(FILE *, int (*handler)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
void *jarg);
#endif /* __LIBNETLINK_H__ */

View File

@ -0,0 +1,12 @@
#ifndef __LL_MAP_H__
#define __LL_MAP_H__ 1
extern int ll_remember_index(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
extern int ll_init_map(struct rtnl_handle *rth);
extern int ll_name_to_index(char *name);
extern const char *ll_index_to_name(int idx);
extern const char *ll_idx_n2a(int idx, char *buf);
extern int ll_index_to_type(int idx);
extern unsigned ll_index_to_flags(int idx);
#endif /* __LL_MAP_H__ */

View File

@ -0,0 +1,28 @@
#ifndef RT_NAMES_H_
#define RT_NAMES_H_ 1
const char* rtnl_rtprot_n2a(int id, char *buf, int len);
const char* rtnl_rtscope_n2a(int id, char *buf, int len);
const char* rtnl_rttable_n2a(int id, char *buf, int len);
const char* rtnl_rtrealm_n2a(int id, char *buf, int len);
const char* rtnl_dsfield_n2a(int id, char *buf, int len);
int rtnl_rtprot_a2n(int *id, char *arg);
int rtnl_rtscope_a2n(int *id, char *arg);
int rtnl_rttable_a2n(int *id, char *arg);
int rtnl_rtrealm_a2n(__u32 *id, char *arg);
int rtnl_dsfield_a2n(__u32 *id, char *arg);
const char *inet_proto_n2a(int proto, char *buf, int len);
int inet_proto_a2n(char *buf);
const char * ll_type_n2a(int type, char *buf, int len);
const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int blen);
int ll_addr_a2n(unsigned char *lladdr, int len, char *arg);
const char * ll_proto_n2a(unsigned short id, char *buf, int len);
int ll_proto_a2n(unsigned short *id, char *buf);
#endif

View File

@ -0,0 +1,10 @@
#ifndef __RTM_MAP_H__
#define __RTM_MAP_H__ 1
char *rtnl_rtntype_n2a(int id, char *buf, int len);
int rtnl_rtntype_a2n(int *id, char *arg);
int get_rt_realms(__u32 *realms, char *arg);
#endif /* __RTM_MAP_H__ */

View File

@ -0,0 +1,119 @@
#ifndef _TCP_DIAG_H_
#define _TCP_DIAG_H_ 1
/* Replace with dymanically allocated value */
#define NETLINK_TCPDIAG 4
/* Just some random number */
#define TCPDIAG_GETSOCK 18
/* Socket identity */
struct tcpdiag_sockid
{
__u16 tcpdiag_sport;
__u16 tcpdiag_dport;
__u32 tcpdiag_src[4];
__u32 tcpdiag_dst[4];
__u32 tcpdiag_if;
__u32 tcpdiag_cookie[2];
#define TCPDIAG_NOCOOKIE (~0U)
};
/* Request structure */
struct tcpdiagreq
{
__u8 tcpdiag_family; /* Family of addresses. */
__u8 tcpdiag_src_len;
__u8 tcpdiag_dst_len;
__u8 tcpdiag_ext; /* Query extended information */
struct tcpdiag_sockid id;
__u32 tcpdiag_states; /* States to dump */
__u32 tcpdiag_dbs; /* Tables to dump (NI) */
};
enum
{
TCPDIAG_REQ_NONE,
TCPDIAG_REQ_BYTECODE,
};
#define TCPDIAG_REQ_MAX TCPDIAG_REQ_BYTECODE
/* Bytecode is sequence of 4 byte commands followed by variable arguments.
* All the commands identified by "code" are conditional jumps forward:
* to offset cc+"yes" or to offset cc+"no". "yes" is supposed to be
* length of the command and its arguments.
*/
struct tcpdiag_bc_op
{
unsigned char code;
unsigned char yes;
unsigned short no;
};
enum
{
TCPDIAG_BC_NOP,
TCPDIAG_BC_JMP,
TCPDIAG_BC_S_GE,
TCPDIAG_BC_S_LE,
TCPDIAG_BC_D_GE,
TCPDIAG_BC_D_LE,
TCPDIAG_BC_AUTO,
TCPDIAG_BC_S_COND,
TCPDIAG_BC_D_COND,
};
struct tcpdiag_hostcond
{
__u8 family;
__u8 prefix_len;
int port;
__u32 addr[0];
};
/* Base info structure. It contains socket identity (addrs/ports/cookie)
* and, alas, the information shown by netstat. */
struct tcpdiagmsg
{
__u8 tcpdiag_family;
__u8 tcpdiag_state;
__u8 tcpdiag_timer;
__u8 tcpdiag_retrans;
struct tcpdiag_sockid id;
__u32 tcpdiag_expires;
__u32 tcpdiag_rqueue;
__u32 tcpdiag_wqueue;
__u32 tcpdiag_uid;
__u32 tcpdiag_inode;
};
/* Extensions */
enum
{
TCPDIAG_NONE,
TCPDIAG_MEMINFO,
TCPDIAG_INFO,
};
#define TCPDIAG_MAX TCPDIAG_INFO
/* TCPDIAG_MEM */
struct tcpdiag_meminfo
{
__u32 tcpdiag_rmem;
__u32 tcpdiag_wmem;
__u32 tcpdiag_fmem;
__u32 tcpdiag_tmem;
};
#endif /* _TCP_DIAG_H_ */

View File

@ -0,0 +1,104 @@
#ifndef __UTILS_H__
#define __UTILS_H__ 1
#include <asm/types.h>
#include <resolv.h>
#include "libnetlink.h"
#include "ll_map.h"
#include "rtm_map.h"
extern int preferred_family;
extern int show_stats;
extern int show_details;
extern int show_raw;
extern int resolve_hosts;
extern int oneline;
extern char * _SL_;
#ifndef IPPROTO_ESP
#define IPPROTO_ESP 50
#endif
#ifndef IPPROTO_AH
#define IPPROTO_AH 51
#endif
#define SPRINT_BSIZE 64
#define SPRINT_BUF(x) char x[SPRINT_BSIZE]
extern void incomplete_command(void) __attribute__((noreturn));
#define NEXT_ARG() do { argv++; if (--argc <= 0) incomplete_command(); } while(0)
typedef struct
{
__u8 family;
__u8 bytelen;
__s16 bitlen;
__u32 data[4];
} inet_prefix;
#define DN_MAXADDL 20
#ifndef AF_DECnet
#define AF_DECnet 12
#endif
struct dn_naddr
{
unsigned short a_len;
unsigned char a_addr[DN_MAXADDL];
};
#define IPX_NODE_LEN 6
struct ipx_addr {
u_int32_t ipx_net;
u_int8_t ipx_node[IPX_NODE_LEN];
};
extern __u32 get_addr32(char *name);
extern int get_addr_1(inet_prefix *dst, char *arg, int family);
extern int get_prefix_1(inet_prefix *dst, char *arg, int family);
extern int get_addr(inet_prefix *dst, char *arg, int family);
extern int get_prefix(inet_prefix *dst, char *arg, int family);
extern int get_integer(int *val, char *arg, int base);
extern int get_unsigned(unsigned *val, char *arg, int base);
#define get_byte get_u8
#define get_ushort get_u16
#define get_short get_s16
extern int get_u32(__u32 *val, char *arg, int base);
extern int get_u16(__u16 *val, char *arg, int base);
extern int get_s16(__s16 *val, char *arg, int base);
extern int get_u8(__u8 *val, char *arg, int base);
extern int get_s8(__s8 *val, char *arg, int base);
extern __u8* hexstring_n2a(const __u8 *str, int len, __u8 *buf, int blen);
extern __u8* hexstring_a2n(const __u8 *str, __u8 *buf, int blen);
extern const char *format_host(int af, int len, void *addr, char *buf, int buflen);
extern const char *rt_addr_n2a(int af, int len, void *addr, char *buf, int buflen);
void invarg(char *, char *) __attribute__((noreturn));
void duparg(char *, char *) __attribute__((noreturn));
void duparg2(char *, char *) __attribute__((noreturn));
int matches(char *arg, char *pattern);
extern int inet_addr_match(inet_prefix *a, inet_prefix *b, int bits);
const char *dnet_ntop(int af, const void *addr, char *str, size_t len);
int dnet_pton(int af, const char *src, void *addr);
const char *ipx_ntop(int af, const void *addr, char *str, size_t len);
int ipx_pton(int af, const char *src, void *addr);
extern int __iproute2_hz_internal;
extern int __get_hz(void);
static __inline__ int get_hz(void)
{
if (__iproute2_hz_internal == 0)
__iproute2_hz_internal = __get_hz();
return __iproute2_hz_internal;
}
#endif /* __UTILS_H__ */

View File

@ -0,0 +1,22 @@
IPOBJ=ip.o ipaddress.o iproute.o iprule.o \
rtm_map.o iptunnel.o ipneigh.o iplink.o ipmaddr.o \
ipmonitor.o ipmroute.o
RTMONOBJ=rtmon.o
ALLOBJ=$(IPOBJ) $(RTMONOBJ)
TARGETS=ip rtmon
all: $(TARGETS)
ip: $(IPOBJ) $(LIBNETLINK) $(LIBUTIL)
rtmon: $(RTMONOBJ) $(LIBNETLINK)
install: all
install -m 0755 -s $(TARGETS) $(DESTDIR)$(SBINDIR)
install -m 0755 routel routef $(DESTDIR)$(SBINDIR)
clean:
rm -f $(ALLOBJ) $(TARGETS)

145
ip/ifcfg
View File

@ -0,0 +1,145 @@
#! /bin/bash
CheckForwarding () {
local sbase fwd
sbase=/proc/sys/net/ipv4/conf
fwd=0
if [ -d $sbase ]; then
for dir in $sbase/*/forwarding; do
fwd=$[$fwd + `cat $dir`]
done
else
fwd=2
fi
return $fwd
}
RestartRDISC () {
killall -HUP rdisc || rdisc -fs
}
ABCMaskLen () {
local class;
class=${1%%.*}
if [ "$1" = "" -o $class -eq 0 -o $class -ge 224 ]; then return 0
elif [ $class -ge 224 ]; then return 0
elif [ $class -ge 192 ]; then return 24
elif [ $class -ge 128 ]; then return 16
else return 8; fi
}
label="label $1"
ldev="$1"
dev=${1%:*}
if [ "$dev" = "" -o "$1" = "help" ]; then
echo "Usage: ifcfg DEV [[add|del [ADDR[/LEN]] [PEER] | stop]" 1>&2
echo " add - add new address" 1>&2
echo " del - delete address" 1>&2
echo " stop - completely disable IP" 1>&2
exit 1
fi
shift
CheckForwarding
fwd=$?
if [ $fwd -ne 0 ]; then
echo "Forwarding is ON or its state is unknown ($fwd). OK, No RDISC." 1>&2
fi
deleting=0
case "$1" in
add) shift ;;
stop)
if [ "$ldev" != "$dev" ]; then
echo "Cannot stop alias $ldev" 1>&2
exit 1;
fi
ip -4 addr flush dev $dev $label || exit 1
if [ $fwd -eq 0 ]; then RestartRDISC; fi
exit 0 ;;
del*)
deleting=1; shift ;;
*)
esac
ipaddr=
pfxlen=
if [ "$1" != "" ]; then
ipaddr=${1%/*}
if [ "$1" != "$ipaddr" ]; then
pfxlen=${1#*/}
fi
if [ "$ipaddr" = "" ]; then
echo "$1 is bad IP address." 1>&2
exit 1
fi
fi
shift
peer=$1
if [ "$peer" != "" ]; then
if [ "$pfxlen" != "" -a "$pfxlen" != "32" ]; then
echo "Peer address with non-trivial netmask." 1>&2
exit 1
fi
pfx="$ipaddr peer $peer"
else
if [ "$pfxlen" = "" ]; then
ABCMaskLen $ipaddr
pfxlen=$?
fi
pfx="$ipaddr/$pfxlen"
fi
if [ "$ldev" = "$dev" -a "$ipaddr" != "" ]; then
label=
fi
if [ $deleting -ne 0 ]; then
ip addr del $pfx dev $dev $label || exit 1
if [ $fwd -eq 0 ]; then RestartRDISC; fi
exit 0
fi
if ! ip link set up dev $dev ; then
echo "Error: cannot enable interface $dev." 1>&2
exit 1
fi
if [ "$ipaddr" = "" ]; then exit 0; fi
if ! arping -q -c 2 -w 3 -D -I $dev $ipaddr ; then
echo "Error: some host already uses address $ipaddr on $dev." 1>&2
exit 1
fi
if ! ip address add $pfx brd + dev $dev $label; then
echo "Error: failed to add $pfx on $dev." 1>&2
exit 1
fi
arping -q -A -c 1 -I $dev $ipaddr
noarp=$?
( sleep 2 ;
arping -q -U -c 1 -I $dev $ipaddr ) >& /dev/null </dev/null &
ip route add unreachable 224.0.0.0/24 >& /dev/null
ip route add unreachable 255.255.255.255 >& /dev/null
if [ `ip link ls $dev | grep -c MULTICAST` -ge 1 ]; then
ip route add 224.0.0.0/4 dev $dev scope global >& /dev/null
fi
if [ $fwd -eq 0 ]; then
if [ $noarp -eq 0 ]; then
ip ro append default dev $dev metric 30000 scope global
elif [ "$peer" != "" ]; then
if ping -q -c 2 -w 4 $peer ; then
ip ro append default via $peer dev $dev metric 30001
fi
fi
RestartRDISC
fi
exit 0

167
ip/ip.c
View File

@ -0,0 +1,167 @@
/*
* ip.c "ip" utility frontend.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*
* Changes:
*
* Rani Assaf <rani@magic.metawire.com> 980929: resolve addresses
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <string.h>
#include "SNAPSHOT.h"
#include "utils.h"
#include "ip_common.h"
int preferred_family = AF_UNSPEC;
int show_stats = 0;
int resolve_hosts = 0;
int oneline = 0;
char * _SL_ = NULL;
static void usage(void) __attribute__((noreturn));
static void usage(void)
{
fprintf(stderr,
"Usage: ip [ OPTIONS ] OBJECT { COMMAND | help }\n"
"where OBJECT := { link | addr | route | rule | neigh | tunnel |\n"
" maddr | mroute | monitor }\n"
" OPTIONS := { -V[ersion] | -s[tatistics] | -r[esolve] |\n"
" -f[amily] { inet | inet6 | ipx | dnet | link } | -o[neline] }\n");
exit(-1);
}
int main(int argc, char **argv)
{
char *basename;
basename = strrchr(argv[0], '/');
if (basename == NULL)
basename = argv[0];
else
basename++;
while (argc > 1) {
char *opt = argv[1];
if (strcmp(opt,"--") == 0) {
argc--; argv++;
break;
}
if (opt[0] != '-')
break;
if (opt[1] == '-')
opt++;
if (matches(opt, "-family") == 0) {
argc--;
argv++;
if (argc <= 1)
usage();
if (strcmp(argv[1], "inet") == 0)
preferred_family = AF_INET;
else if (strcmp(argv[1], "inet6") == 0)
preferred_family = AF_INET6;
else if (strcmp(argv[1], "dnet") == 0)
preferred_family = AF_DECnet;
else if (strcmp(argv[1], "link") == 0)
preferred_family = AF_PACKET;
else if (strcmp(argv[1], "ipx") == 0)
preferred_family = AF_IPX;
else if (strcmp(argv[1], "help") == 0)
usage();
else
invarg(argv[1], "invalid protocol family");
} else if (strcmp(opt, "-4") == 0) {
preferred_family = AF_INET;
} else if (strcmp(opt, "-6") == 0) {
preferred_family = AF_INET6;
} else if (strcmp(opt, "-0") == 0) {
preferred_family = AF_PACKET;
} else if (strcmp(opt, "-I") == 0) {
preferred_family = AF_IPX;
} else if (strcmp(opt, "-D") == 0) {
preferred_family = AF_DECnet;
} else if (matches(opt, "-stats") == 0 ||
matches(opt, "-statistics") == 0) {
++show_stats;
} else if (matches(opt, "-resolve") == 0) {
++resolve_hosts;
} else if (matches(opt, "-oneline") == 0) {
++oneline;
#if 0
} else if (matches(opt, "-numeric") == 0) {
rtnl_names_numeric++;
#endif
} else if (matches(opt, "-Version") == 0) {
printf("ip utility, iproute2-ss%s\n", SNAPSHOT);
exit(0);
} else if (matches(opt, "-help") == 0) {
usage();
} else {
fprintf(stderr, "Option \"%s\" is unknown, try \"ip -help\".\n", opt);
exit(-1);
}
argc--; argv++;
}
_SL_ = oneline ? "\\" : "\n" ;
if (strcmp(basename, "ipaddr") == 0)
return do_ipaddr(argc-1, argv+1);
if (strcmp(basename, "ipmaddr") == 0)
return do_multiaddr(argc-1, argv+1);
if (strcmp(basename, "iproute") == 0)
return do_iproute(argc-1, argv+1);
if (strcmp(basename, "iprule") == 0)
return do_iprule(argc-1, argv+1);
if (strcmp(basename, "ipneigh") == 0)
return do_ipneigh(argc-1, argv+1);
if (strcmp(basename, "iplink") == 0)
return do_iplink(argc-1, argv+1);
if (strcmp(basename, "iptunnel") == 0)
return do_iptunnel(argc-1, argv+1);
if (strcmp(basename, "ipmonitor") == 0)
return do_ipmonitor(argc-1, argv+1);
if (argc > 1) {
if (matches(argv[1], "address") == 0)
return do_ipaddr(argc-2, argv+2);
if (matches(argv[1], "maddress") == 0)
return do_multiaddr(argc-2, argv+2);
if (matches(argv[1], "route") == 0)
return do_iproute(argc-2, argv+2);
if (matches(argv[1], "rule") == 0)
return do_iprule(argc-2, argv+2);
if (matches(argv[1], "mroute") == 0)
return do_multiroute(argc-2, argv+2);
if (matches(argv[1], "neighbor") == 0 ||
matches(argv[1], "neighbour") == 0)
return do_ipneigh(argc-2, argv+2);
if (matches(argv[1], "link") == 0)
return do_iplink(argc-2, argv+2);
if (matches(argv[1], "tunnel") == 0 ||
strcmp(argv[1], "tunl") == 0)
return do_iptunnel(argc-2, argv+2);
if (matches(argv[1], "monitor") == 0)
return do_ipmonitor(argc-2, argv+2);
if (matches(argv[1], "help") == 0)
usage();
fprintf(stderr, "Object \"%s\" is unknown, try \"ip help\".\n", argv[1]);
exit(-1);
}
usage();
}

View File

@ -0,0 +1,20 @@
extern int print_linkinfo(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
extern int print_addrinfo(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
extern int print_neigh(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
extern int ipaddr_list(int argc, char **argv);
extern int ipaddr_list_link(int argc, char **argv);
extern int iproute_monitor(int argc, char **argv);
extern void iplink_usage(void) __attribute__((noreturn));
extern void iproute_reset_filter(void);
extern void ipaddr_reset_filter(int);
extern void ipneigh_reset_filter(void);
extern int print_route(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg);
extern int do_ipaddr(int argc, char **argv);
extern int do_iproute(int argc, char **argv);
extern int do_iprule(int argc, char **argv);
extern int do_ipneigh(int argc, char **argv);
extern int do_iptunnel(int argc, char **argv);
extern int do_iplink(int argc, char **argv);
extern int do_ipmonitor(int argc, char **argv);
extern int do_multiaddr(int argc, char **argv);
extern int do_multiroute(int argc, char **argv);

View File

@ -0,0 +1,898 @@
/*
* ipaddress.c "ip address".
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* Changes:
* Laszlo Valko <valko@linux.karinthy.hu> 990223: address label must be zero terminated
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/sockios.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include <fnmatch.h>
#include "rt_names.h"
#include "utils.h"
#include "ll_map.h"
#include "ip_common.h"
static struct
{
int ifindex;
int family;
int oneline;
int showqueue;
inet_prefix pfx;
int scope, scopemask;
int flags, flagmask;
int up;
char *label;
int flushed;
char *flushb;
int flushp;
int flushe;
struct rtnl_handle *rth;
} filter;
static int do_link;
static void usage(void) __attribute__((noreturn));
static void usage(void)
{
if (do_link) {
iplink_usage();
}
fprintf(stderr, "Usage: ip addr {add|del} IFADDR dev STRING\n");
fprintf(stderr, " ip addr {show|flush} [ dev STRING ] [ scope SCOPE-ID ]\n");
fprintf(stderr, " [ to PREFIX ] [ FLAG-LIST ] [ label PATTERN ]\n");
fprintf(stderr, "IFADDR := PREFIX | ADDR peer PREFIX\n");
fprintf(stderr, " [ broadcast ADDR ] [ anycast ADDR ]\n");
fprintf(stderr, " [ label STRING ] [ scope SCOPE-ID ]\n");
fprintf(stderr, "SCOPE-ID := [ host | link | global | NUMBER ]\n");
fprintf(stderr, "FLAG-LIST := [ FLAG-LIST ] FLAG\n");
fprintf(stderr, "FLAG := [ permanent | dynamic | secondary | primary |\n");
fprintf(stderr, " tentative | deprecated ]\n");
exit(-1);
}
void print_link_flags(FILE *fp, unsigned flags, unsigned mdown)
{
fprintf(fp, "<");
flags &= ~IFF_RUNNING;
#define _PF(f) if (flags&IFF_##f) { \
flags &= ~IFF_##f ; \
fprintf(fp, #f "%s", flags ? "," : ""); }
_PF(LOOPBACK);
_PF(BROADCAST);
_PF(POINTOPOINT);
_PF(MULTICAST);
_PF(NOARP);
_PF(ALLMULTI);
_PF(PROMISC);
_PF(MASTER);
_PF(SLAVE);
_PF(DEBUG);
_PF(DYNAMIC);
_PF(AUTOMEDIA);
_PF(PORTSEL);
_PF(NOTRAILERS);
_PF(UP);
#undef _PF
if (flags)
fprintf(fp, "%x", flags);
if (mdown)
fprintf(fp, ",M-DOWN");
fprintf(fp, "> ");
}
void print_queuelen(char *name)
{
struct ifreq ifr;
int s;
s = socket(AF_INET, SOCK_STREAM, 0);
if (s < 0)
return;
memset(&ifr, 0, sizeof(ifr));
strcpy(ifr.ifr_name, name);
if (ioctl(s, SIOCGIFTXQLEN, &ifr) < 0) {
perror("SIOCGIFXQLEN");
close(s);
return;
}
close(s);
if (ifr.ifr_qlen)
printf("qlen %d", ifr.ifr_qlen);
}
int print_linkinfo(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
{
FILE *fp = (FILE*)arg;
struct ifinfomsg *ifi = NLMSG_DATA(n);
struct rtattr * tb[IFLA_MAX+1];
int len = n->nlmsg_len;
unsigned m_flag = 0;
if (n->nlmsg_type != RTM_NEWLINK && n->nlmsg_type != RTM_DELLINK)
return 0;
len -= NLMSG_LENGTH(sizeof(*ifi));
if (len < 0)
return -1;
if (filter.ifindex && ifi->ifi_index != filter.ifindex)
return 0;
if (filter.up && !(ifi->ifi_flags&IFF_UP))
return 0;
memset(tb, 0, sizeof(tb));
parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
if (tb[IFLA_IFNAME] == NULL) {
fprintf(stderr, "BUG: nil ifname\n");
return -1;
}
if (filter.label &&
(!filter.family || filter.family == AF_PACKET) &&
fnmatch(filter.label, RTA_DATA(tb[IFLA_IFNAME]), 0))
return 0;
if (n->nlmsg_type == RTM_DELLINK)
fprintf(fp, "Deleted ");
fprintf(fp, "%d: %s", ifi->ifi_index,
tb[IFLA_IFNAME] ? (char*)RTA_DATA(tb[IFLA_IFNAME]) : "<nil>");
if (tb[IFLA_LINK]) {
SPRINT_BUF(b1);
int iflink = *(int*)RTA_DATA(tb[IFLA_LINK]);
if (iflink == 0)
fprintf(fp, "@NONE: ");
else {
fprintf(fp, "@%s: ", ll_idx_n2a(iflink, b1));
m_flag = ll_index_to_flags(iflink);
m_flag = !(m_flag & IFF_UP);
}
} else {
fprintf(fp, ": ");
}
print_link_flags(fp, ifi->ifi_flags, m_flag);
if (tb[IFLA_MTU])
fprintf(fp, "mtu %u ", *(int*)RTA_DATA(tb[IFLA_MTU]));
if (tb[IFLA_QDISC])
fprintf(fp, "qdisc %s ", (char*)RTA_DATA(tb[IFLA_QDISC]));
#ifdef IFLA_MASTER
if (tb[IFLA_MASTER]) {
SPRINT_BUF(b1);
fprintf(fp, "master %s ", ll_idx_n2a(*(int*)RTA_DATA(tb[IFLA_MASTER]), b1));
}
#endif
if (filter.showqueue)
print_queuelen((char*)RTA_DATA(tb[IFLA_IFNAME]));
if (!filter.family || filter.family == AF_PACKET) {
SPRINT_BUF(b1);
fprintf(fp, "%s", _SL_);
fprintf(fp, " link/%s ", ll_type_n2a(ifi->ifi_type, b1, sizeof(b1)));
if (tb[IFLA_ADDRESS]) {
fprintf(fp, "%s", ll_addr_n2a(RTA_DATA(tb[IFLA_ADDRESS]),
RTA_PAYLOAD(tb[IFLA_ADDRESS]),
ifi->ifi_type,
b1, sizeof(b1)));
}
if (tb[IFLA_BROADCAST]) {
if (ifi->ifi_flags&IFF_POINTOPOINT)
fprintf(fp, " peer ");
else
fprintf(fp, " brd ");
fprintf(fp, "%s", ll_addr_n2a(RTA_DATA(tb[IFLA_BROADCAST]),
RTA_PAYLOAD(tb[IFLA_BROADCAST]),
ifi->ifi_type,
b1, sizeof(b1)));
}
}
if (do_link && tb[IFLA_STATS] && show_stats) {
struct net_device_stats slocal;
struct net_device_stats *s = RTA_DATA(tb[IFLA_STATS]);
if (((unsigned long)s) & (sizeof(unsigned long)-1)) {
memcpy(&slocal, s, sizeof(slocal));
s = &slocal;
}
fprintf(fp, "%s", _SL_);
fprintf(fp, " RX: bytes packets errors dropped overrun mcast %s%s",
s->rx_compressed ? "compressed" : "", _SL_);
fprintf(fp, " %-10lu %-8lu %-7lu %-7lu %-7lu %-7lu",
s->rx_bytes, s->rx_packets, s->rx_errors,
s->rx_dropped, s->rx_over_errors,
s->multicast
);
if (s->rx_compressed)
fprintf(fp, " %-7lu", s->rx_compressed);
if (show_stats > 1) {
fprintf(fp, "%s", _SL_);
fprintf(fp, " RX errors: length crc frame fifo missed%s", _SL_);
fprintf(fp, " %-7lu %-7lu %-7lu %-7lu %-7lu",
s->rx_length_errors,
s->rx_crc_errors,
s->rx_frame_errors,
s->rx_fifo_errors,
s->rx_missed_errors
);
}
fprintf(fp, "%s", _SL_);
fprintf(fp, " TX: bytes packets errors dropped carrier collsns %s%s",
s->tx_compressed ? "compressed" : "", _SL_);
fprintf(fp, " %-10lu %-8lu %-7lu %-7lu %-7lu %-7lu",
s->tx_bytes, s->tx_packets, s->tx_errors,
s->tx_dropped, s->tx_carrier_errors, s->collisions);
if (s->tx_compressed)
fprintf(fp, " %-7lu", s->tx_compressed);
if (show_stats > 1) {
fprintf(fp, "%s", _SL_);
fprintf(fp, " TX errors: aborted fifo window heartbeat%s", _SL_);
fprintf(fp, " %-7lu %-7lu %-7lu %-7lu",
s->tx_aborted_errors,
s->tx_fifo_errors,
s->tx_window_errors,
s->tx_heartbeat_errors
);
}
}
fprintf(fp, "\n");
fflush(fp);
return 0;
}
static int flush_update(void)
{
if (rtnl_send(filter.rth, filter.flushb, filter.flushp) < 0) {
perror("Failed to send flush request\n");
return -1;
}
filter.flushp = 0;
return 0;
}
int print_addrinfo(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
{
FILE *fp = (FILE*)arg;
struct ifaddrmsg *ifa = NLMSG_DATA(n);
int len = n->nlmsg_len;
struct rtattr * rta_tb[IFA_MAX+1];
char abuf[256];
SPRINT_BUF(b1);
if (n->nlmsg_type != RTM_NEWADDR && n->nlmsg_type != RTM_DELADDR)
return 0;
len -= NLMSG_LENGTH(sizeof(*ifa));
if (len < 0) {
fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
return -1;
}
if (filter.flushb && n->nlmsg_type != RTM_NEWADDR)
return 0;
memset(rta_tb, 0, sizeof(rta_tb));
parse_rtattr(rta_tb, IFA_MAX, IFA_RTA(ifa), n->nlmsg_len - NLMSG_LENGTH(sizeof(*ifa)));
if (!rta_tb[IFA_LOCAL])
rta_tb[IFA_LOCAL] = rta_tb[IFA_ADDRESS];
if (!rta_tb[IFA_ADDRESS])
rta_tb[IFA_ADDRESS] = rta_tb[IFA_LOCAL];
if (filter.ifindex && filter.ifindex != ifa->ifa_index)
return 0;
if ((filter.scope^ifa->ifa_scope)&filter.scopemask)
return 0;
if ((filter.flags^ifa->ifa_flags)&filter.flagmask)
return 0;
if (filter.label) {
SPRINT_BUF(b1);
const char *label;
if (rta_tb[IFA_LABEL])
label = RTA_DATA(rta_tb[IFA_LABEL]);
else
label = ll_idx_n2a(ifa->ifa_index, b1);
if (fnmatch(filter.label, label, 0) != 0)
return 0;
}
if (filter.pfx.family) {
if (rta_tb[IFA_LOCAL]) {
inet_prefix dst;
memset(&dst, 0, sizeof(dst));
dst.family = ifa->ifa_family;
memcpy(&dst.data, RTA_DATA(rta_tb[IFA_LOCAL]), RTA_PAYLOAD(rta_tb[IFA_LOCAL]));
if (inet_addr_match(&dst, &filter.pfx, filter.pfx.bitlen))
return 0;
}
}
if (filter.flushb) {
struct nlmsghdr *fn;
if (NLMSG_ALIGN(filter.flushp) + n->nlmsg_len > filter.flushe) {
if (flush_update())
return -1;
}
fn = (struct nlmsghdr*)(filter.flushb + NLMSG_ALIGN(filter.flushp));
memcpy(fn, n, n->nlmsg_len);
fn->nlmsg_type = RTM_DELADDR;
fn->nlmsg_flags = NLM_F_REQUEST;
fn->nlmsg_seq = ++filter.rth->seq;
filter.flushp = (((char*)fn) + n->nlmsg_len) - filter.flushb;
filter.flushed++;
if (show_stats < 2)
return 0;
}
if (n->nlmsg_type == RTM_DELADDR)
fprintf(fp, "Deleted ");
if (filter.oneline || filter.flushb)
fprintf(fp, "%u: %s", ifa->ifa_index, ll_index_to_name(ifa->ifa_index));
if (ifa->ifa_family == AF_INET)
fprintf(fp, " inet ");
else if (ifa->ifa_family == AF_INET6)
fprintf(fp, " inet6 ");
else if (ifa->ifa_family == AF_DECnet)
fprintf(fp, " dnet ");
else if (ifa->ifa_family == AF_IPX)
fprintf(fp, " ipx ");
else
fprintf(fp, " family %d ", ifa->ifa_family);
if (rta_tb[IFA_LOCAL]) {
fprintf(fp, "%s", rt_addr_n2a(ifa->ifa_family,
RTA_PAYLOAD(rta_tb[IFA_LOCAL]),
RTA_DATA(rta_tb[IFA_LOCAL]),
abuf, sizeof(abuf)));
if (rta_tb[IFA_ADDRESS] == NULL ||
memcmp(RTA_DATA(rta_tb[IFA_ADDRESS]), RTA_DATA(rta_tb[IFA_LOCAL]), 4) == 0) {
fprintf(fp, "/%d ", ifa->ifa_prefixlen);
} else {
fprintf(fp, " peer %s/%d ",
rt_addr_n2a(ifa->ifa_family,
RTA_PAYLOAD(rta_tb[IFA_ADDRESS]),
RTA_DATA(rta_tb[IFA_ADDRESS]),
abuf, sizeof(abuf)),
ifa->ifa_prefixlen);
}
}
if (rta_tb[IFA_BROADCAST]) {
fprintf(fp, "brd %s ",
rt_addr_n2a(ifa->ifa_family,
RTA_PAYLOAD(rta_tb[IFA_BROADCAST]),
RTA_DATA(rta_tb[IFA_BROADCAST]),
abuf, sizeof(abuf)));
}
if (rta_tb[IFA_ANYCAST]) {
fprintf(fp, "any %s ",
rt_addr_n2a(ifa->ifa_family,
RTA_PAYLOAD(rta_tb[IFA_ANYCAST]),
RTA_DATA(rta_tb[IFA_ANYCAST]),
abuf, sizeof(abuf)));
}
fprintf(fp, "scope %s ", rtnl_rtscope_n2a(ifa->ifa_scope, b1, sizeof(b1)));
if (ifa->ifa_flags&IFA_F_SECONDARY) {
ifa->ifa_flags &= ~IFA_F_SECONDARY;
fprintf(fp, "secondary ");
}
if (ifa->ifa_flags&IFA_F_TENTATIVE) {
ifa->ifa_flags &= ~IFA_F_TENTATIVE;
fprintf(fp, "tentative ");
}
if (ifa->ifa_flags&IFA_F_DEPRECATED) {
ifa->ifa_flags &= ~IFA_F_DEPRECATED;
fprintf(fp, "deprecated ");
}
if (!(ifa->ifa_flags&IFA_F_PERMANENT)) {
fprintf(fp, "dynamic ");
} else
ifa->ifa_flags &= ~IFA_F_PERMANENT;
if (ifa->ifa_flags)
fprintf(fp, "flags %02x ", ifa->ifa_flags);
if (rta_tb[IFA_LABEL])
fprintf(fp, "%s", (char*)RTA_DATA(rta_tb[IFA_LABEL]));
if (rta_tb[IFA_CACHEINFO]) {
struct ifa_cacheinfo *ci = RTA_DATA(rta_tb[IFA_CACHEINFO]);
char buf[128];
fprintf(fp, "%s", _SL_);
if (ci->ifa_valid == 0xFFFFFFFFU)
sprintf(buf, "valid_lft forever");
else
sprintf(buf, "valid_lft %dsec", ci->ifa_valid);
if (ci->ifa_prefered == 0xFFFFFFFFU)
sprintf(buf+strlen(buf), " preferred_lft forever");
else
sprintf(buf+strlen(buf), " preferred_lft %dsec", ci->ifa_prefered);
fprintf(fp, " %s", buf);
}
fprintf(fp, "\n");
fflush(fp);
return 0;
}
struct nlmsg_list
{
struct nlmsg_list *next;
struct nlmsghdr h;
};
int print_selected_addrinfo(int ifindex, struct nlmsg_list *ainfo, FILE *fp)
{
for ( ;ainfo ; ainfo = ainfo->next) {
struct nlmsghdr *n = &ainfo->h;
struct ifaddrmsg *ifa = NLMSG_DATA(n);
if (n->nlmsg_type != RTM_NEWADDR)
continue;
if (n->nlmsg_len < NLMSG_LENGTH(sizeof(ifa)))
return -1;
if (ifa->ifa_index != ifindex ||
(filter.family && filter.family != ifa->ifa_family))
continue;
print_addrinfo(NULL, n, fp);
}
return 0;
}
int store_nlmsg(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
{
struct nlmsg_list **linfo = (struct nlmsg_list**)arg;
struct nlmsg_list *h;
struct nlmsg_list **lp;
h = malloc(n->nlmsg_len+sizeof(void*));
if (h == NULL)
return -1;
memcpy(&h->h, n, n->nlmsg_len);
h->next = NULL;
for (lp = linfo; *lp; lp = &(*lp)->next) /* NOTHING */;
*lp = h;
ll_remember_index(who, n, NULL);
return 0;
}
int ipaddr_list_or_flush(int argc, char **argv, int flush)
{
struct nlmsg_list *linfo = NULL;
struct nlmsg_list *ainfo = NULL;
struct nlmsg_list *l;
struct rtnl_handle rth;
char *filter_dev = NULL;
int no_link = 0;
ipaddr_reset_filter(oneline);
filter.showqueue = 1;
if (filter.family == AF_UNSPEC)
filter.family = preferred_family;
if (flush) {
if (argc <= 0) {
fprintf(stderr, "Flush requires arguments.\n");
return -1;
}
if (filter.family == AF_PACKET) {
fprintf(stderr, "Cannot flush link addresses.\n");
return -1;
}
}
while (argc > 0) {
if (strcmp(*argv, "to") == 0) {
NEXT_ARG();
get_prefix(&filter.pfx, *argv, filter.family);
if (filter.family == AF_UNSPEC)
filter.family = filter.pfx.family;
} else if (strcmp(*argv, "scope") == 0) {
int scope = 0;
NEXT_ARG();
filter.scopemask = -1;
if (rtnl_rtscope_a2n(&scope, *argv)) {
if (strcmp(*argv, "all") != 0)
invarg("invalid \"scope\"\n", *argv);
scope = RT_SCOPE_NOWHERE;
filter.scopemask = 0;
}
filter.scope = scope;
} else if (strcmp(*argv, "up") == 0) {
filter.up = 1;
} else if (strcmp(*argv, "dynamic") == 0) {
filter.flags &= ~IFA_F_PERMANENT;
filter.flagmask |= IFA_F_PERMANENT;
} else if (strcmp(*argv, "permanent") == 0) {
filter.flags |= IFA_F_PERMANENT;
filter.flagmask |= IFA_F_PERMANENT;
} else if (strcmp(*argv, "secondary") == 0) {
filter.flags |= IFA_F_SECONDARY;
filter.flagmask |= IFA_F_SECONDARY;
} else if (strcmp(*argv, "primary") == 0) {
filter.flags &= ~IFA_F_SECONDARY;
filter.flagmask |= IFA_F_SECONDARY;
} else if (strcmp(*argv, "tentative") == 0) {
filter.flags |= IFA_F_TENTATIVE;
filter.flagmask |= IFA_F_TENTATIVE;
} else if (strcmp(*argv, "deprecated") == 0) {
filter.flags |= IFA_F_DEPRECATED;
filter.flagmask |= IFA_F_DEPRECATED;
} else if (strcmp(*argv, "label") == 0) {
NEXT_ARG();
filter.label = *argv;
} else {
if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
}
if (matches(*argv, "help") == 0)
usage();
if (filter_dev)
duparg2("dev", *argv);
filter_dev = *argv;
}
argv++; argc--;
}
if (rtnl_open(&rth, 0) < 0)
exit(1);
if (rtnl_wilddump_request(&rth, preferred_family, RTM_GETLINK) < 0) {
perror("Cannot send dump request");
exit(1);
}
if (rtnl_dump_filter(&rth, store_nlmsg, &linfo, NULL, NULL) < 0) {
fprintf(stderr, "Dump terminated\n");
exit(1);
}
if (filter_dev) {
filter.ifindex = ll_name_to_index(filter_dev);
if (filter.ifindex <= 0) {
fprintf(stderr, "Device \"%s\" does not exist.\n", filter_dev);
return -1;
}
}
if (flush) {
int round = 0;
char flushb[4096-512];
filter.flushb = flushb;
filter.flushp = 0;
filter.flushe = sizeof(flushb);
filter.rth = &rth;
for (;;) {
if (rtnl_wilddump_request(&rth, filter.family, RTM_GETADDR) < 0) {
perror("Cannot send dump request");
exit(1);
}
filter.flushed = 0;
if (rtnl_dump_filter(&rth, print_addrinfo, stdout, NULL, NULL) < 0) {
fprintf(stderr, "Flush terminated\n");
exit(1);
}
if (filter.flushed == 0) {
if (round == 0) {
fprintf(stderr, "Nothing to flush.\n");
} else if (show_stats)
printf("*** Flush is complete after %d round%s ***\n", round, round>1?"s":"");
fflush(stdout);
return 0;
}
round++;
if (flush_update() < 0)
exit(1);
if (show_stats) {
printf("\n*** Round %d, deleting %d addresses ***\n", round, filter.flushed);
fflush(stdout);
}
}
}
if (filter.family != AF_PACKET) {
if (rtnl_wilddump_request(&rth, filter.family, RTM_GETADDR) < 0) {
perror("Cannot send dump request");
exit(1);
}
if (rtnl_dump_filter(&rth, store_nlmsg, &ainfo, NULL, NULL) < 0) {
fprintf(stderr, "Dump terminated\n");
exit(1);
}
}
if (filter.family && filter.family != AF_PACKET) {
struct nlmsg_list **lp;
lp=&linfo;
if (filter.oneline)
no_link = 1;
while ((l=*lp)!=NULL) {
int ok = 0;
struct ifinfomsg *ifi = NLMSG_DATA(&l->h);
struct nlmsg_list *a;
for (a=ainfo; a; a=a->next) {
struct nlmsghdr *n = &a->h;
struct ifaddrmsg *ifa = NLMSG_DATA(n);
if (ifa->ifa_index != ifi->ifi_index ||
(filter.family && filter.family != ifa->ifa_family))
continue;
if ((filter.scope^ifa->ifa_scope)&filter.scopemask)
continue;
if ((filter.flags^ifa->ifa_flags)&filter.flagmask)
continue;
if (filter.pfx.family || filter.label) {
struct rtattr *tb[IFA_MAX+1];
memset(tb, 0, sizeof(tb));
parse_rtattr(tb, IFA_MAX, IFA_RTA(ifa), IFA_PAYLOAD(n));
if (!tb[IFA_LOCAL])
tb[IFA_LOCAL] = tb[IFA_ADDRESS];
if (filter.pfx.family && tb[IFA_LOCAL]) {
inet_prefix dst;
memset(&dst, 0, sizeof(dst));
dst.family = ifa->ifa_family;
memcpy(&dst.data, RTA_DATA(tb[IFA_LOCAL]), RTA_PAYLOAD(tb[IFA_LOCAL]));
if (inet_addr_match(&dst, &filter.pfx, filter.pfx.bitlen))
continue;
}
if (filter.label) {
SPRINT_BUF(b1);
const char *label;
if (tb[IFA_LABEL])
label = RTA_DATA(tb[IFA_LABEL]);
else
label = ll_idx_n2a(ifa->ifa_index, b1);
if (fnmatch(filter.label, label, 0) != 0)
continue;
}
}
ok = 1;
break;
}
if (!ok)
*lp = l->next;
else
lp = &l->next;
}
}
for (l=linfo; l; l = l->next) {
if (no_link || print_linkinfo(NULL, &l->h, stdout) == 0) {
struct ifinfomsg *ifi = NLMSG_DATA(&l->h);
if (filter.family != AF_PACKET)
print_selected_addrinfo(ifi->ifi_index, ainfo, stdout);
}
fflush(stdout);
}
exit(0);
}
int ipaddr_list_link(int argc, char **argv)
{
preferred_family = AF_PACKET;
do_link = 1;
return ipaddr_list_or_flush(argc, argv, 0);
}
void ipaddr_reset_filter(int oneline)
{
memset(&filter, 0, sizeof(filter));
filter.oneline = oneline;
}
int default_scope(inet_prefix *lcl)
{
if (lcl->family == AF_INET) {
if (lcl->bytelen >= 1 && *(__u8*)&lcl->data == 127)
return RT_SCOPE_HOST;
}
return 0;
}
int ipaddr_modify(int cmd, int argc, char **argv)
{
struct rtnl_handle rth;
struct {
struct nlmsghdr n;
struct ifaddrmsg ifa;
char buf[256];
} req;
char *d = NULL;
char *l = NULL;
inet_prefix lcl;
inet_prefix peer;
int local_len = 0;
int peer_len = 0;
int brd_len = 0;
int any_len = 0;
int scoped = 0;
memset(&req, 0, sizeof(req));
req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifaddrmsg));
req.n.nlmsg_flags = NLM_F_REQUEST;
req.n.nlmsg_type = cmd;
req.ifa.ifa_family = preferred_family;
while (argc > 0) {
if (strcmp(*argv, "peer") == 0 ||
strcmp(*argv, "remote") == 0) {
NEXT_ARG();
if (peer_len)
duparg("peer", *argv);
get_prefix(&peer, *argv, req.ifa.ifa_family);
peer_len = peer.bytelen;
if (req.ifa.ifa_family == AF_UNSPEC)
req.ifa.ifa_family = peer.family;
addattr_l(&req.n, sizeof(req), IFA_ADDRESS, &peer.data, peer.bytelen);
req.ifa.ifa_prefixlen = peer.bitlen;
} else if (matches(*argv, "broadcast") == 0 ||
strcmp(*argv, "brd") == 0) {
inet_prefix addr;
NEXT_ARG();
if (brd_len)
duparg("broadcast", *argv);
if (strcmp(*argv, "+") == 0)
brd_len = -1;
else if (strcmp(*argv, "-") == 0)
brd_len = -2;
else {
get_addr(&addr, *argv, req.ifa.ifa_family);
if (req.ifa.ifa_family == AF_UNSPEC)
req.ifa.ifa_family = addr.family;
addattr_l(&req.n, sizeof(req), IFA_BROADCAST, &addr.data, addr.bytelen);
brd_len = addr.bytelen;
}
} else if (strcmp(*argv, "anycast") == 0) {
inet_prefix addr;
NEXT_ARG();
if (any_len)
duparg("anycast", *argv);
get_addr(&addr, *argv, req.ifa.ifa_family);
if (req.ifa.ifa_family == AF_UNSPEC)
req.ifa.ifa_family = addr.family;
addattr_l(&req.n, sizeof(req), IFA_ANYCAST, &addr.data, addr.bytelen);
any_len = addr.bytelen;
} else if (strcmp(*argv, "scope") == 0) {
int scope = 0;
NEXT_ARG();
if (rtnl_rtscope_a2n(&scope, *argv))
invarg(*argv, "invalid scope value.");
req.ifa.ifa_scope = scope;
scoped = 1;
} else if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
d = *argv;
} else if (strcmp(*argv, "label") == 0) {
NEXT_ARG();
l = *argv;
addattr_l(&req.n, sizeof(req), IFA_LABEL, l, strlen(l)+1);
} else {
if (strcmp(*argv, "local") == 0) {
NEXT_ARG();
}
if (matches(*argv, "help") == 0)
usage();
if (local_len)
duparg2("local", *argv);
get_prefix(&lcl, *argv, req.ifa.ifa_family);
if (req.ifa.ifa_family == AF_UNSPEC)
req.ifa.ifa_family = lcl.family;
addattr_l(&req.n, sizeof(req), IFA_LOCAL, &lcl.data, lcl.bytelen);
local_len = lcl.bytelen;
}
argc--; argv++;
}
if (d == NULL) {
fprintf(stderr, "Not enough information: \"dev\" argument is required.\n");
return -1;
}
if (l && matches(d, l) != 0) {
fprintf(stderr, "\"dev\" (%s) must match \"label\" (%s).\n", d, l);
exit(1);
}
if (peer_len == 0 && local_len && cmd != RTM_DELADDR) {
peer = lcl;
addattr_l(&req.n, sizeof(req), IFA_ADDRESS, &lcl.data, lcl.bytelen);
}
if (req.ifa.ifa_prefixlen == 0)
req.ifa.ifa_prefixlen = lcl.bitlen;
if (brd_len < 0 && cmd != RTM_DELADDR) {
inet_prefix brd;
int i;
if (req.ifa.ifa_family != AF_INET) {
fprintf(stderr, "Broadcast can be set only for IPv4 addresses\n");
return -1;
}
brd = peer;
if (brd.bitlen <= 30) {
for (i=31; i>=brd.bitlen; i--) {
if (brd_len == -1)
brd.data[0] |= htonl(1<<(31-i));
else
brd.data[0] &= ~htonl(1<<(31-i));
}
addattr_l(&req.n, sizeof(req), IFA_BROADCAST, &brd.data, brd.bytelen);
brd_len = brd.bytelen;
}
}
if (!scoped && cmd != RTM_DELADDR)
req.ifa.ifa_scope = default_scope(&lcl);
if (rtnl_open(&rth, 0) < 0)
exit(1);
ll_init_map(&rth);
if ((req.ifa.ifa_index = ll_name_to_index(d)) == 0) {
fprintf(stderr, "Cannot find device \"%s\"\n", d);
return -1;
}
if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
exit(2);
exit(0);
}
int do_ipaddr(int argc, char **argv)
{
if (argc < 1)
return ipaddr_list_or_flush(0, NULL, 0);
if (matches(*argv, "add") == 0)
return ipaddr_modify(RTM_NEWADDR, argc-1, argv+1);
if (matches(*argv, "delete") == 0)
return ipaddr_modify(RTM_DELADDR, argc-1, argv+1);
if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0
|| matches(*argv, "lst") == 0)
return ipaddr_list_or_flush(argc-1, argv+1, 0);
if (matches(*argv, "flush") == 0)
return ipaddr_list_or_flush(argc-1, argv+1, 1);
if (matches(*argv, "help") == 0)
usage();
fprintf(stderr, "Command \"%s\" is unknown, try \"ip address help\".\n", *argv);
exit(-1);
}

View File

@ -0,0 +1,397 @@
/*
* iplink.c "ip link".
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/socket.h>
#include <linux/if.h>
#include <linux/if_packet.h>
#include <linux/if_ether.h>
#include <linux/sockios.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/sockios.h>
#include "rt_names.h"
#include "utils.h"
#include "ip_common.h"
static void usage(void) __attribute__((noreturn));
void iplink_usage(void)
{
fprintf(stderr, "Usage: ip link set DEVICE { up | down | arp { on | off } |\n");
fprintf(stderr, " dynamic { on | off } |\n");
fprintf(stderr, " multicast { on | off } | txqueuelen PACKETS |\n");
fprintf(stderr, " name NEWNAME |\n");
fprintf(stderr, " address LLADDR | broadcast LLADDR |\n");
fprintf(stderr, " mtu MTU }\n");
fprintf(stderr, " ip link show [ DEVICE ]\n");
exit(-1);
}
static void usage(void)
{
iplink_usage();
}
static int on_off(char *msg)
{
fprintf(stderr, "Error: argument of \"%s\" must be \"on\" or \"off\"\n", msg);
return -1;
}
static int get_ctl_fd(void)
{
int s_errno;
int fd;
fd = socket(PF_INET, SOCK_DGRAM, 0);
if (fd >= 0)
return fd;
s_errno = errno;
fd = socket(PF_PACKET, SOCK_DGRAM, 0);
if (fd >= 0)
return fd;
fd = socket(PF_INET6, SOCK_DGRAM, 0);
if (fd >= 0)
return fd;
errno = s_errno;
perror("Cannot create control socket");
return -1;
}
static int do_chflags(char *dev, __u32 flags, __u32 mask)
{
struct ifreq ifr;
int fd;
int err;
strcpy(ifr.ifr_name, dev);
fd = get_ctl_fd();
if (fd < 0)
return -1;
err = ioctl(fd, SIOCGIFFLAGS, &ifr);
if (err) {
perror("SIOCGIFFLAGS");
close(fd);
return -1;
}
if ((ifr.ifr_flags^flags)&mask) {
ifr.ifr_flags &= ~mask;
ifr.ifr_flags |= mask&flags;
err = ioctl(fd, SIOCSIFFLAGS, &ifr);
if (err)
perror("SIOCSIFFLAGS");
}
close(fd);
return err;
}
static int do_changename(char *dev, char *newdev)
{
struct ifreq ifr;
int fd;
int err;
strcpy(ifr.ifr_name, dev);
strcpy(ifr.ifr_newname, newdev);
fd = get_ctl_fd();
if (fd < 0)
return -1;
err = ioctl(fd, SIOCSIFNAME, &ifr);
if (err) {
perror("SIOCSIFNAME");
close(fd);
return -1;
}
close(fd);
return err;
}
static int set_qlen(char *dev, int qlen)
{
struct ifreq ifr;
int s;
s = get_ctl_fd();
if (s < 0)
return -1;
memset(&ifr, 0, sizeof(ifr));
strcpy(ifr.ifr_name, dev);
ifr.ifr_qlen = qlen;
if (ioctl(s, SIOCSIFTXQLEN, &ifr) < 0) {
perror("SIOCSIFXQLEN");
close(s);
return -1;
}
close(s);
return 0;
}
static int set_mtu(char *dev, int mtu)
{
struct ifreq ifr;
int s;
s = get_ctl_fd();
if (s < 0)
return -1;
memset(&ifr, 0, sizeof(ifr));
strcpy(ifr.ifr_name, dev);
ifr.ifr_mtu = mtu;
if (ioctl(s, SIOCSIFMTU, &ifr) < 0) {
perror("SIOCSIFMTU");
close(s);
return -1;
}
close(s);
return 0;
}
static int get_address(char *dev, int *htype)
{
struct ifreq ifr;
struct sockaddr_ll me;
int alen;
int s;
s = socket(PF_PACKET, SOCK_DGRAM, 0);
if (s < 0) {
perror("socket(PF_PACKET)");
return -1;
}
memset(&ifr, 0, sizeof(ifr));
strcpy(ifr.ifr_name, dev);
if (ioctl(s, SIOCGIFINDEX, &ifr) < 0) {
perror("SIOCGIFINDEX");
close(s);
return -1;
}
memset(&me, 0, sizeof(me));
me.sll_family = AF_PACKET;
me.sll_ifindex = ifr.ifr_ifindex;
me.sll_protocol = htons(ETH_P_LOOP);
if (bind(s, (struct sockaddr*)&me, sizeof(me)) == -1) {
perror("bind");
close(s);
return -1;
}
alen = sizeof(me);
if (getsockname(s, (struct sockaddr*)&me, &alen) == -1) {
perror("getsockname");
close(s);
return -1;
}
close(s);
*htype = me.sll_hatype;
return me.sll_halen;
}
static int parse_address(char *dev, int hatype, int halen, char *lla, struct ifreq *ifr)
{
int alen;
memset(ifr, 0, sizeof(*ifr));
strcpy(ifr->ifr_name, dev);
ifr->ifr_hwaddr.sa_family = hatype;
alen = ll_addr_a2n(ifr->ifr_hwaddr.sa_data, 14, lla);
if (alen < 0)
return -1;
if (alen != halen) {
fprintf(stderr, "Wrong address (%s) length: expected %d bytes\n", lla, halen);
return -1;
}
return 0;
}
static int set_address(struct ifreq *ifr, int brd)
{
int s;
s = get_ctl_fd();
if (s < 0)
return -1;
if (ioctl(s, brd?SIOCSIFHWBROADCAST:SIOCSIFHWADDR, ifr) < 0) {
perror(brd?"SIOCSIFHWBROADCAST":"SIOCSIFHWADDR");
close(s);
return -1;
}
close(s);
return 0;
}
static int do_set(int argc, char **argv)
{
char *dev = NULL;
__u32 mask = 0;
__u32 flags = 0;
int qlen = -1;
int mtu = -1;
char *newaddr = NULL;
char *newbrd = NULL;
struct ifreq ifr0, ifr1;
char *newname = NULL;
int htype, halen;
while (argc > 0) {
if (strcmp(*argv, "up") == 0) {
mask |= IFF_UP;
flags |= IFF_UP;
} else if (strcmp(*argv, "down") == 0) {
mask |= IFF_UP;
flags &= ~IFF_UP;
} else if (strcmp(*argv, "name") == 0) {
NEXT_ARG();
newname = *argv;
} else if (matches(*argv, "address") == 0) {
NEXT_ARG();
newaddr = *argv;
} else if (matches(*argv, "broadcast") == 0 ||
strcmp(*argv, "brd") == 0) {
NEXT_ARG();
newbrd = *argv;
} else if (matches(*argv, "txqueuelen") == 0 ||
strcmp(*argv, "qlen") == 0 ||
matches(*argv, "txqlen") == 0) {
NEXT_ARG();
if (qlen != -1)
duparg("txqueuelen", *argv);
if (get_integer(&qlen, *argv, 0))
invarg("Invalid \"txqueuelen\" value\n", *argv);
} else if (strcmp(*argv, "mtu") == 0) {
NEXT_ARG();
if (mtu != -1)
duparg("mtu", *argv);
if (get_integer(&mtu, *argv, 0))
invarg("Invalid \"mtu\" value\n", *argv);
} else if (strcmp(*argv, "multicast") == 0) {
NEXT_ARG();
mask |= IFF_MULTICAST;
if (strcmp(*argv, "on") == 0) {
flags |= IFF_MULTICAST;
} else if (strcmp(*argv, "off") == 0) {
flags &= ~IFF_MULTICAST;
} else
return on_off("multicast");
} else if (strcmp(*argv, "arp") == 0) {
NEXT_ARG();
mask |= IFF_NOARP;
if (strcmp(*argv, "on") == 0) {
flags &= ~IFF_NOARP;
} else if (strcmp(*argv, "off") == 0) {
flags |= IFF_NOARP;
} else
return on_off("noarp");
#ifdef IFF_DYNAMIC
} else if (matches(*argv, "dynamic") == 0) {
NEXT_ARG();
mask |= IFF_DYNAMIC;
if (strcmp(*argv, "on") == 0) {
flags |= IFF_DYNAMIC;
} else if (strcmp(*argv, "off") == 0) {
flags &= ~IFF_DYNAMIC;
} else
return on_off("dynamic");
#endif
} else {
if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
}
if (matches(*argv, "help") == 0)
usage();
if (dev)
duparg2("dev", *argv);
dev = *argv;
}
argc--; argv++;
}
if (!dev) {
fprintf(stderr, "Not enough of information: \"dev\" argument is required.\n");
exit(-1);
}
if (newaddr || newbrd) {
halen = get_address(dev, &htype);
if (halen < 0)
return -1;
if (newaddr) {
if (parse_address(dev, htype, halen, newaddr, &ifr0) < 0)
return -1;
}
if (newbrd) {
if (parse_address(dev, htype, halen, newbrd, &ifr1) < 0)
return -1;
}
}
if (newname && strcmp(dev, newname)) {
if (do_changename(dev, newname) < 0)
return -1;
dev = newname;
}
if (qlen != -1) {
if (set_qlen(dev, qlen) < 0)
return -1;
}
if (mtu != -1) {
if (set_mtu(dev, mtu) < 0)
return -1;
}
if (newaddr || newbrd) {
if (newbrd) {
if (set_address(&ifr1, 1) < 0)
return -1;
}
if (newaddr) {
if (set_address(&ifr0, 0) < 0)
return -1;
}
}
if (mask)
return do_chflags(dev, flags, mask);
return 0;
}
int do_iplink(int argc, char **argv)
{
if (argc > 0) {
if (matches(*argv, "set") == 0)
return do_set(argc-1, argv+1);
if (matches(*argv, "show") == 0 ||
matches(*argv, "lst") == 0 ||
matches(*argv, "list") == 0)
return ipaddr_list_link(argc-1, argv+1);
if (matches(*argv, "help") == 0)
usage();
} else
return ipaddr_list_link(0, NULL);
fprintf(stderr, "Command \"%s\" is unknown, try \"ip link help\".\n", *argv);
exit(-1);
}

View File

@ -0,0 +1,342 @@
/*
* ipmaddr.c "ip maddress".
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <linux/netdevice.h>
#include <linux/if.h>
#include <linux/if_arp.h>
#include <linux/sockios.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include "rt_names.h"
#include "utils.h"
static struct {
char *dev;
int family;
} filter;
static void usage(void) __attribute__((noreturn));
static void usage(void)
{
fprintf(stderr, "Usage: ip maddr [ add | del ] MULTIADDR dev STRING\n");
fprintf(stderr, " ip maddr show [ dev STRING ]\n");
exit(-1);
}
static int parse_hex(char *str, unsigned char *addr)
{
int len=0;
while (*str) {
int tmp;
if (str[1] == 0)
return -1;
if (sscanf(str, "%02x", &tmp) != 1)
return -1;
addr[len] = tmp;
len++;
str += 2;
}
return len;
}
struct ma_info
{
struct ma_info *next;
int index;
int users;
char *features;
char name[IFNAMSIZ];
inet_prefix addr;
};
void maddr_ins(struct ma_info **lst, struct ma_info *m)
{
struct ma_info *mp;
for (; (mp=*lst) != NULL; lst = &mp->next) {
if (mp->index > m->index)
break;
}
m->next = *lst;
*lst = m;
}
void read_dev_mcast(struct ma_info **result_p)
{
char buf[256];
FILE *fp = fopen("/proc/net/dev_mcast", "r");
if (!fp)
return;
while (fgets(buf, sizeof(buf), fp)) {
char hexa[256];
struct ma_info m;
int len;
int st;
memset(&m, 0, sizeof(m));
sscanf(buf, "%d%s%d%d%s", &m.index, m.name, &m.users, &st,
hexa);
if (filter.dev && strcmp(filter.dev, m.name))
continue;
m.addr.family = AF_PACKET;
len = parse_hex(hexa, (unsigned char*)&m.addr.data);
if (len >= 0) {
struct ma_info *ma = malloc(sizeof(m));
memcpy(ma, &m, sizeof(m));
ma->addr.bytelen = len;
ma->addr.bitlen = len<<3;
if (st)
ma->features = "static";
maddr_ins(result_p, ma);
}
}
fclose(fp);
}
void read_igmp(struct ma_info **result_p)
{
struct ma_info m;
char buf[256];
FILE *fp = fopen("/proc/net/igmp", "r");
if (!fp)
return;
memset(&m, 0, sizeof(m));
fgets(buf, sizeof(buf), fp);
m.addr.family = AF_INET;
m.addr.bitlen = 32;
m.addr.bytelen = 4;
while (fgets(buf, sizeof(buf), fp)) {
struct ma_info *ma = malloc(sizeof(m));
if (buf[0] != '\t') {
sscanf(buf, "%d%s", &m.index, m.name);
continue;
}
if (filter.dev && strcmp(filter.dev, m.name))
continue;
sscanf(buf, "%08x%d", (__u32*)&m.addr.data, &m.users);
ma = malloc(sizeof(m));
memcpy(ma, &m, sizeof(m));
maddr_ins(result_p, ma);
}
fclose(fp);
}
void read_igmp6(struct ma_info **result_p)
{
char buf[256];
FILE *fp = fopen("/proc/net/igmp6", "r");
if (!fp)
return;
while (fgets(buf, sizeof(buf), fp)) {
char hexa[256];
struct ma_info m;
int len;
memset(&m, 0, sizeof(m));
sscanf(buf, "%d%s%s%d", &m.index, m.name, hexa, &m.users);
if (filter.dev && strcmp(filter.dev, m.name))
continue;
m.addr.family = AF_INET6;
len = parse_hex(hexa, (unsigned char*)&m.addr.data);
if (len >= 0) {
struct ma_info *ma = malloc(sizeof(m));
memcpy(ma, &m, sizeof(m));
ma->addr.bytelen = len;
ma->addr.bitlen = len<<3;
maddr_ins(result_p, ma);
}
}
fclose(fp);
}
static void print_maddr(FILE *fp, struct ma_info *list)
{
fprintf(fp, "\t");
if (list->addr.family == AF_PACKET) {
SPRINT_BUF(b1);
fprintf(fp, "link %s", ll_addr_n2a((unsigned char*)list->addr.data,
list->addr.bytelen, 0,
b1, sizeof(b1)));
} else {
char abuf[256];
switch(list->addr.family) {
case AF_INET:
fprintf(fp, "inet ");
break;
case AF_INET6:
fprintf(fp, "inet6 ");
break;
default:
fprintf(fp, "family %d ", list->addr.family);
break;
}
fprintf(fp, "%s",
format_host(list->addr.family,
-1,
list->addr.data,
abuf, sizeof(abuf)));
}
if (list->users != 1)
fprintf(fp, " users %d", list->users);
if (list->features)
fprintf(fp, " %s", list->features);
fprintf(fp, "\n");
}
static void print_mlist(FILE *fp, struct ma_info *list)
{
int cur_index = 0;
for (; list; list = list->next) {
if (oneline) {
cur_index = list->index;
fprintf(fp, "%d:\t%s%s", cur_index, list->name, _SL_);
} else if (cur_index != list->index) {
cur_index = list->index;
fprintf(fp, "%d:\t%s\n", cur_index, list->name);
}
print_maddr(fp, list);
}
}
static int multiaddr_list(int argc, char **argv)
{
struct ma_info *list = NULL;
if (!filter.family)
filter.family = preferred_family;
while (argc > 0) {
if (1) {
if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
}
if (matches(*argv, "help") == 0)
usage();
if (filter.dev)
duparg2("dev", *argv);
filter.dev = *argv;
}
argv++; argc--;
}
if (!filter.family || filter.family == AF_PACKET)
read_dev_mcast(&list);
if (!filter.family || filter.family == AF_INET)
read_igmp(&list);
if (!filter.family || filter.family == AF_INET6)
read_igmp6(&list);
print_mlist(stdout, list);
return 0;
}
int multiaddr_modify(int cmd, int argc, char **argv)
{
struct ifreq ifr;
int fd;
memset(&ifr, 0, sizeof(ifr));
if (cmd == RTM_NEWADDR)
cmd = SIOCADDMULTI;
else
cmd = SIOCDELMULTI;
while (argc > 0) {
if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
if (ifr.ifr_name[0])
duparg("dev", *argv);
strncpy(ifr.ifr_name, *argv, IFNAMSIZ);
} else {
if (matches(*argv, "address") == 0) {
NEXT_ARG();
}
if (matches(*argv, "help") == 0)
usage();
if (ifr.ifr_hwaddr.sa_data[0])
duparg("address", *argv);
if (ll_addr_a2n(ifr.ifr_hwaddr.sa_data, 14, *argv) < 0) {
fprintf(stderr, "Error: \"%s\" is not a legal ll address.\n", *argv);
exit(1);
}
}
argc--; argv++;
}
if (ifr.ifr_name[0] == 0) {
fprintf(stderr, "Not enough information: \"dev\" is required.\n");
exit(-1);
}
fd = socket(AF_INET, SOCK_DGRAM, 0);
if (fd < 0) {
perror("Cannot create socket");
exit(1);
}
if (ioctl(fd, cmd, (char*)&ifr) != 0) {
perror("ioctl");
exit(1);
}
close(fd);
exit(0);
}
int do_multiaddr(int argc, char **argv)
{
if (argc < 1)
return multiaddr_list(0, NULL);
if (matches(*argv, "add") == 0)
return multiaddr_modify(RTM_NEWADDR, argc-1, argv+1);
if (matches(*argv, "delete") == 0)
return multiaddr_modify(RTM_DELADDR, argc-1, argv+1);
if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0
|| matches(*argv, "lst") == 0)
return multiaddr_list(argc-1, argv+1);
if (matches(*argv, "help") == 0)
usage();
fprintf(stderr, "Command \"%s\" is unknown, try \"ip maddr help\".\n", *argv);
exit(-1);
}

View File

@ -0,0 +1,152 @@
/*
* ipmonitor.c "ip monitor".
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include <time.h>
#include "utils.h"
#include "ip_common.h"
static void usage(void) __attribute__((noreturn));
static void usage(void)
{
fprintf(stderr, "Usage: ip monitor [ all | LISTofOBJECTS ]\n");
exit(-1);
}
int accept_msg(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
{
FILE *fp = (FILE*)arg;
if (n->nlmsg_type == RTM_NEWROUTE || n->nlmsg_type == RTM_DELROUTE) {
print_route(who, n, arg);
return 0;
}
if (n->nlmsg_type == RTM_NEWLINK || n->nlmsg_type == RTM_DELLINK) {
ll_remember_index(who, n, NULL);
print_linkinfo(who, n, arg);
return 0;
}
if (n->nlmsg_type == RTM_NEWADDR || n->nlmsg_type == RTM_DELADDR) {
print_addrinfo(who, n, arg);
return 0;
}
if (n->nlmsg_type == RTM_NEWNEIGH || n->nlmsg_type == RTM_DELNEIGH) {
print_neigh(who, n, arg);
return 0;
}
if (n->nlmsg_type == 15) {
char *tstr;
time_t secs = ((__u32*)NLMSG_DATA(n))[0];
long usecs = ((__u32*)NLMSG_DATA(n))[1];
tstr = asctime(localtime(&secs));
tstr[strlen(tstr)-1] = 0;
fprintf(fp, "Timestamp: %s %lu us\n", tstr, usecs);
return 0;
}
if (n->nlmsg_type == RTM_NEWQDISC ||
n->nlmsg_type == RTM_DELQDISC ||
n->nlmsg_type == RTM_NEWTCLASS ||
n->nlmsg_type == RTM_DELTCLASS ||
n->nlmsg_type == RTM_NEWTFILTER ||
n->nlmsg_type == RTM_DELTFILTER)
return 0;
if (n->nlmsg_type != NLMSG_ERROR && n->nlmsg_type != NLMSG_NOOP &&
n->nlmsg_type != NLMSG_DONE) {
fprintf(fp, "Unknown message: %08x %08x %08x\n",
n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
}
return 0;
}
int do_ipmonitor(int argc, char **argv)
{
struct rtnl_handle rth;
char *file = NULL;
unsigned groups = ~RTMGRP_TC;
int llink=0;
int laddr=0;
int lroute=0;
ipaddr_reset_filter(1);
iproute_reset_filter();
ipneigh_reset_filter();
while (argc > 0) {
if (matches(*argv, "file") == 0) {
NEXT_ARG();
file = *argv;
} else if (matches(*argv, "link") == 0) {
llink=1;
groups = 0;
} else if (matches(*argv, "address") == 0) {
laddr=1;
groups = 0;
} else if (matches(*argv, "route") == 0) {
lroute=1;
groups = 0;
} else if (strcmp(*argv, "all") == 0) {
groups = ~RTMGRP_TC;
} else if (matches(*argv, "help") == 0) {
usage();
} else {
fprintf(stderr, "Argument \"%s\" is unknown, try \"ip monitor help\".\n", *argv);
exit(-1);
}
argc--; argv++;
}
if (llink)
groups |= RTMGRP_LINK;
if (laddr) {
if (!preferred_family || preferred_family == AF_INET)
groups |= RTMGRP_IPV4_IFADDR;
if (!preferred_family || preferred_family == AF_INET6)
groups |= RTMGRP_IPV6_IFADDR;
}
if (lroute) {
if (!preferred_family || preferred_family == AF_INET)
groups |= RTMGRP_IPV4_ROUTE;
if (!preferred_family || preferred_family == AF_INET6)
groups |= RTMGRP_IPV6_ROUTE;
}
if (file) {
FILE *fp;
fp = fopen(file, "r");
if (fp == NULL) {
perror("Cannot fopen");
exit(-1);
}
return rtnl_from_file(fp, accept_msg, (void*)stdout);
}
if (rtnl_open(&rth, groups) < 0)
exit(1);
ll_init_map(&rth);
if (rtnl_listen(&rth, accept_msg, (void*)stdout) < 0)
exit(2);
exit(0);
}

View File

@ -0,0 +1,204 @@
/*
* ipmroute.c "ip mroute".
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <linux/netdevice.h>
#include <linux/if.h>
#include <linux/if_arp.h>
#include <linux/sockios.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include "utils.h"
char filter_dev[16];
int filter_family;
static void usage(void) __attribute__((noreturn));
static void usage(void)
{
fprintf(stderr, "Usage: ip mroute show [ PREFIX ] [ from PREFIX ] [ iif DEVICE ]\n");
#if 0
fprintf(stderr, "Usage: ip mroute [ add | del ] DESTINATION from SOURCE [ iif DEVICE ] [ oif DEVICE ]\n");
#endif
exit(-1);
}
char *viftable[32];
struct rtfilter
{
inet_prefix mdst;
inet_prefix msrc;
} filter;
void read_viftable(void)
{
char buf[256];
FILE *fp = fopen("/proc/net/ip_mr_vif", "r");
if (!fp)
return;
fgets(buf, sizeof(buf), fp);
while (fgets(buf, sizeof(buf), fp)) {
int vifi;
char dev[256];
if (sscanf(buf, "%d%s", &vifi, dev) < 2)
continue;
if (vifi<0 || vifi>31)
continue;
viftable[vifi] = strdup(dev);
}
fclose(fp);
}
void read_mroute_list(FILE *ofp)
{
char buf[256];
FILE *fp = fopen("/proc/net/ip_mr_cache", "r");
if (!fp)
return;
fgets(buf, sizeof(buf), fp);
while (fgets(buf, sizeof(buf), fp)) {
inet_prefix maddr, msrc;
unsigned pkts, b, w;
int vifi;
char oiflist[256];
char sbuf[256];
char mbuf[256];
char obuf[256];
oiflist[0] = 0;
if (sscanf(buf, "%x%x%d%u%u%u%s", maddr.data, msrc.data, &vifi,
&pkts, &b, &w, oiflist) < 6)
continue;
if (vifi!=-1 && (vifi < 0 || vifi>31))
continue;
if (filter_dev[0] && (vifi<0 || strcmp(filter_dev, viftable[vifi])))
continue;
if (filter.mdst.family && inet_addr_match(&maddr, &filter.mdst, filter.mdst.bitlen))
continue;
if (filter.msrc.family && inet_addr_match(&msrc, &filter.msrc, filter.msrc.bitlen))
continue;
snprintf(obuf, sizeof(obuf), "(%s, %s)",
format_host(AF_INET, 4, &msrc.data[0], sbuf, sizeof(sbuf)),
format_host(AF_INET, 4, &maddr.data[0], mbuf, sizeof(mbuf)));
fprintf(ofp, "%-32s Iif: ", obuf);
if (vifi == -1)
fprintf(ofp, "unresolved ");
else
fprintf(ofp, "%-10s ", viftable[vifi]);
if (oiflist[0]) {
char *next = NULL;
char *p = oiflist;
int ovifi, ottl;
fprintf(ofp, "Oifs: ");
while (p) {
next = strchr(p, ' ');
if (next) {
*next = 0;
next++;
}
if (sscanf(p, "%d:%d", &ovifi, &ottl)<2) {
p = next;
continue;
}
p = next;
fprintf(ofp, "%s", viftable[ovifi]);
if (ottl>1)
fprintf(ofp, "(ttl %d) ", ovifi);
else
fprintf(ofp, " ");
}
}
if (show_stats && b) {
fprintf(ofp, "%s %u packets, %u bytes", _SL_, pkts, b);
if (w)
fprintf(ofp, ", %u arrived on wrong iif.", w);
}
fprintf(ofp, "\n");
}
fclose(fp);
}
static int mroute_list(int argc, char **argv)
{
while (argc > 0) {
if (strcmp(*argv, "iif") == 0) {
NEXT_ARG();
strncpy(filter_dev, *argv, sizeof(filter_dev)-1);
} else if (matches(*argv, "from") == 0) {
NEXT_ARG();
get_prefix(&filter.msrc, *argv, AF_INET);
} else {
if (strcmp(*argv, "to") == 0) {
NEXT_ARG();
}
if (matches(*argv, "help") == 0)
usage();
get_prefix(&filter.mdst, *argv, AF_INET);
}
argv++; argc--;
}
read_viftable();
read_mroute_list(stdout);
return 0;
}
int do_multiroute(int argc, char **argv)
{
if (argc < 1)
return mroute_list(0, NULL);
#if 0
if (matches(*argv, "add") == 0)
return mroute_modify(RTM_NEWADDR, argc-1, argv+1);
if (matches(*argv, "delete") == 0)
return mroute_modify(RTM_DELADDR, argc-1, argv+1);
if (matches(*argv, "get") == 0)
return mroute_get(argc-1, argv+1);
#endif
if (matches(*argv, "list") == 0 || matches(*argv, "show") == 0
|| matches(*argv, "lst") == 0)
return mroute_list(argc-1, argv+1);
if (matches(*argv, "help") == 0)
usage();
fprintf(stderr, "Command \"%s\" is unknown, try \"ip mroute help\".\n", *argv);
exit(-1);
}

View File

@ -0,0 +1,484 @@
/*
* ipneigh.c "ip neigh".
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*
* Changes:
*
* Rani Assaf <rani@magic.metawire.com> 980929: resolve addresses
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <string.h>
#include <sys/time.h>
#include <net/if.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include "rt_names.h"
#include "utils.h"
#include "ip_common.h"
#define NUD_VALID (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE|NUD_PROBE|NUD_STALE|NUD_DELAY)
static struct
{
int family;
int index;
int state;
int unused_only;
inet_prefix pfx;
int flushed;
char *flushb;
int flushp;
int flushe;
struct rtnl_handle *rth;
} filter;
static void usage(void) __attribute__((noreturn));
static void usage(void)
{
fprintf(stderr, "Usage: ip neigh { add | del | change | replace } { ADDR [ lladdr LLADDR ]\n"
" [ nud { permanent | noarp | stale | reachable } ]\n"
" | proxy ADDR } [ dev DEV ]\n");
fprintf(stderr, " ip neigh {show|flush} [ to PREFIX ] [ dev DEV ] [ nud STATE ]\n");
exit(-1);
}
int nud_state_a2n(unsigned *state, char *arg)
{
if (matches(arg, "permanent") == 0)
*state = NUD_PERMANENT;
else if (matches(arg, "reachable") == 0)
*state = NUD_REACHABLE;
else if (strcmp(arg, "noarp") == 0)
*state = NUD_NOARP;
else if (strcmp(arg, "none") == 0)
*state = NUD_NONE;
else if (strcmp(arg, "stale") == 0)
*state = NUD_STALE;
else if (strcmp(arg, "incomplete") == 0)
*state = NUD_INCOMPLETE;
else if (strcmp(arg, "delay") == 0)
*state = NUD_DELAY;
else if (strcmp(arg, "probe") == 0)
*state = NUD_PROBE;
else if (matches(arg, "failed") == 0)
*state = NUD_FAILED;
else {
if (get_unsigned(state, arg, 0))
return -1;
if (*state>=0x100 || (*state&((*state)-1)))
return -1;
}
return 0;
}
char * nud_state_n2a(__u8 state, char *buf, int len)
{
switch (state) {
case NUD_NONE:
return "none";
case NUD_INCOMPLETE:
return "incomplete";
case NUD_REACHABLE:
return "reachable";
case NUD_STALE:
return "stale";
case NUD_DELAY:
return "delay";
case NUD_PROBE:
return "probe";
case NUD_FAILED:
return "failed";
case NUD_NOARP:
return "noarp";
case NUD_PERMANENT:
return "permanent";
default:
snprintf(buf, len, "%x", state);
return buf;
}
}
static int flush_update(void)
{
if (rtnl_send(filter.rth, filter.flushb, filter.flushp) < 0) {
perror("Failed to send flush request\n");
return -1;
}
filter.flushp = 0;
return 0;
}
static int ipneigh_modify(int cmd, int flags, int argc, char **argv)
{
struct rtnl_handle rth;
struct {
struct nlmsghdr n;
struct ndmsg ndm;
char buf[256];
} req;
char *d = NULL;
int dst_ok = 0;
int lladdr_ok = 0;
char * lla = NULL;
inet_prefix dst;
memset(&req, 0, sizeof(req));
req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
req.n.nlmsg_flags = NLM_F_REQUEST|flags;
req.n.nlmsg_type = cmd;
req.ndm.ndm_family = preferred_family;
req.ndm.ndm_state = NUD_PERMANENT;
while (argc > 0) {
if (matches(*argv, "lladdr") == 0) {
NEXT_ARG();
if (lladdr_ok)
duparg("lladdr", *argv);
lla = *argv;
lladdr_ok = 1;
} else if (strcmp(*argv, "nud") == 0) {
unsigned state;
NEXT_ARG();
if (nud_state_a2n(&state, *argv))
invarg("nud state is bad", *argv);
req.ndm.ndm_state = state;
} else if (matches(*argv, "proxy") == 0) {
NEXT_ARG();
if (matches(*argv, "help") == 0)
usage();
if (dst_ok)
duparg("address", *argv);
get_addr(&dst, *argv, preferred_family);
dst_ok = 1;
req.ndm.ndm_flags |= NTF_PROXY;
} else if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
d = *argv;
} else {
if (strcmp(*argv, "to") == 0) {
NEXT_ARG();
}
if (matches(*argv, "help") == 0) {
NEXT_ARG();
}
if (dst_ok)
duparg2("to", *argv);
get_addr(&dst, *argv, preferred_family);
dst_ok = 1;
}
argc--; argv++;
}
if (d == NULL || !dst_ok || dst.family == AF_UNSPEC) {
fprintf(stderr, "Device and destination are required arguments.\n");
exit(-1);
}
req.ndm.ndm_family = dst.family;
addattr_l(&req.n, sizeof(req), NDA_DST, &dst.data, dst.bytelen);
if (lla && strcmp(lla, "null")) {
__u8 llabuf[16];
int l;
l = ll_addr_a2n(llabuf, sizeof(llabuf), lla);
addattr_l(&req.n, sizeof(req), NDA_LLADDR, llabuf, l);
}
if (rtnl_open(&rth, 0) < 0)
exit(1);
ll_init_map(&rth);
if ((req.ndm.ndm_ifindex = ll_name_to_index(d)) == 0) {
fprintf(stderr, "Cannot find device \"%s\"\n", d);
return -1;
}
if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
exit(2);
exit(0);
}
int print_neigh(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
{
FILE *fp = (FILE*)arg;
struct ndmsg *r = NLMSG_DATA(n);
int len = n->nlmsg_len;
struct rtattr * tb[NDA_MAX+1];
char abuf[256];
if (n->nlmsg_type != RTM_NEWNEIGH && n->nlmsg_type != RTM_DELNEIGH) {
fprintf(stderr, "Not RTM_NEWNEIGH: %08x %08x %08x\n",
n->nlmsg_len, n->nlmsg_type, n->nlmsg_flags);
return 0;
}
len -= NLMSG_LENGTH(sizeof(*r));
if (len < 0) {
fprintf(stderr, "BUG: wrong nlmsg len %d\n", len);
return -1;
}
if (filter.flushb && n->nlmsg_type != RTM_NEWNEIGH)
return 0;
if (filter.family && filter.family != r->ndm_family)
return 0;
if (filter.index && filter.index != r->ndm_ifindex)
return 0;
if (!(filter.state&r->ndm_state) &&
(r->ndm_state || !(filter.state&0x100)) &&
(r->ndm_family != AF_DECnet))
return 0;
memset(tb, 0, sizeof(tb));
parse_rtattr(tb, NDA_MAX, NDA_RTA(r), n->nlmsg_len - NLMSG_LENGTH(sizeof(*r)));
if (tb[NDA_DST]) {
if (filter.pfx.family) {
inet_prefix dst;
memset(&dst, 0, sizeof(dst));
dst.family = r->ndm_family;
memcpy(&dst.data, RTA_DATA(tb[NDA_DST]), RTA_PAYLOAD(tb[NDA_DST]));
if (inet_addr_match(&dst, &filter.pfx, filter.pfx.bitlen))
return 0;
}
}
if (filter.unused_only && tb[NDA_CACHEINFO]) {
struct nda_cacheinfo *ci = RTA_DATA(tb[NDA_CACHEINFO]);
if (ci->ndm_refcnt)
return 0;
}
if (filter.flushb) {
struct nlmsghdr *fn;
if (NLMSG_ALIGN(filter.flushp) + n->nlmsg_len > filter.flushe) {
if (flush_update())
return -1;
}
fn = (struct nlmsghdr*)(filter.flushb + NLMSG_ALIGN(filter.flushp));
memcpy(fn, n, n->nlmsg_len);
fn->nlmsg_type = RTM_DELNEIGH;
fn->nlmsg_flags = NLM_F_REQUEST;
fn->nlmsg_seq = ++filter.rth->seq;
filter.flushp = (((char*)fn) + n->nlmsg_len) - filter.flushb;
filter.flushed++;
if (show_stats < 2)
return 0;
}
if (tb[NDA_DST]) {
fprintf(fp, "%s ",
format_host(r->ndm_family,
RTA_PAYLOAD(tb[NDA_DST]),
RTA_DATA(tb[NDA_DST]),
abuf, sizeof(abuf)));
}
if (!filter.index && r->ndm_ifindex)
fprintf(fp, "dev %s ", ll_index_to_name(r->ndm_ifindex));
if (tb[NDA_LLADDR]) {
SPRINT_BUF(b1);
fprintf(fp, "lladdr %s", ll_addr_n2a(RTA_DATA(tb[NDA_LLADDR]),
RTA_PAYLOAD(tb[NDA_LLADDR]),
ll_index_to_type(r->ndm_ifindex),
b1, sizeof(b1)));
}
if (r->ndm_flags & NTF_ROUTER) {
fprintf(fp, " router");
}
if (tb[NDA_CACHEINFO] && show_stats) {
static int hz;
struct nda_cacheinfo *ci = RTA_DATA(tb[NDA_CACHEINFO]);
if (!hz)
hz = get_hz();
if (ci->ndm_refcnt)
printf(" ref %d", ci->ndm_refcnt);
fprintf(fp, " used %d/%d/%d", ci->ndm_used/hz,
ci->ndm_confirmed/hz, ci->ndm_updated/hz);
}
if (r->ndm_state) {
SPRINT_BUF(b1);
fprintf(fp, " nud %s", nud_state_n2a(r->ndm_state, b1, sizeof(b1)));
}
fprintf(fp, "\n");
fflush(fp);
return 0;
}
void ipneigh_reset_filter()
{
memset(&filter, 0, sizeof(filter));
filter.state = ~0;
}
int do_show_or_flush(int argc, char **argv, int flush)
{
char *filter_dev = NULL;
struct rtnl_handle rth;
int state_given = 0;
ipneigh_reset_filter();
if (!filter.family)
filter.family = preferred_family;
if (flush) {
if (argc <= 0) {
fprintf(stderr, "Flush requires arguments.\n");
return -1;
}
filter.state = ~(NUD_PERMANENT|NUD_NOARP);
} else
filter.state = 0xFF & ~NUD_NOARP;
while (argc > 0) {
if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
if (filter_dev)
duparg("dev", *argv);
filter_dev = *argv;
} else if (strcmp(*argv, "unused") == 0) {
filter.unused_only = 1;
} else if (strcmp(*argv, "nud") == 0) {
unsigned state;
NEXT_ARG();
if (!state_given) {
state_given = 1;
filter.state = 0;
}
if (nud_state_a2n(&state, *argv)) {
if (strcmp(*argv, "all") != 0)
invarg("nud state is bad", *argv);
state = ~0;
if (flush)
state &= ~NUD_NOARP;
}
if (state == 0)
state = 0x100;
filter.state |= state;
} else {
if (strcmp(*argv, "to") == 0) {
NEXT_ARG();
}
if (matches(*argv, "help") == 0)
usage();
get_prefix(&filter.pfx, *argv, filter.family);
if (filter.family == AF_UNSPEC)
filter.family = filter.pfx.family;
}
argc--; argv++;
}
if (rtnl_open(&rth, 0) < 0)
exit(1);
ll_init_map(&rth);
if (filter_dev) {
if ((filter.index = ll_name_to_index(filter_dev)) == 0) {
fprintf(stderr, "Cannot find device \"%s\"\n", filter_dev);
return -1;
}
}
if (flush) {
int round = 0;
char flushb[4096-512];
filter.flushb = flushb;
filter.flushp = 0;
filter.flushe = sizeof(flushb);
filter.rth = &rth;
filter.state &= ~NUD_FAILED;
for (;;) {
if (rtnl_wilddump_request(&rth, filter.family, RTM_GETNEIGH) < 0) {
perror("Cannot send dump request");
exit(1);
}
filter.flushed = 0;
if (rtnl_dump_filter(&rth, print_neigh, stdout, NULL, NULL) < 0) {
fprintf(stderr, "Flush terminated\n");
exit(1);
}
if (filter.flushed == 0) {
if (round == 0) {
fprintf(stderr, "Nothing to flush.\n");
} else if (show_stats)
printf("*** Flush is complete after %d round%s ***\n", round, round>1?"s":"");
fflush(stdout);
return 0;
}
round++;
if (flush_update() < 0)
exit(1);
if (show_stats) {
printf("\n*** Round %d, deleting %d entries ***\n", round, filter.flushed);
fflush(stdout);
}
}
}
if (rtnl_wilddump_request(&rth, filter.family, RTM_GETNEIGH) < 0) {
perror("Cannot send dump request");
exit(1);
}
if (rtnl_dump_filter(&rth, print_neigh, stdout, NULL, NULL) < 0) {
fprintf(stderr, "Dump terminated\n");
exit(1);
}
return 0;
}
int do_ipneigh(int argc, char **argv)
{
if (argc > 0) {
if (matches(*argv, "add") == 0)
return ipneigh_modify(RTM_NEWNEIGH, NLM_F_CREATE|NLM_F_EXCL, argc-1, argv+1);
if (matches(*argv, "change") == 0 ||
strcmp(*argv, "chg") == 0)
return ipneigh_modify(RTM_NEWNEIGH, NLM_F_REPLACE, argc-1, argv+1);
if (matches(*argv, "replace") == 0)
return ipneigh_modify(RTM_NEWNEIGH, NLM_F_CREATE|NLM_F_REPLACE, argc-1, argv+1);
if (matches(*argv, "delete") == 0)
return ipneigh_modify(RTM_DELNEIGH, 0, argc-1, argv+1);
if (matches(*argv, "get") == 0) {
fprintf(stderr, "Sorry, \"neigh get\" is not implemented :-(\n");
return -1;
}
if (matches(*argv, "show") == 0 ||
matches(*argv, "lst") == 0 ||
matches(*argv, "list") == 0)
return do_show_or_flush(argc-1, argv+1, 0);
if (matches(*argv, "flush") == 0)
return do_show_or_flush(argc-1, argv+1, 1);
if (matches(*argv, "help") == 0)
usage();
} else
return do_show_or_flush(0, NULL, 0);
fprintf(stderr, "Command \"%s\" is unknown, try \"ip neigh help\".\n", *argv);
exit(-1);
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,323 @@
/*
* iprule.c "ip rule".
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*
* Changes:
*
* Rani Assaf <rani@magic.metawire.com> 980929: resolve addresses
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <string.h>
#include "rt_names.h"
#include "utils.h"
static void usage(void) __attribute__((noreturn));
static void usage(void)
{
fprintf(stderr, "Usage: ip rule [ list | add | del ] SELECTOR ACTION\n");
fprintf(stderr, "SELECTOR := [ from PREFIX ] [ to PREFIX ] [ tos TOS ] [ fwmark FWMARK ]\n");
fprintf(stderr, " [ dev STRING ] [ pref NUMBER ]\n");
fprintf(stderr, "ACTION := [ table TABLE_ID ] [ nat ADDRESS ]\n");
fprintf(stderr, " [ prohibit | reject | unreachable ]\n");
fprintf(stderr, " [ realms [SRCREALM/]DSTREALM ]\n");
fprintf(stderr, "TABLE_ID := [ local | main | default | NUMBER ]\n");
exit(-1);
}
int print_rule(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
{
FILE *fp = (FILE*)arg;
struct rtmsg *r = NLMSG_DATA(n);
int len = n->nlmsg_len;
int host_len = -1;
struct rtattr * tb[RTA_MAX+1];
char abuf[256];
SPRINT_BUF(b1);
if (n->nlmsg_type != RTM_NEWRULE)
return 0;
len -= NLMSG_LENGTH(sizeof(*r));
if (len < 0)
return -1;
memset(tb, 0, sizeof(tb));
parse_rtattr(tb, RTA_MAX, RTM_RTA(r), len);
if (r->rtm_family == AF_INET)
host_len = 32;
else if (r->rtm_family == AF_INET6)
host_len = 128;
else if (r->rtm_family == AF_DECnet)
host_len = 16;
else if (r->rtm_family == AF_IPX)
host_len = 80;
if (tb[RTA_PRIORITY])
fprintf(fp, "%u:\t", *(unsigned*)RTA_DATA(tb[RTA_PRIORITY]));
else
fprintf(fp, "0:\t");
if (tb[RTA_SRC]) {
if (r->rtm_src_len != host_len) {
fprintf(fp, "from %s/%u ", rt_addr_n2a(r->rtm_family,
RTA_PAYLOAD(tb[RTA_SRC]),
RTA_DATA(tb[RTA_SRC]),
abuf, sizeof(abuf)),
r->rtm_src_len
);
} else {
fprintf(fp, "from %s ", format_host(r->rtm_family,
RTA_PAYLOAD(tb[RTA_SRC]),
RTA_DATA(tb[RTA_SRC]),
abuf, sizeof(abuf))
);
}
} else if (r->rtm_src_len) {
fprintf(fp, "from 0/%d ", r->rtm_src_len);
} else {
fprintf(fp, "from all ");
}
if (tb[RTA_DST]) {
if (r->rtm_dst_len != host_len) {
fprintf(fp, "to %s/%u ", rt_addr_n2a(r->rtm_family,
RTA_PAYLOAD(tb[RTA_DST]),
RTA_DATA(tb[RTA_DST]),
abuf, sizeof(abuf)),
r->rtm_dst_len
);
} else {
fprintf(fp, "to %s ", format_host(r->rtm_family,
RTA_PAYLOAD(tb[RTA_DST]),
RTA_DATA(tb[RTA_DST]),
abuf, sizeof(abuf)));
}
} else if (r->rtm_dst_len) {
fprintf(fp, "to 0/%d ", r->rtm_dst_len);
}
if (r->rtm_tos) {
SPRINT_BUF(b1);
fprintf(fp, "tos %s ", rtnl_dsfield_n2a(r->rtm_tos, b1, sizeof(b1)));
}
if (tb[RTA_PROTOINFO]) {
fprintf(fp, "fwmark %8x ", *(__u32*)RTA_DATA(tb[RTA_PROTOINFO]));
}
if (tb[RTA_IIF]) {
fprintf(fp, "iif %s ", (char*)RTA_DATA(tb[RTA_IIF]));
}
if (r->rtm_table)
fprintf(fp, "lookup %s ", rtnl_rttable_n2a(r->rtm_table, b1, sizeof(b1)));
if (tb[RTA_FLOW]) {
__u32 to = *(__u32*)RTA_DATA(tb[RTA_FLOW]);
__u32 from = to>>16;
to &= 0xFFFF;
if (from) {
fprintf(fp, "realms %s/",
rtnl_rtrealm_n2a(from, b1, sizeof(b1)));
}
fprintf(fp, "%s ",
rtnl_rtrealm_n2a(to, b1, sizeof(b1)));
}
if (r->rtm_type == RTN_NAT) {
if (tb[RTA_GATEWAY]) {
fprintf(fp, "map-to %s ",
format_host(r->rtm_family,
RTA_PAYLOAD(tb[RTA_GATEWAY]),
RTA_DATA(tb[RTA_GATEWAY]),
abuf, sizeof(abuf)));
} else
fprintf(fp, "masquerade");
} else if (r->rtm_type != RTN_UNICAST)
fprintf(fp, "%s", rtnl_rtntype_n2a(r->rtm_type, b1, sizeof(b1)));
fprintf(fp, "\n");
fflush(fp);
return 0;
}
int iprule_list(int argc, char **argv)
{
struct rtnl_handle rth;
int af = preferred_family;
if (af == AF_UNSPEC)
af = AF_INET;
if (argc > 0) {
fprintf(stderr, "\"ip rule show\" need not eny arguments.\n");
return -1;
}
if (rtnl_open(&rth, 0) < 0)
return 1;
if (rtnl_wilddump_request(&rth, af, RTM_GETRULE) < 0) {
perror("Cannot send dump request");
return 1;
}
if (rtnl_dump_filter(&rth, print_rule, stdout, NULL, NULL) < 0) {
fprintf(stderr, "Dump terminated\n");
return 1;
}
return 0;
}
int iprule_modify(int cmd, int argc, char **argv)
{
int table_ok = 0;
struct rtnl_handle rth;
struct {
struct nlmsghdr n;
struct rtmsg r;
char buf[1024];
} req;
memset(&req, 0, sizeof(req));
req.n.nlmsg_type = cmd;
req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct rtmsg));
req.n.nlmsg_flags = NLM_F_REQUEST;
req.r.rtm_family = preferred_family;
req.r.rtm_protocol = RTPROT_BOOT;
req.r.rtm_scope = RT_SCOPE_UNIVERSE;
req.r.rtm_table = 0;
req.r.rtm_type = RTN_UNSPEC;
if (cmd == RTM_NEWRULE) {
req.n.nlmsg_flags |= NLM_F_CREATE|NLM_F_EXCL;
req.r.rtm_type = RTN_UNICAST;
}
while (argc > 0) {
if (strcmp(*argv, "from") == 0) {
inet_prefix dst;
NEXT_ARG();
get_prefix(&dst, *argv, req.r.rtm_family);
req.r.rtm_src_len = dst.bitlen;
addattr_l(&req.n, sizeof(req), RTA_SRC, &dst.data, dst.bytelen);
} else if (strcmp(*argv, "to") == 0) {
inet_prefix dst;
NEXT_ARG();
get_prefix(&dst, *argv, req.r.rtm_family);
req.r.rtm_dst_len = dst.bitlen;
addattr_l(&req.n, sizeof(req), RTA_DST, &dst.data, dst.bytelen);
} else if (matches(*argv, "preference") == 0 ||
matches(*argv, "order") == 0 ||
matches(*argv, "priority") == 0) {
__u32 pref;
NEXT_ARG();
if (get_u32(&pref, *argv, 0))
invarg("preference value is invalid\n", *argv);
addattr32(&req.n, sizeof(req), RTA_PRIORITY, pref);
} else if (strcmp(*argv, "tos") == 0) {
__u32 tos;
NEXT_ARG();
if (rtnl_dsfield_a2n(&tos, *argv))
invarg("TOS value is invalid\n", *argv);
req.r.rtm_tos = tos;
} else if (strcmp(*argv, "fwmark") == 0) {
__u32 fwmark;
NEXT_ARG();
if (get_u32(&fwmark, *argv, 16))
invarg("fwmark value is invalid\n", *argv);
addattr32(&req.n, sizeof(req), RTA_PROTOINFO, fwmark);
} else if (matches(*argv, "realms") == 0) {
__u32 realm;
NEXT_ARG();
if (get_rt_realms(&realm, *argv))
invarg("invalid realms\n", *argv);
addattr32(&req.n, sizeof(req), RTA_FLOW, realm);
} else if (matches(*argv, "table") == 0 ||
strcmp(*argv, "lookup") == 0) {
int tid;
NEXT_ARG();
if (rtnl_rttable_a2n(&tid, *argv))
invarg("invalid table ID\n", *argv);
req.r.rtm_table = tid;
table_ok = 1;
} else if (strcmp(*argv, "dev") == 0 ||
strcmp(*argv, "iif") == 0) {
NEXT_ARG();
addattr_l(&req.n, sizeof(req), RTA_IIF, *argv, strlen(*argv)+1);
} else if (strcmp(*argv, "nat") == 0 ||
matches(*argv, "map-to") == 0) {
NEXT_ARG();
addattr32(&req.n, sizeof(req), RTA_GATEWAY, get_addr32(*argv));
req.r.rtm_type = RTN_NAT;
} else {
int type;
if (strcmp(*argv, "type") == 0) {
NEXT_ARG();
}
if (matches(*argv, "help") == 0)
usage();
if (rtnl_rtntype_a2n(&type, *argv))
invarg("Failed to parse rule type", *argv);
req.r.rtm_type = type;
}
argc--;
argv++;
}
if (req.r.rtm_family == AF_UNSPEC)
req.r.rtm_family = AF_INET;
if (!table_ok && cmd == RTM_NEWRULE)
req.r.rtm_table = RT_TABLE_MAIN;
if (rtnl_open(&rth, 0) < 0)
return 1;
if (rtnl_talk(&rth, &req.n, 0, 0, NULL, NULL, NULL) < 0)
return 2;
return 0;
}
int do_iprule(int argc, char **argv)
{
if (argc < 1) {
return iprule_list(0, NULL);
} else if (matches(argv[0], "list") == 0 ||
matches(argv[0], "lst") == 0 ||
matches(argv[0], "show") == 0) {
return iprule_list(argc-1, argv+1);
} else if (matches(argv[0], "add") == 0) {
return iprule_modify(RTM_NEWRULE, argc-1, argv+1);
} else if (matches(argv[0], "delete") == 0) {
return iprule_modify(RTM_DELRULE, argc-1, argv+1);
} else if (matches(argv[0], "help") == 0)
usage();
fprintf(stderr, "Command \"%s\" is unknown, try \"ip rule help\".\n", *argv);
exit(-1);
}

View File

@ -0,0 +1,581 @@
/*
* iptunnel.c "ip tunnel"
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*
* Changes:
*
* Rani Assaf <rani@magic.metawire.com> 980929: resolve addresses
* Rani Assaf <rani@magic.metawire.com> 980930: do not allow key for ipip/sit
* Phil Karn <karn@ka9q.ampr.org> 990408: "pmtudisc" flag
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <linux/if.h>
#include <linux/if_arp.h>
#include <netinet/in.h>
#include <netinet/ip.h>
#include <arpa/inet.h>
#include <linux/if_tunnel.h>
#include "rt_names.h"
#include "utils.h"
static void usage(void) __attribute__((noreturn));
static void usage(void)
{
fprintf(stderr, "Usage: ip tunnel { add | change | del | show } [ NAME ]\n");
fprintf(stderr, " [ mode { ipip | gre | sit } ] [ remote ADDR ] [ local ADDR ]\n");
fprintf(stderr, " [ [i|o]seq ] [ [i|o]key KEY ] [ [i|o]csum ]\n");
fprintf(stderr, " [ ttl TTL ] [ tos TOS ] [ [no]pmtudisc ] [ dev PHYS_DEV ]\n");
fprintf(stderr, "\n");
fprintf(stderr, "Where: NAME := STRING\n");
fprintf(stderr, " ADDR := { IP_ADDRESS | any }\n");
fprintf(stderr, " TOS := { NUMBER | inherit }\n");
fprintf(stderr, " TTL := { 1..255 | inherit }\n");
fprintf(stderr, " KEY := { DOTTED_QUAD | NUMBER }\n");
exit(-1);
}
static int do_ioctl_get_ifindex(char *dev)
{
struct ifreq ifr;
int fd;
int err;
strcpy(ifr.ifr_name, dev);
fd = socket(AF_INET, SOCK_DGRAM, 0);
err = ioctl(fd, SIOCGIFINDEX, &ifr);
if (err) {
perror("ioctl");
return 0;
}
close(fd);
return ifr.ifr_ifindex;
}
static int do_ioctl_get_iftype(char *dev)
{
struct ifreq ifr;
int fd;
int err;
strcpy(ifr.ifr_name, dev);
fd = socket(AF_INET, SOCK_DGRAM, 0);
err = ioctl(fd, SIOCGIFHWADDR, &ifr);
if (err) {
perror("ioctl");
return -1;
}
close(fd);
return ifr.ifr_addr.sa_family;
}
static char * do_ioctl_get_ifname(int idx)
{
static struct ifreq ifr;
int fd;
int err;
ifr.ifr_ifindex = idx;
fd = socket(AF_INET, SOCK_DGRAM, 0);
err = ioctl(fd, SIOCGIFNAME, &ifr);
if (err) {
perror("ioctl");
return NULL;
}
close(fd);
return ifr.ifr_name;
}
static int do_get_ioctl(char *basedev, struct ip_tunnel_parm *p)
{
struct ifreq ifr;
int fd;
int err;
strcpy(ifr.ifr_name, basedev);
ifr.ifr_ifru.ifru_data = (void*)p;
fd = socket(AF_INET, SOCK_DGRAM, 0);
err = ioctl(fd, SIOCGETTUNNEL, &ifr);
if (err)
perror("ioctl");
close(fd);
return err;
}
static int do_add_ioctl(int cmd, char *basedev, struct ip_tunnel_parm *p)
{
struct ifreq ifr;
int fd;
int err;
if (cmd == SIOCCHGTUNNEL && p->name[0])
strcpy(ifr.ifr_name, p->name);
else
strcpy(ifr.ifr_name, basedev);
ifr.ifr_ifru.ifru_data = (void*)p;
fd = socket(AF_INET, SOCK_DGRAM, 0);
err = ioctl(fd, cmd, &ifr);
if (err)
perror("ioctl");
close(fd);
return err;
}
static int do_del_ioctl(char *basedev, struct ip_tunnel_parm *p)
{
struct ifreq ifr;
int fd;
int err;
if (p->name[0])
strcpy(ifr.ifr_name, p->name);
else
strcpy(ifr.ifr_name, basedev);
ifr.ifr_ifru.ifru_data = (void*)p;
fd = socket(AF_INET, SOCK_DGRAM, 0);
err = ioctl(fd, SIOCDELTUNNEL, &ifr);
if (err)
perror("ioctl");
close(fd);
return err;
}
static int parse_args(int argc, char **argv, int cmd, struct ip_tunnel_parm *p)
{
int count = 0;
char medium[IFNAMSIZ];
memset(p, 0, sizeof(*p));
memset(&medium, 0, sizeof(medium));
p->iph.version = 4;
p->iph.ihl = 5;
#ifndef IP_DF
#define IP_DF 0x4000 /* Flag: "Don't Fragment" */
#endif
p->iph.frag_off = htons(IP_DF);
while (argc > 0) {
if (strcmp(*argv, "mode") == 0) {
NEXT_ARG();
if (strcmp(*argv, "ipip") == 0 ||
strcmp(*argv, "ip/ip") == 0) {
if (p->iph.protocol && p->iph.protocol != IPPROTO_IPIP) {
fprintf(stderr,"You managed to ask for more than one tunnel mode.\n");
exit(-1);
}
p->iph.protocol = IPPROTO_IPIP;
} else if (strcmp(*argv, "gre") == 0 ||
strcmp(*argv, "gre/ip") == 0) {
if (p->iph.protocol && p->iph.protocol != IPPROTO_GRE) {
fprintf(stderr,"You managed to ask for more than one tunnel mode.\n");
exit(-1);
}
p->iph.protocol = IPPROTO_GRE;
} else if (strcmp(*argv, "sit") == 0 ||
strcmp(*argv, "ipv6/ip") == 0) {
if (p->iph.protocol && p->iph.protocol != IPPROTO_IPV6) {
fprintf(stderr,"You managed to ask for more than one tunnel mode.\n");
exit(-1);
}
p->iph.protocol = IPPROTO_IPV6;
} else {
fprintf(stderr,"Cannot guess tunnel mode.\n");
exit(-1);
}
} else if (strcmp(*argv, "key") == 0) {
unsigned uval;
NEXT_ARG();
p->i_flags |= GRE_KEY;
p->o_flags |= GRE_KEY;
if (strchr(*argv, '.'))
p->i_key = p->o_key = get_addr32(*argv);
else {
if (get_unsigned(&uval, *argv, 0)<0) {
fprintf(stderr, "invalid value of \"key\"\n");
exit(-1);
}
p->i_key = p->o_key = htonl(uval);
}
} else if (strcmp(*argv, "ikey") == 0) {
unsigned uval;
NEXT_ARG();
p->i_flags |= GRE_KEY;
if (strchr(*argv, '.'))
p->o_key = get_addr32(*argv);
else {
if (get_unsigned(&uval, *argv, 0)<0) {
fprintf(stderr, "invalid value of \"ikey\"\n");
exit(-1);
}
p->i_key = htonl(uval);
}
} else if (strcmp(*argv, "okey") == 0) {
unsigned uval;
NEXT_ARG();
p->o_flags |= GRE_KEY;
if (strchr(*argv, '.'))
p->o_key = get_addr32(*argv);
else {
if (get_unsigned(&uval, *argv, 0)<0) {
fprintf(stderr, "invalid value of \"okey\"\n");
exit(-1);
}
p->o_key = htonl(uval);
}
} else if (strcmp(*argv, "seq") == 0) {
p->i_flags |= GRE_SEQ;
p->o_flags |= GRE_SEQ;
} else if (strcmp(*argv, "iseq") == 0) {
p->i_flags |= GRE_SEQ;
} else if (strcmp(*argv, "oseq") == 0) {
p->o_flags |= GRE_SEQ;
} else if (strcmp(*argv, "csum") == 0) {
p->i_flags |= GRE_CSUM;
p->o_flags |= GRE_CSUM;
} else if (strcmp(*argv, "icsum") == 0) {
p->i_flags |= GRE_CSUM;
} else if (strcmp(*argv, "ocsum") == 0) {
p->o_flags |= GRE_CSUM;
} else if (strcmp(*argv, "nopmtudisc") == 0) {
p->iph.frag_off = 0;
} else if (strcmp(*argv, "pmtudisc") == 0) {
p->iph.frag_off = htons(IP_DF);
} else if (strcmp(*argv, "remote") == 0) {
NEXT_ARG();
if (strcmp(*argv, "any"))
p->iph.daddr = get_addr32(*argv);
} else if (strcmp(*argv, "local") == 0) {
NEXT_ARG();
if (strcmp(*argv, "any"))
p->iph.saddr = get_addr32(*argv);
} else if (strcmp(*argv, "dev") == 0) {
NEXT_ARG();
strncpy(medium, *argv, IFNAMSIZ-1);
} else if (strcmp(*argv, "ttl") == 0) {
unsigned uval;
NEXT_ARG();
if (strcmp(*argv, "inherit") != 0) {
if (get_unsigned(&uval, *argv, 0))
invarg("invalid TTL\n", *argv);
if (uval > 255)
invarg("TTL must be <=255\n", *argv);
p->iph.ttl = uval;
}
} else if (strcmp(*argv, "tos") == 0 ||
matches(*argv, "dsfield") == 0) {
__u32 uval;
NEXT_ARG();
if (strcmp(*argv, "inherit") != 0) {
if (rtnl_dsfield_a2n(&uval, *argv))
invarg("bad TOS value", *argv);
p->iph.tos = uval;
} else
p->iph.tos = 1;
} else {
if (strcmp(*argv, "name") == 0) {
NEXT_ARG();
}
if (matches(*argv, "help") == 0)
usage();
if (p->name[0])
duparg2("name", *argv);
strncpy(p->name, *argv, IFNAMSIZ);
if (cmd == SIOCCHGTUNNEL && count == 0) {
struct ip_tunnel_parm old_p;
memset(&old_p, 0, sizeof(old_p));
if (do_get_ioctl(*argv, &old_p))
return -1;
*p = old_p;
}
}
count++;
argc--; argv++;
}
if (p->iph.protocol == 0) {
if (memcmp(p->name, "gre", 3) == 0)
p->iph.protocol = IPPROTO_GRE;
else if (memcmp(p->name, "ipip", 4) == 0)
p->iph.protocol = IPPROTO_IPIP;
else if (memcmp(p->name, "sit", 3) == 0)
p->iph.protocol = IPPROTO_IPV6;
}
if (p->iph.protocol == IPPROTO_IPIP || p->iph.protocol == IPPROTO_IPV6) {
if ((p->i_flags & GRE_KEY) || (p->o_flags & GRE_KEY)) {
fprintf(stderr, "Keys are not allowed with ipip and sit.\n");
return -1;
}
}
if (medium[0]) {
p->link = do_ioctl_get_ifindex(medium);
if (p->link == 0)
return -1;
}
if (p->i_key == 0 && IN_MULTICAST(ntohl(p->iph.daddr))) {
p->i_key = p->iph.daddr;
p->i_flags |= GRE_KEY;
}
if (p->o_key == 0 && IN_MULTICAST(ntohl(p->iph.daddr))) {
p->o_key = p->iph.daddr;
p->o_flags |= GRE_KEY;
}
if (IN_MULTICAST(ntohl(p->iph.daddr)) && !p->iph.saddr) {
fprintf(stderr, "Broadcast tunnel requires a source address.\n");
return -1;
}
return 0;
}
static int do_add(int cmd, int argc, char **argv)
{
struct ip_tunnel_parm p;
if (parse_args(argc, argv, cmd, &p) < 0)
return -1;
if (p.iph.ttl && p.iph.frag_off == 0) {
fprintf(stderr, "ttl != 0 and noptmudisc are incompatible\n");
return -1;
}
switch (p.iph.protocol) {
case IPPROTO_IPIP:
return do_add_ioctl(cmd, "tunl0", &p);
case IPPROTO_GRE:
return do_add_ioctl(cmd, "gre0", &p);
case IPPROTO_IPV6:
return do_add_ioctl(cmd, "sit0", &p);
default:
fprintf(stderr, "cannot determine tunnel mode (ipip, gre or sit)\n");
return -1;
}
return -1;
}
int do_del(int argc, char **argv)
{
struct ip_tunnel_parm p;
if (parse_args(argc, argv, SIOCDELTUNNEL, &p) < 0)
return -1;
switch (p.iph.protocol) {
case IPPROTO_IPIP:
return do_del_ioctl("tunl0", &p);
case IPPROTO_GRE:
return do_del_ioctl("gre0", &p);
case IPPROTO_IPV6:
return do_del_ioctl("sit0", &p);
default:
return do_del_ioctl(p.name, &p);
}
return -1;
}
void print_tunnel(struct ip_tunnel_parm *p)
{
char s1[1024];
char s2[1024];
char s3[64];
char s4[64];
inet_ntop(AF_INET, &p->i_key, s3, sizeof(s3));
inet_ntop(AF_INET, &p->o_key, s4, sizeof(s4));
/* Do not use format_host() for local addr,
* symbolic name will not be useful.
*/
printf("%s: %s/ip remote %s local %s ",
p->name,
p->iph.protocol == IPPROTO_IPIP ? "ip" :
(p->iph.protocol == IPPROTO_GRE ? "gre" :
(p->iph.protocol == IPPROTO_IPV6 ? "ipv6" : "unknown")),
p->iph.daddr ? format_host(AF_INET, 4, &p->iph.daddr, s1, sizeof(s1)) : "any",
p->iph.saddr ? rt_addr_n2a(AF_INET, 4, &p->iph.saddr, s2, sizeof(s2)) : "any");
if (p->link) {
char *n = do_ioctl_get_ifname(p->link);
if (n)
printf(" dev %s ", n);
}
if (p->iph.ttl)
printf(" ttl %d ", p->iph.ttl);
else
printf(" ttl inherit ");
if (p->iph.tos) {
SPRINT_BUF(b1);
printf(" tos");
if (p->iph.tos&1)
printf(" inherit");
if (p->iph.tos&~1)
printf("%c%s ", p->iph.tos&1 ? '/' : ' ',
rtnl_dsfield_n2a(p->iph.tos&~1, b1, sizeof(b1)));
}
if (!(p->iph.frag_off&htons(IP_DF)))
printf(" nopmtudisc");
if ((p->i_flags&GRE_KEY) && (p->o_flags&GRE_KEY) && p->o_key == p->i_key)
printf(" key %s", s3);
else if ((p->i_flags|p->o_flags)&GRE_KEY) {
if (p->i_flags&GRE_KEY)
printf(" ikey %s ", s3);
if (p->o_flags&GRE_KEY)
printf(" okey %s ", s4);
}
if (p->i_flags&GRE_SEQ)
printf("%s Drop packets out of sequence.\n", _SL_);
if (p->i_flags&GRE_CSUM)
printf("%s Checksum in received packet is required.", _SL_);
if (p->o_flags&GRE_SEQ)
printf("%s Sequence packets on output.", _SL_);
if (p->o_flags&GRE_CSUM)
printf("%s Checksum output packets.", _SL_);
}
static int do_tunnels_list(struct ip_tunnel_parm *p)
{
char name[IFNAMSIZ];
unsigned long rx_bytes, rx_packets, rx_errs, rx_drops,
rx_fifo, rx_frame,
tx_bytes, tx_packets, tx_errs, tx_drops,
tx_fifo, tx_colls, tx_carrier, rx_multi;
int type;
struct ip_tunnel_parm p1;
char buf[512];
FILE *fp = fopen("/proc/net/dev", "r");
if (fp == NULL) {
perror("fopen");
return -1;
}
fgets(buf, sizeof(buf), fp);
fgets(buf, sizeof(buf), fp);
while (fgets(buf, sizeof(buf), fp) != NULL) {
char *ptr;
buf[sizeof(buf) - 1] = 0;
if ((ptr = strchr(buf, ':')) == NULL ||
(*ptr++ = 0, sscanf(buf, "%s", name) != 1)) {
fprintf(stderr, "Wrong format of /proc/net/dev. Sorry.\n");
return -1;
}
if (sscanf(ptr, "%ld%ld%ld%ld%ld%ld%ld%*d%ld%ld%ld%ld%ld%ld%ld",
&rx_bytes, &rx_packets, &rx_errs, &rx_drops,
&rx_fifo, &rx_frame, &rx_multi,
&tx_bytes, &tx_packets, &tx_errs, &tx_drops,
&tx_fifo, &tx_colls, &tx_carrier) != 14)
continue;
if (p->name[0] && strcmp(p->name, name))
continue;
type = do_ioctl_get_iftype(name);
if (type == -1) {
fprintf(stderr, "Failed to get type of [%s]\n", name);
continue;
}
if (type != ARPHRD_TUNNEL && type != ARPHRD_IPGRE && type != ARPHRD_SIT)
continue;
memset(&p1, 0, sizeof(p1));
if (do_get_ioctl(name, &p1))
continue;
if ((p->link && p1.link != p->link) ||
(p->name[0] && strcmp(p1.name, p->name)) ||
(p->iph.daddr && p1.iph.daddr != p->iph.daddr) ||
(p->iph.saddr && p1.iph.saddr != p->iph.saddr) ||
(p->i_key && p1.i_key != p->i_key))
continue;
print_tunnel(&p1);
if (show_stats) {
printf("%s", _SL_);
printf("RX: Packets Bytes Errors CsumErrs OutOfSeq Mcasts%s", _SL_);
printf(" %-10ld %-12ld %-6ld %-8ld %-8ld %-8ld%s",
rx_packets, rx_bytes, rx_errs, rx_frame, rx_fifo, rx_multi, _SL_);
printf("TX: Packets Bytes Errors DeadLoop NoRoute NoBufs%s", _SL_);
printf(" %-10ld %-12ld %-6ld %-8ld %-8ld %-6ld",
tx_packets, tx_bytes, tx_errs, tx_colls, tx_carrier, tx_drops);
}
printf("\n");
}
return 0;
}
static int do_show(int argc, char **argv)
{
int err;
struct ip_tunnel_parm p;
if (parse_args(argc, argv, SIOCGETTUNNEL, &p) < 0)
return -1;
switch (p.iph.protocol) {
case IPPROTO_IPIP:
err = do_get_ioctl(p.name[0] ? p.name : "tunl0", &p);
break;
case IPPROTO_GRE:
err = do_get_ioctl(p.name[0] ? p.name : "gre0", &p);
break;
case IPPROTO_IPV6:
err = do_get_ioctl(p.name[0] ? p.name : "sit0", &p);
break;
default:
do_tunnels_list(&p);
return 0;
}
if (err)
return -1;
print_tunnel(&p);
printf("\n");
return 0;
}
int do_iptunnel(int argc, char **argv)
{
if (argc > 0) {
if (matches(*argv, "add") == 0)
return do_add(SIOCADDTUNNEL, argc-1, argv+1);
if (matches(*argv, "change") == 0)
return do_add(SIOCCHGTUNNEL, argc-1, argv+1);
if (matches(*argv, "del") == 0)
return do_del(argc-1, argv+1);
if (matches(*argv, "show") == 0 ||
matches(*argv, "lst") == 0 ||
matches(*argv, "list") == 0)
return do_show(argc-1, argv+1);
if (matches(*argv, "help") == 0)
usage();
} else
return do_show(0, NULL);
fprintf(stderr, "Command \"%s\" is unknown, try \"ip tunnel help\".\n", *argv);
exit(-1);
}

View File

@ -0,0 +1,3 @@
#! /bin/sh
exec ip -4 ro flush scope global type unicast

View File

@ -0,0 +1,60 @@
#!/bin/sh
#$Id$
#
# Script created by: Stephen R. van den Berg <srb@cuci.nl>, 1999/04/18
# Donated to the public domain.
#
# This script transforms the output of "ip" into more readable text.
# "ip" is the Linux-advanced-routing configuration tool part of the
# iproute package.
#
test "X-h" = "X$1" && echo "Usage: $0 [tablenr [raw ip args...]]" && exit 64
test -z "$*" && set 0
ip route list table "$@" |
while read network rest
do set xx $rest
shift
proto=""
via=""
dev=""
scope=""
src=""
table=""
case $network in
broadcast|local|unreachable) via=$network
network=$1
shift
;;
esac
while test $# != 0
do
key=$1
val=$2
eval "$key=$val"
shift 2
done
echo "$network $via $src $proto $scope $dev $table"
done | awk -F ' ' '
BEGIN {
format="%15s%-3s %15s %15s %8s %8s%7s %s\n";
printf(format,"target","","gateway","source","proto","scope","dev","tbl");
}
{ network=$1;
mask="";
if(match(network,"/"))
{ mask=" "substr(network,RSTART+1);
network=substr(network,0,RSTART);
}
via=$2;
src=$3;
proto=$4;
scope=$5;
dev=$6;
table=$7;
printf(format,network,mask,via,src,proto,scope,dev,table);
}
'

View File

@ -0,0 +1,116 @@
/*
* rtm_map.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <string.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include "rt_names.h"
#include "utils.h"
char *rtnl_rtntype_n2a(int id, char *buf, int len)
{
switch (id) {
case RTN_UNSPEC:
return "none";
case RTN_UNICAST:
return "unicast";
case RTN_LOCAL:
return "local";
case RTN_BROADCAST:
return "broadcast";
case RTN_ANYCAST:
return "anycast";
case RTN_MULTICAST:
return "multicast";
case RTN_BLACKHOLE:
return "blackhole";
case RTN_UNREACHABLE:
return "unreachable";
case RTN_PROHIBIT:
return "prohibit";
case RTN_THROW:
return "throw";
case RTN_NAT:
return "nat";
case RTN_XRESOLVE:
return "xresolve";
default:
snprintf(buf, len, "%d", id);
return buf;
}
}
int rtnl_rtntype_a2n(int *id, char *arg)
{
char *end;
unsigned long res;
if (strcmp(arg, "local") == 0)
res = RTN_LOCAL;
else if (strcmp(arg, "nat") == 0)
res = RTN_NAT;
else if (matches(arg, "broadcast") == 0 ||
strcmp(arg, "brd") == 0)
res = RTN_BROADCAST;
else if (matches(arg, "anycast") == 0)
res = RTN_ANYCAST;
else if (matches(arg, "multicast") == 0)
res = RTN_MULTICAST;
else if (matches(arg, "prohibit") == 0)
res = RTN_PROHIBIT;
else if (matches(arg, "unreachable") == 0)
res = RTN_UNREACHABLE;
else if (matches(arg, "blackhole") == 0)
res = RTN_BLACKHOLE;
else if (matches(arg, "xresolve") == 0)
res = RTN_XRESOLVE;
else if (matches(arg, "unicast") == 0)
res = RTN_UNICAST;
else if (strcmp(arg, "throw") == 0)
res = RTN_THROW;
else {
res = strtoul(arg, &end, 0);
if (!end || end == arg || *end || res > 255)
return -1;
}
*id = res;
return 0;
}
int get_rt_realms(__u32 *realms, char *arg)
{
__u32 realm = 0;
char *p = strchr(arg, '/');
*realms = 0;
if (p) {
*p = 0;
if (rtnl_rtrealm_a2n(realms, arg)) {
*p = '/';
return -1;
}
*realms <<= 16;
*p = '/';
arg = p+1;
}
if (*arg && rtnl_rtrealm_a2n(&realm, arg))
return -1;
*realms |= realm;
return 0;
}

View File

@ -0,0 +1,177 @@
/*
* rtmon.c RTnetlink listener.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <net/if.h>
#include <netinet/in.h>
#include <string.h>
#include "SNAPSHOT.h"
#include "utils.h"
#include "libnetlink.h"
int resolve_hosts = 0;
static int init_phase = 1;
static void write_stamp(FILE *fp)
{
char buf[128];
struct nlmsghdr *n1 = (void*)buf;
struct timeval tv;
n1->nlmsg_type = 15;
n1->nlmsg_flags = 0;
n1->nlmsg_seq = 0;
n1->nlmsg_pid = 0;
n1->nlmsg_len = NLMSG_LENGTH(4*2);
gettimeofday(&tv, NULL);
((__u32*)NLMSG_DATA(n1))[0] = tv.tv_sec;
((__u32*)NLMSG_DATA(n1))[1] = tv.tv_usec;
fwrite((void*)n1, 1, NLMSG_ALIGN(n1->nlmsg_len), fp);
}
static int dump_msg(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
{
FILE *fp = (FILE*)arg;
if (!init_phase)
write_stamp(fp);
fwrite((void*)n, 1, NLMSG_ALIGN(n->nlmsg_len), fp);
fflush(fp);
return 0;
}
void usage(void)
{
fprintf(stderr, "Usage: rtmon file FILE [ all | LISTofOBJECTS]\n");
fprintf(stderr, "LISTofOBJECTS := [ link ] [ address ] [ route ]\n");
exit(-1);
}
int
main(int argc, char **argv)
{
FILE *fp;
struct rtnl_handle rth;
int family = AF_UNSPEC;
unsigned groups = ~0U;
int llink = 0;
int laddr = 0;
int lroute = 0;
char *file = NULL;
while (argc > 1) {
if (matches(argv[1], "-family") == 0) {
argc--;
argv++;
if (argc <= 1)
usage();
if (strcmp(argv[1], "inet") == 0)
family = AF_INET;
else if (strcmp(argv[1], "inet6") == 0)
family = AF_INET6;
else if (strcmp(argv[1], "link") == 0)
family = AF_INET6;
else if (strcmp(argv[1], "help") == 0)
usage();
else {
fprintf(stderr, "Protocol ID \"%s\" is unknown, try \"rtmon help\".\n", argv[1]);
exit(-1);
}
} else if (strcmp(argv[1], "-4") == 0) {
family = AF_INET;
} else if (strcmp(argv[1], "-6") == 0) {
family = AF_INET6;
} else if (strcmp(argv[1], "-0") == 0) {
family = AF_PACKET;
} else if (matches(argv[1], "-Version") == 0) {
printf("rtmon utility, iproute2-ss%s\n", SNAPSHOT);
exit(0);
} else if (matches(argv[1], "file") == 0) {
argc--;
argv++;
if (argc <= 1)
usage();
file = argv[1];
} else if (matches(argv[1], "link") == 0) {
llink=1;
groups = 0;
} else if (matches(argv[1], "address") == 0) {
laddr=1;
groups = 0;
} else if (matches(argv[1], "route") == 0) {
lroute=1;
groups = 0;
} else if (strcmp(argv[1], "all") == 0) {
groups = ~0U;
} else if (matches(argv[1], "help") == 0) {
usage();
} else {
fprintf(stderr, "Argument \"%s\" is unknown, try \"rtmon help\".\n", argv[1]);
exit(-1);
}
argc--; argv++;
}
if (file == NULL) {
fprintf(stderr, "Not enough information: argument \"file\" is required\n");
exit(-1);
}
if (llink)
groups |= RTMGRP_LINK;
if (laddr) {
if (!family || family == AF_INET)
groups |= RTMGRP_IPV4_IFADDR;
if (!family || family == AF_INET6)
groups |= RTMGRP_IPV6_IFADDR;
}
if (lroute) {
if (!family || family == AF_INET)
groups |= RTMGRP_IPV4_ROUTE;
if (!family || family == AF_INET6)
groups |= RTMGRP_IPV6_ROUTE;
}
fp = fopen(file, "w");
if (fp == NULL) {
perror("Cannot fopen");
exit(-1);
}
if (rtnl_open(&rth, groups) < 0)
exit(1);
if (rtnl_wilddump_request(&rth, AF_UNSPEC, RTM_GETLINK) < 0) {
perror("Cannot send dump request");
exit(1);
}
write_stamp(fp);
if (rtnl_dump_filter(&rth, dump_msg, fp, NULL, NULL) < 0) {
fprintf(stderr, "Dump terminated\n");
return 1;
}
init_phase = 0;
if (rtnl_listen(&rth, dump_msg, (void*)fp) < 0)
exit(2);
exit(0);
}

View File

@ -0,0 +1,4 @@
#! /bin/bash
exec tr "[\\\\]" "[
]"

View File

@ -0,0 +1,18 @@
UTILOBJ=utils.o rt_names.o ll_types.o ll_proto.o ll_addr.o inet_proto.o
NLOBJ=ll_map.o libnetlink.o
all: libnetlink.a libutil.a
libnetlink.a: $(NLOBJ)
$(AR) rcs $@ $(NLOBJ)
libutil.a: $(UTILOBJ) $(ADDLIB)
$(AR) rcs $@ $(UTILOBJ) $(ADDLIB)
install:
clean:
rm -f $(NLOBJ) $(UTILOBJ) $(ADDLIB) libnetlink.a libutil.a

View File

@ -0,0 +1,98 @@
#include <errno.h>
#include <sys/types.h>
#include <netinet/in.h>
#include "utils.h"
static __inline__ u_int16_t dn_ntohs(u_int16_t addr)
{
union {
u_int8_t byte[2];
u_int16_t word;
} u;
u.word = addr;
return ((u_int16_t)u.byte[0]) | (((u_int16_t)u.byte[1]) << 8);
}
static __inline__ int do_digit(char *str, u_int16_t *addr, u_int16_t scale, size_t *pos, size_t len, int *started)
{
u_int16_t tmp = *addr / scale;
if (*pos == len)
return 1;
if (((tmp) > 0) || *started || (scale == 1)) {
*str = tmp + '0';
*started = 1;
(*pos)++;
*addr -= (tmp * scale);
}
return 0;
}
static const char *dnet_ntop1(const struct dn_naddr *dna, char *str, size_t len)
{
u_int16_t addr = dn_ntohs(*(u_int16_t *)dna->a_addr);
u_int16_t area = addr >> 10;
size_t pos = 0;
int started = 0;
if (dna->a_len != 2)
return NULL;
addr &= 0x03ff;
if (len == 0)
return str;
if (do_digit(str + pos, &area, 10, &pos, len, &started))
return str;
if (do_digit(str + pos, &area, 1, &pos, len, &started))
return str;
if (pos == len)
return str;
*(str + pos) = '.';
pos++;
started = 0;
if (do_digit(str + pos, &addr, 1000, &pos, len, &started))
return str;
if (do_digit(str + pos, &addr, 100, &pos, len, &started))
return str;
if (do_digit(str + pos, &addr, 10, &pos, len, &started))
return str;
if (do_digit(str + pos, &addr, 1, &pos, len, &started))
return str;
if (pos == len)
return str;
*(str + pos) = 0;
return str;
}
const char *dnet_ntop(int af, const void *addr, char *str, size_t len)
{
switch(af) {
case AF_DECnet:
errno = 0;
return dnet_ntop1((struct dn_naddr *)addr, str, len);
default:
errno = EAFNOSUPPORT;
}
return NULL;
}

View File

@ -0,0 +1,71 @@
#include <errno.h>
#include <sys/types.h>
#include <netinet/in.h>
#include "utils.h"
static __inline__ u_int16_t dn_htons(u_int16_t addr)
{
union {
u_int8_t byte[2];
u_int16_t word;
} u;
u.word = addr;
return ((u_int16_t)u.byte[0]) | (((u_int16_t)u.byte[1]) << 8);
}
static int dnet_num(const char *src, u_int16_t * dst)
{
int rv = 0;
int tmp;
*dst = 0;
while ((tmp = *src++) != 0) {
tmp -= '0';
if ((tmp < 0) || (tmp > 9))
return rv;
rv++;
(*dst) *= 10;
(*dst) += tmp;
}
return rv;
}
static int dnet_pton1(const char *src, struct dn_naddr *dna)
{
u_int16_t area = 0;
u_int16_t node = 0;
int pos;
pos = dnet_num(src, &area);
if ((pos == 0) || (area > 63) || (*(src + pos) != '.'))
return 0;
pos = dnet_num(src + pos + 1, &node);
if ((pos == 0) || (node > 1023))
return 0;
dna->a_len = 2;
*(u_int16_t *)dna->a_addr = dn_htons((area << 10) | node);
return 1;
}
int dnet_pton(int af, const char *src, void *addr)
{
int err;
switch (af) {
case AF_DECnet:
errno = 0;
err = dnet_pton1(src, (struct dn_naddr *)addr);
break;
default:
errno = EAFNOSUPPORT;
err = -1;
}
return err;
}

View File

@ -0,0 +1,199 @@
/* Copyright (c) 1996 by Internet Software Consortium.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
* ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
* CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char rcsid[] = "$Id: inet_ntop.c,v 1.4 1996/09/27 03:24:13 drepper Exp $";
#endif /* LIBC_SCCS and not lint */
#include <sys/param.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <arpa/nameser.h>
#include <string.h>
#include <errno.h>
#include <stdio.h>
#include <linux/in6.h>
#ifndef IN6ADDRSZ
#define IN6ADDRSZ sizeof(struct in6_addr)
#endif
#ifdef SPRINTF_CHAR
# define SPRINTF(x) strlen(sprintf/**/x)
#else
# define SPRINTF(x) ((size_t)sprintf x)
#endif
/*
* WARNING: Don't even consider trying to compile this on a system where
* sizeof(int) < 4. sizeof(int) > 4 is fine; all the world's not a VAX.
*/
static const char *inet_ntop4 __P((const u_char *src, char *dst, size_t size));
static const char *inet_ntop6 __P((const u_char *src, char *dst, size_t size));
/* char *
* inet_ntop(af, src, dst, size)
* convert a network format address to presentation format.
* return:
* pointer to presentation format address (`dst'), or NULL (see errno).
* author:
* Paul Vixie, 1996.
*/
const char *
inet_ntop(af, src, dst, size)
int af;
const void *src;
char *dst;
size_t size;
{
switch (af) {
case AF_INET:
return (inet_ntop4(src, dst, size));
case AF_INET6:
return (inet_ntop6(src, dst, size));
default:
errno = (EAFNOSUPPORT);
return (NULL);
}
/* NOTREACHED */
}
/* const char *
* inet_ntop4(src, dst, size)
* format an IPv4 address, more or less like inet_ntoa()
* return:
* `dst' (as a const)
* notes:
* (1) uses no statics
* (2) takes a u_char* not an in_addr as input
* author:
* Paul Vixie, 1996.
*/
static const char *
inet_ntop4(src, dst, size)
const u_char *src;
char *dst;
size_t size;
{
static const char fmt[] = "%u.%u.%u.%u";
char tmp[sizeof "255.255.255.255"];
if (SPRINTF((tmp, fmt, src[0], src[1], src[2], src[3])) > size) {
errno = (ENOSPC);
return (NULL);
}
strcpy(dst, tmp);
return (dst);
}
/* const char *
* inet_ntop6(src, dst, size)
* convert IPv6 binary address into presentation (printable) format
* author:
* Paul Vixie, 1996.
*/
static const char *
inet_ntop6(src, dst, size)
const u_char *src;
char *dst;
size_t size;
{
/*
* Note that int32_t and int16_t need only be "at least" large enough
* to contain a value of the specified size. On some systems, like
* Crays, there is no such thing as an integer variable with 16 bits.
* Keep this in mind if you think this function should have been coded
* to use pointer overlays. All the world's not a VAX.
*/
char tmp[sizeof "ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255"], *tp;
struct { int base, len; } best, cur;
u_int words[sizeof(struct in6_addr) / INT16SZ];
int i;
/*
* Preprocess:
* Copy the input (bytewise) array into a wordwise array.
* Find the longest run of 0x00's in src[] for :: shorthanding.
*/
memset(words, '\0', sizeof words);
for (i = 0; i < IN6ADDRSZ; i++)
words[i / 2] |= (src[i] << ((1 - (i % 2)) << 3));
best.base = -1;
cur.base = -1;
for (i = 0; i < (IN6ADDRSZ / INT16SZ); i++) {
if (words[i] == 0) {
if (cur.base == -1)
cur.base = i, cur.len = 1;
else
cur.len++;
} else {
if (cur.base != -1) {
if (best.base == -1 || cur.len > best.len)
best = cur;
cur.base = -1;
}
}
}
if (cur.base != -1) {
if (best.base == -1 || cur.len > best.len)
best = cur;
}
if (best.base != -1 && best.len < 2)
best.base = -1;
/*
* Format the result.
*/
tp = tmp;
for (i = 0; i < (IN6ADDRSZ / INT16SZ); i++) {
/* Are we inside the best run of 0x00's? */
if (best.base != -1 && i >= best.base &&
i < (best.base + best.len)) {
if (i == best.base)
*tp++ = ':';
continue;
}
/* Are we following an initial run of 0x00s or any real hex? */
if (i != 0)
*tp++ = ':';
/* Is this address an encapsulated IPv4? */
if (i == 6 && best.base == 0 &&
(best.len == 6 || (best.len == 5 && words[5] == 0xffff))) {
if (!inet_ntop4(src+12, tp, sizeof tmp - (tp - tmp)))
return (NULL);
tp += strlen(tp);
break;
}
tp += SPRINTF((tp, "%x", words[i]));
}
/* Was it a trailing run of 0x00's? */
if (best.base != -1 && (best.base + best.len) == (IN6ADDRSZ / INT16SZ))
*tp++ = ':';
*tp++ = '\0';
/*
* Check for overflow, copy, and we're done.
*/
if ((size_t)(tp - tmp) > size) {
errno = (ENOSPC);
return (NULL);
}
strcpy(dst, tmp);
return (dst);
}

View File

@ -0,0 +1,70 @@
/*
* inet_proto.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netdb.h>
#include <string.h>
#include "utils.h"
char *inet_proto_n2a(int proto, char *buf, int len)
{
static char ncache[16];
static int icache = -1;
struct protoent *pe;
if (proto == icache)
return ncache;
pe = getprotobynumber(proto);
if (pe) {
icache = proto;
strncpy(ncache, pe->p_name, 16);
strncpy(buf, pe->p_name, len);
return buf;
}
snprintf(buf, len, "ipproto-%d", proto);
return buf;
}
int inet_proto_a2n(char *buf)
{
static char ncache[16];
static int icache = -1;
struct protoent *pe;
if (icache>=0 && strcmp(ncache, buf) == 0)
return icache;
if (buf[0] >= '0' && buf[0] <= '9') {
__u8 ret;
if (get_u8(&ret, buf, 10))
return -1;
return ret;
}
pe = getprotobyname(buf);
if (pe) {
icache = pe->p_proto;
strncpy(ncache, pe->p_name, 16);
return pe->p_proto;
}
return -1;
}

View File

@ -0,0 +1,217 @@
/* Copyright (c) 1996 by Internet Software Consortium.
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
* ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
* CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*/
#if defined(LIBC_SCCS) && !defined(lint)
static char rcsid[] = "$Id: inet_pton.c,v 1.5 1996/09/27 03:24:16 drepper Exp $";
#endif /* LIBC_SCCS and not lint */
#include <sys/param.h>
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <arpa/nameser.h>
#include <string.h>
#include <errno.h>
#include <linux/in6.h>
#define IN6ADDRSZ sizeof(struct in6_addr)
/*
* WARNING: Don't even consider trying to compile this on a system where
* sizeof(int) < 4. sizeof(int) > 4 is fine; all the world's not a VAX.
*/
static int inet_pton4 __P((const char *src, u_char *dst));
static int inet_pton6 __P((const char *src, u_char *dst));
/* int
* inet_pton(af, src, dst)
* convert from presentation format (which usually means ASCII printable)
* to network format (which is usually some kind of binary format).
* return:
* 1 if the address was valid for the specified address family
* 0 if the address wasn't valid (`dst' is untouched in this case)
* -1 if some other error occurred (`dst' is untouched in this case, too)
* author:
* Paul Vixie, 1996.
*/
int
inet_pton(af, src, dst)
int af;
const char *src;
void *dst;
{
switch (af) {
case AF_INET:
return (inet_pton4(src, dst));
case AF_INET6:
return (inet_pton6(src, dst));
default:
errno = EAFNOSUPPORT;
return (-1);
}
/* NOTREACHED */
}
/* int
* inet_pton4(src, dst)
* like inet_aton() but without all the hexadecimal and shorthand.
* return:
* 1 if `src' is a valid dotted quad, else 0.
* notice:
* does not touch `dst' unless it's returning 1.
* author:
* Paul Vixie, 1996.
*/
static int
inet_pton4(src, dst)
const char *src;
u_char *dst;
{
static const char digits[] = "0123456789";
int saw_digit, octets, ch;
u_char tmp[INADDRSZ], *tp;
saw_digit = 0;
octets = 0;
*(tp = tmp) = 0;
while ((ch = *src++) != '\0') {
const char *pch;
if ((pch = strchr(digits, ch)) != NULL) {
u_int new = *tp * 10 + (pch - digits);
if (new > 255)
return (0);
*tp = new;
if (! saw_digit) {
if (++octets > 4)
return (0);
saw_digit = 1;
}
} else if (ch == '.' && saw_digit) {
if (octets == 4)
return (0);
*++tp = 0;
saw_digit = 0;
} else
return (0);
}
if (octets < 4)
return (0);
memcpy(dst, tmp, INADDRSZ);
return (1);
}
/* int
* inet_pton6(src, dst)
* convert presentation level address to network order binary form.
* return:
* 1 if `src' is a valid [RFC1884 2.2] address, else 0.
* notice:
* (1) does not touch `dst' unless it's returning 1.
* (2) :: in a full address is silently ignored.
* credit:
* inspired by Mark Andrews.
* author:
* Paul Vixie, 1996.
*/
static int
inet_pton6(src, dst)
const char *src;
u_char *dst;
{
static const char xdigits_l[] = "0123456789abcdef",
xdigits_u[] = "0123456789ABCDEF";
u_char tmp[IN6ADDRSZ], *tp, *endp, *colonp;
const char *xdigits, *curtok;
int ch, saw_xdigit;
u_int val;
memset((tp = tmp), '\0', IN6ADDRSZ);
endp = tp + IN6ADDRSZ;
colonp = NULL;
/* Leading :: requires some special handling. */
if (*src == ':')
if (*++src != ':')
return (0);
curtok = src;
saw_xdigit = 0;
val = 0;
while ((ch = *src++) != '\0') {
const char *pch;
if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL)
pch = strchr((xdigits = xdigits_u), ch);
if (pch != NULL) {
val <<= 4;
val |= (pch - xdigits);
if (val > 0xffff)
return (0);
saw_xdigit = 1;
continue;
}
if (ch == ':') {
curtok = src;
if (!saw_xdigit) {
if (colonp)
return (0);
colonp = tp;
continue;
}
if (tp + INT16SZ > endp)
return (0);
*tp++ = (u_char) (val >> 8) & 0xff;
*tp++ = (u_char) val & 0xff;
saw_xdigit = 0;
val = 0;
continue;
}
if (ch == '.' && ((tp + INADDRSZ) <= endp) &&
inet_pton4(curtok, tp) > 0) {
tp += INADDRSZ;
saw_xdigit = 0;
break; /* '\0' was seen by inet_pton4(). */
}
return (0);
}
if (saw_xdigit) {
if (tp + INT16SZ > endp)
return (0);
*tp++ = (u_char) (val >> 8) & 0xff;
*tp++ = (u_char) val & 0xff;
}
if (colonp != NULL) {
/*
* Since some memmove()'s erroneously fail to handle
* overlapping regions, we'll do the shift by hand.
*/
const int n = tp - colonp;
int i;
for (i = 1; i <= n; i++) {
endp[- i] = colonp[n - i];
colonp[n - i] = 0;
}
tp = endp;
}
if (tp != endp)
return (0);
memcpy(dst, tmp, IN6ADDRSZ);
return (1);
}

View File

@ -0,0 +1,71 @@
#include <errno.h>
#include <sys/types.h>
#include <netinet/in.h>
#include "utils.h"
static __inline__ int do_digit(char *str, u_int32_t addr, u_int32_t scale, size_t *pos, size_t len)
{
u_int32_t tmp = addr >> (scale * 4);
if (*pos == len)
return 1;
tmp &= 0x0f;
if (tmp > 9)
*str = tmp + 'A' - 10;
else
*str = tmp + '0';
(*pos)++;
return 0;
}
static const char *ipx_ntop1(const struct ipx_addr *addr, char *str, size_t len)
{
int i;
size_t pos = 0;
if (len == 0)
return str;
for(i = 7; i >= 0; i--)
if (do_digit(str + pos, ntohl(addr->ipx_net), i, &pos, len))
return str;
if (pos == len)
return str;
*(str + pos) = '.';
pos++;
for(i = 0; i < 6; i++) {
if (do_digit(str + pos, addr->ipx_node[i], 1, &pos, len))
return str;
if (do_digit(str + pos, addr->ipx_node[i], 0, &pos, len))
return str;
}
if (pos == len)
return str;
*(str + pos) = 0;
return str;
}
const char *ipx_ntop(int af, const void *addr, char *str, size_t len)
{
switch(af) {
case AF_IPX:
errno = 0;
return ipx_ntop1((struct ipx_addr *)addr, str, len);
default:
errno = EAFNOSUPPORT;
}
return NULL;
}

View File

@ -0,0 +1,107 @@
#include <errno.h>
#include <string.h>
#include <sys/types.h>
#include <netinet/in.h>
#include "utils.h"
static u_int32_t hexget(char c)
{
if (c >= 'A' && c <= 'F')
return c - 'A' + 10;
if (c >= 'a' && c <= 'f')
return c - 'a' + 10;
if (c >= '0' && c <= '9')
return c - '0';
return 0xf0;
}
static int ipx_getnet(u_int32_t *net, const char *str)
{
int i;
u_int32_t tmp;
for(i = 0; *str && (i < 8); i++) {
if ((tmp = hexget(*str)) & 0xf0) {
if (*str == '.')
return 0;
else
return -1;
}
str++;
(*net) <<= 4;
(*net) |= tmp;
}
if (*str == 0)
return 0;
return -1;
}
static int ipx_getnode(u_int8_t *node, const char *str)
{
int i;
u_int32_t tmp;
for(i = 0; i < 6; i++) {
if ((tmp = hexget(*str++)) & 0xf0)
return -1;
node[i] = (u_int8_t)tmp;
node[i] <<= 4;
if ((tmp = hexget(*str++)) & 0xf0)
return -1;
node[i] |= (u_int8_t)tmp;
if (*str == ':')
str++;
}
return 0;
}
static int ipx_pton1(const char *src, struct ipx_addr *addr)
{
char *sep = (char *)src;
int no_node = 0;
memset(addr, 0, sizeof(struct ipx_addr));
while(*sep && (*sep != '.'))
sep++;
if (*sep != '.')
no_node = 1;
if (ipx_getnet(&addr->ipx_net, src))
return 0;
addr->ipx_net = htonl(addr->ipx_net);
if (no_node)
return 1;
if (ipx_getnode(addr->ipx_node, sep + 1))
return 0;
return 1;
}
int ipx_pton(int af, const char *src, void *addr)
{
int err;
switch (af) {
case AF_IPX:
errno = 0;
err = ipx_pton1(src, (struct ipx_addr *)addr);
break;
default:
errno = EAFNOSUPPORT;
err = -1;
}
return err;
}

View File

@ -0,0 +1,521 @@
/*
* libnetlink.c RTnetlink service routines.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <net/if_arp.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <string.h>
#include <errno.h>
#include <time.h>
#include <sys/uio.h>
#include "libnetlink.h"
void rtnl_close(struct rtnl_handle *rth)
{
close(rth->fd);
}
int rtnl_open(struct rtnl_handle *rth, unsigned subscriptions)
{
int addr_len;
memset(rth, 0, sizeof(rth));
rth->fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
if (rth->fd < 0) {
perror("Cannot open netlink socket");
return -1;
}
memset(&rth->local, 0, sizeof(rth->local));
rth->local.nl_family = AF_NETLINK;
rth->local.nl_groups = subscriptions;
if (bind(rth->fd, (struct sockaddr*)&rth->local, sizeof(rth->local)) < 0) {
perror("Cannot bind netlink socket");
return -1;
}
addr_len = sizeof(rth->local);
if (getsockname(rth->fd, (struct sockaddr*)&rth->local, &addr_len) < 0) {
perror("Cannot getsockname");
return -1;
}
if (addr_len != sizeof(rth->local)) {
fprintf(stderr, "Wrong address length %d\n", addr_len);
return -1;
}
if (rth->local.nl_family != AF_NETLINK) {
fprintf(stderr, "Wrong address family %d\n", rth->local.nl_family);
return -1;
}
rth->seq = time(NULL);
return 0;
}
int rtnl_wilddump_request(struct rtnl_handle *rth, int family, int type)
{
struct {
struct nlmsghdr nlh;
struct rtgenmsg g;
} req;
struct sockaddr_nl nladdr;
memset(&nladdr, 0, sizeof(nladdr));
nladdr.nl_family = AF_NETLINK;
req.nlh.nlmsg_len = sizeof(req);
req.nlh.nlmsg_type = type;
req.nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
req.nlh.nlmsg_pid = 0;
req.nlh.nlmsg_seq = rth->dump = ++rth->seq;
req.g.rtgen_family = family;
return sendto(rth->fd, (void*)&req, sizeof(req), 0, (struct sockaddr*)&nladdr, sizeof(nladdr));
}
int rtnl_send(struct rtnl_handle *rth, char *buf, int len)
{
struct sockaddr_nl nladdr;
memset(&nladdr, 0, sizeof(nladdr));
nladdr.nl_family = AF_NETLINK;
return sendto(rth->fd, buf, len, 0, (struct sockaddr*)&nladdr, sizeof(nladdr));
}
int rtnl_dump_request(struct rtnl_handle *rth, int type, void *req, int len)
{
struct nlmsghdr nlh;
struct sockaddr_nl nladdr;
struct iovec iov[2] = { { &nlh, sizeof(nlh) }, { req, len } };
struct msghdr msg = {
(void*)&nladdr, sizeof(nladdr),
iov, 2,
NULL, 0,
0
};
memset(&nladdr, 0, sizeof(nladdr));
nladdr.nl_family = AF_NETLINK;
nlh.nlmsg_len = NLMSG_LENGTH(len);
nlh.nlmsg_type = type;
nlh.nlmsg_flags = NLM_F_ROOT|NLM_F_MATCH|NLM_F_REQUEST;
nlh.nlmsg_pid = 0;
nlh.nlmsg_seq = rth->dump = ++rth->seq;
return sendmsg(rth->fd, &msg, 0);
}
int rtnl_dump_filter(struct rtnl_handle *rth,
int (*filter)(struct sockaddr_nl *, struct nlmsghdr *n, void *),
void *arg1,
int (*junk)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
void *arg2)
{
char buf[8192];
struct sockaddr_nl nladdr;
struct iovec iov = { buf, sizeof(buf) };
while (1) {
int status;
struct nlmsghdr *h;
struct msghdr msg = {
(void*)&nladdr, sizeof(nladdr),
&iov, 1,
NULL, 0,
0
};
status = recvmsg(rth->fd, &msg, 0);
if (status < 0) {
if (errno == EINTR)
continue;
perror("OVERRUN");
continue;
}
if (status == 0) {
fprintf(stderr, "EOF on netlink\n");
return -1;
}
if (msg.msg_namelen != sizeof(nladdr)) {
fprintf(stderr, "sender address length == %d\n", msg.msg_namelen);
exit(1);
}
h = (struct nlmsghdr*)buf;
while (NLMSG_OK(h, status)) {
int err;
if (h->nlmsg_pid != rth->local.nl_pid ||
h->nlmsg_seq != rth->dump) {
if (junk) {
err = junk(&nladdr, h, arg2);
if (err < 0)
return err;
}
goto skip_it;
}
if (h->nlmsg_type == NLMSG_DONE)
return 0;
if (h->nlmsg_type == NLMSG_ERROR) {
struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h);
if (h->nlmsg_len < NLMSG_LENGTH(sizeof(struct nlmsgerr))) {
fprintf(stderr, "ERROR truncated\n");
} else {
errno = -err->error;
perror("RTNETLINK answers");
}
return -1;
}
err = filter(&nladdr, h, arg1);
if (err < 0)
return err;
skip_it:
h = NLMSG_NEXT(h, status);
}
if (msg.msg_flags & MSG_TRUNC) {
fprintf(stderr, "Message truncated\n");
continue;
}
if (status) {
fprintf(stderr, "!!!Remnant of size %d\n", status);
exit(1);
}
}
}
int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n, pid_t peer,
unsigned groups, struct nlmsghdr *answer,
int (*junk)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
void *jarg)
{
int status;
unsigned seq;
struct nlmsghdr *h;
struct sockaddr_nl nladdr;
struct iovec iov = { (void*)n, n->nlmsg_len };
char buf[8192];
struct msghdr msg = {
(void*)&nladdr, sizeof(nladdr),
&iov, 1,
NULL, 0,
0
};
memset(&nladdr, 0, sizeof(nladdr));
nladdr.nl_family = AF_NETLINK;
nladdr.nl_pid = peer;
nladdr.nl_groups = groups;
n->nlmsg_seq = seq = ++rtnl->seq;
if (answer == NULL)
n->nlmsg_flags |= NLM_F_ACK;
status = sendmsg(rtnl->fd, &msg, 0);
if (status < 0) {
perror("Cannot talk to rtnetlink");
return -1;
}
iov.iov_base = buf;
while (1) {
iov.iov_len = sizeof(buf);
status = recvmsg(rtnl->fd, &msg, 0);
if (status < 0) {
if (errno == EINTR)
continue;
perror("OVERRUN");
continue;
}
if (status == 0) {
fprintf(stderr, "EOF on netlink\n");
return -1;
}
if (msg.msg_namelen != sizeof(nladdr)) {
fprintf(stderr, "sender address length == %d\n", msg.msg_namelen);
exit(1);
}
for (h = (struct nlmsghdr*)buf; status >= sizeof(*h); ) {
int err;
int len = h->nlmsg_len;
int l = len - sizeof(*h);
if (l<0 || len>status) {
if (msg.msg_flags & MSG_TRUNC) {
fprintf(stderr, "Truncated message\n");
return -1;
}
fprintf(stderr, "!!!malformed message: len=%d\n", len);
exit(1);
}
if (h->nlmsg_pid != rtnl->local.nl_pid ||
h->nlmsg_seq != seq) {
if (junk) {
err = junk(&nladdr, h, jarg);
if (err < 0)
return err;
}
continue;
}
if (h->nlmsg_type == NLMSG_ERROR) {
struct nlmsgerr *err = (struct nlmsgerr*)NLMSG_DATA(h);
if (l < sizeof(struct nlmsgerr)) {
fprintf(stderr, "ERROR truncated\n");
} else {
errno = -err->error;
if (errno == 0) {
if (answer)
memcpy(answer, h, h->nlmsg_len);
return 0;
}
perror("RTNETLINK answers");
}
return -1;
}
if (answer) {
memcpy(answer, h, h->nlmsg_len);
return 0;
}
fprintf(stderr, "Unexpected reply!!!\n");
status -= NLMSG_ALIGN(len);
h = (struct nlmsghdr*)((char*)h + NLMSG_ALIGN(len));
}
if (msg.msg_flags & MSG_TRUNC) {
fprintf(stderr, "Message truncated\n");
continue;
}
if (status) {
fprintf(stderr, "!!!Remnant of size %d\n", status);
exit(1);
}
}
}
int rtnl_listen(struct rtnl_handle *rtnl,
int (*handler)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
void *jarg)
{
int status;
struct nlmsghdr *h;
struct sockaddr_nl nladdr;
struct iovec iov;
char buf[8192];
struct msghdr msg = {
(void*)&nladdr, sizeof(nladdr),
&iov, 1,
NULL, 0,
0
};
memset(&nladdr, 0, sizeof(nladdr));
nladdr.nl_family = AF_NETLINK;
nladdr.nl_pid = 0;
nladdr.nl_groups = 0;
iov.iov_base = buf;
while (1) {
iov.iov_len = sizeof(buf);
status = recvmsg(rtnl->fd, &msg, 0);
if (status < 0) {
if (errno == EINTR)
continue;
perror("OVERRUN");
continue;
}
if (status == 0) {
fprintf(stderr, "EOF on netlink\n");
return -1;
}
if (msg.msg_namelen != sizeof(nladdr)) {
fprintf(stderr, "Sender address length == %d\n", msg.msg_namelen);
exit(1);
}
for (h = (struct nlmsghdr*)buf; status >= sizeof(*h); ) {
int err;
int len = h->nlmsg_len;
int l = len - sizeof(*h);
if (l<0 || len>status) {
if (msg.msg_flags & MSG_TRUNC) {
fprintf(stderr, "Truncated message\n");
return -1;
}
fprintf(stderr, "!!!malformed message: len=%d\n", len);
exit(1);
}
err = handler(&nladdr, h, jarg);
if (err < 0)
return err;
status -= NLMSG_ALIGN(len);
h = (struct nlmsghdr*)((char*)h + NLMSG_ALIGN(len));
}
if (msg.msg_flags & MSG_TRUNC) {
fprintf(stderr, "Message truncated\n");
continue;
}
if (status) {
fprintf(stderr, "!!!Remnant of size %d\n", status);
exit(1);
}
}
}
int rtnl_from_file(FILE *rtnl,
int (*handler)(struct sockaddr_nl *,struct nlmsghdr *n, void *),
void *jarg)
{
int status;
struct sockaddr_nl nladdr;
char buf[8192];
struct nlmsghdr *h = (void*)buf;
memset(&nladdr, 0, sizeof(nladdr));
nladdr.nl_family = AF_NETLINK;
nladdr.nl_pid = 0;
nladdr.nl_groups = 0;
while (1) {
int err, len, type;
int l;
status = fread(&buf, 1, sizeof(*h), rtnl);
if (status < 0) {
if (errno == EINTR)
continue;
perror("rtnl_from_file: fread");
return -1;
}
if (status == 0)
return 0;
len = h->nlmsg_len;
type= h->nlmsg_type;
l = len - sizeof(*h);
if (l<0 || len>sizeof(buf)) {
fprintf(stderr, "!!!malformed message: len=%d @%lu\n",
len, ftell(rtnl));
return -1;
}
status = fread(NLMSG_DATA(h), 1, NLMSG_ALIGN(l), rtnl);
if (status < 0) {
perror("rtnl_from_file: fread");
return -1;
}
if (status < l) {
fprintf(stderr, "rtnl-from_file: truncated message\n");
return -1;
}
err = handler(&nladdr, h, jarg);
if (err < 0)
return err;
}
}
int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data)
{
int len = RTA_LENGTH(4);
struct rtattr *rta;
if (NLMSG_ALIGN(n->nlmsg_len) + len > maxlen)
return -1;
rta = (struct rtattr*)(((char*)n) + NLMSG_ALIGN(n->nlmsg_len));
rta->rta_type = type;
rta->rta_len = len;
memcpy(RTA_DATA(rta), &data, 4);
n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + len;
return 0;
}
int addattr_l(struct nlmsghdr *n, int maxlen, int type, void *data, int alen)
{
int len = RTA_LENGTH(alen);
struct rtattr *rta;
if (NLMSG_ALIGN(n->nlmsg_len) + len > maxlen)
return -1;
rta = (struct rtattr*)(((char*)n) + NLMSG_ALIGN(n->nlmsg_len));
rta->rta_type = type;
rta->rta_len = len;
memcpy(RTA_DATA(rta), data, alen);
n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + len;
return 0;
}
int rta_addattr32(struct rtattr *rta, int maxlen, int type, __u32 data)
{
int len = RTA_LENGTH(4);
struct rtattr *subrta;
if (RTA_ALIGN(rta->rta_len) + len > maxlen)
return -1;
subrta = (struct rtattr*)(((char*)rta) + RTA_ALIGN(rta->rta_len));
subrta->rta_type = type;
subrta->rta_len = len;
memcpy(RTA_DATA(subrta), &data, 4);
rta->rta_len = NLMSG_ALIGN(rta->rta_len) + len;
return 0;
}
int rta_addattr_l(struct rtattr *rta, int maxlen, int type, void *data, int alen)
{
struct rtattr *subrta;
int len = RTA_LENGTH(alen);
if (RTA_ALIGN(rta->rta_len) + len > maxlen)
return -1;
subrta = (struct rtattr*)(((char*)rta) + RTA_ALIGN(rta->rta_len));
subrta->rta_type = type;
subrta->rta_len = len;
memcpy(RTA_DATA(subrta), data, alen);
rta->rta_len = NLMSG_ALIGN(rta->rta_len) + len;
return 0;
}
int parse_rtattr(struct rtattr *tb[], int max, struct rtattr *rta, int len)
{
while (RTA_OK(rta, len)) {
if (rta->rta_type <= max)
tb[rta->rta_type] = rta;
rta = RTA_NEXT(rta,len);
}
if (len)
fprintf(stderr, "!!!Deficit %d, rta_len=%d\n", len, rta->rta_len);
return 0;
}

View File

@ -0,0 +1,91 @@
/*
* ll_addr.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/sockios.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include "utils.h"
const char *ll_addr_n2a(unsigned char *addr, int alen, int type, char *buf, int blen)
{
int i;
int l;
if (alen == 4 &&
(type == ARPHRD_TUNNEL || type == ARPHRD_SIT || type == ARPHRD_IPGRE)) {
return inet_ntop(AF_INET, addr, buf, blen);
}
l = 0;
for (i=0; i<alen; i++) {
if (i==0) {
snprintf(buf+l, blen, "%02x", addr[i]);
blen -= 2;
l += 2;
} else {
snprintf(buf+l, blen, ":%02x", addr[i]);
blen -= 3;
l += 3;
}
}
return buf;
}
int ll_addr_a2n(unsigned char *lladdr, int len, char *arg)
{
if (strchr(arg, '.')) {
inet_prefix pfx;
if (get_addr_1(&pfx, arg, AF_INET)) {
fprintf(stderr, "\"%s\" is invalid lladdr.\n", arg);
return -1;
}
if (len < 4)
return -1;
memcpy(lladdr, pfx.data, 4);
return 4;
} else {
int i;
for (i=0; i<len; i++) {
int temp;
char *cp = strchr(arg, ':');
if (cp) {
*cp = 0;
cp++;
}
if (sscanf(arg, "%x", &temp) != 1) {
fprintf(stderr, "\"%s\" is invalid lladdr.\n", arg);
return -1;
}
if (temp < 0 || temp > 255) {
fprintf(stderr, "\"%s\" is invalid lladdr.\n", arg);
return -1;
}
lladdr[i] = temp;
if (!cp)
break;
arg = cp;
}
return i+1;
}
}

View File

@ -0,0 +1,169 @@
/*
* ll_map.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <string.h>
#include "libnetlink.h"
#include "ll_map.h"
struct idxmap
{
struct idxmap * next;
int index;
int type;
int alen;
unsigned flags;
unsigned char addr[8];
char name[16];
};
static struct idxmap *idxmap[16];
int ll_remember_index(struct sockaddr_nl *who, struct nlmsghdr *n, void *arg)
{
int h;
struct ifinfomsg *ifi = NLMSG_DATA(n);
struct idxmap *im, **imp;
struct rtattr *tb[IFLA_MAX+1];
if (n->nlmsg_type != RTM_NEWLINK)
return 0;
if (n->nlmsg_len < NLMSG_LENGTH(sizeof(ifi)))
return -1;
memset(tb, 0, sizeof(tb));
parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), IFLA_PAYLOAD(n));
if (tb[IFLA_IFNAME] == NULL)
return 0;
h = ifi->ifi_index&0xF;
for (imp=&idxmap[h]; (im=*imp)!=NULL; imp = &im->next)
if (im->index == ifi->ifi_index)
break;
if (im == NULL) {
im = malloc(sizeof(*im));
if (im == NULL)
return 0;
im->next = *imp;
im->index = ifi->ifi_index;
*imp = im;
}
im->type = ifi->ifi_type;
im->flags = ifi->ifi_flags;
if (tb[IFLA_ADDRESS]) {
int alen;
im->alen = alen = RTA_PAYLOAD(tb[IFLA_ADDRESS]);
if (alen > sizeof(im->addr))
alen = sizeof(im->addr);
memcpy(im->addr, RTA_DATA(tb[IFLA_ADDRESS]), alen);
} else {
im->alen = 0;
memset(im->addr, 0, sizeof(im->addr));
}
strcpy(im->name, RTA_DATA(tb[IFLA_IFNAME]));
return 0;
}
const char *ll_idx_n2a(int idx, char *buf)
{
struct idxmap *im;
if (idx == 0)
return "*";
for (im = idxmap[idx&0xF]; im; im = im->next)
if (im->index == idx)
return im->name;
snprintf(buf, 16, "if%d", idx);
return buf;
}
const char *ll_index_to_name(int idx)
{
static char nbuf[16];
return ll_idx_n2a(idx, nbuf);
}
int ll_index_to_type(int idx)
{
struct idxmap *im;
if (idx == 0)
return -1;
for (im = idxmap[idx&0xF]; im; im = im->next)
if (im->index == idx)
return im->type;
return -1;
}
unsigned ll_index_to_flags(int idx)
{
struct idxmap *im;
if (idx == 0)
return 0;
for (im = idxmap[idx&0xF]; im; im = im->next)
if (im->index == idx)
return im->flags;
return 0;
}
int ll_name_to_index(char *name)
{
static char ncache[16];
static int icache;
struct idxmap *im;
int i;
if (name == NULL)
return 0;
if (icache && strcmp(name, ncache) == 0)
return icache;
for (i=0; i<16; i++) {
for (im = idxmap[i]; im; im = im->next) {
if (strcmp(im->name, name) == 0) {
icache = im->index;
strcpy(ncache, name);
return im->index;
}
}
}
return 0;
}
int ll_init_map(struct rtnl_handle *rth)
{
if (rtnl_wilddump_request(rth, AF_UNSPEC, RTM_GETLINK) < 0) {
perror("Cannot send dump request");
exit(1);
}
if (rtnl_dump_filter(rth, ll_remember_index, &idxmap, NULL, NULL) < 0) {
fprintf(stderr, "Dump terminated\n");
exit(1);
}
return 0;
}

View File

@ -0,0 +1,127 @@
/*
* ll_proto.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/sockios.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
#include "utils.h"
#define __PF(f,n) { ETH_P_##f, #n },
static struct {
int id;
char *name;
} llproto_names[] = {
__PF(LOOP,loop)
__PF(PUP,pup)
#ifdef ETH_P_PUPAT
__PF(PUPAT,pupat)
#endif
__PF(IP,ip)
__PF(X25,x25)
__PF(ARP,arp)
__PF(BPQ,bpq)
#ifdef ETH_P_IEEEPUP
__PF(IEEEPUP,ieeepup)
#endif
#ifdef ETH_P_IEEEPUPAT
__PF(IEEEPUPAT,ieeepupat)
#endif
__PF(DEC,dec)
__PF(DNA_DL,dna_dl)
__PF(DNA_RC,dna_rc)
__PF(DNA_RT,dna_rt)
__PF(LAT,lat)
__PF(DIAG,diag)
__PF(CUST,cust)
__PF(SCA,sca)
__PF(RARP,rarp)
__PF(ATALK,atalk)
__PF(AARP,aarp)
__PF(IPX,ipx)
__PF(IPV6,ipv6)
#ifdef ETH_P_PPP_DISC
__PF(PPP_DISC,ppp_disc)
#endif
#ifdef ETH_P_PPP_SES
__PF(PPP_SES,ppp_ses)
#endif
#ifdef ETH_P_ATMMPOA
__PF(ATMMPOA,atmmpoa)
#endif
#ifdef ETH_P_ATMFATE
__PF(ATMFATE,atmfate)
#endif
__PF(802_3,802_3)
__PF(AX25,ax25)
__PF(ALL,all)
__PF(802_2,802_2)
__PF(SNAP,snap)
__PF(DDCMP,ddcmp)
__PF(WAN_PPP,wan_ppp)
__PF(PPP_MP,ppp_mp)
__PF(LOCALTALK,localtalk)
__PF(PPPTALK,ppptalk)
__PF(TR_802_2,tr_802_2)
__PF(MOBITEX,mobitex)
__PF(CONTROL,control)
__PF(IRDA,irda)
#ifdef ETH_P_ECONET
__PF(ECONET,econet)
#endif
{ 0x8100, "802.1Q" },
{ ETH_P_IP, "ipv4" },
};
#undef __PF
char * ll_proto_n2a(unsigned short id, char *buf, int len)
{
int i;
id = ntohs(id);
for (i=0; i<sizeof(llproto_names)/sizeof(llproto_names[0]); i++) {
if (llproto_names[i].id == id)
return llproto_names[i].name;
}
snprintf(buf, len, "[%d]", id);
return buf;
}
int ll_proto_a2n(unsigned short *id, char *buf)
{
int i;
for (i=0; i<sizeof(llproto_names)/sizeof(llproto_names[0]); i++) {
if (strcasecmp(llproto_names[i].name, buf) == 0) {
*id = htons(llproto_names[i].id);
return 0;
}
}
if (get_u16(id, buf, 0))
return -1;
*id = htons(*id);
return 0;
}

View File

@ -0,0 +1,128 @@
/*
* ll_types.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/sockios.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <string.h>
char * ll_type_n2a(int type, char *buf, int len)
{
#define __PF(f,n) { ARPHRD_##f, #n },
static struct {
int type;
char *name;
} arphrd_names[] = {
{ 0, "generic" },
__PF(ETHER,ether)
__PF(EETHER,eether)
__PF(AX25,ax25)
__PF(PRONET,pronet)
__PF(CHAOS,chaos)
#ifdef ARPHRD_IEEE802_TR
__PF(IEEE802,ieee802)
#else
__PF(IEEE802,tr)
#endif
__PF(ARCNET,arcnet)
__PF(APPLETLK,atalk)
__PF(DLCI,dlci)
#ifdef ARPHRD_ATM
__PF(ATM,atm)
#endif
__PF(METRICOM,metricom)
#ifdef ARPHRD_IEEE1394
__PF(IEEE1394,ieee1394)
#endif
__PF(SLIP,slip)
__PF(CSLIP,cslip)
__PF(SLIP6,slip6)
__PF(CSLIP6,cslip6)
__PF(RSRVD,rsrvd)
__PF(ADAPT,adapt)
__PF(ROSE,rose)
__PF(X25,x25)
#ifdef ARPHRD_HWX25
__PF(HWX25,hwx25)
#endif
__PF(PPP,ppp)
__PF(HDLC,hdlc)
__PF(LAPB,lapb)
#ifdef ARPHRD_DDCMP
__PF(DDCMP,ddcmp)
#endif
#ifdef ARPHRD_RAWHDLC
__PF(RAWHDLC,rawhdlc)
#endif
__PF(TUNNEL,ipip)
__PF(TUNNEL6,tunnel6)
__PF(FRAD,frad)
__PF(SKIP,skip)
__PF(LOOPBACK,loopback)
__PF(LOCALTLK,ltalk)
__PF(FDDI,fddi)
__PF(BIF,bif)
__PF(SIT,sit)
__PF(IPDDP,ip/ddp)
__PF(IPGRE,gre)
__PF(PIMREG,pimreg)
__PF(HIPPI,hippi)
__PF(ASH,ash)
__PF(ECONET,econet)
__PF(IRDA,irda)
__PF(FCPP,fcpp)
__PF(FCAL,fcal)
__PF(FCPL,fcpl)
__PF(FCFABRIC,fcfb0)
__PF(FCFABRIC+1,fcfb1)
__PF(FCFABRIC+2,fcfb2)
__PF(FCFABRIC+3,fcfb3)
__PF(FCFABRIC+4,fcfb4)
__PF(FCFABRIC+5,fcfb5)
__PF(FCFABRIC+6,fcfb6)
__PF(FCFABRIC+7,fcfb7)
__PF(FCFABRIC+8,fcfb8)
__PF(FCFABRIC+9,fcfb9)
__PF(FCFABRIC+10,fcfb10)
__PF(FCFABRIC+11,fcfb11)
__PF(FCFABRIC+12,fcfb12)
#ifdef ARPHRD_IEEE802_TR
__PF(IEEE802_TR,tr)
#endif
#ifdef ARPHRD_IEEE80211
__PF(IEEE80211,ieee802.11)
#endif
#ifdef ARPHRD_VOID
__PF(VOID,void)
#endif
};
#undef __PF
int i;
for (i=0; i<sizeof(arphrd_names)/sizeof(arphrd_names[0]); i++) {
if (arphrd_names[i].type == type)
return arphrd_names[i].name;
}
snprintf(buf, len, "[%d]", type);
return buf;
}

View File

@ -0,0 +1,388 @@
/*
* rt_names.c rtnetlink names DB.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <string.h>
#include <sys/time.h>
static void rtnl_tab_initialize(char *file, char **tab, int size)
{
char buf[512];
FILE *fp;
fp = fopen(file, "r");
if (!fp)
return;
while (fgets(buf, sizeof(buf), fp)) {
char *p = buf;
int id;
char namebuf[512];
while (*p == ' ' || *p == '\t')
p++;
if (*p == '#' || *p == '\n' || *p == 0)
continue;
if (sscanf(p, "0x%x %s\n", &id, namebuf) != 2 &&
sscanf(p, "0x%x %s #", &id, namebuf) != 2 &&
sscanf(p, "%d %s\n", &id, namebuf) != 2 &&
sscanf(p, "%d %s #", &id, namebuf) != 2) {
fprintf(stderr, "Database %s is corrupted at %s\n",
file, p);
return;
}
if (id<0 || id>size)
continue;
tab[id] = strdup(namebuf);
}
fclose(fp);
}
static char * rtnl_rtprot_tab[256] = {
"none",
"redirect",
"kernel",
"boot",
"static",
NULL,
NULL,
NULL,
"gated",
"ra",
"mrt",
"zebra",
"bird",
};
static int rtnl_rtprot_init;
static void rtnl_rtprot_initialize(void)
{
rtnl_rtprot_init = 1;
rtnl_tab_initialize("/etc/iproute2/rt_protos",
rtnl_rtprot_tab, 256);
}
char * rtnl_rtprot_n2a(int id, char *buf, int len)
{
if (id<0 || id>=256) {
snprintf(buf, len, "%d", id);
return buf;
}
if (!rtnl_rtprot_tab[id]) {
if (!rtnl_rtprot_init)
rtnl_rtprot_initialize();
}
if (rtnl_rtprot_tab[id])
return rtnl_rtprot_tab[id];
snprintf(buf, len, "%d", id);
return buf;
}
int rtnl_rtprot_a2n(__u32 *id, char *arg)
{
static char *cache = NULL;
static unsigned long res;
char *end;
int i;
if (cache && strcmp(cache, arg) == 0) {
*id = res;
return 0;
}
if (!rtnl_rtprot_init)
rtnl_rtprot_initialize();
for (i=0; i<256; i++) {
if (rtnl_rtprot_tab[i] &&
strcmp(rtnl_rtprot_tab[i], arg) == 0) {
cache = rtnl_rtprot_tab[i];
res = i;
*id = res;
return 0;
}
}
res = strtoul(arg, &end, 0);
if (!end || end == arg || *end || res > 255)
return -1;
*id = res;
return 0;
}
static char * rtnl_rtscope_tab[256] = {
"global",
};
static int rtnl_rtscope_init;
static void rtnl_rtscope_initialize(void)
{
rtnl_rtscope_init = 1;
rtnl_rtscope_tab[255] = "nowhere";
rtnl_rtscope_tab[254] = "host";
rtnl_rtscope_tab[253] = "link";
rtnl_rtscope_tab[200] = "site";
rtnl_tab_initialize("/etc/iproute2/rt_scopes",
rtnl_rtscope_tab, 256);
}
char * rtnl_rtscope_n2a(int id, char *buf, int len)
{
if (id<0 || id>=256) {
snprintf(buf, len, "%d", id);
return buf;
}
if (!rtnl_rtscope_tab[id]) {
if (!rtnl_rtscope_init)
rtnl_rtscope_initialize();
}
if (rtnl_rtscope_tab[id])
return rtnl_rtscope_tab[id];
snprintf(buf, len, "%d", id);
return buf;
}
int rtnl_rtscope_a2n(__u32 *id, char *arg)
{
static char *cache = NULL;
static unsigned long res;
char *end;
int i;
if (cache && strcmp(cache, arg) == 0) {
*id = res;
return 0;
}
if (!rtnl_rtscope_init)
rtnl_rtscope_initialize();
for (i=0; i<256; i++) {
if (rtnl_rtscope_tab[i] &&
strcmp(rtnl_rtscope_tab[i], arg) == 0) {
cache = rtnl_rtscope_tab[i];
res = i;
*id = res;
return 0;
}
}
res = strtoul(arg, &end, 0);
if (!end || end == arg || *end || res > 255)
return -1;
*id = res;
return 0;
}
static char * rtnl_rtrealm_tab[256] = {
"unknown",
};
static int rtnl_rtrealm_init;
static void rtnl_rtrealm_initialize(void)
{
rtnl_rtrealm_init = 1;
rtnl_tab_initialize("/etc/iproute2/rt_realms",
rtnl_rtrealm_tab, 256);
}
char * rtnl_rtrealm_n2a(int id, char *buf, int len)
{
if (id<0 || id>=256) {
snprintf(buf, len, "%d", id);
return buf;
}
if (!rtnl_rtrealm_tab[id]) {
if (!rtnl_rtrealm_init)
rtnl_rtrealm_initialize();
}
if (rtnl_rtrealm_tab[id])
return rtnl_rtrealm_tab[id];
snprintf(buf, len, "%d", id);
return buf;
}
int rtnl_rtrealm_a2n(__u32 *id, char *arg)
{
static char *cache = NULL;
static unsigned long res;
char *end;
int i;
if (cache && strcmp(cache, arg) == 0) {
*id = res;
return 0;
}
if (!rtnl_rtrealm_init)
rtnl_rtrealm_initialize();
for (i=0; i<256; i++) {
if (rtnl_rtrealm_tab[i] &&
strcmp(rtnl_rtrealm_tab[i], arg) == 0) {
cache = rtnl_rtrealm_tab[i];
res = i;
*id = res;
return 0;
}
}
res = strtoul(arg, &end, 0);
if (!end || end == arg || *end || res > 255)
return -1;
*id = res;
return 0;
}
static char * rtnl_rttable_tab[256] = {
"unspec",
};
static int rtnl_rttable_init;
static void rtnl_rttable_initialize(void)
{
rtnl_rttable_init = 1;
rtnl_rttable_tab[255] = "local";
rtnl_rttable_tab[254] = "main";
rtnl_tab_initialize("/etc/iproute2/rt_tables",
rtnl_rttable_tab, 256);
}
char * rtnl_rttable_n2a(int id, char *buf, int len)
{
if (id<0 || id>=256) {
snprintf(buf, len, "%d", id);
return buf;
}
if (!rtnl_rttable_tab[id]) {
if (!rtnl_rttable_init)
rtnl_rttable_initialize();
}
if (rtnl_rttable_tab[id])
return rtnl_rttable_tab[id];
snprintf(buf, len, "%d", id);
return buf;
}
int rtnl_rttable_a2n(__u32 *id, char *arg)
{
static char *cache = NULL;
static unsigned long res;
char *end;
int i;
if (cache && strcmp(cache, arg) == 0) {
*id = res;
return 0;
}
if (!rtnl_rttable_init)
rtnl_rttable_initialize();
for (i=0; i<256; i++) {
if (rtnl_rttable_tab[i] &&
strcmp(rtnl_rttable_tab[i], arg) == 0) {
cache = rtnl_rttable_tab[i];
res = i;
*id = res;
return 0;
}
}
i = strtoul(arg, &end, 0);
if (!end || end == arg || *end || i > 255)
return -1;
*id = i;
return 0;
}
static char * rtnl_rtdsfield_tab[256] = {
"0",
};
static int rtnl_rtdsfield_init;
static void rtnl_rtdsfield_initialize(void)
{
rtnl_rtdsfield_init = 1;
rtnl_tab_initialize("/etc/iproute2/rt_dsfield",
rtnl_rtdsfield_tab, 256);
}
char * rtnl_dsfield_n2a(int id, char *buf, int len)
{
if (id<0 || id>=256) {
snprintf(buf, len, "%d", id);
return buf;
}
if (!rtnl_rtdsfield_tab[id]) {
if (!rtnl_rtdsfield_init)
rtnl_rtdsfield_initialize();
}
if (rtnl_rtdsfield_tab[id])
return rtnl_rtdsfield_tab[id];
snprintf(buf, len, "0x%02x", id);
return buf;
}
int rtnl_dsfield_a2n(__u32 *id, char *arg)
{
static char *cache = NULL;
static unsigned long res;
char *end;
int i;
if (cache && strcmp(cache, arg) == 0) {
*id = res;
return 0;
}
if (!rtnl_rtdsfield_init)
rtnl_rtdsfield_initialize();
for (i=0; i<256; i++) {
if (rtnl_rtdsfield_tab[i] &&
strcmp(rtnl_rtdsfield_tab[i], arg) == 0) {
cache = rtnl_rtdsfield_tab[i];
res = i;
*id = res;
return 0;
}
}
res = strtoul(arg, &end, 16);
if (!end || end == arg || *end || res > 255)
return -1;
*id = res;
return 0;
}

View File

@ -0,0 +1,528 @@
/*
* utils.c
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*
* Changes:
*
* Rani Assaf <rani@magic.metawire.com> 980929: resolve addresses
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <syslog.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <string.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <resolv.h>
#include <linux/pkt_sched.h>
#include "utils.h"
int get_integer(int *val, char *arg, int base)
{
long res;
char *ptr;
if (!arg || !*arg)
return -1;
res = strtol(arg, &ptr, base);
if (!ptr || ptr == arg || *ptr || res > INT_MAX || res < INT_MIN)
return -1;
*val = res;
return 0;
}
int get_unsigned(unsigned *val, char *arg, int base)
{
unsigned long res;
char *ptr;
if (!arg || !*arg)
return -1;
res = strtoul(arg, &ptr, base);
if (!ptr || ptr == arg || *ptr || res > UINT_MAX)
return -1;
*val = res;
return 0;
}
int get_u32(__u32 *val, char *arg, int base)
{
unsigned long res;
char *ptr;
if (!arg || !*arg)
return -1;
res = strtoul(arg, &ptr, base);
if (!ptr || ptr == arg || *ptr || res > 0xFFFFFFFFUL)
return -1;
*val = res;
return 0;
}
int get_u16(__u16 *val, char *arg, int base)
{
unsigned long res;
char *ptr;
if (!arg || !*arg)
return -1;
res = strtoul(arg, &ptr, base);
if (!ptr || ptr == arg || *ptr || res > 0xFFFF)
return -1;
*val = res;
return 0;
}
int get_u8(__u8 *val, char *arg, int base)
{
unsigned long res;
char *ptr;
if (!arg || !*arg)
return -1;
res = strtoul(arg, &ptr, base);
if (!ptr || ptr == arg || *ptr || res > 0xFF)
return -1;
*val = res;
return 0;
}
int get_s16(__s16 *val, char *arg, int base)
{
long res;
char *ptr;
if (!arg || !*arg)
return -1;
res = strtol(arg, &ptr, base);
if (!ptr || ptr == arg || *ptr || res > 0x7FFF || res < -0x8000)
return -1;
*val = res;
return 0;
}
int get_s8(__s8 *val, char *arg, int base)
{
long res;
char *ptr;
if (!arg || !*arg)
return -1;
res = strtol(arg, &ptr, base);
if (!ptr || ptr == arg || *ptr || res > 0x7F || res < -0x80)
return -1;
*val = res;
return 0;
}
int get_addr_1(inet_prefix *addr, char *name, int family)
{
char *cp;
unsigned char *ap = (unsigned char*)addr->data;
int i;
memset(addr, 0, sizeof(*addr));
if (strcmp(name, "default") == 0 ||
strcmp(name, "all") == 0 ||
strcmp(name, "any") == 0) {
if (family == AF_DECnet)
return -1;
addr->family = family;
addr->bytelen = (family == AF_INET6 ? 16 : 4);
addr->bitlen = -1;
return 0;
}
if (strchr(name, ':')) {
addr->family = AF_INET6;
if (family != AF_UNSPEC && family != AF_INET6)
return -1;
if (inet_pton(AF_INET6, name, addr->data) <= 0)
return -1;
addr->bytelen = 16;
addr->bitlen = -1;
return 0;
}
if (family == AF_DECnet) {
struct dn_naddr dna;
addr->family = AF_DECnet;
if (dnet_pton(AF_DECnet, name, &dna) <= 0)
return -1;
memcpy(addr->data, dna.a_addr, 2);
addr->bytelen = 2;
addr->bitlen = -1;
return 0;
}
addr->family = AF_INET;
if (family != AF_UNSPEC && family != AF_INET)
return -1;
addr->bytelen = 4;
addr->bitlen = -1;
for (cp=name, i=0; *cp; cp++) {
if (*cp <= '9' && *cp >= '0') {
ap[i] = 10*ap[i] + (*cp-'0');
continue;
}
if (*cp == '.' && ++i <= 3)
continue;
return -1;
}
return 0;
}
int get_prefix_1(inet_prefix *dst, char *arg, int family)
{
int err;
unsigned plen;
char *slash;
memset(dst, 0, sizeof(*dst));
if (strcmp(arg, "default") == 0 ||
strcmp(arg, "any") == 0 ||
strcmp(arg, "all") == 0) {
if (family == AF_DECnet)
return -1;
dst->family = family;
dst->bytelen = 0;
dst->bitlen = 0;
return 0;
}
slash = strchr(arg, '/');
if (slash)
*slash = 0;
err = get_addr_1(dst, arg, family);
if (err == 0) {
switch(dst->family) {
case AF_INET6:
dst->bitlen = 128;
break;
case AF_DECnet:
dst->bitlen = 16;
break;
default:
case AF_INET:
dst->bitlen = 32;
}
if (slash) {
if (get_integer(&plen, slash+1, 0) || plen > dst->bitlen) {
err = -1;
goto done;
}
dst->bitlen = plen;
}
}
done:
if (slash)
*slash = '/';
return err;
}
int get_addr(inet_prefix *dst, char *arg, int family)
{
if (family == AF_PACKET) {
fprintf(stderr, "Error: \"%s\" may be inet address, but it is not allowed in this context.\n", arg);
exit(1);
}
if (get_addr_1(dst, arg, family)) {
fprintf(stderr, "Error: an inet address is expected rather than \"%s\".\n", arg);
exit(1);
}
return 0;
}
int get_prefix(inet_prefix *dst, char *arg, int family)
{
if (family == AF_PACKET) {
fprintf(stderr, "Error: \"%s\" may be inet prefix, but it is not allowed in this context.\n", arg);
exit(1);
}
if (get_prefix_1(dst, arg, family)) {
fprintf(stderr, "Error: an inet prefix is expected rather than \"%s\".\n", arg);
exit(1);
}
return 0;
}
__u32 get_addr32(char *name)
{
inet_prefix addr;
if (get_addr_1(&addr, name, AF_INET)) {
fprintf(stderr, "Error: an IP address is expected rather than \"%s\"\n", name);
exit(1);
}
return addr.data[0];
}
void incomplete_command()
{
fprintf(stderr, "Command line is not complete. Try option \"help\"\n");
exit(-1);
}
void invarg(char *msg, char *arg)
{
fprintf(stderr, "Error: argument \"%s\" is wrong: %s\n", arg, msg);
exit(-1);
}
void duparg(char *key, char *arg)
{
fprintf(stderr, "Error: duplicate \"%s\": \"%s\" is the second value.\n", key, arg);
exit(-1);
}
void duparg2(char *key, char *arg)
{
fprintf(stderr, "Error: either \"%s\" is duplicate, or \"%s\" is a garbage.\n", key, arg);
exit(-1);
}
int matches(char *cmd, char *pattern)
{
int len = strlen(cmd);
if (len > strlen(pattern))
return -1;
return memcmp(pattern, cmd, len);
}
int inet_addr_match(inet_prefix *a, inet_prefix *b, int bits)
{
__u32 *a1 = a->data;
__u32 *a2 = b->data;
int words = bits >> 0x05;
bits &= 0x1f;
if (words)
if (memcmp(a1, a2, words << 2))
return -1;
if (bits) {
__u32 w1, w2;
__u32 mask;
w1 = a1[words];
w2 = a2[words];
mask = htonl((0xffffffff) << (0x20 - bits));
if ((w1 ^ w2) & mask)
return 1;
}
return 0;
}
int __iproute2_hz_internal;
int __get_hz(void)
{
char name[1024];
int hz = 0;
FILE *fp;
if (getenv("HZ"))
return atoi(getenv("HZ")) ? : HZ;
if (getenv("PROC_NET_PSCHED")) {
snprintf(name, sizeof(name)-1, "%s", getenv("PROC_NET_PSCHED"));
} else if (getenv("PROC_ROOT")) {
snprintf(name, sizeof(name)-1, "%s/net/psched", getenv("PROC_ROOT"));
} else {
strcpy(name, "/proc/net/psched");
}
fp = fopen(name, "r");
if (fp) {
unsigned nom, denom;
if (fscanf(fp, "%*08x%*08x%08x%08x", &nom, &denom) == 2)
if (nom == 1000000)
hz = denom;
fclose(fp);
}
if (hz)
return hz;
return HZ;
}
const char *rt_addr_n2a(int af, int len, void *addr, char *buf, int buflen)
{
switch (af) {
case AF_INET:
case AF_INET6:
return inet_ntop(af, addr, buf, buflen);
case AF_IPX:
return ipx_ntop(af, addr, buf, buflen);
case AF_DECnet:
{
struct dn_naddr dna = { 2, { 0, 0, }};
memcpy(dna.a_addr, addr, 2);
return dnet_ntop(af, &dna, buf, buflen);
}
default:
return "???";
}
}
#ifdef RESOLVE_HOSTNAMES
struct namerec
{
struct namerec *next;
inet_prefix addr;
char *name;
};
static struct namerec *nht[256];
char *resolve_address(char *addr, int len, int af)
{
struct namerec *n;
struct hostent *h_ent;
unsigned hash;
static int notfirst;
if (af == AF_INET6 && ((__u32*)addr)[0] == 0 &&
((__u32*)addr)[1] == 0 && ((__u32*)addr)[2] == htonl(0xffff)) {
af = AF_INET;
addr += 12;
len = 4;
}
hash = addr[len-1] ^ addr[len-2] ^ addr[len-3] ^ addr[len-4];
for (n = nht[hash]; n; n = n->next) {
if (n->addr.family == af &&
n->addr.bytelen == len &&
memcmp(n->addr.data, addr, len) == 0)
return n->name;
}
if ((n = malloc(sizeof(*n))) == NULL)
return NULL;
n->addr.family = af;
n->addr.bytelen = len;
n->name = NULL;
memcpy(n->addr.data, addr, len);
n->next = nht[hash];
nht[hash] = n;
if (++notfirst == 1)
sethostent(1);
fflush(stdout);
if ((h_ent = gethostbyaddr(addr, len, af)) != NULL)
n->name = strdup(h_ent->h_name);
/* Even if we fail, "negative" entry is remembered. */
return n->name;
}
#endif
const char *format_host(int af, int len, void *addr, char *buf, int buflen)
{
#ifdef RESOLVE_HOSTNAMES
if (resolve_hosts) {
char *n;
if (len <= 0) {
switch (af) {
case AF_INET:
len = 4;
break;
case AF_INET6:
len = 16;
break;
case AF_IPX:
len = 10;
break;
#ifdef AF_DECnet
/* I see no reasons why gethostbyname
may not work for DECnet */
case AF_DECnet:
len = 2;
break;
#endif
default: ;
}
}
if (len > 0 &&
(n = resolve_address(addr, len, af)) != NULL)
return n;
}
#endif
return rt_addr_n2a(af, len, addr, buf, buflen);
}
__u8* hexstring_n2a(const __u8 *str, int len, __u8 *buf, int blen)
{
__u8 *ptr = buf;
int i;
for (i=0; i<len; i++) {
if (blen < 3)
break;
sprintf(ptr, "%02x", str[i]);
ptr += 2;
blen -= 2;
if (i != len-1 && blen > 1) {
*ptr++ = ':';
blen--;
}
}
return buf;
}
__u8* hexstring_a2n(const __u8 *str, __u8 *buf, int blen)
{
int cnt = 0;
for (;;) {
unsigned acc;
char ch;
acc = 0;
while ((ch = *str) != ':' && ch != 0) {
if (ch >= '0' && ch <= '9')
ch -= '0';
else if (ch >= 'a' && ch <= 'f')
ch -= 'a'-10;
else if (ch >= 'A' && ch <= 'F')
ch -= 'A'-10;
else
return NULL;
acc = (acc<<4) + ch;
str++;
}
if (acc > 255)
return NULL;
if (cnt < blen) {
buf[cnt] = acc;
cnt++;
}
if (ch == 0)
break;
++str;
}
if (cnt < blen)
memset(buf+cnt, 0, blen-cnt);
return buf;
}

View File

@ -0,0 +1,37 @@
SSOBJ=ss.o ssfilter.o
NSTATOBJ=nstat.o
IFSTATOBJ=ifstat.o
RTACCTOBJ=rtacct.o
ARPDOBJ=arpd.o
RTSTATOBJ=rtstat.o
ALLOBJ=$(SSOBJ) $(NSTATOBJ) $(IFSTATOBJ) $(RTACCTOBJ) $(ARPDOBJ) $(RTSTATOBJ)
TARGETS=ss nstat ifstat rtacct arpd rtstat
all: $(TARGETS)
ss: $(SSOBJ) $(LIBUTIL)
nstat: $(NSTATOBJ)
$(CC) $(CFLAGS) $(LDFLAGS) -o nstat $(NSTATOBJ) -lm
ifstat: $(IFSTATOBJ)
$(CC) $(CFLAGS) $(LDFLAGS) -o ifstat $(IFSTATOBJ) $(LIBNETLINK) -lm
rtacct: $(RTACCTOBJ)
$(CC) $(CFLAGS) $(LDFLAGS) -o rtacct $(RTACCTOBJ) $(LIBNETLINK) -lm
arpd: $(ARPDOBJ)
$(CC) $(CFLAGS) $(LDFLAGS) -o arpd $(ARPDOBJ) $(LIBNETLINK) -ldb
rtstat: $(RTSTATOBJ)
$(CC) $(CFLAGS) $(LDFLAGS) -o rtstat $(RTSTATOBJ)
ssfilter.c: ssfilter.y
bison ssfilter.y -o ssfilter.c
install: all
install -m 0755 -s $(TARGETS) $(DESTDIR)$(SBINDIR)
clean:
rm -f $(ALLOBJ) $(TARGETS) ssfilter.c

View File

@ -0,0 +1,846 @@
/*
* arpd.c ARP helper daemon.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
#include <stdio.h>
#include <syslog.h>
#include <malloc.h>
#include <string.h>
#include <unistd.h>
#include <stdlib.h>
#include <netdb.h>
#include <db.h>
#include <sys/ioctl.h>
#include <sys/poll.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/uio.h>
#include <sys/socket.h>
#include <sys/time.h>
#include <time.h>
#include <signal.h>
#include <linux/if.h>
#include <linux/if_arp.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <linux/if_packet.h>
#include <linux/filter.h>
#include "libnetlink.h"
#include "utils.h"
int resolve_hosts;
DB *dbase;
char *dbname = "/var/lib/arpd/arpd.db";
int ifnum;
int *ifvec;
char **ifnames;
struct dbkey
{
__u32 iface;
__u32 addr;
};
#define IS_NEG(x) (((__u8*)(x))[0] == 0xFF)
#define NEG_TIME(x) (((x)[2]<<24)|((x)[3]<<16)|((x)[4]<<8)|(x)[5])
#define NEG_AGE(x) ((__u32)time(NULL) - NEG_TIME((__u8*)x))
#define NEG_VALID(x) (NEG_AGE(x) < negative_timeout)
#define NEG_CNT(x) (((__u8*)(x))[1])
struct rtnl_handle rth;
struct pollfd pset[2];
int udp_sock = -1;
volatile int do_exit;
volatile int do_sync;
volatile int do_stats;
struct {
unsigned long arp_new;
unsigned long arp_change;
unsigned long app_recv;
unsigned long app_success;
unsigned long app_bad;
unsigned long app_neg;
unsigned long app_suppressed;
unsigned long kern_neg;
unsigned long kern_new;
unsigned long kern_change;
unsigned long probes_sent;
unsigned long probes_suppressed;
} stats;
int active_probing;
int negative_timeout = 60;
int no_kernel_broadcasts;
int broadcast_rate = 1000;
int broadcast_burst = 3000;
void usage(void)
{
fprintf(stderr,
"Usage: arpd [ -lk ] [ -a N ] [ -b dbase ] [ -f file ] [ interfaces ]\n");
exit(1);
}
int handle_if(int ifindex)
{
int i;
if (ifnum == 0)
return 1;
for (i=0; i<ifnum; i++)
if (ifvec[i] == ifindex)
return 1;
return 0;
}
int sysctl_adjusted;
void do_sysctl_adjustments(void)
{
int i;
if (!ifnum)
return;
for (i=0; i<ifnum; i++) {
char buf[128];
FILE *fp;
if (active_probing) {
sprintf(buf, "/proc/sys/net/ipv4/neigh/%s/mcast_solicit", ifnames[i]);
if ((fp = fopen(buf, "w")) != NULL) {
if (no_kernel_broadcasts)
strcpy(buf, "0\n");
else
sprintf(buf, "%d\n", active_probing>=2 ? 1 : 3-active_probing);
fputs(buf, fp);
fclose(fp);
}
}
sprintf(buf, "/proc/sys/net/ipv4/neigh/%s/app_solicit", ifnames[i]);
if ((fp = fopen(buf, "w")) != NULL) {
sprintf(buf, "%d\n", active_probing<=1 ? 1 : active_probing);
fputs(buf, fp);
fclose(fp);
}
}
sysctl_adjusted = 1;
}
void undo_sysctl_adjustments(void)
{
int i;
if (!sysctl_adjusted)
return;
for (i=0; i<ifnum; i++) {
char buf[128];
FILE *fp;
if (active_probing) {
sprintf(buf, "/proc/sys/net/ipv4/neigh/%s/mcast_solicit", ifnames[i]);
if ((fp = fopen(buf, "w")) != NULL) {
strcpy(buf, "3\n");
fputs(buf, fp);
fclose(fp);
}
}
sprintf(buf, "/proc/sys/net/ipv4/neigh/%s/app_solicit", ifnames[i]);
if ((fp = fopen(buf, "w")) != NULL) {
strcpy(buf, "0\n");
fputs(buf, fp);
fclose(fp);
}
}
sysctl_adjusted = 0;
}
int send_probe(int ifindex, __u32 addr)
{
struct ifreq ifr;
struct sockaddr_in dst;
int len;
unsigned char buf[256];
struct arphdr *ah = (struct arphdr*)buf;
unsigned char *p = (unsigned char *)(ah+1);
struct sockaddr_ll sll;
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_ifindex = ifindex;
if (ioctl(udp_sock, SIOCGIFNAME, &ifr))
return -1;
if (ioctl(udp_sock, SIOCGIFHWADDR, &ifr))
return -1;
if (ifr.ifr_hwaddr.sa_family != ARPHRD_ETHER)
return -1;
if (setsockopt(udp_sock, SOL_SOCKET, SO_BINDTODEVICE, ifr.ifr_name, strlen(ifr.ifr_name)+1) < 0)
return -1;
dst.sin_family = AF_INET;
dst.sin_port = htons(1025);
dst.sin_addr.s_addr = addr;
if (connect(udp_sock, (struct sockaddr*)&dst, sizeof(dst)) < 0)
return -1;
len = sizeof(dst);
if (getsockname(udp_sock, (struct sockaddr*)&dst, &len) < 0)
return -1;
ah->ar_hrd = htons(ifr.ifr_hwaddr.sa_family);
ah->ar_pro = htons(ETH_P_IP);
ah->ar_hln = 6;
ah->ar_pln = 4;
ah->ar_op = htons(ARPOP_REQUEST);
memcpy(p, ifr.ifr_hwaddr.sa_data, ah->ar_hln);
p += ah->ar_hln;
memcpy(p, &dst.sin_addr, 4);
p+=4;
sll.sll_family = AF_PACKET;
memset(sll.sll_addr, 0xFF, sizeof(sll.sll_addr));
sll.sll_ifindex = ifindex;
sll.sll_protocol = htons(ETH_P_ARP);
memcpy(p, &sll.sll_addr, ah->ar_hln);
p+=ah->ar_hln;
memcpy(p, &addr, 4);
p+=4;
len = sendto(pset[0].fd, buf, p-buf, 0, (struct sockaddr*)&sll, sizeof(sll));
if (len < 0)
return -1;
stats.probes_sent++;
return 0;
}
/* Be very tough on sending probes: 1 per second with burst of 3. */
int queue_active_probe(int ifindex, __u32 addr)
{
static struct timeval prev;
static int buckets;
struct timeval now;
gettimeofday(&now, NULL);
if (prev.tv_sec) {
int diff = (now.tv_sec-prev.tv_sec)*1000+(now.tv_usec-prev.tv_usec)/1000;
buckets += diff;
} else {
buckets = broadcast_burst;
}
if (buckets > broadcast_burst)
buckets = broadcast_burst;
if (buckets >= broadcast_rate && !send_probe(ifindex, addr)) {
buckets -= broadcast_rate;
prev = now;
return 0;
}
stats.probes_suppressed++;
return -1;
}
int respond_to_kernel(int ifindex, __u32 addr, char *lla, int llalen)
{
struct {
struct nlmsghdr n;
struct ndmsg ndm;
char buf[256];
} req;
memset(&req.n, 0, sizeof(req.n));
memset(&req.ndm, 0, sizeof(req.ndm));
req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ndmsg));
req.n.nlmsg_flags = NLM_F_REQUEST;
req.n.nlmsg_type = RTM_NEWNEIGH;
req.ndm.ndm_family = AF_INET;
req.ndm.ndm_state = NUD_STALE;
req.ndm.ndm_ifindex = ifindex;
req.ndm.ndm_type = RTN_UNICAST;
addattr_l(&req.n, sizeof(req), NDA_DST, &addr, 4);
addattr_l(&req.n, sizeof(req), NDA_LLADDR, lla, llalen);
return rtnl_send(&rth, (char*)&req, req.n.nlmsg_len) <= 0;
}
void prepare_neg_entry(__u8 *ndata, __u32 stamp)
{
ndata[0] = 0xFF;
ndata[1] = 0;
ndata[2] = stamp>>24;
ndata[3] = stamp>>16;
ndata[4] = stamp>>8;
ndata[5] = stamp;
}
int do_one_request(struct nlmsghdr *n)
{
struct ndmsg *ndm = NLMSG_DATA(n);
int len = n->nlmsg_len;
struct rtattr * tb[NDA_MAX+1];
struct dbkey key;
DBT dbkey, dbdat;
int do_acct = 0;
if (n->nlmsg_type == NLMSG_DONE) {
dbase->sync(dbase, 0);
/* Now we have at least mirror of kernel db, so that
* may start real resolution.
*/
do_sysctl_adjustments();
return 0;
}
if (n->nlmsg_type != RTM_GETNEIGH && n->nlmsg_type != RTM_NEWNEIGH)
return 0;
len -= NLMSG_LENGTH(sizeof(*ndm));
if (len < 0)
return -1;
if (ndm->ndm_family != AF_INET ||
(ifnum && !handle_if(ndm->ndm_ifindex)) ||
ndm->ndm_flags ||
ndm->ndm_type != RTN_UNICAST ||
!(ndm->ndm_state&~NUD_NOARP))
return 0;
memset(tb, 0, sizeof(tb));
parse_rtattr(tb, NDA_MAX, NDA_RTA(ndm), len);
if (!tb[NDA_DST])
return 0;
key.iface = ndm->ndm_ifindex;
memcpy(&key.addr, RTA_DATA(tb[NDA_DST]), 4);
dbkey.data = &key;
dbkey.size = sizeof(key);
if (dbase->get(dbase, &dbkey, &dbdat, 0) != 0) {
dbdat.data = 0;
dbdat.size = 0;
}
if (n->nlmsg_type == RTM_GETNEIGH) {
if (!(n->nlmsg_flags&NLM_F_REQUEST))
return 0;
if (!(ndm->ndm_state&(NUD_PROBE|NUD_INCOMPLETE))) {
stats.app_bad++;
return 0;
}
if (ndm->ndm_state&NUD_PROBE) {
/* If we get this, kernel still has some valid
* address, but unicast probing failed and host
* is either dead or changed its mac address.
* Kernel is going to initiate broadcast resolution.
* OK, we invalidate our information as well.
*/
if (dbdat.data && !IS_NEG(dbdat.data))
stats.app_neg++;
dbase->del(dbase, &dbkey, 0);
} else {
/* If we get this kernel does not have any information.
* If we have something tell this to kernel. */
stats.app_recv++;
if (dbdat.data && !IS_NEG(dbdat.data)) {
stats.app_success++;
respond_to_kernel(key.iface, key.addr, dbdat.data, dbdat.size);
return 0;
}
/* Sheeit! We have nothing to tell. */
/* If we have recent negative entry, be silent. */
if (dbdat.data && NEG_VALID(dbdat.data)) {
if (NEG_CNT(dbdat.data) >= active_probing) {
stats.app_suppressed++;
return 0;
}
do_acct = 1;
}
}
if (active_probing &&
queue_active_probe(ndm->ndm_ifindex, key.addr) == 0 &&
do_acct) {
NEG_CNT(dbdat.data)++;
dbase->put(dbase, &dbkey, &dbdat, 0);
}
} else if (n->nlmsg_type == RTM_NEWNEIGH) {
if (n->nlmsg_flags&NLM_F_REQUEST)
return 0;
if (ndm->ndm_state&NUD_FAILED) {
/* Kernel was not able to resolve. Host is dead.
* Create negative entry if it is not present
* or renew it if it is too old. */
if (!dbdat.data ||
!IS_NEG(dbdat.data) ||
!NEG_VALID(dbdat.data)) {
__u8 ndata[6];
stats.kern_neg++;
prepare_neg_entry(ndata, time(NULL));
dbdat.data = ndata;
dbdat.size = sizeof(ndata);
dbase->put(dbase, &dbkey, &dbdat, 0);
}
} else if (tb[NDA_LLADDR]) {
if (dbdat.data && !IS_NEG(dbdat.data)) {
if (memcmp(RTA_DATA(tb[NDA_LLADDR]), dbdat.data, dbdat.size) == 0)
return 0;
stats.kern_change++;
} else {
stats.kern_new++;
}
dbdat.data = RTA_DATA(tb[NDA_LLADDR]);
dbdat.size = RTA_PAYLOAD(tb[NDA_LLADDR]);
dbase->put(dbase, &dbkey, &dbdat, 0);
}
}
return 0;
}
void load_initial_table(void)
{
rtnl_wilddump_request(&rth, AF_INET, RTM_GETNEIGH);
}
void get_kern_msg(void)
{
int status;
struct nlmsghdr *h;
struct sockaddr_nl nladdr;
struct iovec iov;
char buf[8192];
struct msghdr msg = {
(void*)&nladdr, sizeof(nladdr),
&iov, 1,
NULL, 0,
0
};
memset(&nladdr, 0, sizeof(nladdr));
iov.iov_base = buf;
iov.iov_len = sizeof(buf);
status = recvmsg(rth.fd, &msg, MSG_DONTWAIT);
if (status <= 0)
return;
if (msg.msg_namelen != sizeof(nladdr))
return;
if (nladdr.nl_pid)
return;
for (h = (struct nlmsghdr*)buf; status >= sizeof(*h); ) {
int len = h->nlmsg_len;
int l = len - sizeof(*h);
if (l < 0 || len > status)
return;
if (do_one_request(h) < 0)
return;
status -= NLMSG_ALIGN(len);
h = (struct nlmsghdr*)((char*)h + NLMSG_ALIGN(len));
}
}
/* Receive gratuitous ARP messages and store them, that's all. */
void get_arp_pkt(void)
{
unsigned char buf[1024];
struct sockaddr_ll sll;
int sll_len = sizeof(sll);
struct arphdr *a = (struct arphdr*)buf;
struct dbkey key;
DBT dbkey, dbdat;
int n;
n = recvfrom(pset[0].fd, buf, sizeof(buf), MSG_DONTWAIT, (struct sockaddr*)&sll, &sll_len);
if (n < 0) {
if (errno != EINTR && errno != EAGAIN)
syslog(LOG_ERR, "recvfrom: %m");
return;
}
if (ifnum && !handle_if(sll.sll_ifindex))
return;
/* Sanity checks */
if (n < sizeof(*a) ||
(a->ar_op != htons(ARPOP_REQUEST) &&
a->ar_op != htons(ARPOP_REPLY)) ||
a->ar_pln != 4 ||
a->ar_pro != htons(ETH_P_IP) ||
a->ar_hln != sll.sll_halen ||
sizeof(*a) + 2*4 + 2*a->ar_hln > n)
return;
key.iface = sll.sll_ifindex;
memcpy(&key.addr, (char*)(a+1) + a->ar_hln, 4);
/* DAD message, ignore. */
if (key.addr == 0)
return;
dbkey.data = &key;
dbkey.size = sizeof(key);
if (dbase->get(dbase, &dbkey, &dbdat, 0) == 0 && !IS_NEG(dbdat.data)) {
if (memcmp(dbdat.data, a+1, dbdat.size) == 0)
return;
stats.arp_change++;
} else {
stats.arp_new++;
}
dbdat.data = a+1;
dbdat.size = a->ar_hln;
dbase->put(dbase, &dbkey, &dbdat, 0);
}
void catch_signal(int sig, void (*handler)(int))
{
struct sigaction sa;
memset(&sa, 0, sizeof(sa));
sa.sa_handler = handler;
#ifdef SA_INTERRUPT
sa.sa_flags = SA_INTERRUPT;
#endif
sigaction(sig, &sa, NULL);
}
#include <setjmp.h>
sigjmp_buf env;
volatile int in_poll;
void sig_exit(int signo)
{
do_exit = 1;
if (in_poll)
siglongjmp(env, 1);
}
void sig_sync(int signo)
{
do_sync = 1;
if (in_poll)
siglongjmp(env, 1);
}
void sig_stats(int signo)
{
do_sync = 1;
do_stats = 1;
if (in_poll)
siglongjmp(env, 1);
}
void send_stats(void)
{
syslog(LOG_INFO, "arp_rcv: n%lu c%lu app_rcv: tot %lu hits %lu bad %lu neg %lu sup %lu",
stats.arp_new, stats.arp_change,
stats.app_recv, stats.app_success,
stats.app_bad, stats.app_neg, stats.app_suppressed
);
syslog(LOG_INFO, "kern: n%lu c%lu neg %lu arp_send: %lu rlim %lu",
stats.kern_new, stats.kern_change, stats.kern_neg,
stats.probes_sent, stats.probes_suppressed
);
do_stats = 0;
}
int main(int argc, char **argv)
{
int opt;
int do_list = 0;
char *do_load = NULL;
while ((opt = getopt(argc, argv, "h?b:lf:a:n:kR:B:")) != EOF) {
switch (opt) {
case 'b':
dbname = optarg;
break;
case 'f':
if (do_load) {
fprintf(stderr, "Duplicate option -f\n");
usage();
}
do_load = optarg;
break;
case 'l':
do_list = 1;
break;
case 'a':
active_probing = atoi(optarg);
break;
case 'n':
negative_timeout = atoi(optarg);
break;
case 'k':
no_kernel_broadcasts = 1;
break;
case 'R':
if ((broadcast_rate = atoi(optarg)) <= 0 ||
(broadcast_rate = 1000/broadcast_rate) <= 0) {
fprintf(stderr, "Invalid ARP rate\n");
exit(-1);
}
break;
case 'B':
if ((broadcast_burst = atoi(optarg)) <= 0 ||
(broadcast_burst = 1000*broadcast_burst) <= 0) {
fprintf(stderr, "Invalid ARP burst\n");
exit(-1);
}
break;
case 'h':
case '?':
default:
usage();
}
}
argc -= optind;
argv += optind;
if (argc > 0) {
ifnum = argc;
ifnames = argv;
ifvec = malloc(argc*sizeof(int));
if (!ifvec) {
perror("malloc");
exit(-1);
}
}
if ((udp_sock = socket(AF_INET, SOCK_DGRAM, 0)) < 0) {
perror("socket");
exit(-1);
}
if (ifnum) {
int i;
struct ifreq ifr;
memset(&ifr, 0, sizeof(ifr));
for (i=0; i<ifnum; i++) {
strncpy(ifr.ifr_name, ifnames[i], IFNAMSIZ);
if (ioctl(udp_sock, SIOCGIFINDEX, &ifr)) {
perror("ioctl(SIOCGIFINDEX)");
exit(-1);;
}
ifvec[i] = ifr.ifr_ifindex;
}
}
dbase = dbopen(dbname, O_CREAT|O_RDWR, 0644, DB_HASH, NULL);
if (dbase == NULL) {
perror("db_open");
exit(-1);
}
if (do_load) {
char buf[128];
FILE *fp;
struct dbkey k;
DBT dbkey, dbdat;
dbkey.data = &k;
dbkey.size = sizeof(k);
if (strcmp(do_load, "-") == 0 || strcmp(do_load, "--") == 0) {
fp = stdin;
} else if ((fp = fopen(do_load, "r")) == NULL) {
perror("fopen");
goto do_abort;
}
buf[sizeof(buf)-1] = 0;
while (fgets(buf, sizeof(buf)-1, fp)) {
__u8 b1[6];
char ipbuf[128];
char macbuf[128];
if (buf[0] == '#')
continue;
if (sscanf(buf, "%u%s%s", &k.iface, ipbuf, macbuf) != 3) {
fprintf(stderr, "Wrong format of input file \"%s\"\n", do_load);
goto do_abort;
}
if (strncmp(macbuf, "FAILED:", 7) == 0)
continue;
if (!inet_aton(ipbuf, (struct in_addr*)&k.addr)) {
fprintf(stderr, "Invalid IP address: \"%s\"\n", ipbuf);
goto do_abort;
}
dbdat.data = hexstring_a2n(macbuf, b1, 6);
if (dbdat.data == NULL)
goto do_abort;
dbdat.size = 6;
if (dbase->put(dbase, &dbkey, &dbdat, 0)) {
perror("hash->put");
goto do_abort;
}
}
dbase->sync(dbase, 0);
if (fp != stdin)
fclose(fp);
}
if (do_list) {
DBT dbkey, dbdat;
printf("%-8s %-15s %s\n", "#Ifindex", "IP", "MAC");
while (dbase->seq(dbase, &dbkey, &dbdat, R_NEXT) == 0) {
struct dbkey *key = dbkey.data;
if (handle_if(key->iface)) {
if (!IS_NEG(dbdat.data)) {
__u8 b1[18];
printf("%-8d %-15s %s\n",
key->iface,
inet_ntoa(*(struct in_addr*)&key->addr),
hexstring_n2a(dbdat.data, 6, b1, 18));
} else {
printf("%-8d %-15s FAILED: %dsec ago\n",
key->iface,
inet_ntoa(*(struct in_addr*)&key->addr),
NEG_AGE(dbdat.data));
}
}
}
}
if (do_load || do_list)
goto out;
pset[0].fd = socket(PF_PACKET, SOCK_DGRAM, 0);
if (pset[0].fd < 0) {
perror("socket");
exit(-1);
}
if (1) {
struct sockaddr_ll sll;
memset(&sll, 0, sizeof(sll));
sll.sll_family = AF_PACKET;
sll.sll_protocol = htons(ETH_P_ARP);
sll.sll_ifindex = (ifnum == 1 ? ifvec[0] : 0);
if (bind(pset[0].fd, (struct sockaddr*)&sll, sizeof(sll)) < 0) {
perror("bind");
goto do_abort;
}
}
if (rtnl_open(&rth, RTMGRP_NEIGH) < 0) {
perror("rtnl_open");
goto do_abort;
}
pset[1].fd = rth.fd;
load_initial_table();
if (1) {
int fd;
pid_t pid = fork();
if (pid > 0)
_exit(0);
if (pid < 0) {
perror("arpd: fork");
goto do_abort;
}
chdir("/");
fd = open("/dev/null", O_RDWR);
if (fd >= 0) {
dup2(fd, 0);
dup2(fd, 1);
dup2(fd, 2);
if (fd > 2)
close(fd);
}
setsid();
}
openlog("arpd", LOG_PID | LOG_CONS, LOG_DAEMON);
catch_signal(SIGINT, sig_exit);
catch_signal(SIGTERM, sig_exit);
catch_signal(SIGHUP, sig_sync);
catch_signal(SIGUSR1, sig_stats);
#define EVENTS (POLLIN|POLLPRI|POLLERR|POLLHUP)
pset[0].events = EVENTS;
pset[0].revents = 0;
pset[1].events = EVENTS;
pset[1].revents = 0;
sigsetjmp(env, 1);
for (;;) {
in_poll = 1;
if (do_exit)
break;
if (do_sync) {
in_poll = 0;
dbase->sync(dbase, 0);
do_sync = 0;
in_poll = 1;
}
if (do_stats)
send_stats();
if (poll(pset, 2, 30000) > 0) {
in_poll = 0;
if (pset[0].revents&EVENTS)
get_arp_pkt();
if (pset[1].revents&EVENTS)
get_kern_msg();
} else {
do_sync = 1;
}
}
undo_sysctl_adjustments();
out:
dbase->close(dbase);
exit(0);
do_abort:
dbase->close(dbase);
exit(-1);
}

View File

@ -0,0 +1,729 @@
/*
* ifstat.c handy utility to read net interface statistics
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <time.h>
#include <sys/time.h>
#include <fnmatch.h>
#include <sys/file.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/poll.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <signal.h>
#include <math.h>
#include <libnetlink.h>
#include <linux/netdevice.h>
#include <SNAPSHOT.h>
int dump_zeros = 0;
int reset_history = 0;
int ignore_history = 0;
int no_output = 0;
int no_update = 0;
int scan_interval = 0;
int time_constant = 0;
int show_errors = 0;
double W;
char **patterns;
int npatterns;
char info_source[128];
int source_mismatch;
#define MAXS (sizeof(struct net_device_stats)/sizeof(unsigned long))
struct ifstat_ent
{
struct ifstat_ent *next;
char *name;
int ifindex;
unsigned long long val[MAXS];
double rate[MAXS];
unsigned long ival[MAXS];
};
struct ifstat_ent *kern_db;
struct ifstat_ent *hist_db;
int match(char *id)
{
int i;
if (npatterns == 0)
return 1;
for (i=0; i<npatterns; i++) {
if (!fnmatch(patterns[i], id, 0))
return 1;
}
return 0;
}
int get_nlmsg(struct sockaddr_nl *who, struct nlmsghdr *m, void *arg)
{
struct ifinfomsg *ifi = NLMSG_DATA(m);
struct rtattr * tb[IFLA_MAX+1];
int len = m->nlmsg_len;
struct ifstat_ent *n;
int i;
if (m->nlmsg_type != RTM_NEWLINK)
return 0;
len -= NLMSG_LENGTH(sizeof(*ifi));
if (len < 0)
return -1;
if (!(ifi->ifi_flags&IFF_UP))
return 0;
memset(tb, 0, sizeof(tb));
parse_rtattr(tb, IFLA_MAX, IFLA_RTA(ifi), len);
if (tb[IFLA_IFNAME] == NULL || tb[IFLA_STATS] == NULL)
return 0;
n = malloc(sizeof(*n));
if (!n)
abort();
n->ifindex = ifi->ifi_index;
n->name = strdup(RTA_DATA(tb[IFLA_IFNAME]));
memcpy(&n->ival, RTA_DATA(tb[IFLA_STATS]), sizeof(n->ival));
memset(&n->rate, 0, sizeof(n->rate));
for (i=0; i<MAXS; i++)
n->val[i] = n->ival[i];
n->next = kern_db;
kern_db = n;
return 0;
}
void load_info(void)
{
struct ifstat_ent *db, *n;
struct rtnl_handle rth;
if (rtnl_open(&rth, 0) < 0)
exit(1);
if (rtnl_wilddump_request(&rth, AF_INET, RTM_GETLINK) < 0) {
perror("Cannot send dump request");
exit(1);
}
if (rtnl_dump_filter(&rth, get_nlmsg, NULL, NULL, NULL) < 0) {
fprintf(stderr, "Dump terminated\n");
exit(1);
}
rtnl_close(&rth);
db = kern_db;
kern_db = NULL;
while (db) {
n = db;
db = db->next;
n->next = kern_db;
kern_db = n;
}
}
void load_raw_table(FILE *fp)
{
char buf[4096];
struct ifstat_ent *db = NULL;
struct ifstat_ent *n;
while (fgets(buf, sizeof(buf), fp) != NULL) {
char *p;
char *next;
int i;
if (buf[0] == '#') {
buf[strlen(buf)-1] = 0;
if (info_source[0] && strcmp(info_source, buf+1))
source_mismatch = 1;
strncpy(info_source, buf+1, sizeof(info_source)-1);
continue;
}
if ((n = malloc(sizeof(*n))) == NULL)
abort();
if (!(p = strchr(buf, ' ')))
abort();
*p++ = 0;
if (sscanf(buf, "%d", &n->ifindex) != 1)
abort();
if (!(next = strchr(p, ' ')))
abort();
*next++ = 0;
n->name = strdup(p);
p = next;
for (i=0; i<MAXS; i++) {
unsigned rate;
if (!(next = strchr(p, ' ')))
abort();
*next++ = 0;
if (sscanf(p, "%llu", n->val+i) != 1)
abort();
n->ival[i] = (unsigned long)n->val[i];
p = next;
if (!(next = strchr(p, ' ')))
abort();
*next++ = 0;
if (sscanf(p, "%u", &rate) != 1)
abort();
n->rate[i] = rate;
p = next;
}
n->next = db;
db = n;
}
while (db) {
n = db;
db = db->next;
n->next = kern_db;
kern_db = n;
}
}
void dump_raw_db(FILE *fp, int to_hist)
{
struct ifstat_ent *n, *h;
h = hist_db;
fprintf(fp, "#%s\n", info_source);
for (n=kern_db; n; n=n->next) {
int i;
unsigned long long *vals = n->val;
double *rates = n->rate;
if (!match(n->name)) {
struct ifstat_ent *h1;
if (!to_hist)
continue;
for (h1 = h; h1; h1 = h1->next) {
if (h1->ifindex == n->ifindex) {
vals = h1->val;
rates = h1->rate;
h = h1->next;
break;
}
}
}
fprintf(fp, "%d %s ", n->ifindex, n->name);
for (i=0; i<MAXS; i++)
fprintf(fp, "%llu %u ", vals[i], (unsigned)rates[i]);
fprintf(fp, "\n");
}
}
void format_rate(FILE *fp, unsigned long long *vals, double *rates, int i)
{
char temp[64];
if (vals[i] > 1024*1024*1024)
fprintf(fp, "%7lluM ", vals[i]/(1024*1024));
else if (vals[i] > 1024*1024)
fprintf(fp, "%7lluK ", vals[i]/1024);
else
fprintf(fp, "%8llu ", vals[i]);
if (rates[i] > 1024*1024) {
sprintf(temp, "%uM", (unsigned)(rates[i]/(1024*1024)));
fprintf(fp, "%-6s ", temp);
} else if (rates[i] > 1024) {
sprintf(temp, "%uK", (unsigned)(rates[i]/1024));
fprintf(fp, "%-6s ", temp);
} else
fprintf(fp, "%-6u ", (unsigned)rates[i]);
}
void format_pair(FILE *fp, unsigned long long *vals, int i, int k)
{
char temp[64];
if (vals[i] > 1024*1024*1024)
fprintf(fp, "%7lluM ", vals[i]/(1024*1024));
else if (vals[i] > 1024*1024)
fprintf(fp, "%7lluK ", vals[i]/1024);
else
fprintf(fp, "%8llu ", vals[i]);
if (vals[k] > 1024*1024*1024) {
sprintf(temp, "%uM", (unsigned)(vals[k]/(1024*1024)));
fprintf(fp, "%-6s ", temp);
} else if (vals[k] > 1024*1024) {
sprintf(temp, "%uK", (unsigned)(vals[k]/1024));
fprintf(fp, "%-6s ", temp);
} else
fprintf(fp, "%-6u ", (unsigned)vals[k]);
}
void print_head(FILE *fp)
{
fprintf(fp, "#%s\n", info_source);
fprintf(fp, "%-15s ", "Interface");
fprintf(fp, "%8s/%-6s ", "RX Pkts", "Rate");
fprintf(fp, "%8s/%-6s ", "TX Pkts", "Rate");
fprintf(fp, "%8s/%-6s ", "RX Data", "Rate");
fprintf(fp, "%8s/%-6s\n","TX Data", "Rate");
if (!show_errors) {
fprintf(fp, "%-15s ", "");
fprintf(fp, "%8s/%-6s ", "RX Errs", "Drop");
fprintf(fp, "%8s/%-6s ", "TX Errs", "Drop");
fprintf(fp, "%8s/%-6s ", "RX Over", "Rate");
fprintf(fp, "%8s/%-6s\n","TX Coll", "Rate");
} else {
fprintf(fp, "%-15s ", "");
fprintf(fp, "%8s/%-6s ", "RX Errs", "Rate");
fprintf(fp, "%8s/%-6s ", "RX Drop", "Rate");
fprintf(fp, "%8s/%-6s ", "RX Over", "Rate");
fprintf(fp, "%8s/%-6s\n","RX Leng", "Rate");
fprintf(fp, "%-15s ", "");
fprintf(fp, "%8s/%-6s ", "RX Crc", "Rate");
fprintf(fp, "%8s/%-6s ", "RX Frm", "Rate");
fprintf(fp, "%8s/%-6s ", "RX Fifo", "Rate");
fprintf(fp, "%8s/%-6s\n","RX Miss", "Rate");
fprintf(fp, "%-15s ", "");
fprintf(fp, "%8s/%-6s ", "TX Errs", "Rate");
fprintf(fp, "%8s/%-6s ", "TX Drop", "Rate");
fprintf(fp, "%8s/%-6s ", "TX Coll", "Rate");
fprintf(fp, "%8s/%-6s\n","TX Carr", "Rate");
fprintf(fp, "%-15s ", "");
fprintf(fp, "%8s/%-6s ", "TX Abrt", "Rate");
fprintf(fp, "%8s/%-6s ", "TX Fifo", "Rate");
fprintf(fp, "%8s/%-6s ", "TX Hear", "Rate");
fprintf(fp, "%8s/%-6s\n","TX Wind", "Rate");
}
}
void print_one_if(FILE *fp, struct ifstat_ent *n, unsigned long long *vals)
{
int i;
fprintf(fp, "%-15s ", n->name);
for (i=0; i<4; i++)
format_rate(fp, vals, n->rate, i);
fprintf(fp, "\n");
if (!show_errors) {
fprintf(fp, "%-15s ", "");
format_pair(fp, vals, 4, 6);
format_pair(fp, vals, 5, 7);
format_rate(fp, vals, n->rate, 11);
format_rate(fp, vals, n->rate, 9);
fprintf(fp, "\n");
} else {
fprintf(fp, "%-15s ", "");
format_rate(fp, vals, n->rate, 4);
format_rate(fp, vals, n->rate, 6);
format_rate(fp, vals, n->rate, 11);
format_rate(fp, vals, n->rate, 10);
fprintf(fp, "\n");
fprintf(fp, "%-15s ", "");
format_rate(fp, vals, n->rate, 12);
format_rate(fp, vals, n->rate, 13);
format_rate(fp, vals, n->rate, 14);
format_rate(fp, vals, n->rate, 15);
fprintf(fp, "\n");
fprintf(fp, "%-15s ", "");
format_rate(fp, vals, n->rate, 5);
format_rate(fp, vals, n->rate, 7);
format_rate(fp, vals, n->rate, 9);
format_rate(fp, vals, n->rate, 17);
fprintf(fp, "\n");
fprintf(fp, "%-15s ", "");
format_rate(fp, vals, n->rate, 16);
format_rate(fp, vals, n->rate, 18);
format_rate(fp, vals, n->rate, 19);
format_rate(fp, vals, n->rate, 20);
fprintf(fp, "\n");
}
}
void dump_kern_db(FILE *fp)
{
struct ifstat_ent *n, *h;
h = hist_db;
print_head(fp);
for (n=kern_db; n; n=n->next) {
if (!match(n->name))
continue;
print_one_if(fp, n, n->val);
}
}
void dump_incr_db(FILE *fp)
{
struct ifstat_ent *n, *h;
h = hist_db;
print_head(fp);
for (n=kern_db; n; n=n->next) {
int i;
unsigned long long vals[MAXS];
struct ifstat_ent *h1;
memcpy(vals, n->val, sizeof(vals));
for (h1 = h; h1; h1 = h1->next) {
if (h1->ifindex == n->ifindex) {
for (i = 0; i < MAXS; i++)
vals[i] -= h1->val[i];
h = h1->next;
break;
}
}
if (!match(n->name))
continue;
print_one_if(fp, n, vals);
}
}
static int children;
void sigchild(int signo)
{
}
void update_db(int interval)
{
struct ifstat_ent *n, *h;
n = kern_db;
kern_db = NULL;
load_info();
h = kern_db;
kern_db = n;
for (n = kern_db; n; n = n->next) {
struct ifstat_ent *h1;
for (h1 = h; h1; h1 = h1->next) {
if (h1->ifindex == n->ifindex) {
int i;
for (i = 0; i < MAXS; i++) {
if ((long)(h1->ival[i] - n->ival[i]) < 0) {
memset(n->ival, 0, sizeof(n->ival));
break;
}
}
for (i = 0; i < MAXS; i++) {
double sample;
unsigned long incr = h1->ival[i] - n->ival[i];
n->val[i] += incr;
n->ival[i] = h1->ival[i];
sample = (double)(incr*1000)/interval;
if (interval >= scan_interval) {
n->rate[i] += W*(sample-n->rate[i]);
} else if (interval >= 1000) {
if (interval >= time_constant) {
n->rate[i] = sample;
} else {
double w = W*(double)interval/scan_interval;
n->rate[i] += w*(sample-n->rate[i]);
}
}
}
while (h != h1) {
struct ifstat_ent *tmp = h;
h = h->next;
free(tmp->name);
free(tmp);
};
h = h1->next;
free(h1->name);
free(h1);
break;
}
}
}
}
#define T_DIFF(a,b) (((a).tv_sec-(b).tv_sec)*1000 + ((a).tv_usec-(b).tv_usec)/1000)
void server_loop(int fd)
{
struct timeval snaptime;
struct pollfd p;
p.fd = fd;
p.events = p.revents = POLLIN;
sprintf(info_source, "%d.%lu sampling_interval=%d time_const=%d",
getpid(), (unsigned long)random(), scan_interval/1000, time_constant/1000);
load_info();
for (;;) {
int status;
int tdiff;
struct timeval now;
gettimeofday(&now, NULL);
tdiff = T_DIFF(now, snaptime);
if (tdiff >= scan_interval) {
update_db(tdiff);
snaptime = now;
tdiff = 0;
}
if (poll(&p, 1, tdiff + scan_interval) > 0
&& (p.revents&POLLIN)) {
int clnt = accept(fd, NULL, NULL);
if (clnt >= 0) {
pid_t pid;
if (children >= 5) {
close(clnt);
} else if ((pid = fork()) != 0) {
if (pid>0)
children++;
close(clnt);
} else {
FILE *fp = fdopen(clnt, "w");
if (fp) {
if (tdiff > 0)
update_db(tdiff);
dump_raw_db(fp, 0);
}
exit(0);
}
}
}
while (children && waitpid(-1, &status, WNOHANG) > 0)
children--;
}
}
int verify_forging(int fd)
{
struct ucred cred;
int olen = sizeof(cred);
if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void*)&cred, &olen) ||
olen < sizeof(cred))
return -1;
if (cred.uid == getuid() || cred.uid == 0)
return 0;
return -1;
}
static void usage(void) __attribute__((noreturn));
static void usage(void)
{
fprintf(stderr,
"Usage: ifstat [ -h?vVzrnasd:t: ] [ PATTERN [ PATTERN ] ]\n"
);
exit(-1);
}
int main(int argc, char *argv[])
{
char hist_name[128];
struct sockaddr_un sun;
FILE *hist_fp = NULL;
int ch;
int fd;
while ((ch = getopt(argc, argv, "h?vVzrnasd:t:e")) != EOF) {
switch(ch) {
case 'z':
dump_zeros = 1;
break;
case 'r':
reset_history = 1;
break;
case 'a':
ignore_history = 1;
break;
case 's':
no_update = 1;
break;
case 'n':
no_output = 1;
break;
case 'e':
show_errors = 1;
break;
case 'd':
scan_interval = 1000*atoi(optarg);
break;
case 't':
if (sscanf(optarg, "%d", &time_constant) != 1 ||
time_constant <= 0) {
fprintf(stderr, "ifstat: invalid time constant divisor\n");
exit(-1);
}
break;
case 'v':
case 'V':
printf("ifstat utility, iproute2-ss%s\n", SNAPSHOT);
exit(0);
case 'h':
case '?':
default:
usage();
}
}
argc -= optind;
argv += optind;
sun.sun_family = AF_UNIX;
sun.sun_path[0] = 0;
sprintf(sun.sun_path+1, "ifstat%d", getuid());
if (scan_interval > 0) {
if (time_constant == 0)
time_constant = 60;
time_constant *= 1000;
W = 1 - 1/exp(log(10)*(double)scan_interval/time_constant);
if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
perror("ifstat: socket");
exit(-1);
}
if (bind(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) < 0) {
perror("ifstat: bind");
exit(-1);
}
if (listen(fd, 5) < 0) {
perror("ifstat: listen");
exit(-1);
}
if (fork())
exit(0);
chdir("/");
close(0); close(1); close(2); setsid();
signal(SIGPIPE, SIG_IGN);
signal(SIGCHLD, sigchild);
server_loop(fd);
exit(0);
}
patterns = argv;
npatterns = argc;
if (getenv("IFSTAT_HISTORY"))
snprintf(hist_name, sizeof(hist_name), getenv("IFSTAT_HISTORY"));
else
sprintf(hist_name, "/tmp/.ifstat.u%d", getuid());
if (reset_history)
unlink(hist_name);
if (!ignore_history || !no_update) {
struct stat stb;
fd = open(hist_name, O_RDWR|O_CREAT|O_NOFOLLOW, 0600);
if (fd < 0) {
perror("ifstat: open history file");
exit(-1);
}
if ((hist_fp = fdopen(fd, "r+")) == NULL) {
perror("ifstat: fdopen history file");
exit(-1);
}
if (flock(fileno(hist_fp), LOCK_EX)) {
perror("ifstat: flock history file");
exit(-1);
}
if (fstat(fileno(hist_fp), &stb) != 0) {
perror("ifstat: fstat history file");
exit(-1);
}
if (stb.st_nlink != 1 || stb.st_uid != getuid()) {
fprintf(stderr, "ifstat: something is so wrong with history file, that I prefer not to proceed.\n");
exit(-1);
}
if (!ignore_history) {
FILE *tfp;
long uptime;
if ((tfp = fopen("/proc/uptime", "r")) != NULL) {
if (fscanf(tfp, "%ld", &uptime) != 1)
uptime = -1;
fclose(tfp);
}
if (uptime >= 0 && time(NULL) >= stb.st_mtime+uptime) {
fprintf(stderr, "ifstat: history is aged out, resetting\n");
ftruncate(fileno(hist_fp), 0);
}
}
load_raw_table(hist_fp);
hist_db = kern_db;
kern_db = NULL;
}
if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0 &&
(connect(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) == 0
|| (strcpy(sun.sun_path+1, "ifstat0"),
connect(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) == 0))
&& verify_forging(fd) == 0) {
FILE *sfp = fdopen(fd, "r");
load_raw_table(sfp);
if (hist_db && source_mismatch) {
fprintf(stderr, "ifstat: history is stale, ignoring it.\n");
hist_db = NULL;
}
fclose(sfp);
} else {
if (fd >= 0)
close(fd);
if (hist_db && info_source[0] && strcmp(info_source, "kernel")) {
fprintf(stderr, "ifstat: history is stale, ignoring it.\n");
hist_db = NULL;
info_source[0] = 0;
}
load_info();
if (info_source[0] == 0)
strcpy(info_source, "kernel");
}
if (!no_output) {
if (ignore_history || hist_db == NULL)
dump_kern_db(stdout);
else
dump_incr_db(stdout);
}
if (!no_update) {
ftruncate(fileno(hist_fp), 0);
rewind(hist_fp);
dump_raw_db(hist_fp, 1);
fflush(hist_fp);
}
exit(0);
}

View File

@ -0,0 +1,53 @@
#! /bin/bash
echo -n "Send network configuration summary to [ENTER means kuznet@ms2.inr.ac.ru] "
IFS="" read mail || exit 1
[ -z "$mail" ] && mail=kuznet@ms2.inr.ac.ru
netbug=""
while [ "$netbug" = "" ]; do
netbug=`echo netbug.$$.$RANDOM`
if [ -e /tmp/$netbug ]; then
netbug=""
fi
done
tmppath=/tmp/$netbug
trap "rm -rf $tmppath $tmppath.tar.gz" 0 SIGINT
mkdir $tmppath
mkdir $tmppath/net
cat /proc/slabinfo > $tmppath/slabinfo
cat /proc/net/netstat > $tmppath/net/netstat
cat /proc/net/unix > $tmppath/net/unix
cat /proc/net/packet > $tmppath/net/packet
cat /proc/net/netlink > $tmppath/net/netlink
cat /proc/net/psched > $tmppath/net/psched
cat /proc/net/softnet_stat > $tmppath/net/softnet_stat
cat /proc/net/sockstat > $tmppath/net/sockstat
cat /proc/net/tcp > $tmppath/net/tcp
cat /proc/net/udp > $tmppath/net/udp
cat /proc/net/raw > $tmppath/net/raw
cat /proc/net/snmp > $tmppath/net/snmp
ss -aioem -D $tmppath/tcpdiag
if [ -e /proc/net/tcp6 ]; then
cat /proc/net/sockstat6 > $tmppath/net/sockstat6
cat /proc/net/tcp6 > $tmppath/net/tcp6
cat /proc/net/udp6 > $tmppath/net/udp6
cat /proc/net/raw6 > $tmppath/net/raw6
cat /proc/net/snmp6 > $tmppath/net/snmp6
fi
cd /tmp
tar c $netbug | gzip -9c > $netbug.tar.gz
uuencode $netbug.tar.gz $netbug.tar.gz | mail -s $netbug "$mail"
echo "Sending to <$mail>; subject is $netbug"
exit 0

View File

@ -0,0 +1,614 @@
/*
* nstat.c handy utility to read counters /proc/net/netstat and snmp
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <time.h>
#include <sys/time.h>
#include <fnmatch.h>
#include <sys/file.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/poll.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <signal.h>
#include <math.h>
#include <SNAPSHOT.h>
int dump_zeros = 0;
int reset_history = 0;
int ignore_history = 0;
int no_output = 0;
int no_update = 0;
int scan_interval = 0;
int time_constant = 0;
double W;
char **patterns;
int npatterns;
char info_source[128];
int source_mismatch;
int generic_proc_open(char *env, char *name)
{
char store[128];
char *p = getenv(env);
if (!p) {
p = getenv("PROC_ROOT") ? : "/proc";
snprintf(store, sizeof(store)-1, "%s/%s", p, name);
p = store;
}
return open(store, O_RDONLY);
}
int net_netstat_open(void)
{
return generic_proc_open("PROC_NET_NETSTAT", "net/netstat");
}
int net_snmp_open(void)
{
return generic_proc_open("PROC_NET_SNMP", "net/snmp");
}
int net_snmp6_open(void)
{
return generic_proc_open("PROC_NET_SNMP6", "net/snmp6");
}
struct nstat_ent
{
struct nstat_ent *next;
char *id;
unsigned long long val;
unsigned long ival;
double rate;
};
struct nstat_ent *kern_db;
struct nstat_ent *hist_db;
char *useless_numbers[] = {
"IpForwarding", "IpDefaultTTL",
"TcpRtoAlgorithm", "TcpRtoMin", "TcpRtoMax",
"TcpMaxConn", "TcpCurrEstab"
};
int useless_number(char *id)
{
int i;
for (i=0; i<sizeof(useless_numbers)/sizeof(*useless_numbers); i++)
if (strcmp(id, useless_numbers[i]) == 0)
return 1;
return 0;
}
int match(char *id)
{
int i;
if (npatterns == 0)
return 1;
for (i=0; i<npatterns; i++) {
if (!fnmatch(patterns[i], id, 0))
return 1;
}
return 0;
}
void load_good_table(FILE *fp)
{
char buf[4096];
struct nstat_ent *db = NULL;
struct nstat_ent *n;
while (fgets(buf, sizeof(buf), fp) != NULL) {
int nr;
unsigned long long val;
double rate;
char idbuf[256];
if (buf[0] == '#') {
buf[strlen(buf)-1] = 0;
if (info_source[0] && strcmp(info_source, buf+1))
source_mismatch = 1;
strncpy(info_source, buf+1, sizeof(info_source)-1);
continue;
}
nr = sscanf(buf, "%s%llu%lg", idbuf, &val, &rate);
if (nr < 2)
abort();
if (nr < 3)
rate = 0;
if (useless_number(idbuf))
continue;
if ((n = malloc(sizeof(*n))) == NULL)
abort();
n->id = strdup(idbuf);
n->ival = (unsigned long)val;
n->val = val;
n->rate = rate;
n->next = db;
db = n;
}
while (db) {
n = db;
db = db->next;
n->next = kern_db;
kern_db = n;
}
}
void load_ugly_table(FILE *fp)
{
char buf[4096];
struct nstat_ent *db = NULL;
struct nstat_ent *n;
while (fgets(buf, sizeof(buf), fp) != NULL) {
char idbuf[256];
int off;
char *p;
p = strchr(buf, ':');
if (!p)
abort();
*p = 0;
strcpy(idbuf, buf);
off = strlen(idbuf);
p += 2;
while (*p) {
char *next;
if ((next = strchr(p, ' ')) != NULL)
*next++ = 0;
else if ((next = strchr(p, '\n')) != NULL)
*next++ = 0;
strcpy(idbuf+off, p);
n = malloc(sizeof(*n));
if (!n)
abort();
n->id = strdup(idbuf);
n->rate = 0;
n->next = db;
db = n;
p = next;
}
n = db;
if (fgets(buf, sizeof(buf), fp) == NULL)
abort();
do {
p = strrchr(buf, ' ');
if (!p)
abort();
*p = 0;
if (sscanf(p+1, "%lu", &n->ival) != 1)
abort();
n->val = n->ival;
/* Trick to skip "dummy" trailing ICMP MIB in 2.4 */
if (strcmp(idbuf, "IcmpOutAddrMaskReps") == 0)
idbuf[5] = 0;
else
n = n->next;
} while (p > buf + off + 2);
}
while (db) {
n = db;
db = db->next;
if (useless_number(n->id)) {
free(n->id);
free(n);
} else {
n->next = kern_db;
kern_db = n;
}
}
}
void load_snmp(void)
{
FILE *fp = fdopen(net_snmp_open(), "r");
if (fp) {
load_ugly_table(fp);
fclose(fp);
}
}
void load_snmp6(void)
{
FILE *fp = fdopen(net_snmp6_open(), "r");
if (fp) {
load_good_table(fp);
fclose(fp);
}
}
void load_netstat(void)
{
FILE *fp = fdopen(net_netstat_open(), "r");
if (fp) {
load_ugly_table(fp);
fclose(fp);
}
}
void dump_kern_db(FILE *fp, int to_hist)
{
struct nstat_ent *n, *h;
h = hist_db;
fprintf(fp, "#%s\n", info_source);
for (n=kern_db; n; n=n->next) {
unsigned long long val = n->val;
if (!dump_zeros && !val && !n->rate)
continue;
if (!match(n->id)) {
struct nstat_ent *h1;
if (!to_hist)
continue;
for (h1 = h; h1; h1 = h1->next) {
if (strcmp(h1->id, n->id) == 0) {
val = h1->val;
h = h1->next;
break;
}
}
}
fprintf(fp, "%-32s%-16llu%6.1f\n", n->id, val, n->rate);
}
}
void dump_incr_db(FILE *fp)
{
struct nstat_ent *n, *h;
h = hist_db;
fprintf(fp, "#%s\n", info_source);
for (n=kern_db; n; n=n->next) {
int ovfl = 0;
unsigned long long val = n->val;
struct nstat_ent *h1;
for (h1 = h; h1; h1 = h1->next) {
if (strcmp(h1->id, n->id) == 0) {
if (val < h1->val) {
ovfl = 1;
val = h1->val;
}
val -= h1->val;
h = h1->next;
break;
}
}
if (!dump_zeros && !val && !n->rate)
continue;
if (!match(n->id))
continue;
fprintf(fp, "%-32s%-16llu%6.1f%s\n", n->id, val,
n->rate, ovfl?" (overflow)":"");
}
}
static int children;
void sigchild(int signo)
{
}
void update_db(int interval)
{
struct nstat_ent *n, *h;
n = kern_db;
kern_db = NULL;
load_netstat();
load_snmp6();
load_snmp();
h = kern_db;
kern_db = n;
for (n = kern_db; n; n = n->next) {
struct nstat_ent *h1;
for (h1 = h; h1; h1 = h1->next) {
if (strcmp(h1->id, n->id) == 0) {
double sample;
unsigned long incr = h1->ival - n->ival;
n->val += incr;
n->ival = h1->ival;
sample = (double)(incr*1000)/interval;
if (interval >= scan_interval) {
n->rate += W*(sample-n->rate);
} else if (interval >= 1000) {
if (interval >= time_constant) {
n->rate = sample;
} else {
double w = W*(double)interval/scan_interval;
n->rate += w*(sample-n->rate);
}
}
while (h != h1) {
struct nstat_ent *tmp = h;
h = h->next;
free(tmp->id);
free(tmp);
};
h = h1->next;
free(h1->id);
free(h1);
break;
}
}
}
}
#define T_DIFF(a,b) (((a).tv_sec-(b).tv_sec)*1000 + ((a).tv_usec-(b).tv_usec)/1000)
void server_loop(int fd)
{
struct timeval snaptime;
struct pollfd p;
p.fd = fd;
p.events = p.revents = POLLIN;
sprintf(info_source, "%d.%lu sampling_interval=%d time_const=%d",
getpid(), (unsigned long)random(), scan_interval/1000, time_constant/1000);
load_netstat();
load_snmp6();
load_snmp();
for (;;) {
int status;
int tdiff;
struct timeval now;
gettimeofday(&now, NULL);
tdiff = T_DIFF(now, snaptime);
if (tdiff >= scan_interval) {
update_db(tdiff);
snaptime = now;
tdiff = 0;
}
if (poll(&p, 1, tdiff + scan_interval) > 0
&& (p.revents&POLLIN)) {
int clnt = accept(fd, NULL, NULL);
if (clnt >= 0) {
pid_t pid;
if (children >= 5) {
close(clnt);
} else if ((pid = fork()) != 0) {
if (pid>0)
children++;
close(clnt);
} else {
FILE *fp = fdopen(clnt, "w");
if (fp) {
if (tdiff > 0)
update_db(tdiff);
dump_kern_db(fp, 0);
}
exit(0);
}
}
}
while (children && waitpid(-1, &status, WNOHANG) > 0)
children--;
}
}
int verify_forging(int fd)
{
struct ucred cred;
int olen = sizeof(cred);
if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void*)&cred, &olen) ||
olen < sizeof(cred))
return -1;
if (cred.uid == getuid() || cred.uid == 0)
return 0;
return -1;
}
static void usage(void) __attribute__((noreturn));
static void usage(void)
{
fprintf(stderr,
"Usage: nstat [ -h?vVzrnasd:t: ] [ PATTERN [ PATTERN ] ]\n"
);
exit(-1);
}
int main(int argc, char *argv[])
{
char hist_name[128];
struct sockaddr_un sun;
FILE *hist_fp = NULL;
int ch;
int fd;
while ((ch = getopt(argc, argv, "h?vVzrnasd:t:")) != EOF) {
switch(ch) {
case 'z':
dump_zeros = 1;
break;
case 'r':
reset_history = 1;
break;
case 'a':
ignore_history = 1;
break;
case 's':
no_update = 1;
break;
case 'n':
no_output = 1;
break;
case 'd':
scan_interval = 1000*atoi(optarg);
break;
case 't':
if (sscanf(optarg, "%d", &time_constant) != 1 ||
time_constant <= 0) {
fprintf(stderr, "nstat: invalid time constant divisor\n");
exit(-1);
}
break;
case 'v':
case 'V':
printf("nstat utility, iproute2-ss%s\n", SNAPSHOT);
exit(0);
case 'h':
case '?':
default:
usage();
}
}
argc -= optind;
argv += optind;
sun.sun_family = AF_UNIX;
sun.sun_path[0] = 0;
sprintf(sun.sun_path+1, "nstat%d", getuid());
if (scan_interval > 0) {
if (time_constant == 0)
time_constant = 60;
time_constant *= 1000;
W = 1 - 1/exp(log(10)*(double)scan_interval/time_constant);
if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
perror("nstat: socket");
exit(-1);
}
if (bind(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) < 0) {
perror("nstat: bind");
exit(-1);
}
if (listen(fd, 5) < 0) {
perror("nstat: listen");
exit(-1);
}
if (fork())
exit(0);
chdir("/");
close(0); close(1); close(2); setsid();
signal(SIGPIPE, SIG_IGN);
signal(SIGCHLD, sigchild);
server_loop(fd);
exit(0);
}
patterns = argv;
npatterns = argc;
if (getenv("NSTAT_HISTORY"))
snprintf(hist_name, sizeof(hist_name), getenv("NSTAT_HISTORY"));
else
sprintf(hist_name, "/tmp/.nstat.u%d", getuid());
if (reset_history)
unlink(hist_name);
if (!ignore_history || !no_update) {
struct stat stb;
fd = open(hist_name, O_RDWR|O_CREAT|O_NOFOLLOW, 0600);
if (fd < 0) {
perror("nstat: open history file");
exit(-1);
}
if ((hist_fp = fdopen(fd, "r+")) == NULL) {
perror("nstat: fdopen history file");
exit(-1);
}
if (flock(fileno(hist_fp), LOCK_EX)) {
perror("nstat: flock history file");
exit(-1);
}
if (fstat(fileno(hist_fp), &stb) != 0) {
perror("nstat: fstat history file");
exit(-1);
}
if (stb.st_nlink != 1 || stb.st_uid != getuid()) {
fprintf(stderr, "nstat: something is so wrong with history file, that I prefer not to proceed.\n");
exit(-1);
}
if (!ignore_history) {
FILE *tfp;
long uptime;
if ((tfp = fopen("/proc/uptime", "r")) != NULL) {
if (fscanf(tfp, "%ld", &uptime) != 1)
uptime = -1;
fclose(tfp);
}
if (uptime >= 0 && time(NULL) >= stb.st_mtime+uptime) {
fprintf(stderr, "nstat: history is aged out, resetting\n");
ftruncate(fileno(hist_fp), 0);
}
}
load_good_table(hist_fp);
hist_db = kern_db;
kern_db = NULL;
}
if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0 &&
(connect(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) == 0
|| (strcpy(sun.sun_path+1, "nstat0"),
connect(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) == 0))
&& verify_forging(fd) == 0) {
FILE *sfp = fdopen(fd, "r");
load_good_table(sfp);
if (hist_db && source_mismatch) {
fprintf(stderr, "nstat: history is stale, ignoring it.\n");
hist_db = NULL;
}
fclose(sfp);
} else {
if (fd >= 0)
close(fd);
if (hist_db && info_source[0] && strcmp(info_source, "kernel")) {
fprintf(stderr, "nstat: history is stale, ignoring it.\n");
hist_db = NULL;
info_source[0] = 0;
}
load_netstat();
load_snmp6();
load_snmp();
if (info_source[0] == 0)
strcpy(info_source, "kernel");
}
if (!no_output) {
if (ignore_history || hist_db == NULL)
dump_kern_db(stdout, 0);
else
dump_incr_db(stdout);
}
if (!no_update) {
ftruncate(fileno(hist_fp), 0);
rewind(hist_fp);
dump_kern_db(hist_fp, 1);
fflush(hist_fp);
}
exit(0);
}

View File

@ -0,0 +1,625 @@
/*
* rtacct.c Applet to display contents of /proc/net/rt_acct.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <errno.h>
#include <time.h>
#include <sys/time.h>
#include <fnmatch.h>
#include <sys/file.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/poll.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <signal.h>
#include <math.h>
#include "rt_names.h"
#include <SNAPSHOT.h>
int reset_history = 0;
int ignore_history = 0;
int no_output = 0;
int no_update = 0;
int scan_interval = 0;
int time_constant = 0;
int dump_zeros = 0;
unsigned long magic_number = 0;
double W;
int generic_proc_open(char *env, char *name)
{
char store[1024];
char *p = getenv(env);
if (!p) {
p = getenv("PROC_ROOT") ? : "/proc";
snprintf(store, sizeof(store)-1, "%s/%s", p, name);
p = store;
}
return open(store, O_RDONLY);
}
int net_rtacct_open(void)
{
return generic_proc_open("PROC_NET_RTACCT", "net/rt_acct");
}
__u32 rmap[256/4];
struct rtacct_data
{
__u32 ival[256*4];
unsigned long long val[256*4];
double rate[256*4];
__u8 signature[128];
};
struct rtacct_data kern_db_static;
struct rtacct_data *kern_db = &kern_db_static;
struct rtacct_data *hist_db;
void nread(int fd, char *buf, int tot)
{
int count = 0;
while (count < tot) {
int n = read(fd, buf+count, tot-count);
if (n < 0) {
if (errno == EINTR)
continue;
exit(-1);
}
if (n == 0)
exit(-1);
count += n;
}
}
__u32 *read_kern_table(__u32 *tbl)
{
static __u32 *tbl_ptr;
int fd;
if (magic_number) {
if (tbl_ptr != NULL)
return tbl_ptr;
fd = open("/dev/mem", O_RDONLY);
if (fd < 0) {
perror("magic open");
exit(-1);
}
tbl_ptr = mmap(NULL, 4096,
PROT_READ,
MAP_SHARED,
fd, magic_number);
if ((unsigned long)tbl_ptr == ~0UL) {
perror("magic mmap");
exit(-1);
}
close(fd);
return tbl_ptr;
}
fd = net_rtacct_open();
if (fd >= 0) {
nread(fd, (char*)tbl, 256*16);
close(fd);
} else {
memset(tbl, 0, 256*16);
}
return tbl;
}
void format_rate(FILE *fp, double rate)
{
char temp[64];
if (rate > 1024*1024) {
sprintf(temp, "%uM", (unsigned)rint(rate/(1024*1024)));
fprintf(fp, " %-10s", temp);
} else if (rate > 1024) {
sprintf(temp, "%uK", (unsigned)rint(rate/1024));
fprintf(fp, " %-10s", temp);
} else
fprintf(fp, " %-10u", (unsigned)rate);
}
void format_count(FILE *fp, unsigned long long val)
{
if (val > 1024*1024*1024)
fprintf(fp, " %10lluM", val/(1024*1024));
else if (val > 1024*1024)
fprintf(fp, " %10lluK", val/1024);
else
fprintf(fp, " %10llu", val);
}
void dump_abs_db(FILE *fp)
{
int realm;
char b1[16];
if (!no_output) {
fprintf(fp, "#%s\n", kern_db->signature);
fprintf(fp,
"%-10s "
"%-10s "
"%-10s "
"%-10s "
"%-10s "
"\n"
, "Realm", "BytesTo", "PktsTo", "BytesFrom", "PktsFrom");
fprintf(fp,
"%-10s "
"%-10s "
"%-10s "
"%-10s "
"%-10s "
"\n"
, "", "BPSTo", "PPSTo", "BPSFrom", "PPSFrom");
}
for (realm=0; realm<256; realm++) {
int i;
unsigned long long *val;
double *rate;
if (!(rmap[realm>>5] & (1<<(realm&0x1f))))
continue;
val = &kern_db->val[realm*4];
rate = &kern_db->rate[realm*4];
if (!dump_zeros &&
!val[0] && !rate[0] &&
!val[1] && !rate[1] &&
!val[2] && !rate[2] &&
!val[3] && !rate[3])
continue;
if (hist_db) {
memcpy(&hist_db->val[realm*4], val, sizeof(*val)*4);
}
if (no_output)
continue;
fprintf(fp, "%-10s", rtnl_rtrealm_n2a(realm, b1, sizeof(b1)));
for (i = 0; i < 4; i++)
format_count(fp, val[i]);
fprintf(fp, "\n%-10s", "");
for (i = 0; i < 4; i++)
format_rate(fp, rate[i]);
fprintf(fp, "\n");
}
}
void dump_incr_db(FILE *fp)
{
int k, realm;
char b1[16];
if (!no_output) {
fprintf(fp, "#%s\n", kern_db->signature);
fprintf(fp,
"%-10s "
"%-10s "
"%-10s "
"%-10s "
"%-10s "
"\n"
, "Realm", "BytesTo", "PktsTo", "BytesFrom", "PktsFrom");
fprintf(fp,
"%-10s "
"%-10s "
"%-10s "
"%-10s "
"%-10s "
"\n"
, "", "BPSTo", "PPSTo", "BPSFrom", "PPSFrom");
}
for (realm=0; realm<256; realm++) {
int ovfl = 0;
int i;
unsigned long long *val;
double *rate;
unsigned long long rval[4];
if (!(rmap[realm>>5] & (1<<(realm&0x1f))))
continue;
val = &kern_db->val[realm*4];
rate = &kern_db->rate[realm*4];
for (k=0; k<4; k++) {
rval[k] = val[k];
if (rval[k] < hist_db->val[realm*4+k])
ovfl = 1;
else
rval[k] -= hist_db->val[realm*4+k];
}
if (ovfl) {
for (k=0; k<4; k++)
rval[k] = val[k];
}
if (hist_db) {
memcpy(&hist_db->val[realm*4], val, sizeof(*val)*4);
}
if (no_output)
continue;
if (!dump_zeros &&
!rval[0] && !rate[0] &&
!rval[1] && !rate[1] &&
!rval[2] && !rate[2] &&
!rval[3] && !rate[3])
continue;
fprintf(fp, "%-10s", rtnl_rtrealm_n2a(realm, b1, sizeof(b1)));
for (i = 0; i < 4; i++)
format_count(fp, rval[i]);
fprintf(fp, "\n%-10s", "");
for (i = 0; i < 4; i++)
format_rate(fp, rate[i]);
fprintf(fp, "\n");
}
}
static int children;
void sigchild(int signo)
{
}
/* Server side only: read kernel data, update tables, calculate rates. */
void update_db(int interval)
{
int i;
__u32 *ival;
__u32 _ival[256*4];
ival = read_kern_table(_ival);
for (i=0; i<256*4; i++) {
double sample;
__u32 incr = ival[i] - kern_db->ival[i];
if (ival[i] == 0 && incr == 0 &&
kern_db->val[i] == 0 && kern_db->rate[i] == 0)
continue;
kern_db->val[i] += incr;
kern_db->ival[i] = ival[i];
sample = (double)(incr*1000)/interval;
if (interval >= scan_interval) {
kern_db->rate[i] += W*(sample-kern_db->rate[i]);
} else if (interval >= 1000) {
if (interval >= time_constant) {
kern_db->rate[i] = sample;
} else {
double w = W*(double)interval/scan_interval;
kern_db->rate[i] += w*(sample-kern_db->rate[i]);
}
}
}
}
void send_db(int fd)
{
int tot = 0;
while (tot < sizeof(*kern_db)) {
int n = write(fd, ((char*)kern_db) + tot, sizeof(*kern_db)-tot);
if (n < 0) {
if (errno == EINTR)
continue;
return;
}
tot += n;
}
}
#define T_DIFF(a,b) (((a).tv_sec-(b).tv_sec)*1000 + ((a).tv_usec-(b).tv_usec)/1000)
void pad_kern_table(struct rtacct_data *dat, __u32 *ival)
{
int i;
memset(dat->rate, 0, sizeof(dat->rate));
if (dat->ival != ival)
memcpy(dat->ival, ival, sizeof(dat->ival));
for (i=0; i<256*4; i++)
dat->val[i] = ival[i];
}
void server_loop(int fd)
{
struct timeval snaptime;
struct pollfd p;
p.fd = fd;
p.events = p.revents = POLLIN;
sprintf(kern_db->signature, "%d.%lu sampling_interval=%d time_const=%d",
getpid(), (unsigned long)random(), scan_interval/1000, time_constant/1000);
pad_kern_table(kern_db, read_kern_table(kern_db->ival));
for (;;) {
int status;
int tdiff;
struct timeval now;
gettimeofday(&now, NULL);
tdiff = T_DIFF(now, snaptime);
if (tdiff >= scan_interval) {
update_db(tdiff);
snaptime = now;
tdiff = 0;
}
if (poll(&p, 1, tdiff + scan_interval) > 0
&& (p.revents&POLLIN)) {
int clnt = accept(fd, NULL, NULL);
if (clnt >= 0) {
pid_t pid;
if (children >= 5) {
close(clnt);
} else if ((pid = fork()) != 0) {
if (pid>0)
children++;
close(clnt);
} else {
if (tdiff > 0)
update_db(tdiff);
send_db(clnt);
exit(0);
}
}
}
while (children && waitpid(-1, &status, WNOHANG) > 0)
children--;
}
}
int verify_forging(int fd)
{
struct ucred cred;
int olen = sizeof(cred);
if (getsockopt(fd, SOL_SOCKET, SO_PEERCRED, (void*)&cred, &olen) ||
olen < sizeof(cred))
return -1;
if (cred.uid == getuid() || cred.uid == 0)
return 0;
return -1;
}
static void usage(void) __attribute__((noreturn));
static void usage(void)
{
fprintf(stderr,
"Usage: rtacct [ -h?vVzrnasd:t: ] [ ListOfRealms ]\n"
);
exit(-1);
}
int main(int argc, char *argv[])
{
char hist_name[128];
struct sockaddr_un sun;
int ch;
int fd;
while ((ch = getopt(argc, argv, "h?vVzrM:nasd:t:")) != EOF) {
switch(ch) {
case 'z':
dump_zeros = 1;
break;
case 'r':
reset_history = 1;
break;
case 'a':
ignore_history = 1;
break;
case 's':
no_update = 1;
break;
case 'n':
no_output = 1;
break;
case 'd':
scan_interval = 1000*atoi(optarg);
break;
case 't':
if (sscanf(optarg, "%d", &time_constant) != 1 ||
time_constant <= 0) {
fprintf(stderr, "rtacct: invalid time constant divisor\n");
exit(-1);
}
break;
case 'v':
case 'V':
printf("rtacct utility, iproute2-ss%s\n", SNAPSHOT);
exit(0);
case 'M':
/* Some secret undocumented option, nobody
* is expected to ask about its sense. See?
*/
sscanf(optarg, "%lx", &magic_number);
break;
case 'h':
case '?':
default:
usage();
}
}
argc -= optind;
argv += optind;
if (argc) {
while (argc > 0) {
__u32 realm;
if (rtnl_rtrealm_a2n(&realm, argv[0])) {
fprintf(stderr, "Warning: realm \"%s\" does not exist.\n", argv[0]);
exit(-1);
}
rmap[realm>>5] |= (1<<(realm&0x1f));
argc--; argv++;
}
} else {
memset(rmap, ~0, sizeof(rmap));
/* Always suppress zeros. */
dump_zeros = 0;
}
sun.sun_family = AF_UNIX;
sun.sun_path[0] = 0;
sprintf(sun.sun_path+1, "rtacct%d", getuid());
if (scan_interval > 0) {
if (time_constant == 0)
time_constant = 60;
time_constant *= 1000;
W = 1 - 1/exp(log(10)*(double)scan_interval/time_constant);
if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) < 0) {
perror("rtacct: socket");
exit(-1);
}
if (bind(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) < 0) {
perror("rtacct: bind");
exit(-1);
}
if (listen(fd, 5) < 0) {
perror("rtacct: listen");
exit(-1);
}
if (fork())
exit(0);
chdir("/");
close(0); close(1); close(2); setsid();
signal(SIGPIPE, SIG_IGN);
signal(SIGCHLD, sigchild);
server_loop(fd);
exit(0);
}
if (getenv("RTACCT_HISTORY"))
snprintf(hist_name, sizeof(hist_name), getenv("RTACCT_HISTORY"));
else
sprintf(hist_name, "/tmp/.rtacct.u%d", getuid());
if (reset_history)
unlink(hist_name);
if (!ignore_history || !no_update) {
struct stat stb;
fd = open(hist_name, O_RDWR|O_CREAT|O_NOFOLLOW, 0600);
if (fd < 0) {
perror("rtacct: open history file");
exit(-1);
}
if (flock(fd, LOCK_EX)) {
perror("rtacct: flock history file");
exit(-1);
}
if (fstat(fd, &stb) != 0) {
perror("rtacct: fstat history file");
exit(-1);
}
if (stb.st_nlink != 1 || stb.st_uid != getuid()) {
fprintf(stderr, "rtacct: something is so wrong with history file, that I prefer not to proceed.\n");
exit(-1);
}
if (stb.st_size != sizeof(*hist_db))
write(fd, kern_db, sizeof(*hist_db));
hist_db = mmap(NULL, sizeof(*hist_db),
PROT_READ|PROT_WRITE,
no_update ? MAP_PRIVATE : MAP_SHARED,
fd, 0);
if ((unsigned long)hist_db == ~0UL) {
perror("mmap");
exit(-1);
}
if (!ignore_history) {
FILE *tfp;
long uptime;
if ((tfp = fopen("/proc/uptime", "r")) != NULL) {
if (fscanf(tfp, "%ld", &uptime) != 1)
uptime = -1;
fclose(tfp);
}
if (uptime >= 0 && time(NULL) >= stb.st_mtime+uptime) {
fprintf(stderr, "rtacct: history is aged out, resetting\n");
memset(hist_db, 0, sizeof(*hist_db));
}
}
close(fd);
}
if ((fd = socket(AF_UNIX, SOCK_STREAM, 0)) >= 0 &&
(connect(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) == 0
|| (strcpy(sun.sun_path+1, "rtacct0"),
connect(fd, (struct sockaddr*)&sun, 2+1+strlen(sun.sun_path+1)) == 0))
&& verify_forging(fd) == 0) {
nread(fd, (char*)kern_db, sizeof(*kern_db));
if (hist_db && hist_db->signature[0] &&
strcmp(kern_db->signature, hist_db->signature)) {
fprintf(stderr, "rtacct: history is stale, ignoring it.\n");
hist_db = NULL;
}
close(fd);
} else {
if (fd >= 0)
close(fd);
if (hist_db && hist_db->signature[0] &&
strcmp(hist_db->signature, "kernel")) {
fprintf(stderr, "rtacct: history is stale, ignoring it.\n");
hist_db = NULL;
}
pad_kern_table(kern_db, read_kern_table(kern_db->ival));
strcpy(kern_db->signature, "kernel");
}
if (ignore_history || hist_db == NULL)
dump_abs_db(stdout);
else
dump_incr_db(stdout);
exit(0);
}

View File

@ -0,0 +1,172 @@
/* rtstat.c: A program for route cache monitoring
*
* Copyright 2001 by Robert Olsson <robert.olsson@its.uu.se>
* Uppsala University, Sweden
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* Additional credits:
* Martin Josefsson <gandalf@wlug.westbo.se> 010828 bug fix
*
*/
#include <stdio.h>
#include <unistd.h>
#include <getopt.h>
#define VERSION "0.33 010829"
extern char *optarg;
extern int optind, opterr, optopt;
FILE *fp;
unsigned rt_size, in_hit[2], in_slow_tot[2], in_slow_mc[2],
in_no_rt[2], in_brd[2], in_martian_dst[2], in_martian_src[2],
out_hit[2], out_slow_tot[2], out_slow_mc[2];
/* Read (and summarize for SMP) the different stats vars. */
void scan_line(int i)
{
unsigned temp[10];
in_hit[i] = 0;
in_slow_tot[i] = 0;
in_slow_mc[i] = 0;
in_no_rt[i] = 0;
in_brd[i] = 0;
in_martian_dst[i] = 0;
in_martian_src[i] = 0;
out_hit[i] = 0;
out_slow_tot[i] = 0;
out_slow_mc[i] = 0;
while(!feof(fp)) {
fscanf(fp, "%x %x %x %x %x %x %x %x %x %x %x\n",
&rt_size,
&temp[0], /* in_hit */
&temp[1], /* in_slow_tot */
&temp[2], /* in_slow_mc */
&temp[3], /* in_no_rt */
&temp[4], /* in_brd */
&temp[5], /* in_martian_dst */
&temp[6], /* in_martian_src */
&temp[7], /* out_hit */
&temp[8], /* out_slow_tot */
&temp[9] /* out_slow_mc */
);
in_hit[i] += temp[0];
in_slow_tot[i] += temp[1];
in_slow_mc[i] += temp[2];
in_no_rt[i] += temp[3];
in_brd[i] += temp[4];
in_martian_dst[i] += temp[5];
in_martian_src[i] += temp[6];
out_hit[i] += temp[7];
out_slow_tot[i] += temp[8];
out_slow_mc[i] += temp[9];
}
return;
}
void print_hdr_line(void)
{
printf(" size IN: hit tot mc no_rt bcast madst masrc OUT: hit tot mc\n");
}
int usage(int exit_code)
{
fprintf(stderr, "rtstat Version %s\n", VERSION);
fprintf(stderr, " -help\n");
fprintf(stderr, " -i interval\n");
fprintf(stderr, " -s subject [0-2]\n");
fprintf(stderr, "\n");
print_hdr_line();
fprintf(stderr, "\n");
fprintf(stderr, "size == route cache size\n");
fprintf(stderr, "hit == IN: total number of cache hits per sec\n");
fprintf(stderr, "tot == IN: total number of cache misses per sec\n");
fprintf(stderr, "mc == IN: mulicast cache misses per sec\n");
fprintf(stderr, "no_rt == IN: route table misses per sec\n");
fprintf(stderr, "bcast == IN: broadcast cache misses per sec\n");
fprintf(stderr, "madst == IN: dst martians per sec\n");
fprintf(stderr, "masrc == IN: src martians per sec\n");
fprintf(stderr, "hit == OUT: total number of cache hits per sec\n");
fprintf(stderr, "tot == OUT: total number of cache misses per sec\n");
fprintf(stderr, "mc == OUT: mulicast cache misses per sec\n");
exit(exit_code);
}
int main(int argc, char **argv)
{
int c, i=1, interval=2, hdr=2;
while ((c=getopt(argc, argv,"h?s:i:")) != EOF)
switch (c)
{
case '?':
case 'h': usage(0);
case 'i': sscanf(optarg, "%u", &interval);
break;
case 's': sscanf(optarg, "%u", &hdr);
break;
default: usage(1);
}
if(interval < 1 ) interval=1;
if ((fp = fopen("/proc/net/rt_cache_stat", "r")));
else
{
perror("fopen");
exit(-1);
}
if(hdr > 0) print_hdr_line();
for(;1;i++) {
if(hdr > 1 && (! (i % 20))) print_hdr_line();
scan_line(0);
sleep(interval);
rewind(fp);
scan_line(1);
rewind(fp);
printf("%5u %9u %7u %5u %5u %5u %5u %5u %9u %7u %6u\n",
rt_size,
(in_hit[1] - in_hit[0])/interval,
(in_slow_tot[1] - in_slow_tot[0])/interval,
(in_slow_mc[1] - in_slow_mc[0])/interval,
(in_no_rt[1] - in_no_rt[0])/interval,
(in_brd[1] - in_brd[0])/interval,
(in_martian_dst[1] - in_martian_dst[0])/interval,
(in_martian_src[1] - in_martian_src[0])/interval,
(out_hit[1] - out_hit[0])/interval,
(out_slow_tot[1] - out_slow_tot[0])/interval,
(out_slow_mc[1] - out_slow_mc[0])/interval
);
}
return 1;
}
/*
* Compile:
gcc -g -O2 -Wall -o rtstat rtstat.c
*/

2672
misc/ss.c

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,21 @@
#define SSF_DCOND 0
#define SSF_SCOND 1
#define SSF_OR 2
#define SSF_AND 3
#define SSF_NOT 4
#define SSF_D_GE 5
#define SSF_D_LE 6
#define SSF_S_GE 7
#define SSF_S_LE 8
#define SSF_S_AUTO 9
struct ssfilter
{
int type;
struct ssfilter *post;
struct ssfilter *pred;
};
int ssfilter_parse(struct ssfilter **f, int argc, char **argv, FILE *fp);
void *parse_hostcond(char*);

Some files were not shown because too many files have changed in this diff Show More