diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 8a45569..13500a3 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -33,7 +33,7 @@ #define SIOCGETSGCNT (SIOCPROTOPRIVATE+1) #define SIOCGETRPF (SIOCPROTOPRIVATE+2) -#define MAXVIFS 32 +#define MAXVIFS 256 typedef unsigned long vifbitmap_t; /* User mode code depends on this lot */ typedef unsigned short vifi_t; #define ALL_VIFS ((vifi_t)(-1)) @@ -41,7 +41,7 @@ typedef unsigned short vifi_t; /* * Same idea as select */ - + #define VIFM_SET(n,m) ((m)|=(1<<(n))) #define VIFM_CLR(n,m) ((m)&=~(1<<(n))) #define VIFM_ISSET(n,m) ((m)&(1<<(n))) @@ -53,7 +53,7 @@ typedef unsigned short vifi_t; * Passed by mrouted for an MRT_ADD_VIF - again we use the * mrouted 3.6 structures for compatibility */ - + struct vifctl { vifi_t vifc_vifi; /* Index of VIF */ unsigned char vifc_flags; /* VIFF_ flags */ @@ -66,11 +66,12 @@ struct vifctl { #define VIFF_TUNNEL 0x1 /* IPIP tunnel */ #define VIFF_SRCRT 0x2 /* NI */ #define VIFF_REGISTER 0x4 /* register vif */ +#define VIFF_NBMA 0x10 /* * Cache manipulation structures for mrouted and PIMd */ - + struct mfcctl { struct in_addr mfcc_origin; /* Origin of mcast */ @@ -83,10 +84,10 @@ struct mfcctl int mfcc_expire; }; -/* +/* * Group count retrieval for mrouted */ - + struct sioc_sg_req { struct in_addr src; @@ -113,7 +114,7 @@ struct sioc_vif_req * This is the format the mroute daemon expects to see IGMP control * data. Magically happens to be like an IP packet as per the original */ - + struct igmpmsg { __u32 unused1,unused2; @@ -190,7 +191,7 @@ struct vif_device #define VIFF_STATIC 0x8000 -struct mfc_cache +struct mfc_cache { struct mfc_cache *next; /* Next entry on cache line */ __be32 mfc_mcastgrp; /* Group the entry belongs to */ @@ -224,7 +225,7 @@ struct mfc_cache #define MFC_HASH(a,b) (((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1)) #else #define MFC_HASH(a,b) ((((__force u32)(__be32)a)^(((__force u32)(__be32)b)>>2))&(MFC_LINES-1)) -#endif +#endif #endif diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 1466644..5adea03 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -116,6 +116,31 @@ static struct net_protocol pim_protocol; static struct timer_list ipmr_expire_timer; +static __be32 ipmr_get_skb_nbma(struct sk_buff *skb) +{ + union { + char addr[MAX_ADDR_LEN]; + __be32 inaddr; + } u; + + if (dev_parse_header(skb, u.addr) != 4) + return INADDR_ANY; + + return u.inaddr; +} + +static int ip_mr_match_vif_skb(struct vif_device *vif, struct sk_buff *skb) +{ + if (vif->dev != skb->dev) + return 0; + + if (vif->flags & VIFF_NBMA) + return ipmr_get_skb_nbma(skb) == vif->remote; + + return 1; +} + + /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */ static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v) @@ -468,6 +493,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) return err; } break; + case VIFF_NBMA: case 0: dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr); if (!dev) @@ -502,7 +528,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock) v->pkt_in = 0; v->pkt_out = 0; v->link = dev->ifindex; - if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER)) + if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER|VIFF_NBMA)) v->link = dev->iflink; /* And finish update writing critical data */ @@ -1191,12 +1217,15 @@ static inline int ipmr_forward_finish(struct sk_buff *skb) { struct ip_options * opt = &(IPCB(skb)->opt); - IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); + IP_INC_STATS_BH(dev_net(skb->dev), IPSTATS_MIB_OUTFORWDATAGRAMS); if (unlikely(opt->optlen)) ip_forward_options(skb); - return dst_output(skb); + if (skb->dst != NULL) + return dst_output(skb); + else + return dev_queue_xmit(skb); } /* @@ -1208,7 +1237,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) const struct iphdr *iph = ip_hdr(skb); struct vif_device *vif = &vif_table[vifi]; struct net_device *dev; - struct rtable *rt; + struct net_device *fromdev = skb->dev; + struct rtable *rt = NULL; int encap = 0; if (vif->dev == NULL) @@ -1236,6 +1266,19 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) if (ip_route_output_key(&init_net, &rt, &fl)) goto out_free; encap = sizeof(struct iphdr); + dev = rt->u.dst.dev; + } else if (vif->flags&VIFF_NBMA) { + /* Fixme, we should take tunnel source address from the + * tunnel device binding if it exists */ + struct flowi fl = { .oif = vif->link, + .nl_u = { .ip4_u = + { .daddr = vif->remote, + .tos = RT_TOS(iph->tos) } }, + .proto = IPPROTO_GRE }; + if (ip_route_output_key(&init_net, &rt, &fl)) + goto out_free; + encap = LL_RESERVED_SPACE(rt->u.dst.dev); + dev = vif->dev; } else { struct flowi fl = { .oif = vif->link, .nl_u = { .ip4_u = @@ -1244,34 +1287,39 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) .proto = IPPROTO_IPIP }; if (ip_route_output_key(&init_net, &rt, &fl)) goto out_free; + dev = rt->u.dst.dev; } - dev = rt->u.dst.dev; + if (!(vif->flags & VIFF_NBMA)) { + if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { + /* Do not fragment multicasts. Alas, IPv4 does not + allow to send ICMP, so that packets will disappear + to blackhole. + */ - if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) { - /* Do not fragment multicasts. Alas, IPv4 does not - allow to send ICMP, so that packets will disappear - to blackhole. - */ - - IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); - ip_rt_put(rt); - goto out_free; + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS); + goto out_free_rt; + } } encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len; - if (skb_cow(skb, encap)) { - ip_rt_put(rt); - goto out_free; - } + if (skb_cow(skb, encap)) + goto out_free_rt; vif->pkt_out++; vif->bytes_out += skb->len; dst_release(skb->dst); - skb->dst = &rt->u.dst; + if (vif->flags & VIFF_NBMA) { + ip_rt_put(rt); + skb->dst = NULL; + rt = NULL; + } else { + skb->dst = &rt->u.dst; + } ip_decrease_ttl(ip_hdr(skb)); + skb->dev = dev; /* FIXME: forward and output firewalls used to be called here. * What do we do with netfilter? -- RR */ @@ -1280,6 +1328,10 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) /* FIXME: extra output firewall step used to be here. --RR */ vif->dev->stats.tx_packets++; vif->dev->stats.tx_bytes += skb->len; + } else if (vif->flags & VIFF_NBMA) { + if (dev_hard_header(skb, dev, ntohs(skb->protocol), + &vif->remote, NULL, 4) < 0) + goto out_free_rt; } IPCB(skb)->flags |= IPSKB_FORWARDED; @@ -1295,20 +1347,29 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi) * not mrouter) cannot join to more than one interface - it will * result in receiving multiple packets. */ - NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev, + NF_HOOK(PF_INET, NF_INET_FORWARD, skb, fromdev, dev, ipmr_forward_finish); return; +out_free_rt: + if (rt != NULL) + ip_rt_put(rt); out_free: kfree_skb(skb); return; } -static int ipmr_find_vif(struct net_device *dev) +static int ipmr_find_vif(struct net_device *dev, __be32 nbma_origin) { int ct; for (ct=maxvif-1; ct>=0; ct--) { - if (vif_table[ct].dev == dev) + if (vif_table[ct].dev != dev) + continue; + + if (vif_table[ct].flags & VIFF_NBMA) { + if (vif_table[ct].remote == nbma_origin) + break; + } else if (nbma_origin == INADDR_ANY) break; } return ct; @@ -1328,7 +1389,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local /* * Wrong interface: drop packet and (maybe) send PIM assert. */ - if (vif_table[vif].dev != skb->dev) { + if (!ip_mr_match_vif_skb(&vif_table[vif], skb)) { int true_vifi; if (skb->rtable->fl.iif == 0) { @@ -1347,7 +1408,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local } cache->mfc_un.res.wrong_if++; - true_vifi = ipmr_find_vif(skb->dev); + true_vifi = ipmr_find_vif(skb->dev, ipmr_get_skb_nbma(skb)); if (true_vifi >= 0 && mroute_do_assert && /* pimsm uses asserts, when switching from RPT to SPT, @@ -1454,7 +1515,7 @@ int ip_mr_input(struct sk_buff *skb) skb = skb2; } - vif = ipmr_find_vif(skb->dev); + vif = ipmr_find_vif(skb->dev, ipmr_get_skb_nbma(skb)); if (vif >= 0) { int err = ipmr_cache_unresolved(vif, skb); read_unlock(&mrt_lock); @@ -1634,7 +1695,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait) } dev = skb->dev; - if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) { + if (dev == NULL || (vif = ipmr_find_vif(dev, INADDR_ANY)) < 0) { read_unlock(&mrt_lock); return -ENODEV; }