aboutsummaryrefslogtreecommitdiffstats
path: root/core/linux-headers/linux-nbma-mroute-v4-2.6.29.diff
blob: a79adc2815f815db3d35ea9a6fa8fc3faf7961aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 8a45569..13500a3 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -33,7 +33,7 @@
 #define SIOCGETSGCNT	(SIOCPROTOPRIVATE+1)
 #define SIOCGETRPF	(SIOCPROTOPRIVATE+2)
 
-#define MAXVIFS		32	
+#define MAXVIFS		256
 typedef unsigned long vifbitmap_t;	/* User mode code depends on this lot */
 typedef unsigned short vifi_t;
 #define ALL_VIFS	((vifi_t)(-1))
@@ -41,7 +41,7 @@ typedef unsigned short vifi_t;
 /*
  *	Same idea as select
  */
- 
+
 #define VIFM_SET(n,m)	((m)|=(1<<(n)))
 #define VIFM_CLR(n,m)	((m)&=~(1<<(n)))
 #define VIFM_ISSET(n,m)	((m)&(1<<(n)))
@@ -53,7 +53,7 @@ typedef unsigned short vifi_t;
  *	Passed by mrouted for an MRT_ADD_VIF - again we use the
  *	mrouted 3.6 structures for compatibility
  */
- 
+
 struct vifctl {
 	vifi_t	vifc_vifi;		/* Index of VIF */
 	unsigned char vifc_flags;	/* VIFF_ flags */
@@ -66,11 +66,12 @@ struct vifctl {
 #define VIFF_TUNNEL	0x1	/* IPIP tunnel */
 #define VIFF_SRCRT	0x2	/* NI */
 #define VIFF_REGISTER	0x4	/* register vif	*/
+#define VIFF_NBMA	0x10
 
 /*
  *	Cache manipulation structures for mrouted and PIMd
  */
- 
+
 struct mfcctl
 {
 	struct in_addr mfcc_origin;		/* Origin of mcast	*/
@@ -83,10 +84,10 @@ struct mfcctl
 	int	     mfcc_expire;
 };
 
-/* 
+/*
  *	Group count retrieval for mrouted
  */
- 
+
 struct sioc_sg_req
 {
 	struct in_addr src;
@@ -113,7 +114,7 @@ struct sioc_vif_req
  *	This is the format the mroute daemon expects to see IGMP control
  *	data. Magically happens to be like an IP packet as per the original
  */
- 
+
 struct igmpmsg
 {
 	__u32 unused1,unused2;
@@ -190,7 +191,7 @@ struct vif_device
 
 #define VIFF_STATIC 0x8000
 
-struct mfc_cache 
+struct mfc_cache
 {
 	struct mfc_cache *next;			/* Next entry on cache line 	*/
 	__be32 mfc_mcastgrp;			/* Group the entry belongs to 	*/
@@ -224,7 +225,7 @@ struct mfc_cache
 #define MFC_HASH(a,b)	(((((__force u32)(__be32)a)>>24)^(((__force u32)(__be32)b)>>26))&(MFC_LINES-1))
 #else
 #define MFC_HASH(a,b)	((((__force u32)(__be32)a)^(((__force u32)(__be32)b)>>2))&(MFC_LINES-1))
-#endif		
+#endif
 
 #endif
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 1466644..5adea03 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -116,6 +116,31 @@ static struct net_protocol pim_protocol;
 
 static struct timer_list ipmr_expire_timer;
 
+static __be32 ipmr_get_skb_nbma(struct sk_buff *skb)
+{
+	union {
+		char addr[MAX_ADDR_LEN];
+		__be32 inaddr;
+	} u;
+
+	if (dev_parse_header(skb, u.addr) != 4)
+		return INADDR_ANY;
+
+	return u.inaddr;
+}
+
+static int ip_mr_match_vif_skb(struct vif_device *vif, struct sk_buff *skb)
+{
+	if (vif->dev != skb->dev)
+		return 0;
+
+	if (vif->flags & VIFF_NBMA)
+		return ipmr_get_skb_nbma(skb) == vif->remote;
+
+	return 1;
+}
+
+
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 
 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
@@ -468,6 +493,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 			return err;
 		}
 		break;
+	case VIFF_NBMA:
 	case 0:
 		dev = ip_dev_find(&init_net, vifc->vifc_lcl_addr.s_addr);
 		if (!dev)
@@ -502,7 +528,7 @@ static int vif_add(struct vifctl *vifc, int mrtsock)
 	v->pkt_in = 0;
 	v->pkt_out = 0;
 	v->link = dev->ifindex;
-	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
+	if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER|VIFF_NBMA))
 		v->link = dev->iflink;
 
 	/* And finish update writing critical data */
@@ -1191,12 +1217,15 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
 {
 	struct ip_options * opt	= &(IPCB(skb)->opt);
 
-	IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
+	IP_INC_STATS_BH(dev_net(skb->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
 
 	if (unlikely(opt->optlen))
 		ip_forward_options(skb);
 
-	return dst_output(skb);
+	if (skb->dst != NULL)
+		return dst_output(skb);
+	else
+		return dev_queue_xmit(skb);
 }
 
 /*
@@ -1208,7 +1237,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 	const struct iphdr *iph = ip_hdr(skb);
 	struct vif_device *vif = &vif_table[vifi];
 	struct net_device *dev;
-	struct rtable *rt;
+	struct net_device *fromdev = skb->dev;
+	struct rtable *rt = NULL;
 	int    encap = 0;
 
 	if (vif->dev == NULL)
@@ -1236,6 +1266,19 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 		if (ip_route_output_key(&init_net, &rt, &fl))
 			goto out_free;
 		encap = sizeof(struct iphdr);
+		dev = rt->u.dst.dev;
+	} else if (vif->flags&VIFF_NBMA) {
+		/* Fixme, we should take tunnel source address from the
+		 * tunnel device binding if it exists */
+		struct flowi fl = { .oif = vif->link,
+				    .nl_u = { .ip4_u =
+					      { .daddr = vif->remote,
+						.tos = RT_TOS(iph->tos) } },
+				    .proto = IPPROTO_GRE };
+		if (ip_route_output_key(&init_net, &rt, &fl))
+			goto out_free;
+		encap = LL_RESERVED_SPACE(rt->u.dst.dev);
+		dev = vif->dev;
 	} else {
 		struct flowi fl = { .oif = vif->link,
 				    .nl_u = { .ip4_u =
@@ -1244,34 +1287,39 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 				    .proto = IPPROTO_IPIP };
 		if (ip_route_output_key(&init_net, &rt, &fl))
 			goto out_free;
+		dev = rt->u.dst.dev;
 	}
 
-	dev = rt->u.dst.dev;
+	if (!(vif->flags & VIFF_NBMA)) {
+		if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
+			/* Do not fragment multicasts. Alas, IPv4 does not
+			   allow to send ICMP, so that packets will disappear
+			   to blackhole.
+			*/
 
-	if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
-		/* Do not fragment multicasts. Alas, IPv4 does not
-		   allow to send ICMP, so that packets will disappear
-		   to blackhole.
-		 */
-
-		IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
-		ip_rt_put(rt);
-		goto out_free;
+			IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+			goto out_free_rt;
+		}
 	}
 
 	encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
 
-	if (skb_cow(skb, encap)) {
-		ip_rt_put(rt);
-		goto out_free;
-	}
+	if (skb_cow(skb, encap))
+		goto out_free_rt;
 
 	vif->pkt_out++;
 	vif->bytes_out += skb->len;
 
 	dst_release(skb->dst);
-	skb->dst = &rt->u.dst;
+	if (vif->flags & VIFF_NBMA) {
+		ip_rt_put(rt);
+		skb->dst = NULL;
+		rt = NULL;
+	} else {
+		skb->dst = &rt->u.dst;
+	}
 	ip_decrease_ttl(ip_hdr(skb));
+	skb->dev = dev;
 
 	/* FIXME: forward and output firewalls used to be called here.
 	 * What do we do with netfilter? -- RR */
@@ -1280,6 +1328,10 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 		/* FIXME: extra output firewall step used to be here. --RR */
 		vif->dev->stats.tx_packets++;
 		vif->dev->stats.tx_bytes += skb->len;
+	} else if (vif->flags & VIFF_NBMA) {
+		if (dev_hard_header(skb, dev, ntohs(skb->protocol),
+				    &vif->remote, NULL, 4) < 0)
+			goto out_free_rt;
 	}
 
 	IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1295,20 +1347,29 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
 	 * not mrouter) cannot join to more than one interface - it will
 	 * result in receiving multiple packets.
 	 */
-	NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
+	NF_HOOK(PF_INET, NF_INET_FORWARD, skb, fromdev, dev,
 		ipmr_forward_finish);
 	return;
 
+out_free_rt:
+	if (rt != NULL)
+		ip_rt_put(rt);
 out_free:
 	kfree_skb(skb);
 	return;
 }
 
-static int ipmr_find_vif(struct net_device *dev)
+static int ipmr_find_vif(struct net_device *dev, __be32 nbma_origin)
 {
 	int ct;
 	for (ct=maxvif-1; ct>=0; ct--) {
-		if (vif_table[ct].dev == dev)
+		if (vif_table[ct].dev != dev)
+			continue;
+
+		if (vif_table[ct].flags & VIFF_NBMA) {
+			if (vif_table[ct].remote == nbma_origin)
+				break;
+		} else if (nbma_origin == INADDR_ANY)
 			break;
 	}
 	return ct;
@@ -1328,7 +1389,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
-	if (vif_table[vif].dev != skb->dev) {
+	if (!ip_mr_match_vif_skb(&vif_table[vif], skb)) {
 		int true_vifi;
 
 		if (skb->rtable->fl.iif == 0) {
@@ -1347,7 +1408,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 		}
 
 		cache->mfc_un.res.wrong_if++;
-		true_vifi = ipmr_find_vif(skb->dev);
+		true_vifi = ipmr_find_vif(skb->dev, ipmr_get_skb_nbma(skb));
 
 		if (true_vifi >= 0 && mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
@@ -1454,7 +1515,7 @@ int ip_mr_input(struct sk_buff *skb)
 			skb = skb2;
 		}
 
-		vif = ipmr_find_vif(skb->dev);
+		vif = ipmr_find_vif(skb->dev, ipmr_get_skb_nbma(skb));
 		if (vif >= 0) {
 			int err = ipmr_cache_unresolved(vif, skb);
 			read_unlock(&mrt_lock);
@@ -1634,7 +1695,7 @@ int ipmr_get_route(struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 		}
 
 		dev = skb->dev;
-		if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
+		if (dev == NULL || (vif = ipmr_find_vif(dev, INADDR_ANY)) < 0) {
 			read_unlock(&mrt_lock);
 			return -ENODEV;
 		}