summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/inet/ip_ire.h
blob: 4efc702266c6fea0afb7ca14742deb1fddc4b612 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */
/* Copyright (c) 1990 Mentat Inc. */

#ifndef	_INET_IP_IRE_H
#define	_INET_IP_IRE_H

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#ifdef	__cplusplus
extern "C" {
#endif

#define	IPV6_LL_PREFIXLEN	10	/* Number of bits in link-local pref */

#define	IP_CACHE_TABLE_SIZE	256
#define	IP_MASK_TABLE_SIZE	(IP_ABITS + 1)		/* 33 ptrs */

#define	IP6_FTABLE_HASH_SIZE	32	/* size of each hash table in ptrs */
#define	IP6_CACHE_TABLE_SIZE	256
#define	IP6_MASK_TABLE_SIZE	(IPV6_ABITS + 1)	/* 129 ptrs */

/*
 * We use the common modulo hash function.  In ip_ire_init(), we make
 * sure that the cache table size is always a power of 2.  That's why
 * we can use & instead of %.  Also note that we try hard to make sure
 * the lower bits of an address capture most info from the whole address.
 * The reason being that since our hash table is probably a lot smaller
 * than 2^32 buckets so the lower bits are the most important.
 */
#define	IRE_ADDR_HASH(addr, table_size) \
	(((addr) ^ ((addr) >> 8) ^ ((addr) >> 16) ^ ((addr) >> 24)) &	\
	((table_size) - 1))

/*
 * To make a byte-order neutral hash for IPv6, just take all the
 * bytes in the bottom 32 bits into account.
 */
#define	IRE_ADDR_HASH_V6(addr, table_size) 				\
	IRE_ADDR_HASH((addr).s6_addr32[3], table_size)

/* This assumes that the ftable size is a power of 2. */
#define	IRE_ADDR_MASK_HASH_V6(addr, mask, table_size) 			\
	((((addr).s6_addr8[8] & (mask).s6_addr8[8]) ^ 			\
	((addr).s6_addr8[9] & (mask).s6_addr8[9]) ^			\
	((addr).s6_addr8[10] & (mask).s6_addr8[10]) ^ 			\
	((addr).s6_addr8[13] & (mask).s6_addr8[13]) ^ 			\
	((addr).s6_addr8[14] & (mask).s6_addr8[14]) ^ 			\
	((addr).s6_addr8[15] & (mask).s6_addr8[15])) & ((table_size) - 1))

/*
 * match parameter definitions for IRE lookup routines.
 */
#define	MATCH_IRE_DSTONLY	0x0000	/* Match just the address */
#define	MATCH_IRE_TYPE		0x0001	/* Match IRE type */
#define	MATCH_IRE_SRC		0x0002	/* Match IRE source address */
#define	MATCH_IRE_MASK		0x0004	/* Match IRE mask */
/* unused			0x0008 */
#define	MATCH_IRE_GW		0x0010	/* Match IRE gateway */
#define	MATCH_IRE_IPIF		0x0020	/* Match IRE ipif */
#define	MATCH_IRE_RECURSIVE	0x0040	/* Do recursive lookup if necessary */
#define	MATCH_IRE_DEFAULT	0x0080	/* Return default route if no route */
					/* found. */
#define	MATCH_IRE_RJ_BHOLE	0x0100	/* During lookup if we hit an ire */
					/* with RTF_REJECT or RTF_BLACKHOLE, */
					/* return the ire. No recursive */
					/* lookup should be done. */
#define	MATCH_IRE_IHANDLE	0x0200	/* Match IRE on ihandle */
#define	MATCH_IRE_MARK_HIDDEN	0x0400	/* Match IRE ire_marks with */
					/* IRE_MARK_HIDDEN. */
/*
 * MATCH_IRE_ILL is used whenever we want to specifically match an IRE
 * whose ire_ipif->ipif_ill or (ill_t *)ire_stq->q_ptr matches a given
 * ill. When MATCH_IRE_ILL is used to locate an IRE_CACHE, it implies
 * that the packet will not be load balanced. This is normally used
 * by in.mpathd to send out failure detection probes.
 *
 * MATCH_IRE_ILL_GROUP is used whenever we are not specific about which
 * interface (ill) the packet should be sent out. This implies that the
 * packets will be subjected to load balancing and it might go out on
 * any interface in the group. When there is only interface in the group,
 * MATCH_IRE_ILL_GROUP becomes MATCH_IRE_ILL. Most of the code uses
 * MATCH_IRE_ILL_GROUP and MATCH_IRE_ILL is used in very few cases where
 * we want to disable load balancing.
 *
 * MATCH_IRE_PARENT is used whenever we unconditionally want to get the
 * parent IRE (sire) while recursively searching IREs for an offsubnet
 * destination. With this flag, even if no IRE_CACHETABLE or IRE_INTERFACE
 * is found to help resolving IRE_OFFSUBNET in lookup routines, the
 * IRE_OFFSUBNET sire, if any, is returned to the caller.
 */
#define	MATCH_IRE_ILL_GROUP	0x0800	/* Match IRE on ill or the ill_group. */
#define	MATCH_IRE_ILL		0x1000	/* Match IRE on the ill only */

#define	MATCH_IRE_PARENT	0x2000	/* Match parent ire, if any, */
					/* even if ire is not matched. */
#define	MATCH_IRE_ZONEONLY	0x4000	/* Match IREs in specified zone, ie */
					/* don't match IRE_LOCALs from other */
					/* zones or shared IREs */
#define	MATCH_IRE_MARK_PRIVATE_ADDR	0x8000	/* Match IRE ire_marks with */
						/* IRE_MARK_PRIVATE_ADDR. */
#define	MATCH_IRE_SECATTR	0x10000	/* Match gateway security attributes */
#define	MATCH_IRE_COMPLETE	0x20000	/* ire_ftable_lookup() can return */
					/* IRE_CACHE entry only if it is  */
					/* ND_REACHABLE			  */

/*
 * Any ire to nce association is long term, and
 * the refhold and refrele may be done by different
 * threads. So all cases of making or breaking ire to
 * nce association should all effectively use the NOTR variants.
 * To understand the *effectively* part read on.
 *
 * ndp_lookup() and ndp_add_v4()/ndp_add_v6() implicitly do
 * NCE_REFHOLD. So wherever we make ire to nce association after
 * calling these functions, we effectively want to end up with
 * NCE_REFHOLD_NOTR. We call this macro to achieve this effect. This
 * macro changes a NCE_REFHOLD to a NCE_REFHOLD_NOTR. The macro's
 * NCE_REFRELE cancels off ndp_lookup[ndp_add]'s implicit NCE_REFHOLD,
 * and what you are left with is a NCE_REFHOLD_NOTR
 */
#define	NCE_REFHOLD_TO_REFHOLD_NOTR(nce) {	\
	NCE_REFHOLD_NOTR(nce);			\
	NCE_REFRELE(nce);			\
}

/*
 * find the next ire_t entry in the ire_next chain starting at ire
 * that is not CONDEMNED.  ire is set to NULL if we reach the end of the list.
 * Caller must hold the ire_bucket lock.
 */

#define	IRE_FIND_NEXT_ORIGIN(ire) {					\
	while ((ire) != NULL && ((ire)->ire_marks & IRE_MARK_CONDEMNED))\
		(ire) = (ire)->ire_next;				\
}


/* Structure for ire_cache_count() */
typedef struct {
	int	icc_total;	/* Total number of IRE_CACHE */
	int	icc_unused;	/* # off/no PMTU unused since last reclaim */
	int	icc_offlink;	/* # offlink without PMTU information */
	int	icc_pmtu;	/* # offlink with PMTU information */
	int	icc_onlink;	/* # onlink */
} ire_cache_count_t;

/*
 * Structure for ire_cache_reclaim(). Each field is a fraction i.e. 1 meaning
 * reclaim all, N meaning reclaim 1/Nth of all entries, 0 meaning reclaim none.
 *
 * The comment below (and for other netstack_t references) refers
 * to the fact that we only do netstack_hold in particular cases,
 * such as the references from open streams (ill_t and conn_t's
 * pointers). Internally within IP we rely on IP's ability to cleanup e.g.
 * ire_t's when an ill goes away.
 */
typedef struct {
	int	icr_unused;	/* Fraction for unused since last reclaim */
	int	icr_offlink;	/* Fraction for offlink without PMTU info */
	int	icr_pmtu;	/* Fraction for offlink with PMTU info */
	int	icr_onlink;	/* Fraction for onlink */
	ip_stack_t *icr_ipst;	/* Does not have a netstack_hold */
} ire_cache_reclaim_t;

/*
 * We use atomics so that we get an accurate accounting on the ires.
 * Otherwise we can't determine leaks correctly.
 */
#define	BUMP_IRE_STATS(ire_stats, x) atomic_add_64(&(ire_stats).x, 1)

#ifdef _KERNEL
struct ts_label_s;
struct nce_s;

extern	ipaddr_t	ip_plen_to_mask(uint_t);
extern	in6_addr_t	*ip_plen_to_mask_v6(uint_t, in6_addr_t *);

extern	int	ip_ire_advise(queue_t *, mblk_t *, cred_t *);
extern	int	ip_ire_delete(queue_t *, mblk_t *, cred_t *);
extern	boolean_t ip_ire_clookup_and_delete(ipaddr_t, ipif_t *, ip_stack_t *);
extern	void	ip_ire_clookup_and_delete_v6(const in6_addr_t *,
    ip_stack_t *);

extern	int	ip_ire_report(queue_t *, mblk_t *, caddr_t, cred_t *);
extern	int	ip_ire_report_v6(queue_t *, mblk_t *, caddr_t, cred_t *);
extern	void	ire_report_ftable(ire_t *, char *);

extern	void	ip_ire_req(queue_t *, mblk_t *);

extern	int	ip_mask_to_plen(ipaddr_t);
extern	int	ip_mask_to_plen_v6(const in6_addr_t *);

extern	ire_t	*ipif_to_ire(const ipif_t *);
extern	ire_t	*ipif_to_ire_v6(const ipif_t *);

extern	int	ire_add(ire_t **, queue_t *, mblk_t *, ipsq_func_t, boolean_t);
extern	void	ire_add_then_send(queue_t *, ire_t *, mblk_t *);
extern	int	ire_add_v6(ire_t **, queue_t *, mblk_t *, ipsq_func_t);
extern	int	ire_atomic_start(irb_t *irb_ptr, ire_t *ire, queue_t *q,
    mblk_t *mp, ipsq_func_t func);
extern	void	ire_atomic_end(irb_t *irb_ptr, ire_t *ire);

extern	void	ire_cache_count(ire_t *, char *);
extern	ire_t	*ire_cache_lookup(ipaddr_t, zoneid_t,
    const struct ts_label_s *, ip_stack_t *);
extern	ire_t	*ire_cache_lookup_v6(const in6_addr_t *, zoneid_t,
    const struct ts_label_s *, ip_stack_t *);
extern	void	ire_cache_reclaim(ire_t *, char *);

extern	ire_t	*ire_create_mp(uchar_t *, uchar_t *, uchar_t *, uchar_t *,
    uint_t, struct nce_s *, queue_t *, queue_t *, ushort_t, ipif_t *, ipaddr_t,
    uint32_t, uint32_t, uint32_t, const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *,
    ip_stack_t *);
extern	ire_t	*ire_create(uchar_t *, uchar_t *, uchar_t *, uchar_t *,
    uint_t *, struct nce_s *, queue_t *, queue_t *, ushort_t, ipif_t *,
    ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *, tsol_gc_t *,
    tsol_gcgrp_t *, ip_stack_t *);

extern	ire_t	**ire_check_and_create_bcast(ipif_t *, ipaddr_t,
    ire_t **, int);
extern	ire_t	**ire_create_bcast(ipif_t *, ipaddr_t, ire_t **);
extern	ire_t	*ire_init(ire_t *, uchar_t *, uchar_t *, uchar_t *, uchar_t *,
    uint_t *, struct nce_s *, queue_t *, queue_t *, ushort_t, ipif_t *,
    ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *, tsol_gc_t *,
    tsol_gcgrp_t *, ip_stack_t *);

extern	boolean_t ire_init_common(ire_t *, uint_t *, struct nce_s *, queue_t *,
    queue_t *, ushort_t, ipif_t *, uint32_t, uint32_t, uint32_t, uchar_t,
    const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *);

extern	ire_t	*ire_create_v6(const in6_addr_t *, const in6_addr_t *,
    const in6_addr_t *, const in6_addr_t *, uint_t *, struct nce_s *, queue_t *,
    queue_t *, ushort_t, ipif_t *,
    const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *,
    tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *);

extern	ire_t	*ire_create_mp_v6(const in6_addr_t *, const in6_addr_t *,
    const in6_addr_t *, const in6_addr_t *, struct nce_s *, queue_t *,
    queue_t *, ushort_t, ipif_t *,
    const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *,
    tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *);


extern	void	ire_clookup_delete_cache_gw(ipaddr_t, zoneid_t,
    ip_stack_t *);
extern	void	ire_clookup_delete_cache_gw_v6(const in6_addr_t *, zoneid_t,
    ip_stack_t *);

extern	ire_t	*ire_ctable_lookup(ipaddr_t, ipaddr_t, int, const ipif_t *,
    zoneid_t, const struct ts_label_s *, int, ip_stack_t *);

extern	ire_t	*ire_ctable_lookup_v6(const in6_addr_t *, const in6_addr_t *,
    int, const ipif_t *, zoneid_t, const struct ts_label_s *, int,
    ip_stack_t *);

extern	void	ire_delete(ire_t *);
extern	void	ire_delete_cache_gw(ire_t *, char *);
extern	void	ire_delete_cache_gw_v6(ire_t *, char *);
extern	void	ire_delete_cache_v6(ire_t *, char *);
extern	void	ire_delete_v6(ire_t *);

extern	void	ire_expire(ire_t *, char *);

extern	void	ire_flush_cache_v4(ire_t *, int);
extern	void	ire_flush_cache_v6(ire_t *, int);

extern	ire_t	*ire_ftable_lookup_v6(const in6_addr_t *, const in6_addr_t *,
    const in6_addr_t *, int, const ipif_t *, ire_t **, zoneid_t,
    uint32_t, const struct ts_label_s *, int, ip_stack_t *);

extern	ire_t	*ire_ihandle_lookup_onlink(ire_t *);
extern	ire_t	*ire_ihandle_lookup_offlink(ire_t *, ire_t *);
extern	ire_t	*ire_ihandle_lookup_offlink_v6(ire_t *, ire_t *);

extern	boolean_t	ire_local_same_ill_group(ire_t *, ire_t *);
extern	boolean_t	ire_local_ok_across_zones(ire_t *, zoneid_t, void *,
    const struct ts_label_s *, ip_stack_t *);

extern	ire_t 	*ire_lookup_local(zoneid_t, ip_stack_t *);
extern	ire_t 	*ire_lookup_local_v6(zoneid_t, ip_stack_t *);

extern  ire_t	*ire_lookup_multi(ipaddr_t, zoneid_t, ip_stack_t *);
extern  ire_t	*ire_lookup_multi_v6(const in6_addr_t *, zoneid_t,
    ip_stack_t *);

extern	void	ire_refrele(ire_t *);
extern	void	ire_refrele_notr(ire_t *);
extern	ire_t	*ire_route_lookup(ipaddr_t, ipaddr_t, ipaddr_t, int,
    const ipif_t *, ire_t **, zoneid_t, const struct ts_label_s *, int,
    ip_stack_t *);

extern	ire_t	*ire_route_lookup_v6(const in6_addr_t *, const in6_addr_t *,
    const in6_addr_t *, int, const ipif_t *, ire_t **, zoneid_t,
    const struct ts_label_s *, int, ip_stack_t *);

extern ill_t	*ire_to_ill(const ire_t *);

extern	void	ire_walk(pfv_t, void *, ip_stack_t *);
extern	void	ire_walk_ill(uint_t, uint_t, pfv_t, void *, ill_t *);
extern	void	ire_walk_ill_v4(uint_t, uint_t, pfv_t, void *, ill_t *);
extern	void	ire_walk_ill_v6(uint_t, uint_t, pfv_t, void *, ill_t *);
extern	void	ire_walk_v4(pfv_t, void *, zoneid_t, ip_stack_t *);
extern  void	ire_walk_ill_tables(uint_t match_flags, uint_t ire_type,
    pfv_t func, void *arg, size_t ftbl_sz, size_t htbl_sz,
    irb_t **ipftbl, size_t ctbl_sz, irb_t *ipctbl, ill_t *ill,
    zoneid_t zoneid, ip_stack_t *);
extern	void	ire_walk_v6(pfv_t, void *, zoneid_t, ip_stack_t *);

extern boolean_t	ire_multirt_lookup(ire_t **, ire_t **, uint32_t,
    const struct ts_label_s *, ip_stack_t *);
extern boolean_t	ire_multirt_need_resolve(ipaddr_t,
    const struct ts_label_s *, ip_stack_t *);
extern boolean_t	ire_multirt_lookup_v6(ire_t **, ire_t **, uint32_t,
    const struct ts_label_s *, ip_stack_t *);
extern boolean_t	ire_multirt_need_resolve_v6(const in6_addr_t *,
    const struct ts_label_s *, ip_stack_t *);

extern ire_t	*ipif_lookup_multi_ire(ipif_t *, ipaddr_t);
extern ire_t	*ipif_lookup_multi_ire_v6(ipif_t *, const in6_addr_t *);

extern ire_t	*ire_get_next_bcast_ire(ire_t *, ire_t *);
extern ire_t	*ire_get_next_default_ire(ire_t *, ire_t *);

extern  void	ire_arpresolve(ire_t *,  ill_t *);
extern  void	ire_freemblk(ire_t *);
extern boolean_t	ire_match_args(ire_t *, ipaddr_t, ipaddr_t, ipaddr_t,
    int, const ipif_t *, zoneid_t, uint32_t, const struct ts_label_s *, int);
extern  int	ire_nce_init(ire_t *, struct nce_s *);
extern  boolean_t	ire_walk_ill_match(uint_t, uint_t, ire_t *, ill_t *,
    zoneid_t, ip_stack_t *);

#endif /* _KERNEL */

#ifdef	__cplusplus
}
#endif

#endif	/* _INET_IP_IRE_H */