1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
|
/*
* CDDL HEADER START
*
* The contents of this file are subject to the terms of the
* Common Development and Distribution License (the "License").
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
* or http://www.opensolaris.org/os/licensing.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* When distributing Covered Code, include this CDDL HEADER in each
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
* If applicable, add the following below this CDDL HEADER, with the
* fields enclosed by brackets "[]" replaced with your own identifying
* information: Portions Copyright [yyyy] [name of copyright owner]
*
* CDDL HEADER END
*/
/*
* Copyright 2007 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
/* Copyright (c) 1990 Mentat Inc. */
#ifndef _INET_IP_IRE_H
#define _INET_IP_IRE_H
#pragma ident "%Z%%M% %I% %E% SMI"
#ifdef __cplusplus
extern "C" {
#endif
#define IPV6_LL_PREFIXLEN 10 /* Number of bits in link-local pref */
#define IP_CACHE_TABLE_SIZE 256
#define IP_MASK_TABLE_SIZE (IP_ABITS + 1) /* 33 ptrs */
#define IP6_FTABLE_HASH_SIZE 32 /* size of each hash table in ptrs */
#define IP6_CACHE_TABLE_SIZE 256
#define IP6_MASK_TABLE_SIZE (IPV6_ABITS + 1) /* 129 ptrs */
/*
* We use the common modulo hash function. In ip_ire_init(), we make
* sure that the cache table size is always a power of 2. That's why
* we can use & instead of %. Also note that we try hard to make sure
* the lower bits of an address capture most info from the whole address.
* The reason being that since our hash table is probably a lot smaller
* than 2^32 buckets so the lower bits are the most important.
*/
#define IRE_ADDR_HASH(addr, table_size) \
(((addr) ^ ((addr) >> 8) ^ ((addr) >> 16) ^ ((addr) >> 24)) & \
((table_size) - 1))
/*
* To make a byte-order neutral hash for IPv6, just take all the
* bytes in the bottom 32 bits into account.
*/
#define IRE_ADDR_HASH_V6(addr, table_size) \
IRE_ADDR_HASH((addr).s6_addr32[3], table_size)
/* This assumes that the ftable size is a power of 2. */
#define IRE_ADDR_MASK_HASH_V6(addr, mask, table_size) \
((((addr).s6_addr8[8] & (mask).s6_addr8[8]) ^ \
((addr).s6_addr8[9] & (mask).s6_addr8[9]) ^ \
((addr).s6_addr8[10] & (mask).s6_addr8[10]) ^ \
((addr).s6_addr8[13] & (mask).s6_addr8[13]) ^ \
((addr).s6_addr8[14] & (mask).s6_addr8[14]) ^ \
((addr).s6_addr8[15] & (mask).s6_addr8[15])) & ((table_size) - 1))
/*
* match parameter definitions for IRE lookup routines.
*/
#define MATCH_IRE_DSTONLY 0x0000 /* Match just the address */
#define MATCH_IRE_TYPE 0x0001 /* Match IRE type */
#define MATCH_IRE_SRC 0x0002 /* Match IRE source address */
#define MATCH_IRE_MASK 0x0004 /* Match IRE mask */
/* unused 0x0008 */
#define MATCH_IRE_GW 0x0010 /* Match IRE gateway */
#define MATCH_IRE_IPIF 0x0020 /* Match IRE ipif */
#define MATCH_IRE_RECURSIVE 0x0040 /* Do recursive lookup if necessary */
#define MATCH_IRE_DEFAULT 0x0080 /* Return default route if no route */
/* found. */
#define MATCH_IRE_RJ_BHOLE 0x0100 /* During lookup if we hit an ire */
/* with RTF_REJECT or RTF_BLACKHOLE, */
/* return the ire. No recursive */
/* lookup should be done. */
#define MATCH_IRE_IHANDLE 0x0200 /* Match IRE on ihandle */
#define MATCH_IRE_MARK_HIDDEN 0x0400 /* Match IRE ire_marks with */
/* IRE_MARK_HIDDEN. */
/*
* MATCH_IRE_ILL is used whenever we want to specifically match an IRE
* whose ire_ipif->ipif_ill or (ill_t *)ire_stq->q_ptr matches a given
* ill. When MATCH_IRE_ILL is used to locate an IRE_CACHE, it implies
* that the packet will not be load balanced. This is normally used
* by in.mpathd to send out failure detection probes.
*
* MATCH_IRE_ILL_GROUP is used whenever we are not specific about which
* interface (ill) the packet should be sent out. This implies that the
* packets will be subjected to load balancing and it might go out on
* any interface in the group. When there is only interface in the group,
* MATCH_IRE_ILL_GROUP becomes MATCH_IRE_ILL. Most of the code uses
* MATCH_IRE_ILL_GROUP and MATCH_IRE_ILL is used in very few cases where
* we want to disable load balancing.
*
* MATCH_IRE_PARENT is used whenever we unconditionally want to get the
* parent IRE (sire) while recursively searching IREs for an offsubnet
* destination. With this flag, even if no IRE_CACHETABLE or IRE_INTERFACE
* is found to help resolving IRE_OFFSUBNET in lookup routines, the
* IRE_OFFSUBNET sire, if any, is returned to the caller.
*/
#define MATCH_IRE_ILL_GROUP 0x0800 /* Match IRE on ill or the ill_group. */
#define MATCH_IRE_ILL 0x1000 /* Match IRE on the ill only */
#define MATCH_IRE_PARENT 0x2000 /* Match parent ire, if any, */
/* even if ire is not matched. */
#define MATCH_IRE_ZONEONLY 0x4000 /* Match IREs in specified zone, ie */
/* don't match IRE_LOCALs from other */
/* zones or shared IREs */
#define MATCH_IRE_MARK_PRIVATE_ADDR 0x8000 /* Match IRE ire_marks with */
/* IRE_MARK_PRIVATE_ADDR. */
#define MATCH_IRE_SECATTR 0x10000 /* Match gateway security attributes */
#define MATCH_IRE_COMPLETE 0x20000 /* ire_ftable_lookup() can return */
/* IRE_CACHE entry only if it is */
/* ND_REACHABLE */
/*
* Any ire to nce association is long term, and
* the refhold and refrele may be done by different
* threads. So all cases of making or breaking ire to
* nce association should all effectively use the NOTR variants.
* To understand the *effectively* part read on.
*
* ndp_lookup() and ndp_add_v4()/ndp_add_v6() implicitly do
* NCE_REFHOLD. So wherever we make ire to nce association after
* calling these functions, we effectively want to end up with
* NCE_REFHOLD_NOTR. We call this macro to achieve this effect. This
* macro changes a NCE_REFHOLD to a NCE_REFHOLD_NOTR. The macro's
* NCE_REFRELE cancels off ndp_lookup[ndp_add]'s implicit NCE_REFHOLD,
* and what you are left with is a NCE_REFHOLD_NOTR
*/
#define NCE_REFHOLD_TO_REFHOLD_NOTR(nce) { \
NCE_REFHOLD_NOTR(nce); \
NCE_REFRELE(nce); \
}
/*
* find the next ire_t entry in the ire_next chain starting at ire
* that is not CONDEMNED. ire is set to NULL if we reach the end of the list.
* Caller must hold the ire_bucket lock.
*/
#define IRE_FIND_NEXT_ORIGIN(ire) { \
while ((ire) != NULL && ((ire)->ire_marks & IRE_MARK_CONDEMNED))\
(ire) = (ire)->ire_next; \
}
/* Structure for ire_cache_count() */
typedef struct {
int icc_total; /* Total number of IRE_CACHE */
int icc_unused; /* # off/no PMTU unused since last reclaim */
int icc_offlink; /* # offlink without PMTU information */
int icc_pmtu; /* # offlink with PMTU information */
int icc_onlink; /* # onlink */
} ire_cache_count_t;
/*
* Structure for ire_cache_reclaim(). Each field is a fraction i.e. 1 meaning
* reclaim all, N meaning reclaim 1/Nth of all entries, 0 meaning reclaim none.
*
* The comment below (and for other netstack_t references) refers
* to the fact that we only do netstack_hold in particular cases,
* such as the references from open streams (ill_t and conn_t's
* pointers). Internally within IP we rely on IP's ability to cleanup e.g.
* ire_t's when an ill goes away.
*/
typedef struct {
int icr_unused; /* Fraction for unused since last reclaim */
int icr_offlink; /* Fraction for offlink without PMTU info */
int icr_pmtu; /* Fraction for offlink with PMTU info */
int icr_onlink; /* Fraction for onlink */
ip_stack_t *icr_ipst; /* Does not have a netstack_hold */
} ire_cache_reclaim_t;
/*
* We use atomics so that we get an accurate accounting on the ires.
* Otherwise we can't determine leaks correctly.
*/
#define BUMP_IRE_STATS(ire_stats, x) atomic_add_64(&(ire_stats).x, 1)
#ifdef _KERNEL
struct ts_label_s;
struct nce_s;
extern ipaddr_t ip_plen_to_mask(uint_t);
extern in6_addr_t *ip_plen_to_mask_v6(uint_t, in6_addr_t *);
extern int ip_ire_advise(queue_t *, mblk_t *, cred_t *);
extern int ip_ire_delete(queue_t *, mblk_t *, cred_t *);
extern boolean_t ip_ire_clookup_and_delete(ipaddr_t, ipif_t *, ip_stack_t *);
extern void ip_ire_clookup_and_delete_v6(const in6_addr_t *,
ip_stack_t *);
extern int ip_ire_report(queue_t *, mblk_t *, caddr_t, cred_t *);
extern int ip_ire_report_v6(queue_t *, mblk_t *, caddr_t, cred_t *);
extern void ire_report_ftable(ire_t *, char *);
extern void ip_ire_req(queue_t *, mblk_t *);
extern int ip_mask_to_plen(ipaddr_t);
extern int ip_mask_to_plen_v6(const in6_addr_t *);
extern ire_t *ipif_to_ire(const ipif_t *);
extern ire_t *ipif_to_ire_v6(const ipif_t *);
extern int ire_add(ire_t **, queue_t *, mblk_t *, ipsq_func_t, boolean_t);
extern void ire_add_then_send(queue_t *, ire_t *, mblk_t *);
extern int ire_add_v6(ire_t **, queue_t *, mblk_t *, ipsq_func_t);
extern int ire_atomic_start(irb_t *irb_ptr, ire_t *ire, queue_t *q,
mblk_t *mp, ipsq_func_t func);
extern void ire_atomic_end(irb_t *irb_ptr, ire_t *ire);
extern void ire_cache_count(ire_t *, char *);
extern ire_t *ire_cache_lookup(ipaddr_t, zoneid_t,
const struct ts_label_s *, ip_stack_t *);
extern ire_t *ire_cache_lookup_v6(const in6_addr_t *, zoneid_t,
const struct ts_label_s *, ip_stack_t *);
extern void ire_cache_reclaim(ire_t *, char *);
extern ire_t *ire_create_mp(uchar_t *, uchar_t *, uchar_t *, uchar_t *,
uint_t, struct nce_s *, queue_t *, queue_t *, ushort_t, ipif_t *, ipaddr_t,
uint32_t, uint32_t, uint32_t, const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *,
ip_stack_t *);
extern ire_t *ire_create(uchar_t *, uchar_t *, uchar_t *, uchar_t *,
uint_t *, struct nce_s *, queue_t *, queue_t *, ushort_t, ipif_t *,
ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *, tsol_gc_t *,
tsol_gcgrp_t *, ip_stack_t *);
extern ire_t **ire_check_and_create_bcast(ipif_t *, ipaddr_t,
ire_t **, int);
extern ire_t **ire_create_bcast(ipif_t *, ipaddr_t, ire_t **);
extern ire_t *ire_init(ire_t *, uchar_t *, uchar_t *, uchar_t *, uchar_t *,
uint_t *, struct nce_s *, queue_t *, queue_t *, ushort_t, ipif_t *,
ipaddr_t, uint32_t, uint32_t, uint32_t, const iulp_t *, tsol_gc_t *,
tsol_gcgrp_t *, ip_stack_t *);
extern boolean_t ire_init_common(ire_t *, uint_t *, struct nce_s *, queue_t *,
queue_t *, ushort_t, ipif_t *, uint32_t, uint32_t, uint32_t, uchar_t,
const iulp_t *, tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *);
extern ire_t *ire_create_v6(const in6_addr_t *, const in6_addr_t *,
const in6_addr_t *, const in6_addr_t *, uint_t *, struct nce_s *, queue_t *,
queue_t *, ushort_t, ipif_t *,
const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *,
tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *);
extern ire_t *ire_create_mp_v6(const in6_addr_t *, const in6_addr_t *,
const in6_addr_t *, const in6_addr_t *, struct nce_s *, queue_t *,
queue_t *, ushort_t, ipif_t *,
const in6_addr_t *, uint32_t, uint32_t, uint_t, const iulp_t *,
tsol_gc_t *, tsol_gcgrp_t *, ip_stack_t *);
extern void ire_clookup_delete_cache_gw(ipaddr_t, zoneid_t,
ip_stack_t *);
extern void ire_clookup_delete_cache_gw_v6(const in6_addr_t *, zoneid_t,
ip_stack_t *);
extern ire_t *ire_ctable_lookup(ipaddr_t, ipaddr_t, int, const ipif_t *,
zoneid_t, const struct ts_label_s *, int, ip_stack_t *);
extern ire_t *ire_ctable_lookup_v6(const in6_addr_t *, const in6_addr_t *,
int, const ipif_t *, zoneid_t, const struct ts_label_s *, int,
ip_stack_t *);
extern void ire_delete(ire_t *);
extern void ire_delete_cache_gw(ire_t *, char *);
extern void ire_delete_cache_gw_v6(ire_t *, char *);
extern void ire_delete_cache_v6(ire_t *, char *);
extern void ire_delete_v6(ire_t *);
extern void ire_expire(ire_t *, char *);
extern void ire_flush_cache_v4(ire_t *, int);
extern void ire_flush_cache_v6(ire_t *, int);
extern ire_t *ire_ftable_lookup_v6(const in6_addr_t *, const in6_addr_t *,
const in6_addr_t *, int, const ipif_t *, ire_t **, zoneid_t,
uint32_t, const struct ts_label_s *, int, ip_stack_t *);
extern ire_t *ire_ihandle_lookup_onlink(ire_t *);
extern ire_t *ire_ihandle_lookup_offlink(ire_t *, ire_t *);
extern ire_t *ire_ihandle_lookup_offlink_v6(ire_t *, ire_t *);
extern boolean_t ire_local_same_ill_group(ire_t *, ire_t *);
extern boolean_t ire_local_ok_across_zones(ire_t *, zoneid_t, void *,
const struct ts_label_s *, ip_stack_t *);
extern ire_t *ire_lookup_local(zoneid_t, ip_stack_t *);
extern ire_t *ire_lookup_local_v6(zoneid_t, ip_stack_t *);
extern ire_t *ire_lookup_multi(ipaddr_t, zoneid_t, ip_stack_t *);
extern ire_t *ire_lookup_multi_v6(const in6_addr_t *, zoneid_t,
ip_stack_t *);
extern void ire_refrele(ire_t *);
extern void ire_refrele_notr(ire_t *);
extern ire_t *ire_route_lookup(ipaddr_t, ipaddr_t, ipaddr_t, int,
const ipif_t *, ire_t **, zoneid_t, const struct ts_label_s *, int,
ip_stack_t *);
extern ire_t *ire_route_lookup_v6(const in6_addr_t *, const in6_addr_t *,
const in6_addr_t *, int, const ipif_t *, ire_t **, zoneid_t,
const struct ts_label_s *, int, ip_stack_t *);
extern ill_t *ire_to_ill(const ire_t *);
extern void ire_walk(pfv_t, void *, ip_stack_t *);
extern void ire_walk_ill(uint_t, uint_t, pfv_t, void *, ill_t *);
extern void ire_walk_ill_v4(uint_t, uint_t, pfv_t, void *, ill_t *);
extern void ire_walk_ill_v6(uint_t, uint_t, pfv_t, void *, ill_t *);
extern void ire_walk_v4(pfv_t, void *, zoneid_t, ip_stack_t *);
extern void ire_walk_ill_tables(uint_t match_flags, uint_t ire_type,
pfv_t func, void *arg, size_t ftbl_sz, size_t htbl_sz,
irb_t **ipftbl, size_t ctbl_sz, irb_t *ipctbl, ill_t *ill,
zoneid_t zoneid, ip_stack_t *);
extern void ire_walk_v6(pfv_t, void *, zoneid_t, ip_stack_t *);
extern boolean_t ire_multirt_lookup(ire_t **, ire_t **, uint32_t,
const struct ts_label_s *, ip_stack_t *);
extern boolean_t ire_multirt_need_resolve(ipaddr_t,
const struct ts_label_s *, ip_stack_t *);
extern boolean_t ire_multirt_lookup_v6(ire_t **, ire_t **, uint32_t,
const struct ts_label_s *, ip_stack_t *);
extern boolean_t ire_multirt_need_resolve_v6(const in6_addr_t *,
const struct ts_label_s *, ip_stack_t *);
extern ire_t *ipif_lookup_multi_ire(ipif_t *, ipaddr_t);
extern ire_t *ipif_lookup_multi_ire_v6(ipif_t *, const in6_addr_t *);
extern ire_t *ire_get_next_bcast_ire(ire_t *, ire_t *);
extern ire_t *ire_get_next_default_ire(ire_t *, ire_t *);
extern void ire_arpresolve(ire_t *, ill_t *);
extern void ire_freemblk(ire_t *);
extern boolean_t ire_match_args(ire_t *, ipaddr_t, ipaddr_t, ipaddr_t,
int, const ipif_t *, zoneid_t, uint32_t, const struct ts_label_s *, int);
extern int ire_nce_init(ire_t *, struct nce_s *);
extern boolean_t ire_walk_ill_match(uint_t, uint_t, ire_t *, ill_t *,
zoneid_t, ip_stack_t *);
#endif /* _KERNEL */
#ifdef __cplusplus
}
#endif
#endif /* _INET_IP_IRE_H */
|