summaryrefslogtreecommitdiff
path: root/usr/src/uts/common/io/vnic/vnic_dev.c
blob: 26d2200b0dffd6af8f4b534b7ea60bbe6d6e4d08 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
 */

#include <sys/types.h>
#include <sys/cred.h>
#include <sys/sysmacros.h>
#include <sys/conf.h>
#include <sys/cmn_err.h>
#include <sys/list.h>
#include <sys/ksynch.h>
#include <sys/kmem.h>
#include <sys/stream.h>
#include <sys/modctl.h>
#include <sys/ddi.h>
#include <sys/sunddi.h>
#include <sys/atomic.h>
#include <sys/stat.h>
#include <sys/modhash.h>
#include <sys/strsubr.h>
#include <sys/strsun.h>
#include <sys/dlpi.h>
#include <sys/mac.h>
#include <sys/mac_provider.h>
#include <sys/mac_client.h>
#include <sys/mac_client_priv.h>
#include <sys/mac_ether.h>
#include <sys/dls.h>
#include <sys/pattr.h>
#include <sys/time.h>
#include <sys/vlan.h>
#include <sys/vnic.h>
#include <sys/vnic_impl.h>
#include <sys/mac_flow_impl.h>
#include <inet/ip_impl.h>

/*
 * Note that for best performance, the VNIC is a passthrough design.
 * For each VNIC corresponds a MAC client of the underlying MAC (lower MAC).
 * This MAC client is opened by the VNIC driver at VNIC creation,
 * and closed when the VNIC is deleted.
 * When a MAC client of the VNIC itself opens a VNIC, the MAC layer
 * (upper MAC) detects that the MAC being opened is a VNIC. Instead
 * of allocating a new MAC client, it asks the VNIC driver to return
 * the lower MAC client handle associated with the VNIC, and that handle
 * is returned to the upper MAC client directly. This allows access
 * by upper MAC clients of the VNIC to have direct access to the lower
 * MAC client for the control path and data path.
 *
 * Due to this passthrough, some of the entry points exported by the
 * VNIC driver are never directly invoked. These entry points include
 * vnic_m_start, vnic_m_stop, vnic_m_promisc, vnic_m_multicst, etc.
 */

static int vnic_m_start(void *);
static void vnic_m_stop(void *);
static int vnic_m_promisc(void *, boolean_t);
static int vnic_m_multicst(void *, boolean_t, const uint8_t *);
static int vnic_m_unicst(void *, const uint8_t *);
static int vnic_m_stat(void *, uint_t, uint64_t *);
static void vnic_m_ioctl(void *, queue_t *, mblk_t *);
static int vnic_m_setprop(void *, const char *, mac_prop_id_t, uint_t,
    const void *);
static void vnic_m_propinfo(void *, const char *, mac_prop_id_t,
    mac_prop_info_handle_t);
static mblk_t *vnic_m_tx(void *, mblk_t *);
static boolean_t vnic_m_capab_get(void *, mac_capab_t, void *);
static void vnic_notify_cb(void *, mac_notify_type_t);

static kmem_cache_t	*vnic_cache;
static krwlock_t	vnic_lock;
static uint_t		vnic_count;

#define	ANCHOR_VNIC_MIN_MTU	576
#define	ANCHOR_VNIC_MAX_MTU	9000

/* hash of VNICs (vnic_t's), keyed by VNIC id */
static mod_hash_t	*vnic_hash;
#define	VNIC_HASHSZ	64
#define	VNIC_HASH_KEY(vnic_id)	((mod_hash_key_t)(uintptr_t)vnic_id)

#define	VNIC_M_CALLBACK_FLAGS	\
	(MC_IOCTL | MC_GETCAPAB | MC_SETPROP | MC_PROPINFO)

static mac_callbacks_t vnic_m_callbacks = {
	VNIC_M_CALLBACK_FLAGS,
	vnic_m_stat,
	vnic_m_start,
	vnic_m_stop,
	vnic_m_promisc,
	vnic_m_multicst,
	vnic_m_unicst,
	vnic_m_tx,
	NULL,
	vnic_m_ioctl,
	vnic_m_capab_get,
	NULL,
	NULL,
	vnic_m_setprop,
	NULL,
	vnic_m_propinfo
};

void
vnic_dev_init(void)
{
	vnic_cache = kmem_cache_create("vnic_cache",
	    sizeof (vnic_t), 0, NULL, NULL, NULL, NULL, NULL, 0);

	vnic_hash = mod_hash_create_idhash("vnic_hash",
	    VNIC_HASHSZ, mod_hash_null_valdtor);

	rw_init(&vnic_lock, NULL, RW_DEFAULT, NULL);

	vnic_count = 0;
}

void
vnic_dev_fini(void)
{
	ASSERT(vnic_count == 0);

	rw_destroy(&vnic_lock);
	mod_hash_destroy_idhash(vnic_hash);
	kmem_cache_destroy(vnic_cache);
}

uint_t
vnic_dev_count(void)
{
	return (vnic_count);
}

static vnic_ioc_diag_t
vnic_mac2vnic_diag(mac_diag_t diag)
{
	switch (diag) {
	case MAC_DIAG_MACADDR_NIC:
		return (VNIC_IOC_DIAG_MACADDR_NIC);
	case MAC_DIAG_MACADDR_INUSE:
		return (VNIC_IOC_DIAG_MACADDR_INUSE);
	case MAC_DIAG_MACADDR_INVALID:
		return (VNIC_IOC_DIAG_MACADDR_INVALID);
	case MAC_DIAG_MACADDRLEN_INVALID:
		return (VNIC_IOC_DIAG_MACADDRLEN_INVALID);
	case MAC_DIAG_MACFACTORYSLOTINVALID:
		return (VNIC_IOC_DIAG_MACFACTORYSLOTINVALID);
	case MAC_DIAG_MACFACTORYSLOTUSED:
		return (VNIC_IOC_DIAG_MACFACTORYSLOTUSED);
	case MAC_DIAG_MACFACTORYSLOTALLUSED:
		return (VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED);
	case MAC_DIAG_MACFACTORYNOTSUP:
		return (VNIC_IOC_DIAG_MACFACTORYNOTSUP);
	case MAC_DIAG_MACPREFIX_INVALID:
		return (VNIC_IOC_DIAG_MACPREFIX_INVALID);
	case MAC_DIAG_MACPREFIXLEN_INVALID:
		return (VNIC_IOC_DIAG_MACPREFIXLEN_INVALID);
	case MAC_DIAG_MACNO_HWRINGS:
		return (VNIC_IOC_DIAG_NO_HWRINGS);
	default:
		return (VNIC_IOC_DIAG_NONE);
	}
}

static int
vnic_unicast_add(vnic_t *vnic, vnic_mac_addr_type_t vnic_addr_type,
    int *addr_slot, uint_t prefix_len, int *addr_len_ptr_arg,
    uint8_t *mac_addr_arg, uint16_t flags, vnic_ioc_diag_t *diag,
    uint16_t vid, boolean_t req_hwgrp_flag)
{
	mac_diag_t mac_diag;
	uint16_t mac_flags = 0;
	int err;
	uint_t addr_len;

	if (flags & VNIC_IOC_CREATE_NODUPCHECK)
		mac_flags |= MAC_UNICAST_NODUPCHECK;

	switch (vnic_addr_type) {
	case VNIC_MAC_ADDR_TYPE_FIXED:
	case VNIC_MAC_ADDR_TYPE_VRID:
		/*
		 * The MAC address value to assign to the VNIC
		 * is already provided in mac_addr_arg. addr_len_ptr_arg
		 * already contains the MAC address length.
		 */
		break;

	case VNIC_MAC_ADDR_TYPE_RANDOM:
		/*
		 * Random MAC address. There are two sub-cases:
		 *
		 * 1 - If mac_len == 0, a new MAC address is generated.
		 *	The length of the MAC address to generated depends
		 *	on the type of MAC used. The prefix to use for the MAC
		 *	address is stored in the most significant bytes
		 *	of the mac_addr argument, and its length is specified
		 *	by the mac_prefix_len argument. This prefix can
		 *	correspond to a IEEE OUI in the case of Ethernet,
		 *	for example.
		 *
		 * 2 - If mac_len > 0, the address was already picked
		 *	randomly, and is now passed back during VNIC
		 *	re-creation. The mac_addr argument contains the MAC
		 *	address that was generated. We distinguish this
		 *	case from the fixed MAC address case, since we
		 *	want the user consumers to know, when they query
		 *	the list of VNICs, that a VNIC was assigned a
		 *	random MAC address vs assigned a fixed address
		 *	specified by the user.
		 */

		/*
		 * If it's a pre-generated address, we're done. mac_addr_arg
		 * and addr_len_ptr_arg already contain the MAC address
		 * value and length.
		 */
		if (*addr_len_ptr_arg > 0)
			break;

		/* generate a new random MAC address */
		if ((err = mac_addr_random(vnic->vn_mch,
		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
			*diag = vnic_mac2vnic_diag(mac_diag);
			return (err);
		}
		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
		break;

	case VNIC_MAC_ADDR_TYPE_FACTORY:
		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
		if (err != 0) {
			if (err == EINVAL)
				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTINVALID;
			if (err == EBUSY)
				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTUSED;
			if (err == ENOSPC)
				*diag = VNIC_IOC_DIAG_MACFACTORYSLOTALLUSED;
			return (err);
		}

		mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
		    mac_addr_arg, &addr_len, NULL, NULL);
		*addr_len_ptr_arg = addr_len;
		break;

	case VNIC_MAC_ADDR_TYPE_AUTO:
		/* first try to allocate a factory MAC address */
		err = mac_addr_factory_reserve(vnic->vn_mch, addr_slot);
		if (err == 0) {
			mac_addr_factory_value(vnic->vn_lower_mh, *addr_slot,
			    mac_addr_arg, &addr_len, NULL, NULL);
			vnic_addr_type = VNIC_MAC_ADDR_TYPE_FACTORY;
			*addr_len_ptr_arg = addr_len;
			break;
		}

		/*
		 * Allocating a factory MAC address failed, generate a
		 * random MAC address instead.
		 */
		if ((err = mac_addr_random(vnic->vn_mch,
		    prefix_len, mac_addr_arg, &mac_diag)) != 0) {
			*diag = vnic_mac2vnic_diag(mac_diag);
			return (err);
		}
		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
		vnic_addr_type = VNIC_MAC_ADDR_TYPE_RANDOM;
		break;
	case VNIC_MAC_ADDR_TYPE_PRIMARY:
		/*
		 * We get the address here since we copy it in the
		 * vnic's vn_addr.
		 * We can't ask for hardware resources since we
		 * don't currently support hardware classification
		 * for these MAC clients.
		 */
		if (req_hwgrp_flag) {
			*diag = VNIC_IOC_DIAG_NO_HWRINGS;
			return (ENOTSUP);
		}
		mac_unicast_primary_get(vnic->vn_lower_mh, mac_addr_arg);
		*addr_len_ptr_arg = mac_addr_len(vnic->vn_lower_mh);
		mac_flags |= MAC_UNICAST_VNIC_PRIMARY;
		break;
	}

	vnic->vn_addr_type = vnic_addr_type;

	err = mac_unicast_add(vnic->vn_mch, mac_addr_arg, mac_flags,
	    &vnic->vn_muh, vid, &mac_diag);
	if (err != 0) {
		if (vnic_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
			/* release factory MAC address */
			mac_addr_factory_release(vnic->vn_mch, *addr_slot);
		}
		*diag = vnic_mac2vnic_diag(mac_diag);
	}

	return (err);
}

/*
 * Create a new VNIC upon request from administrator.
 * Returns 0 on success, an errno on failure.
 */
/* ARGSUSED */
int
vnic_dev_create(datalink_id_t vnic_id, datalink_id_t linkid,
    vnic_mac_addr_type_t *vnic_addr_type, int *mac_len, uchar_t *mac_addr,
    int *mac_slot, uint_t mac_prefix_len, uint16_t vid, vrid_t vrid,
    int af, mac_resource_props_t *mrp, uint32_t flags, vnic_ioc_diag_t *diag,
    cred_t *credp)
{
	vnic_t *vnic;
	mac_register_t *mac;
	int err;
	boolean_t is_anchor = ((flags & VNIC_IOC_CREATE_ANCHOR) != 0);
	char vnic_name[MAXNAMELEN];
	const mac_info_t *minfop;
	uint32_t req_hwgrp_flag = B_FALSE;

	*diag = VNIC_IOC_DIAG_NONE;

	rw_enter(&vnic_lock, RW_WRITER);

	/* does a VNIC with the same id already exist? */
	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
	    (mod_hash_val_t *)&vnic);
	if (err == 0) {
		rw_exit(&vnic_lock);
		return (EEXIST);
	}

	vnic = kmem_cache_alloc(vnic_cache, KM_NOSLEEP);
	if (vnic == NULL) {
		rw_exit(&vnic_lock);
		return (ENOMEM);
	}

	bzero(vnic, sizeof (*vnic));

	vnic->vn_id = vnic_id;
	vnic->vn_link_id = linkid;
	vnic->vn_vrid = vrid;
	vnic->vn_af = af;

	if (!is_anchor) {
		if (linkid == DATALINK_INVALID_LINKID) {
			err = EINVAL;
			goto bail;
		}

		/*
		 * Open the lower MAC and assign its initial bandwidth and
		 * MAC address. We do this here during VNIC creation and
		 * do not wait until the upper MAC client open so that we
		 * can validate the VNIC creation parameters (bandwidth,
		 * MAC address, etc) and reserve a factory MAC address if
		 * one was requested.
		 */
		err = mac_open_by_linkid(linkid, &vnic->vn_lower_mh);
		if (err != 0)
			goto bail;

		/*
		 * VNIC(vlan) over VNICs(vlans) is not supported.
		 */
		if (mac_is_vnic(vnic->vn_lower_mh)) {
			err = EINVAL;
			goto bail;
		}

		/* only ethernet support for now */
		minfop = mac_info(vnic->vn_lower_mh);
		if (minfop->mi_nativemedia != DL_ETHER) {
			err = ENOTSUP;
			goto bail;
		}

		(void) dls_mgmt_get_linkinfo(vnic_id, vnic_name, NULL, NULL,
		    NULL);
		err = mac_client_open(vnic->vn_lower_mh, &vnic->vn_mch,
		    vnic_name, MAC_OPEN_FLAGS_IS_VNIC);
		if (err != 0)
			goto bail;

		if (mrp != NULL) {
			if ((mrp->mrp_mask & MRP_RX_RINGS) != 0 ||
			    (mrp->mrp_mask & MRP_TX_RINGS) != 0) {
				req_hwgrp_flag = B_TRUE;
			}
			err = mac_client_set_resources(vnic->vn_mch, mrp);
			if (err != 0)
				goto bail;
		}
		/* assign a MAC address to the VNIC */

		err = vnic_unicast_add(vnic, *vnic_addr_type, mac_slot,
		    mac_prefix_len, mac_len, mac_addr, flags, diag, vid,
		    req_hwgrp_flag);
		if (err != 0) {
			vnic->vn_muh = NULL;
			if (diag != NULL && req_hwgrp_flag)
				*diag = VNIC_IOC_DIAG_NO_HWRINGS;
			goto bail;
		}

		/* register to receive notification from underlying MAC */
		vnic->vn_mnh = mac_notify_add(vnic->vn_lower_mh, vnic_notify_cb,
		    vnic);

		*vnic_addr_type = vnic->vn_addr_type;
		vnic->vn_addr_len = *mac_len;
		vnic->vn_vid = vid;

		bcopy(mac_addr, vnic->vn_addr, vnic->vn_addr_len);

		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY)
			vnic->vn_slot_id = *mac_slot;

		/*
		 * Set the initial VNIC capabilities. If the VNIC is created
		 * over MACs which does not support nactive vlan, disable
		 * VNIC's hardware checksum capability if its VID is not 0,
		 * since the underlying MAC would get the hardware checksum
		 * offset wrong in case of VLAN packets.
		 */
		if (vid == 0 || !mac_capab_get(vnic->vn_lower_mh,
		    MAC_CAPAB_NO_NATIVEVLAN, NULL)) {
			if (!mac_capab_get(vnic->vn_lower_mh, MAC_CAPAB_HCKSUM,
			    &vnic->vn_hcksum_txflags))
				vnic->vn_hcksum_txflags = 0;
		} else {
			vnic->vn_hcksum_txflags = 0;
		}
	}

	/* register with the MAC module */
	if ((mac = mac_alloc(MAC_VERSION)) == NULL)
		goto bail;

	mac->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
	mac->m_driver = vnic;
	mac->m_dip = vnic_get_dip();
	mac->m_instance = (uint_t)-1;
	mac->m_src_addr = vnic->vn_addr;
	mac->m_callbacks = &vnic_m_callbacks;

	if (!is_anchor) {
		/*
		 * If this is a VNIC based VLAN, then we check for the
		 * margin unless it has been created with the force
		 * flag. If we are configuring a VLAN over an etherstub,
		 * we don't check the margin even if force is not set.
		 */
		if (vid == 0 || (flags & VNIC_IOC_CREATE_FORCE) != 0) {
			if (vid != VLAN_ID_NONE)
				vnic->vn_force = B_TRUE;
			/*
			 * As the current margin size of the underlying mac is
			 * used to determine the margin size of the VNIC
			 * itself, request the underlying mac not to change
			 * to a smaller margin size.
			 */
			err = mac_margin_add(vnic->vn_lower_mh,
			    &vnic->vn_margin, B_TRUE);
			ASSERT(err == 0);
		} else {
			vnic->vn_margin = VLAN_TAGSZ;
			err = mac_margin_add(vnic->vn_lower_mh,
			    &vnic->vn_margin, B_FALSE);
			if (err != 0) {
				mac_free(mac);
				if (diag != NULL)
					*diag = VNIC_IOC_DIAG_MACMARGIN_INVALID;
				goto bail;
			}
		}

		mac_sdu_get(vnic->vn_lower_mh, &mac->m_min_sdu,
		    &mac->m_max_sdu);
	} else {
		vnic->vn_margin = VLAN_TAGSZ;
		mac->m_min_sdu = ANCHOR_VNIC_MIN_MTU;
		mac->m_max_sdu = ANCHOR_VNIC_MAX_MTU;
	}

	mac->m_margin = vnic->vn_margin;

	err = mac_register(mac, &vnic->vn_mh);
	mac_free(mac);
	if (err != 0) {
		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
		    vnic->vn_margin) == 0);
		goto bail;
	}

	/* Set the VNIC's MAC in the client */
	if (!is_anchor)
		mac_set_upper_mac(vnic->vn_mch, vnic->vn_mh, mrp);

	err = dls_devnet_create(vnic->vn_mh, vnic->vn_id, crgetzoneid(credp));
	if (err != 0) {
		VERIFY(is_anchor || mac_margin_remove(vnic->vn_lower_mh,
		    vnic->vn_margin) == 0);
		(void) mac_unregister(vnic->vn_mh);
		goto bail;
	}

	/* add new VNIC to hash table */
	err = mod_hash_insert(vnic_hash, VNIC_HASH_KEY(vnic_id),
	    (mod_hash_val_t)vnic);
	ASSERT(err == 0);
	vnic_count++;

	vnic->vn_enabled = B_TRUE;
	rw_exit(&vnic_lock);

	return (0);

bail:
	rw_exit(&vnic_lock);
	if (!is_anchor) {
		if (vnic->vn_mnh != NULL)
			(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
		if (vnic->vn_muh != NULL)
			(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
		if (vnic->vn_mch != NULL)
			mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
		if (vnic->vn_lower_mh != NULL)
			mac_close(vnic->vn_lower_mh);
	}

	kmem_cache_free(vnic_cache, vnic);
	return (err);
}

/*
 * Modify the properties of an existing VNIC.
 */
/* ARGSUSED */
int
vnic_dev_modify(datalink_id_t vnic_id, uint_t modify_mask,
    vnic_mac_addr_type_t mac_addr_type, uint_t mac_len, uchar_t *mac_addr,
    uint_t mac_slot, mac_resource_props_t *mrp)
{
	vnic_t *vnic = NULL;

	rw_enter(&vnic_lock, RW_WRITER);

	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
	    (mod_hash_val_t *)&vnic) != 0) {
		rw_exit(&vnic_lock);
		return (ENOENT);
	}

	rw_exit(&vnic_lock);

	return (0);
}

/* ARGSUSED */
int
vnic_dev_delete(datalink_id_t vnic_id, uint32_t flags, cred_t *credp)
{
	vnic_t *vnic = NULL;
	mod_hash_val_t val;
	datalink_id_t tmpid;
	int rc;

	rw_enter(&vnic_lock, RW_WRITER);

	if (mod_hash_find(vnic_hash, VNIC_HASH_KEY(vnic_id),
	    (mod_hash_val_t *)&vnic) != 0) {
		rw_exit(&vnic_lock);
		return (ENOENT);
	}

	if ((rc = dls_devnet_destroy(vnic->vn_mh, &tmpid, B_TRUE)) != 0) {
		rw_exit(&vnic_lock);
		return (rc);
	}

	ASSERT(vnic_id == tmpid);

	/*
	 * We cannot unregister the MAC yet. Unregistering would
	 * free up mac_impl_t which should not happen at this time.
	 * So disable mac_impl_t by calling mac_disable(). This will prevent
	 * any new claims on mac_impl_t.
	 */
	if ((rc = mac_disable(vnic->vn_mh)) != 0) {
		(void) dls_devnet_create(vnic->vn_mh, vnic_id,
		    crgetzoneid(credp));
		rw_exit(&vnic_lock);
		return (rc);
	}

	vnic->vn_enabled = B_FALSE;
	(void) mod_hash_remove(vnic_hash, VNIC_HASH_KEY(vnic_id), &val);
	ASSERT(vnic == (vnic_t *)val);
	vnic_count--;
	rw_exit(&vnic_lock);

	/*
	 * XXX-nicolas shouldn't have a void cast here, if it's
	 * expected that the function will never fail, then we should
	 * have an ASSERT().
	 */
	(void) mac_unregister(vnic->vn_mh);

	if (vnic->vn_lower_mh != NULL) {
		/*
		 * Check if MAC address for the vnic was obtained from the
		 * factory MAC addresses. If yes, release it.
		 */
		if (vnic->vn_addr_type == VNIC_MAC_ADDR_TYPE_FACTORY) {
			(void) mac_addr_factory_release(vnic->vn_mch,
			    vnic->vn_slot_id);
		}
		(void) mac_margin_remove(vnic->vn_lower_mh, vnic->vn_margin);
		(void) mac_notify_remove(vnic->vn_mnh, B_TRUE);
		(void) mac_unicast_remove(vnic->vn_mch, vnic->vn_muh);
		mac_client_close(vnic->vn_mch, MAC_CLOSE_FLAGS_IS_VNIC);
		mac_close(vnic->vn_lower_mh);
	}

	kmem_cache_free(vnic_cache, vnic);
	return (0);
}

/* ARGSUSED */
mblk_t *
vnic_m_tx(void *arg, mblk_t *mp_chain)
{
	/*
	 * This function could be invoked for an anchor VNIC when sending
	 * broadcast and multicast packets, and unicast packets which did
	 * not match any local known destination.
	 */
	freemsgchain(mp_chain);
	return (NULL);
}

/*ARGSUSED*/
static void
vnic_m_ioctl(void *arg, queue_t *q, mblk_t *mp)
{
	miocnak(q, mp, 0, ENOTSUP);
}

/*
 * This entry point cannot be passed-through, since it is invoked
 * for the per-VNIC kstats which must be exported independently
 * of the existence of VNIC MAC clients.
 */
static int
vnic_m_stat(void *arg, uint_t stat, uint64_t *val)
{
	vnic_t *vnic = arg;
	int rval = 0;

	if (vnic->vn_lower_mh == NULL) {
		/*
		 * It's an anchor VNIC, which does not have any
		 * statistics in itself.
		 */
		return (ENOTSUP);
	}

	/*
	 * ENOTSUP must be reported for unsupported stats, the VNIC
	 * driver reports a subset of the stats that would
	 * be returned by a real piece of hardware.
	 */

	switch (stat) {
	case MAC_STAT_LINK_STATE:
	case MAC_STAT_LINK_UP:
	case MAC_STAT_PROMISC:
	case MAC_STAT_IFSPEED:
	case MAC_STAT_MULTIRCV:
	case MAC_STAT_MULTIXMT:
	case MAC_STAT_BRDCSTRCV:
	case MAC_STAT_BRDCSTXMT:
	case MAC_STAT_OPACKETS:
	case MAC_STAT_OBYTES:
	case MAC_STAT_IERRORS:
	case MAC_STAT_OERRORS:
	case MAC_STAT_RBYTES:
	case MAC_STAT_IPACKETS:
		*val = mac_client_stat_get(vnic->vn_mch, stat);
		break;
	default:
		rval = ENOTSUP;
	}

	return (rval);
}

/*
 * Invoked by the upper MAC to retrieve the lower MAC client handle
 * corresponding to a VNIC. A pointer to this function is obtained
 * by the upper MAC via capability query.
 *
 * XXX-nicolas Note: this currently causes all VNIC MAC clients to
 * receive the same MAC client handle for the same VNIC. This is ok
 * as long as we have only one VNIC MAC client which sends and
 * receives data, but we don't currently enforce this at the MAC layer.
 */
static void *
vnic_mac_client_handle(void *vnic_arg)
{
	vnic_t *vnic = vnic_arg;

	return (vnic->vn_mch);
}


/*
 * Return information about the specified capability.
 */
/* ARGSUSED */
static boolean_t
vnic_m_capab_get(void *arg, mac_capab_t cap, void *cap_data)
{
	vnic_t *vnic = arg;

	switch (cap) {
	case MAC_CAPAB_HCKSUM: {
		uint32_t *hcksum_txflags = cap_data;

		*hcksum_txflags = vnic->vn_hcksum_txflags &
		    (HCKSUM_INET_FULL_V4 | HCKSUM_IPHDRCKSUM |
		    HCKSUM_INET_PARTIAL);
		break;
	}
	case MAC_CAPAB_VNIC: {
		mac_capab_vnic_t *vnic_capab = cap_data;

		if (vnic->vn_lower_mh == NULL) {
			/*
			 * It's an anchor VNIC, we don't have an underlying
			 * NIC and MAC client handle.
			 */
			return (B_FALSE);
		}

		if (vnic_capab != NULL) {
			vnic_capab->mcv_arg = vnic;
			vnic_capab->mcv_mac_client_handle =
			    vnic_mac_client_handle;
		}
		break;
	}
	case MAC_CAPAB_ANCHOR_VNIC: {
		/* since it's an anchor VNIC we don't have lower mac handle */
		if (vnic->vn_lower_mh == NULL) {
			ASSERT(vnic->vn_link_id == 0);
			return (B_TRUE);
		}
		return (B_FALSE);
	}
	case MAC_CAPAB_NO_NATIVEVLAN:
		return (B_FALSE);
	case MAC_CAPAB_NO_ZCOPY:
		return (B_TRUE);
	case MAC_CAPAB_VRRP: {
		mac_capab_vrrp_t *vrrp_capab = cap_data;

		if (vnic->vn_vrid != 0) {
			if (vrrp_capab != NULL)
				vrrp_capab->mcv_af = vnic->vn_af;
			return (B_TRUE);
		}
		return (B_FALSE);
	}
	default:
		return (B_FALSE);
	}
	return (B_TRUE);
}

/* ARGSUSED */
static int
vnic_m_start(void *arg)
{
	return (0);
}

/* ARGSUSED */
static void
vnic_m_stop(void *arg)
{
}

/* ARGSUSED */
static int
vnic_m_promisc(void *arg, boolean_t on)
{
	return (0);
}

/* ARGSUSED */
static int
vnic_m_multicst(void *arg, boolean_t add, const uint8_t *addrp)
{
	return (0);
}

static int
vnic_m_unicst(void *arg, const uint8_t *macaddr)
{
	vnic_t *vnic = arg;

	return (mac_vnic_unicast_set(vnic->vn_mch, macaddr));
}

/*
 * Callback functions for set/get of properties
 */
/*ARGSUSED*/
static int
vnic_m_setprop(void *m_driver, const char *pr_name, mac_prop_id_t pr_num,
    uint_t pr_valsize, const void *pr_val)
{
	int 		err = ENOTSUP;
	vnic_t		*vn = m_driver;

	/* allow setting MTU only on an etherstub */
	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
		return (err);

	switch (pr_num) {
	case MAC_PROP_MTU: {
		uint32_t	mtu;

		if (pr_valsize < sizeof (mtu)) {
			err = EINVAL;
			break;
		}
		bcopy(pr_val, &mtu, sizeof (mtu));
		if (mtu < ANCHOR_VNIC_MIN_MTU || mtu > ANCHOR_VNIC_MAX_MTU) {
			err = EINVAL;
			break;
		}
		err = mac_maxsdu_update(vn->vn_mh, mtu);
		break;
	}
	default:
		break;
	}
	return (err);
}

/* ARGSUSED */
static void vnic_m_propinfo(void *m_driver, const char *pr_name,
    mac_prop_id_t pr_num, mac_prop_info_handle_t prh)
{
	vnic_t		*vn = m_driver;

	/* MTU setting allowed only on an etherstub */
	if (vn->vn_link_id != DATALINK_INVALID_LINKID)
		return;

	switch (pr_num) {
	case MAC_PROP_MTU:
		mac_prop_info_set_range_uint32(prh,
		    ANCHOR_VNIC_MIN_MTU, ANCHOR_VNIC_MAX_MTU);
		break;
	}
}


int
vnic_info(vnic_info_t *info, cred_t *credp)
{
	vnic_t		*vnic;
	int		err;

	/* Make sure that the VNIC link is visible from the caller's zone. */
	if (!dls_devnet_islinkvisible(info->vn_vnic_id, crgetzoneid(credp)))
		return (ENOENT);

	rw_enter(&vnic_lock, RW_WRITER);

	err = mod_hash_find(vnic_hash, VNIC_HASH_KEY(info->vn_vnic_id),
	    (mod_hash_val_t *)&vnic);
	if (err != 0) {
		rw_exit(&vnic_lock);
		return (ENOENT);
	}

	info->vn_link_id = vnic->vn_link_id;
	info->vn_mac_addr_type = vnic->vn_addr_type;
	info->vn_mac_len = vnic->vn_addr_len;
	bcopy(vnic->vn_addr, info->vn_mac_addr, MAXMACADDRLEN);
	info->vn_mac_slot = vnic->vn_slot_id;
	info->vn_mac_prefix_len = 0;
	info->vn_vid = vnic->vn_vid;
	info->vn_force = vnic->vn_force;
	info->vn_vrid = vnic->vn_vrid;
	info->vn_af = vnic->vn_af;

	bzero(&info->vn_resource_props, sizeof (mac_resource_props_t));
	if (vnic->vn_mch != NULL)
		mac_resource_ctl_get(vnic->vn_mch, &info->vn_resource_props);

	rw_exit(&vnic_lock);
	return (0);
}

static void
vnic_notify_cb(void *arg, mac_notify_type_t type)
{
	vnic_t *vnic = arg;

	/*
	 * Do not deliver notifications if the vnic is not fully initialized
	 * or is in process of being torn down.
	 */
	if (!vnic->vn_enabled)
		return;

	switch (type) {
	case MAC_NOTE_UNICST:
		/*
		 * Only the VLAN VNIC needs to be notified with primary MAC
		 * address change.
		 */
		if (vnic->vn_addr_type != VNIC_MAC_ADDR_TYPE_PRIMARY)
			return;

		/*  the unicast MAC address value */
		mac_unicast_primary_get(vnic->vn_lower_mh, vnic->vn_addr);

		/* notify its upper layer MAC about MAC address change */
		mac_unicst_update(vnic->vn_mh, (const uint8_t *)vnic->vn_addr);
		break;

	case MAC_NOTE_LINK:
		mac_link_update(vnic->vn_mh,
		    mac_client_stat_get(vnic->vn_mch, MAC_STAT_LINK_STATE));
		break;

	default:
		break;
	}
}