diff options
Diffstat (limited to 'usr/src')
-rw-r--r-- | usr/src/cmd/mdb/common/modules/genunix/genunix.c | 7 | ||||
-rw-r--r-- | usr/src/cmd/mdb/common/modules/genunix/netstack.c | 30 | ||||
-rw-r--r-- | usr/src/cmd/mdb/common/modules/genunix/netstack.h | 3 | ||||
-rw-r--r-- | usr/src/cmd/mdb/common/modules/genunix/zone.c | 51 | ||||
-rw-r--r-- | usr/src/cmd/mdb/common/modules/genunix/zone.h | 4 | ||||
-rw-r--r-- | usr/src/uts/common/os/netstack.c | 33 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/apix/apix.c | 93 | ||||
-rw-r--r-- | usr/src/uts/i86pc/io/apix/apix_intr.c | 5 | ||||
-rw-r--r-- | usr/src/uts/i86pc/os/intr.c | 414 |
9 files changed, 576 insertions, 64 deletions
diff --git a/usr/src/cmd/mdb/common/modules/genunix/genunix.c b/usr/src/cmd/mdb/common/modules/genunix/genunix.c index d68260a8f8..48578a52f4 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/genunix.c +++ b/usr/src/cmd/mdb/common/modules/genunix/genunix.c @@ -4059,6 +4059,9 @@ static const mdb_dcmd_t dcmds[] = { /* from netstack.c */ { "netstack", "", "show stack instances", netstack }, + { "netstackid2netstack", ":", + "translate a netstack id to its netstack_t", + netstackid2netstack }, /* from nvpair.c */ { NVPAIR_DCMD_NAME, NVPAIR_DCMD_USAGE, NVPAIR_DCMD_DESCR, @@ -4149,6 +4152,10 @@ static const mdb_dcmd_t dcmds[] = { pfiles_help }, /* from zone.c */ + { "zid2zone", ":", "find the zone_t with the given zone id", + zid2zone }, + { "zdid2zone", ":", "find the zone_t with the given zone debug id", + zdid2zone }, { "zone", "?[-r [-v]]", "display kernel zone(s)", zoneprt }, { "zsd", ":[-v] [zsd_key]", "display zone-specific-data entries for " "selected zones", zsd }, diff --git a/usr/src/cmd/mdb/common/modules/genunix/netstack.c b/usr/src/cmd/mdb/common/modules/genunix/netstack.c index 588bd6dbf3..d46bd85d1f 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/netstack.c +++ b/usr/src/cmd/mdb/common/modules/genunix/netstack.c @@ -21,10 +21,9 @@ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ -#pragma ident "%Z%%M% %I% %E% SMI" - #include <mdb/mdb_modapi.h> #include <mdb/mdb_ks.h> #include <mdb/mdb_ctf.h> @@ -121,3 +120,30 @@ netstack(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) return (DCMD_OK); } + +static int +netstackid_lookup_cb(uintptr_t addr, const netstack_t *ns, void *arg) +{ + netstackid_t nid = *(uintptr_t *)arg; + if (ns->netstack_stackid == nid) + mdb_printf("%p\n", addr); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +int +netstackid2netstack(uintptr_t addr, uint_t flags, int argc, + const mdb_arg_t *argv) +{ + if (!(flags & DCMD_ADDRSPEC) || argc != 0) + return (DCMD_USAGE); + + if (mdb_walk("netstack", (mdb_walk_cb_t)netstackid_lookup_cb, &addr) == + -1) { + mdb_warn("failed to walk zone"); + return (DCMD_ERR); + } + + return (DCMD_OK); +} diff --git a/usr/src/cmd/mdb/common/modules/genunix/netstack.h b/usr/src/cmd/mdb/common/modules/genunix/netstack.h index 392565caca..f5773c36c1 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/netstack.h +++ b/usr/src/cmd/mdb/common/modules/genunix/netstack.h @@ -26,8 +26,6 @@ #ifndef _NETSTACK_H #define _NETSTACK_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <mdb/mdb_modapi.h> #ifdef __cplusplus @@ -38,6 +36,7 @@ int netstack_walk_init(mdb_walk_state_t *); int netstack_walk_step(mdb_walk_state_t *); int netstack(uintptr_t, uint_t, int, const mdb_arg_t *); +int netstackid2netstack(uintptr_t, uint_t, int, const mdb_arg_t *); #ifdef __cplusplus } diff --git a/usr/src/cmd/mdb/common/modules/genunix/zone.c b/usr/src/cmd/mdb/common/modules/genunix/zone.c index 96f6b598ec..fc243061cb 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/zone.c +++ b/usr/src/cmd/mdb/common/modules/genunix/zone.c @@ -20,6 +20,7 @@ */ /* * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include <mdb/mdb_param.h> @@ -54,6 +55,56 @@ char *zone_status_names[] = { "dead" /* ZONE_IS_DEAD */ }; +static int +zid_lookup_cb(uintptr_t addr, const zone_t *zone, void *arg) +{ + zoneid_t zid = *(uintptr_t *)arg; + if (zone->zone_id == zid) + mdb_printf("%p\n", addr); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +int +zid2zone(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + if (!(flags & DCMD_ADDRSPEC) || argc != 0) + return (DCMD_USAGE); + + if (mdb_walk("zone", (mdb_walk_cb_t)zid_lookup_cb, &addr) == -1) { + mdb_warn("failed to walk zone"); + return (DCMD_ERR); + } + + return (DCMD_OK); +} + +static int +zdid_lookup_cb(uintptr_t addr, const zone_t *zone, void *arg) +{ + zoneid_t zdid = *(uintptr_t *)arg; + if (zone->zone_did == zdid) + mdb_printf("%p\n", addr); + + return (WALK_NEXT); +} + +/*ARGSUSED*/ +int +zdid2zone(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) +{ + if (!(flags & DCMD_ADDRSPEC) || argc != 0) + return (DCMD_USAGE); + + if (mdb_walk("zone", (mdb_walk_cb_t)zdid_lookup_cb, &addr) == -1) { + mdb_warn("failed to walk zone"); + return (DCMD_ERR); + } + + return (DCMD_OK); +} + int zoneprt(uintptr_t addr, uint_t flags, int argc, const mdb_arg_t *argv) { diff --git a/usr/src/cmd/mdb/common/modules/genunix/zone.h b/usr/src/cmd/mdb/common/modules/genunix/zone.h index e0e5038527..94a383e41c 100644 --- a/usr/src/cmd/mdb/common/modules/genunix/zone.h +++ b/usr/src/cmd/mdb/common/modules/genunix/zone.h @@ -27,14 +27,14 @@ #ifndef _ZONE_H #define _ZONE_H -#pragma ident "%Z%%M% %I% %E% SMI" - #include <mdb/mdb_modapi.h> #ifdef __cplusplus extern "C" { #endif +extern int zid2zone(uintptr_t, uint_t, int argc, const mdb_arg_t *); +extern int zdid2zone(uintptr_t, uint_t, int argc, const mdb_arg_t *); extern int zoneprt(uintptr_t, uint_t, int argc, const mdb_arg_t *); extern int zone_walk_init(mdb_walk_state_t *); diff --git a/usr/src/uts/common/os/netstack.c b/usr/src/uts/common/os/netstack.c index b8467fbe13..93fd1a387d 100644 --- a/usr/src/uts/common/os/netstack.c +++ b/usr/src/uts/common/os/netstack.c @@ -22,6 +22,7 @@ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. + * Copyright (c) 2012, Joyent, Inc. All rights reserved. */ #include <sys/param.h> @@ -205,6 +206,7 @@ void netstack_unregister(int moduleid) { netstack_t *ns; + boolean_t created = B_FALSE; ASSERT(moduleid >= 0 && moduleid < NS_MAX); @@ -223,7 +225,33 @@ netstack_unregister(int moduleid) nm_state_t *nms = &ns->netstack_m_state[moduleid]; mutex_enter(&ns->netstack_lock); - if (ns_reg[moduleid].nr_shutdown != NULL && + + /* + * We need to be careful here. We could actually have a netstack + * being created as we speak waiting for us to let go of this + * lock to proceed. It may have set NSS_CREATE_NEEDED, but not + * have gotten to the point of completing it yet. If + * NSS_CREATE_NEEDED, we can safely just remove it here and + * never create the module. However, if NSS_CREATE_INPROGRESS is + * set, we need to still flag this module for shutdown and + * deletion, just as though it had reached NSS_CREATE_COMPLETED. + * + * It is safe to do that because of two different guarantees + * that exist in the system. The first is that before we do a + * create, shutdown, or destroy, we ensure that nothing else is + * in progress in the system for this netstack and wait for it + * to complete. Secondly, because the zone is being created, we + * know that the following call to apply_all_netstack will block + * on the zone finishing its initialization. + */ + if (nms->nms_flags & NSS_CREATE_NEEDED) + nms->nms_flags &= ~NSS_CREATE_NEEDED; + + if (nms->nms_flags & NSS_CREATE_INPROGRESS || + nms->nms_flags & NSS_CREATE_COMPLETED) + created = B_TRUE; + + if (ns_reg[moduleid].nr_shutdown != NULL && created && (nms->nms_flags & NSS_CREATE_COMPLETED) && (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) { nms->nms_flags |= NSS_SHUTDOWN_NEEDED; @@ -231,8 +259,7 @@ netstack_unregister(int moduleid) netstack_t *, ns, int, moduleid); } if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) && - ns_reg[moduleid].nr_destroy != NULL && - (nms->nms_flags & NSS_CREATE_COMPLETED) && + ns_reg[moduleid].nr_destroy != NULL && created && (nms->nms_flags & NSS_DESTROY_ALL) == 0) { nms->nms_flags |= NSS_DESTROY_NEEDED; DTRACE_PROBE2(netstack__destroy__needed, diff --git a/usr/src/uts/i86pc/io/apix/apix.c b/usr/src/uts/i86pc/io/apix/apix.c index 8c4ccb6a0a..81b88dc426 100644 --- a/usr/src/uts/i86pc/io/apix/apix.c +++ b/usr/src/uts/i86pc/io/apix/apix.c @@ -635,10 +635,10 @@ apix_send_eoi(void) /* * platform_intr_enter * - * Called at the beginning of the interrupt service routine to - * mask all level equal to and below the interrupt priority - * of the interrupting vector. An EOI should be given to - * the interrupt controller to enable other HW interrupts. + * Called at the beginning of the interrupt service routine, but unlike + * pcplusmp, does not mask interrupts. An EOI is given to the interrupt + * controller to enable other HW interrupts but interrupts are still + * masked by the IF flag. * * Return -1 for spurious interrupts * @@ -750,58 +750,30 @@ apix_intr_exit(int prev_ipl, int arg2) } /* - * Mask all interrupts below or equal to the given IPL. - * Any changes made to this function must also change X2APIC - * version of setspl. + * The pcplusmp setspl code uses the TPR to mask all interrupts at or below the + * given ipl, but apix never uses the TPR and we never mask a subset of the + * interrupts. They are either all blocked by the IF flag or all can come in. + * + * For setspl, we mask all interrupts for XC_HI_PIL, otherwise, interrupts can + * come in if currently enabled by the IF flag. This table shows the state of + * the IF flag when we leave this function. + * + * curr IF | ipl == 15 ipl != 15 + * --------+--------------------------- + * 0 | 0 0 + * 1 | 0 1 */ static void apix_setspl(int ipl) { - /* interrupts at ipl above this cannot be in progress */ - apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1; - /* - * Mask all interrupts for XC_HI_PIL (i.e set TPR to 0xf). - * Otherwise, enable all interrupts (i.e. set TPR to 0). + * Interrupts at ipl above this cannot be in progress, so the following + * mask is ok. */ - if (ipl != XC_HI_PIL) - ipl = 0; - -#if defined(__amd64) - setcr8((ulong_t)ipl); -#else - if (apic_have_32bit_cr8) - setcr8((ulong_t)ipl); - else - apicadr[APIC_TASK_REG] = ipl << APIC_IPL_SHIFT; -#endif - - /* - * this is a patch fix for the ALR QSMP P5 machine, so that interrupts - * have enough time to come in before the priority is raised again - * during the idle() loop. - */ - if (apic_setspl_delay) - (void) apic_reg_ops->apic_get_pri(); -} - -/* - * X2APIC version of setspl. - */ -static void -x2apix_setspl(int ipl) -{ - /* interrupts at ipl above this cannot be in progress */ apic_cpus[psm_get_cpu_id()].aci_ISR_in_progress &= (2 << ipl) - 1; - /* - * Mask all interrupts for XC_HI_PIL (i.e set TPR to 0xf). - * Otherwise, enable all interrupts (i.e. set TPR to 0). - */ - if (ipl != XC_HI_PIL) - ipl = 0; - - X2APIC_WRITE(APIC_TASK_REG, ipl << APIC_IPL_SHIFT); + if (ipl == XC_HI_PIL) + cli(); } int @@ -1112,6 +1084,10 @@ apix_post_cyclic_setup(void *arg) apic_redistribute_sample_interval, DDI_IPL_2); } +/* + * Called the first time we enable x2apic mode on this cpu. + * Update some of the function pointers to use x2apic routines. + */ void x2apic_update_psm() { @@ -1120,14 +1096,17 @@ x2apic_update_psm() ASSERT(pops != NULL); /* - * The xxx_intr_exit() sets TPR and sends back EOI. The - * xxx_setspl() sets TPR. These two routines are not - * needed in new design. + * The pcplusmp module x2apic_update_psm function does this: * - * pops->psm_intr_exit = x2apic_intr_exit; - * pops->psm_setspl = x2apic_setspl; + * pops->psm_intr_exit = x2apic_intr_exit; + * pops->psm_setspl = x2apic_setspl; + * pops->psm_send_ipi = x2apic_send_ipi; + * + * Note the x2apic prefix vs. our apix prefix for setspl. + * The x2apic_intr_exit() sets TPR and sends back EOI. The + * x2apic_setspl() sets TPR. This functionality is not + * used in new design. */ - pops->psm_setspl = x2apix_setspl; pops->psm_send_ipi = x2apic_send_ipi; send_dirintf = pops->psm_send_ipi; @@ -2077,6 +2056,9 @@ apix_intx_get_pending(int irqno) return (pending); } +/* + * This function will mask the interrupt on the I/O APIC + */ static void apix_intx_set_mask(int irqno) { @@ -2106,6 +2088,9 @@ apix_intx_set_mask(int irqno) intr_restore(iflag); } +/* + * This function will clear the mask for the interrupt on the I/O APIC + */ static void apix_intx_clear_mask(int irqno) { diff --git a/usr/src/uts/i86pc/io/apix/apix_intr.c b/usr/src/uts/i86pc/io/apix/apix_intr.c index e5d072b525..d870a4d365 100644 --- a/usr/src/uts/i86pc/io/apix/apix_intr.c +++ b/usr/src/uts/i86pc/io/apix/apix_intr.c @@ -862,6 +862,9 @@ apix_dispatch_lowlevel(uint_t vector, uint_t oldipl) apix_intr_thread_epilog(cpu, oldipl); } +/* + * Interrupt service routine, called with interrupts disabled. + */ void apix_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp) { @@ -904,7 +907,7 @@ apix_do_interrupt(struct regs *rp, trap_trace_rec_t *ttp) } /* - * Raise the interrupt priority. Send EOI to local APIC + * Send EOI to local APIC */ newipl = (*setlvl)(oldipl, (int *)&rp->r_trapno); #ifdef TRAPTRACE diff --git a/usr/src/uts/i86pc/os/intr.c b/usr/src/uts/i86pc/os/intr.c index 91d7afcf36..8acaf8fc73 100644 --- a/usr/src/uts/i86pc/os/intr.c +++ b/usr/src/uts/i86pc/os/intr.c @@ -21,6 +21,420 @@ /* * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2012, Joyent, Inc. All rights reserverd. + */ + +/* + * To understand the present state of interrupt handling on i86pc, we must + * first consider the history of interrupt controllers and our way of handling + * interrupts. + * + * History of Interrupt Controllers on i86pc + * ----------------------------------------- + * + * Intel 8259 and 8259A + * + * The first interrupt controller that attained widespread use on i86pc was + * the Intel 8259(A) Programmable Interrupt Controller that first saw use with + * the 8086. It took up to 8 interrupt sources and combined them into one + * output wire. Up to 8 8259s could be slaved together providing up to 64 IRQs. + * With the switch to the 8259A, level mode interrupts became possible. For a + * long time on i86pc the 8259A was the only way to handle interrupts and it + * had its own set of quirks. The 8259A and its corresponding interval timer + * the 8254 are programmed using outb and inb instructions. + * + * Intel Advanced Programmable Interrupt Controller (APIC) + * + * Starting around the time of the introduction of the P6 family + * microarchitecture (i686) Intel introduced a new interrupt controller. + * Instead of having the series of slaved 8259A devices, Intel opted to outfit + * each processor with a Local APIC (lapic) and to outfit the system with at + * least one, but potentially more, I/O APICs (ioapic). The lapics and ioapics + * initially communicated over a dedicated bus, but this has since been + * replaced. Each physical core and even hyperthread currently contains its + * own local apic, which is not shared. There are a few exceptions for + * hyperthreads, but that does not usually concern us. + * + * Instead of talking directly to 8259 for status, sending End Of Interrupt + * (EOI), etc. a microprocessor now communicates directly to the lapic. This + * also allows for each microprocessor to be able to have independent controls. + * The programming method is different from the 8259. Consumers map the lapic + * registers into uncacheable memory to read and manipulate the state. + * + * The number of addressable interrupt vectors was increased to 256. However + * vectors 0-31 are reserved for the processor exception handling, leaving the + * remaining vectors for general use. In addition to hardware generated + * interrupts, the lapic provides a way for generating inter-processor + * interrupts (IPI) which are the basis for CPU cross calls and CPU pokes. + * + * AMD ended up implementing the Intel APIC architecture in lieu of their work + * with Cyrix. + * + * Intel x2apic + * + * The x2apic is an extension to the lapic which started showing up around the + * same time as the Sandy Bridge chipsets. It provides a new programming mode + * as well as new features. The goal of the x2apic is to solve a few problems + * with the previous generation of lapic and the x2apic is backwards compatible + * with the previous programming and model. The only downsides to using the + * backward compatibility is that you not able to take advantage of the new + * x2apic features. + * + * o The APIC ID is increased from an 8-bit value to a 32-bit value. This + * increases the maximum number of addressable physical processors beyond + * 256. This new ID is assembled in a similar manner as the information that + * is obtainable by the extended cpuid topology leaves. + * + * o A new means of generating IPIs was introduced. + * + * o Instead of memory mapping the registers, the x2apic only allows for + * programming it through a series of wrmsrs. This has important semantic + * side effects. Recall that the registers were previously all mapped to + * uncachable memory which meant that all operations to the local apic were + * serializing instructions. With the switch to using wrmsrs this has been + * relaxed and these operations can no longer be assumed to be serializing + * instructions. + * + * Note for the rest of this we are only going to concern ourselves with the + * apic and x2apic which practically all of i86pc has been using now for + * quite some time. + * + * Interrupt Priority Levels + * ------------------------- + * + * On i86pc systems there are a total of fifteen interrupt priority levels + * (ipls) which range from 1-15. Level 0 is for normal processing and + * non-interrupt processing. To manipulate these values the family of spl + * functions (which date back to UNIX on the PDP-11) are used. Specifically, + * splr() to raise the priority level and splx() to lower it. One should not + * generally call setspl() directly. + * + * Both i86pc and the supported SPARC platforms honor the same conventions for + * the meaning behind these IPLs. The most important IPL is the platform's + * LOCK_LEVEL (0xa on i86pc). If a thread is above LOCK_LEVEL it _must_ not + * sleep on any synchronization object. The only allowed synchronization + * primitive is a mutex that has been specifically initialized to be a spin + * lock (see mutex_init(9F)). Another important level is DISP_LEVEL (0xb on + * i86pc). You must be at DISP_LEVEL if you want to control the dispatcher. + * The XC_HI_PIL is the highest level (0xf) and is used during cross-calls. + * + * Each interrupt that is registered in the system fires at a specific IPL. + * Generally most interrupts fire below LOCK_LEVEL. + * + * PSM Drivers + * ----------- + * + * We currently have three sets of PSM drivers available. uppc, pcplusmp, and + * apix. uppc (uni-processor PC) is the original driver that interacts with the + * 8259A and 8254. In general, it is not used anymore given the prevalence of + * the apic. + * + * The system prefers to use the apix driver over the pcplusmp driver. The apix + * driver requires HW support for an x2apic. If there is no x2apic HW, apix + * will not be used. In general we prefer using the apix driver over the + * pcplusmp driver because it gives us much more flexibility with respect to + * interrupts. In the apix driver each local apic has its own independent set + * of interrupts, whereas the pcplusmp driver only has a single global set of + * interrupts. This is why pcplusmp only supports a finite number of interrupts + * per IPL -- generally 16, often less. The apix driver supports using either + * the x2apic or the local apic programing modes. The programming mode does not + * change the number of interrupts available, just the number of processors + * that we can address. For the apix driver, the x2apic mode is enabled if the + * system supports interrupt re-mapping, otherwise the module manages the + * x2apic in local mode. + * + * When there is no x2apic present, we default back to the pcplusmp PSM driver. + * In general, this is not problematic unless you have more than 256 + * processors in the machine or you do not have enough interrupts available. + * + * Controlling Interrupt Generation on i86pc + * ----------------------------------------- + * + * There are two different ways to manipulate which interrupts will be + * generated on i86pc. Each offers different degrees of control. + * + * The first is through the flags register (eflags and rflags on i386 and amd64 + * respectively). The IF bit determines whether or not interrupts are enabled + * or disabled. This is manipulated in one of several ways. The most common way + * is through the cli and sti instructions. These clear the IF flag and set it, + * respectively, for the current processor. The other common way is through the + * use of the intr_clear and intr_restore functions. + * + * Assuming interrupts are not blocked by the IF flag, then the second form is + * through the Processor-Priority Register (PPR). The PPR is used to determine + * whether or not a pending interrupt should be delivered. If the ipl of the + * new interrupt is higher than the current value in the PPR, then the lapic + * will either deliver it immediately (if interrupts are not in progress) or it + * will deliver it once the current interrupt processing has issued an EOI. The + * highest unmasked interrupt will be the one delivered. + * + * The PPR register is based upon the max of the following two registers in the + * lapic, The TPR register (also known as CR8 on amd64) that can be used to + * mask interrupt levels, and the current vector. Because the pcplusmp module + * always sets TPR appropriately early in the do_interrupt path, we can usually + * just think that the PPR is the TPR. The pcplusmp module also issues an EOI + * once it has set the TPR, so higher priority interrupts can come in while + * we're servicing a lower priority interrupt. + * + * Handling Interrupts + * ------------------- + * + * Interrupts can be broken down into three categories based on priority and + * source: + * + * o High level interrupts + * o Low level hardware interrupts + * o Low level software interrupts + * + * High Level Interrupts + * + * High level interrupts encompasses both hardware-sourced and software-sourced + * interrupts. Examples of high level hardware interrupts include the serial + * console. High level software-sourced interrupts are still delivered through + * the local apic through IPIs. This is primarily cross calls. + * + * When a high level interrupt comes in, we will raise the SPL and then pin the + * current lwp to the processor. We will use its lwp, but our own interrupt + * stack and process the high level interrupt in-situ. These handlers are + * designed to be very short in nature and cannot go to sleep, only block on a + * spin lock. If the interrupt has a lot of work to do, it must generate a + * low-priority software interrupt that will be processed later. + * + * Low level hardware interrupts + * + * Low level hardware interrupts start off like their high-level cousins. The + * current CPU contains a number of kernel threads (kthread_t) that can be used + * to process low level interrupts. These are shared between both low level + * hardware and software interrupts. Note that we while we run with our + * kthread_t, we borrow the pinned threads lwp_t until such a time as we hit a + * synchronization object. If we hit one and need to sleep, then the scheduler + * will instead create the rest of what we need. + * + * Low level software interrupts + * + * Low level software interrupts are handled in a similar way as hardware + * interrupts, but the notification vector is different. Each CPU has a bitmask + * of pending software interrupts. We can notify a CPU to process software + * interrupts through a specific trap vector as well as through several + * checks that are performed throughout the code. Thse checks will look at + * processing software interrupts as we lower our spl. + * + * We attempt to process the highest pending software interrupt that we can + * which is greater than our current IPL. If none currently exist, then we move + * on. We process a software interrupt in a similar fashion to a hardware + * interrupt. + * + * Traditional Interrupt Flow + * -------------------------- + * + * The following diagram tracks the flow of the traditional uppc and pcplusmp + * interrupt handlers. The apix driver has its own version of do_interrupt(). + * We come into the interrupt handler with all interrupts masked by the IF + * flag. This is because we set up the handler using an interrupt-gate, which + * is defined architectuarlly to have cleared the IF flag for us. + * + * +--------------+ +----------------+ +-----------+ + * | _interrupt() |--->| do_interrupt() |--->| *setlvl() | + * +--------------+ +----------------+ +-----------+ + * | | | + * | | | + * low-level| | | softint + * HW int | | +---------------------------------------+ + * +--------------+ | | | + * | intr_thread_ |<-----+ | hi-level int | + * | prolog() | | +----------+ | + * +--------------+ +--->| hilevel_ | Not on intr stack | + * | | intr_ |-----------------+ | + * | | prolog() | | | + * +------------+ +----------+ | | + * | switch_sp_ | | On intr v | + * | and_call() | | Stack +------------+ | + * +------------+ | | switch_sp_ | | + * | v | and_call() | | + * v +-----------+ +------------+ | + * +-----------+ | dispatch_ | | | + * | dispatch_ | +-------------------| hilevel() |<------------+ | + * | hardint() | | +-----------+ | + * +-----------+ | | + * | v | + * | +-----+ +----------------------+ +-----+ hi-level | + * +---->| sti |->| av_dispatch_autovect |->| cli |---------+ | + * +-----+ +----------------------+ +-----+ | | + * | | | | + * v | | | + * +----------+ | | | + * | for each | | | | + * | handler | | | | + * | *intr() | | v | + * +--------------+ +----------+ | +----------------+ | + * | intr_thread_ | low-level | | hilevel_intr_ | | + * | epilog() |<-------------------------------+ | epilog() | | + * +--------------+ +----------------+ | + * | | | | + * | +----------------------v v---------------------+ | + * | +------------+ | + * | +---------------------->| *setlvlx() | | + * | | +------------+ | + * | | | | + * | | v | + * | | +--------+ +------------------+ +-------------+ | + * | | | return |<----| softint pending? |----->| dosoftint() |<-----+ + * | | +--------+ no +------------------+ yes +-------------+ + * | | ^ | | + * | | | softint pil too low | | + * | | +--------------------------------------+ | + * | | v + * | | +-----------+ +------------+ +-----------+ + * | | | dispatch_ |<-----| switch_sp_ |<---------| *setspl() | + * | | | softint() | | and_call() | +-----------+ + * | | +-----------+ +------------+ + * | | | + * | | v + * | | +-----+ +----------------------+ +-----+ +------------+ + * | | | sti |->| av_dispatch_autovect |->| cli |->| dosoftint_ | + * | | +-----+ +----------------------+ +-----+ | epilog() | + * | | +------------+ + * | | | | + * | +----------------------------------------------------+ | + * v | + * +-----------+ | + * | interrupt | | + * | thread |<---------------------------------------------------+ + * | blocked | + * +-----------+ + * | + * v + * +----------------+ +------------+ +-----------+ +-------+ +---------+ + * | set_base_spl() |->| *setlvlx() |->| splhigh() |->| sti() |->| swtch() | + * +----------------+ +------------+ +-----------+ +-------+ +---------+ + * + * Calls made on Interrupt Stacks and Epilogue routines + * + * We use the switch_sp_and_call() assembly routine to switch our sp to the + * interrupt stacks and then call the appropriate dispatch function. In the + * case of interrupts which may block, softints and hardints, we always ensure + * that we are still on the interrupt thread when we call the epilog routine. + * This is not just important, it's necessary. If the interrupt thread blocked, + * we won't return from our switch_sp_and_call() function and instead we'll go + * through and set ourselves up to swtch() directly. + * + * New Interrupt Flow + * ------------------ + * + * The apix module has its own interrupt path. This is done for various + * reasons. The first is that rather than having global interrupt vectors, we + * now have per-cpu vectors. + * + * The other substantial change is that the apix design does not use the TPR to + * mask interrupts below the current level. In fact, except for one special + * case, it does not use the TPR at all. Instead, it only uses the IF flag + * (cli/sti) to either block all interrupts or allow any interrupts to come in. + * The design is such that when interrupts are allowed to come in, if we are + * currently servicing a higher priority interupt, the new interrupt is treated + * as pending and serviced later. Specifically, in the pcplusmp module's + * apic_intr_enter function the code masks interrupts at or below the current + * IPL using the TPR before sending EOI, whereas the apix module's + * apix_intr_enter function simply sends EOI. + * + * The one special case where the apix code uses the TPR is when it calls + * through the apic_reg_ops function pointer apic_write_task_reg in + * apix_init_intr() to initially mask all levels and then finally to enable all + * levels. + * + * Recall that we come into the interrupt handler with all interrupts masked + * by the IF flag. This is because we set up the handler using an + * interrupt-gate which is defined architectuarlly to have cleared the IF flag + * for us. + * + * +--------------+ +---------------------+ + * | _interrupt() |--->| apix_do_interrupt() | + * +--------------+ +---------------------+ + * | + * hard int? +----+--------+ softint? + * | | (but no low-level looping) + * +-----------+ | + * | *setlvl() | | + * +---------+ +-----------+ +----------------------------------+ + * |apix_add_| check IPL | | + * |pending_ |<-------------+------+----------------------+ | + * |hardint()| low-level int| hi-level int| | + * +---------+ v v | + * | check IPL +-----------------+ +---------------+ | + * +--+-----+ | apix_intr_ | | apix_hilevel_ | | + * | | | thread_prolog() | | intr_prolog() | | + * | return +-----------------+ +---------------+ | + * | | | On intr | + * | +------------+ | stack? +------------+ | + * | | switch_sp_ | +---------| switch_sp_ | | + * | | and_call() | | | and_call() | | + * | +------------+ | +------------+ | + * | | | | | + * | +----------------+ +----------------+ | + * | | apix_dispatch_ | | apix_dispatch_ | | + * | | lowlevel() | | hilevel() | | + * | +----------------+ +----------------+ | + * | | | | + * | v v | + * | +-------------------------+ | + * | |apix_dispatch_by_vector()|----+ | + * | +-------------------------+ | | + * | !XC_HI_PIL| | | | | + * | +---+ +-------+ +---+ | | + * | |sti| |*intr()| |cli| | | + * | +---+ +-------+ +---+ | hi-level? | + * | +---------------------------+----+ | + * | v low-level? v | + * | +----------------+ +----------------+ | + * | | apix_intr_ | | apix_hilevel_ | | + * | | thread_epilog()| | intr_epilog() | | + * | +----------------+ +----------------+ | + * | | | | + * | v-----------------+--------------------------------+ | + * | +------------+ | + * | | *setlvlx() | +----------------------------------------------------+ + * | +------------+ | + * | | | +--------------------------------+ low + * v v v------+ v | level + * +------------------+ +------------------+ +-----------+ | pending? + * | apix_do_pending_ |----->| apix_do_pending_ |----->| apix_do_ |--+ + * | hilevel() | | hardint() | | softint() | | + * +------------------+ +------------------+ +-----------+ return + * | | | + * | while pending | while pending | while pending + * | hi-level | low-level | softint + * | | | + * +---------------+ +-----------------+ +-----------------+ + * | apix_hilevel_ | | apix_intr_ | | apix_do_ | + * | intr_prolog() | | thread_prolog() | | softint_prolog()| + * +---------------+ +-----------------+ +-----------------+ + * | On intr | | + * | stack? +------------+ +------------+ +------------+ + * +--------| switch_sp_ | | switch_sp_ | | switch_sp_ | + * | | and_call() | | and_call() | | and_call() | + * | +------------+ +------------+ +------------+ + * | | | | + * +------------------+ +------------------+ +------------------------+ + * | apix_dispatch_ | | apix_dispatch_ | | apix_dispatch_softint()| + * | pending_hilevel()| | pending_hardint()| +------------------------+ + * +------------------+ +------------------+ | | | | + * | | | | | | | | + * | +----------------+ | +----------------+ | | | | + * | | apix_hilevel_ | | | apix_intr_ | | | | | + * | | intr_epilog() | | | thread_epilog()| | | | | + * | +----------------+ | +----------------+ | | | | + * | | | | | | | | + * | +------------+ | +----------+ +------+ | | | + * | | *setlvlx() | | |*setlvlx()| | | | | + * | +------------+ | +----------+ | +----------+ | +---------+ + * | | +---+ |av_ | +---+ |apix_do_ | + * +---------------------------------+ |sti| |dispatch_ | |cli| |softint_ | + * | apix_dispatch_pending_autovect()| +---+ |softvect()| +---+ |epilog() | + * +---------------------------------+ +----------+ +---------+ + * |!XC_HI_PIL | | | | + * +---+ +-------+ +---+ +----------+ +-------+ + * |sti| |*intr()| |cli| |apix_post_| |*intr()| + * +---+ +-------+ +---+ |hardint() | +-------+ + * +----------+ */ #include <sys/cpuvar.h> |