summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith M Wesolowski <wesolows@foobazco.org>2014-11-06 15:43:53 +0000
committerKeith M Wesolowski <wesolows@foobazco.org>2014-11-06 15:43:53 +0000
commit30bf51611cf364907c3816b6ef16d018d9f9f605 (patch)
treeafca625a445b38e3439f968b76ccb93cc8766bf2
parent474a11108331eb3cfffdfd61d02a64fcc861f791 (diff)
parent08eb19132eebba77347f1e2d5adac3f8d1e552ca (diff)
downloadillumos-joyent-30bf51611cf364907c3816b6ef16d018d9f9f605.tar.gz
[illumos-gate merge]
commit 08eb19132eebba77347f1e2d5adac3f8d1e552ca 5284 ualarm and usleep need to be suppressed in XPG7 commit b59c4a48daf5a1863ecac763711b497b2f8321e4 4110 panic while reaping htable_cache
-rw-r--r--usr/src/head/unistd.h8
-rw-r--r--usr/src/man/man3c/ualarm.3c155
-rw-r--r--usr/src/man/man3c/usleep.3c126
-rw-r--r--usr/src/uts/i86pc/vm/htable.c382
4 files changed, 375 insertions, 296 deletions
diff --git a/usr/src/head/unistd.h b/usr/src/head/unistd.h
index a86fa99d06..826c777ba8 100644
--- a/usr/src/head/unistd.h
+++ b/usr/src/head/unistd.h
@@ -554,13 +554,13 @@ extern off_t tell(int);
extern int truncate(const char *, off_t);
#endif /* !defined(__XOPEN_OR_POSIX) || defined(_XPG4_2)... */
extern char *ttyname(int);
-#if !defined(__XOPEN_OR_POSIX) || defined(_XPG4_2) || defined(__EXTENSIONS__)
+#if (defined(_XPG4_2) && !defined(_XPG7)) || !defined(_STRICT_SYMBOLS)
extern useconds_t ualarm(useconds_t, useconds_t);
-#endif /* !defined(__XOPEN_OR_POSIX) || defined(_XPG4_2)... */
+#endif
extern int unlink(const char *);
-#if !defined(__XOPEN_OR_POSIX) || defined(_XPG4_2) || defined(__EXTENSIONS__)
+#if (defined(_XPG4_2) && !defined(_XPG7)) || !defined(_STRICT_SYMBOLS)
extern int usleep(useconds_t);
-#endif /* !defined(__XOPEN_OR_POSIX) || defined(_XPG4_2)... */
+#endif
#if !defined(__XOPEN_OR_POSIX) || defined(_XPG4_2) || defined(__EXTENSIONS__)
extern pid_t vfork(void) __RETURNS_TWICE;
#endif /* !defined(__XOPEN_OR_POSIX) || defined(_XPG4_2)... */
diff --git a/usr/src/man/man3c/ualarm.3c b/usr/src/man/man3c/ualarm.3c
index c7d3d02e34..269d5ebe8f 100644
--- a/usr/src/man/man3c/ualarm.3c
+++ b/usr/src/man/man3c/ualarm.3c
@@ -1,71 +1,108 @@
-'\" te
+.\" Copyright 2014 Garrett D'Amore <garrett@damore.org>
.\" Copyright (c) 1980 Regents of the University of California. All rights reserved. The Berkeley software License Agreement specifies the terms and conditions for redistribution. Copyright (c) 2002, Sun Microsystems, Inc. All Rights Reserved Portions Copyright (c) 1992,
.\" X/Open Company Limited All Rights Reserved
.\" Sun Microsystems, Inc. gratefully acknowledges The Open Group for permission to reproduce portions of its copyrighted documentation. Original documentation from The Open Group can be obtained online at
.\" http://www.opengroup.org/bookstore/.
.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html.
.\" This notice shall appear on any product containing this material.
-.TH UALARM 3C "Aug 14, 2002"
-.SH NAME
-ualarm \- schedule signal after interval in microseconds
-.SH SYNOPSIS
-.LP
-.nf
-#include <unistd.h>
-
-\fBuseconds_t\fR \fBualarm\fR(\fBuseconds_t\fR \fIuseconds\fR, \fBuseconds_t\fR \fIinterval\fR);
-.fi
-
-.SH DESCRIPTION
-.sp
-.LP
-The \fBualarm()\fR function causes the \fBSIGALRM\fR signal to be generated for
+.Dd "Aug 16, 2014"
+.Dt UALARM 3C
+.Os
+.Sh NAME
+.Nm ualarm
+.Nd schedule signal after interval in microseconds
+.Sh SYNOPSIS
+.In unistd.h
+.Ft useconds_t
+.Fn ualarm "useconds_t useconds" "useconds_t interval"
+.Sh DESCRIPTION
+The
+.Fn ualarm
+function causes the
+.Dv SIGALRM
+signal to be generated for
the calling process after the number of real-time microseconds specified by the
-\fIuseconds\fR argument has elapsed. When the \fIinterval\fR argument is
+.Fa useconds
+argument has elapsed. When the
+.Fa interval
+argument is
non-zero, repeated timeout notification occurs with a period in microseconds
specified by the \fIinterval\fR argument. If the notification signal,
-\fBSIGALRM\fR, is not caught or ignored, the calling process is terminated.
-.sp
-.LP
+.Dv SIGALRM ,
+is not caught or ignored, the calling process is terminated.
+.Lp
Because of scheduling delays, resumption of execution when the signal is caught
may be delayed an arbitrary amount of time.
-.sp
-.LP
-Interactions between \fBualarm()\fR and either \fBalarm\fR(2) or
-\fBsleep\fR(3C) are unspecified.
-.SH RETURN VALUES
-.sp
-.LP
-The \fBualarm()\fR function returns the number of microseconds remaining from
-the previous \fBualarm()\fR call. If no timeouts are pending or if
-\fBualarm()\fR has not previously been called, \fBualarm()\fR returns \fB0\fR.
-.SH ERRORS
-.sp
-.LP
+.Lp
+Interactions between
+.Fn ualarm
+and either
+.Xr alarm 2
+or
+.Xr sleep 3C
+are unspecified.
+.Sh RETURN VALUES
+The
+.Fn ualarm
+function returns the number of microseconds remaining from
+the previous
+.Fn ualarm
+call. If no timeouts are pending or if
+.Fn ualarm
+has not previously been called,
+.Fn ualarm
+returns 0.
+.Sh ERRORS
No errors are defined.
-.SH USAGE
-.sp
-.LP
-The \fBualarm()\fR function is a simplified interface to \fBsetitimer\fR(2),
-and uses the \fBITIMER_REAL\fR interval timer.
-.SH ATTRIBUTES
-.sp
-.LP
-See \fBattributes\fR(5) for descriptions of the following attributes:
-.sp
-
-.sp
-.TS
-box;
-c | c
-l | l .
-ATTRIBUTE TYPE ATTRIBUTE VALUE
-_
-Interface Stability Standard
-.TE
-
-.SH SEE ALSO
-.sp
-.LP
-\fBalarm\fR(2), \fBsetitimer\fR(2), \fBsighold\fR(3C), \fBsignal\fR(3C),
-\fBsleep\fR(3C), \fBusleep\fR(3C), \fBattributes\fR(5), \fBstandards\fR(5)
+.Sh USAGE
+The
+.Fn ualarm
+function is a simplified interface to
+.Xr setitimer 2 ,
+and uses the
+.Dv ITIMER_REAL
+interval timer. It's use has been deprecated in favor of the
+.Xr timer_create 3C
+family of functions.
+.Sh INTERFACE STABILITY
+.Sy Obsolete Standard .
+.Sh SEE ALSO
+.Xr alarm 2 ,
+.Xr setitimer 2 ,
+.Xr sighold 3C ,
+.Xr signal 3C ,
+.Xr sleep 3C ,
+.Xr timer_create 3C ,
+.Xr usleep 3C ,
+.Xr unistd.h 3HEAD ,
+.Xr standards 5
+.Sh STANDARDS
+The
+.Fn ualarm
+function is available in the following compilation environments. See
+.Xr standards 5 .
+.Lp
+.Bl -bullet -compact
+.\".It
+.\".St -p1003.1-90
+.\".It
+.\".St -p1003.1b-93
+.\".It
+.\".St -p1003.1c-95
+.\".It
+.\".St -p1003.1-2001
+.\".It
+.\".St -xpg3
+.\".It
+.\".St -xpg4
+.It
+.St -xpg4.2
+.It
+.St -susv2
+.It
+.St -susv3
+.El
+.Lp
+It is marked obsolete in
+.St -susv3 , and was removed from
+.St -p1003.1-2008 .
diff --git a/usr/src/man/man3c/usleep.3c b/usr/src/man/man3c/usleep.3c
index 025bc9b7db..bde0508ec2 100644
--- a/usr/src/man/man3c/usleep.3c
+++ b/usr/src/man/man3c/usleep.3c
@@ -1,4 +1,4 @@
-'\" te
+.\" Copyright 2014 Garrett D'Amore <garrett@damore.org>
.\" Copyright (c) 2008, Sun Microsystems, Inc. All Rights Reserved.
.\" Copyright (c) 1980 Regents of the University of California. All rights reserved. The Berkeley software License Agreement specifies the terms and conditions for redistribution.
.\" Portions Copyright (c) 1992, X/Open Company Limited All Rights Reserved.
@@ -6,68 +6,74 @@
.\" http://www.opengroup.org/bookstore/.
.\" The Institute of Electrical and Electronics Engineers and The Open Group, have given us permission to reprint portions of their documentation. In the following statement, the phrase "this text" refers to portions of the system documentation. Portions of this text are reprinted and reproduced in electronic form in the Sun OS Reference Manual, from IEEE Std 1003.1, 2004 Edition, Standard for Information Technology -- Portable Operating System Interface (POSIX), The Open Group Base Specifications Issue 6, Copyright (C) 2001-2004 by the Institute of Electrical and Electronics Engineers, Inc and The Open Group. In the event of any discrepancy between these versions and the original IEEE and The Open Group Standard, the original IEEE and The Open Group Standard is the referee document. The original Standard can be obtained online at http://www.opengroup.org/unix/online.html.
.\" This notice shall appear on any product containing this material.
-.TH USLEEP 3C "Feb 5, 2008"
-.SH NAME
-usleep \- suspend execution for interval in microseconds
-.SH SYNOPSIS
-.LP
-.nf
-#include <unistd.h>
-
-\fBint\fR \fBusleep\fR(\fBuseconds_t\fR \fIuseconds\fR);
-.fi
-
-.SH DESCRIPTION
-.sp
-.LP
-The \fBusleep()\fR function suspends the caller from execution for the number
-of microseconds specified by the \fIuseconds\fR argument. The actual suspension
+.Dd "Aug 16, 2014"
+.Dt USLEEP 3C
+.Os
+.Sh NAME
+.Nm usleep
+.Nd suspend execution for interval in microseconds
+.Sh SYNOPSIS
+.In unistd.h
+.Ft int
+.Fn usleep "useconds_t useconds"
+.Sh DESCRIPTION
+The
+.Fn usleep
+function suspends the caller from execution for the number
+of microseconds specified by the
+.Fa useconds
+argument. The actual suspension
time might be less than requested because any caught signal will terminate
-\fBusleep()\fR following execution of that signal's catching routine. The
+.Fn usleep
+following execution of that signal's catching routine. The
suspension time might be longer than requested by an arbitrary amount because
of the scheduling of other activity in the system.
-.sp
-.LP
-If the value of \fIuseconds\fR is 0, then the call has no effect.
-.sp
-.LP
-The use of the \fBusleep()\fR function has no effect on the action or blockage
+.Lp
+If the value of
+.Fa useconds
+is 0, then the call has no effect.
+.Lp
+The use of the
+usleep
+function has no effect on the action or blockage
of any signal. In a multithreaded process, only the invoking thread is
suspended from execution.
-.SH RETURN VALUES
-.sp
-.LP
-On completion, \fBusleep()\fR returns \fB0\fR. There are no error returns.
-.SH ERRORS
-.sp
-.LP
+.Sh RETURN VALUES
+On completion,
+.Fn usleep
+returns 0. There are no error returns.
+.Sh ERRORS
No errors are returned.
-.SH USAGE
-.sp
-.LP
-The \fBusleep()\fR function is included for its historical usage. The
-\fBnanosleep\fR(3C) function is preferred over this function.
-.SH ATTRIBUTES
-.sp
-.LP
-See \fBattributes\fR(5) for descriptions of the following attributes:
-.sp
-
-.sp
-.TS
-box;
-c | c
-l | l .
-ATTRIBUTE TYPE ATTRIBUTE VALUE
-_
-Interface Stability Committed
-_
-MT-Level Safe
-_
-Standard See \fBstandards\fR(5).
-.TE
-
-.SH SEE ALSO
-.sp
-.LP
-\fBnanosleep\fR(3C), \fBsleep\fR(3C), \fBattributes\fR(5), \fBstandards\fR(5)
+.Sh USAGE
+The
+.Fn usleep
+function is included for its historical usage and is Obsolete. The
+.Xr nanosleep 3C
+function is preferred over this function.
+.Sh INTERFACE STABILITY
+.Sy Obsolete Standard .
+.Sh MT-LEVEL
+.Sy Safe .
+.Sh SEE ALSO
+.Xr nanosleep 3C ,
+.Xr sleep 3C ,
+.Xr unistd.h 3HEAD ,
+.Xr standards 5
+.Sh STANDARDS
+The
+.Fn usleep
+function is available in the following compilation environments. See
+.Xr standards 5 .
+.Lp
+.Bl -bullet -compact
+.It
+.St -xpg4.2
+.It
+.St -susv2
+.It
+.St -susv3
+.El
+.Lp
+It is marked obsolete in
+.St -susv3 , and was removed from
+.St -p1003.1-2008 .
diff --git a/usr/src/uts/i86pc/vm/htable.c b/usr/src/uts/i86pc/vm/htable.c
index 4ed2a74413..665eb9d414 100644
--- a/usr/src/uts/i86pc/vm/htable.c
+++ b/usr/src/uts/i86pc/vm/htable.c
@@ -426,24 +426,147 @@ htable_adjust_reserve()
}
}
+/*
+ * Search the active htables for one to steal. Start at a different hash
+ * bucket every time to help spread the pain of stealing
+ */
+static void
+htable_steal_active(hat_t *hat, uint_t cnt, uint_t threshold,
+ uint_t *stolen, htable_t **list)
+{
+ static uint_t h_seed = 0;
+ htable_t *higher, *ht;
+ uint_t h, e, h_start;
+ uintptr_t va;
+ x86pte_t pte;
+
+ h = h_start = h_seed++ % hat->hat_num_hash;
+ do {
+ higher = NULL;
+ HTABLE_ENTER(h);
+ for (ht = hat->hat_ht_hash[h]; ht; ht = ht->ht_next) {
+
+ /*
+ * Can we rule out reaping?
+ */
+ if (ht->ht_busy != 0 ||
+ (ht->ht_flags & HTABLE_SHARED_PFN) ||
+ ht->ht_level > 0 || ht->ht_valid_cnt > threshold ||
+ ht->ht_lock_cnt != 0)
+ continue;
+
+ /*
+ * Increment busy so the htable can't disappear. We
+ * drop the htable mutex to avoid deadlocks with
+ * hat_pageunload() and the hment mutex while we
+ * call hat_pte_unmap()
+ */
+ ++ht->ht_busy;
+ HTABLE_EXIT(h);
+
+ /*
+ * Try stealing.
+ * - unload and invalidate all PTEs
+ */
+ for (e = 0, va = ht->ht_vaddr;
+ e < HTABLE_NUM_PTES(ht) && ht->ht_valid_cnt > 0 &&
+ ht->ht_busy == 1 && ht->ht_lock_cnt == 0;
+ ++e, va += MMU_PAGESIZE) {
+ pte = x86pte_get(ht, e);
+ if (!PTE_ISVALID(pte))
+ continue;
+ hat_pte_unmap(ht, e, HAT_UNLOAD, pte, NULL);
+ }
+
+ /*
+ * Reacquire htable lock. If we didn't remove all
+ * mappings in the table, or another thread added a new
+ * mapping behind us, give up on this table.
+ */
+ HTABLE_ENTER(h);
+ if (ht->ht_busy != 1 || ht->ht_valid_cnt != 0 ||
+ ht->ht_lock_cnt != 0) {
+ --ht->ht_busy;
+ continue;
+ }
+
+ /*
+ * Steal it and unlink the page table.
+ */
+ higher = ht->ht_parent;
+ unlink_ptp(higher, ht, ht->ht_vaddr);
+
+ /*
+ * remove from the hash list
+ */
+ if (ht->ht_next)
+ ht->ht_next->ht_prev = ht->ht_prev;
+
+ if (ht->ht_prev) {
+ ht->ht_prev->ht_next = ht->ht_next;
+ } else {
+ ASSERT(hat->hat_ht_hash[h] == ht);
+ hat->hat_ht_hash[h] = ht->ht_next;
+ }
+
+ /*
+ * Break to outer loop to release the
+ * higher (ht_parent) pagetable. This
+ * spreads out the pain caused by
+ * pagefaults.
+ */
+ ht->ht_next = *list;
+ *list = ht;
+ ++*stolen;
+ break;
+ }
+ HTABLE_EXIT(h);
+ if (higher != NULL)
+ htable_release(higher);
+ if (++h == hat->hat_num_hash)
+ h = 0;
+ } while (*stolen < cnt && h != h_start);
+}
/*
- * This routine steals htables from user processes for htable_alloc() or
- * for htable_reap().
+ * Move hat to the end of the kas list
+ */
+static void
+move_victim(hat_t *hat)
+{
+ ASSERT(MUTEX_HELD(&hat_list_lock));
+
+ /* unlink victim hat */
+ if (hat->hat_prev)
+ hat->hat_prev->hat_next = hat->hat_next;
+ else
+ kas.a_hat->hat_next = hat->hat_next;
+
+ if (hat->hat_next)
+ hat->hat_next->hat_prev = hat->hat_prev;
+ else
+ kas.a_hat->hat_prev = hat->hat_prev;
+ /* relink at end of hat list */
+ hat->hat_next = NULL;
+ hat->hat_prev = kas.a_hat->hat_prev;
+ if (hat->hat_prev)
+ hat->hat_prev->hat_next = hat;
+ else
+ kas.a_hat->hat_next = hat;
+
+ kas.a_hat->hat_prev = hat;
+}
+
+/*
+ * This routine steals htables from user processes. Called by htable_reap
+ * (reap=TRUE) or htable_alloc (reap=FALSE).
*/
static htable_t *
-htable_steal(uint_t cnt)
+htable_steal(uint_t cnt, boolean_t reap)
{
hat_t *hat = kas.a_hat; /* list starts with khat */
htable_t *list = NULL;
htable_t *ht;
- htable_t *higher;
- uint_t h;
- uint_t h_start;
- static uint_t h_seed = 0;
- uint_t e;
- uintptr_t va;
- x86pte_t pte;
uint_t stolen = 0;
uint_t pass;
uint_t threshold;
@@ -463,19 +586,12 @@ htable_steal(uint_t cnt)
atomic_inc_32(&htable_dont_cache);
for (pass = 0; pass <= htable_steal_passes && stolen < cnt; ++pass) {
threshold = pass * mmu.ptes_per_table / htable_steal_passes;
- hat = kas.a_hat;
- for (;;) {
- /*
- * Clear the victim flag and move to next hat
- */
- mutex_enter(&hat_list_lock);
- if (hat != kas.a_hat) {
- hat->hat_flags &= ~HAT_VICTIM;
- cv_broadcast(&hat_list_cv);
- }
- hat = hat->hat_next;
+ mutex_enter(&hat_list_lock);
+ /* skip the first hat (kernel) */
+ hat = kas.a_hat->hat_next;
+ for (;;) {
/*
* Skip any hat that is already being stolen from.
*
@@ -493,54 +609,12 @@ htable_steal(uint_t cnt)
(HAT_VICTIM | HAT_SHARED | HAT_FREEING)) != 0)
hat = hat->hat_next;
- if (hat == NULL) {
- mutex_exit(&hat_list_lock);
+ if (hat == NULL)
break;
- }
/*
- * Are we finished?
- */
- if (stolen == cnt) {
- /*
- * Try to spread the pain of stealing,
- * move victim HAT to the end of the HAT list.
- */
- if (pass >= 1 && cnt == 1 &&
- kas.a_hat->hat_prev != hat) {
-
- /* unlink victim hat */
- if (hat->hat_prev)
- hat->hat_prev->hat_next =
- hat->hat_next;
- else
- kas.a_hat->hat_next =
- hat->hat_next;
- if (hat->hat_next)
- hat->hat_next->hat_prev =
- hat->hat_prev;
- else
- kas.a_hat->hat_prev =
- hat->hat_prev;
-
-
- /* relink at end of hat list */
- hat->hat_next = NULL;
- hat->hat_prev = kas.a_hat->hat_prev;
- if (hat->hat_prev)
- hat->hat_prev->hat_next = hat;
- else
- kas.a_hat->hat_next = hat;
- kas.a_hat->hat_prev = hat;
-
- }
-
- mutex_exit(&hat_list_lock);
- break;
- }
-
- /*
- * Mark the HAT as a stealing victim.
+ * Mark the HAT as a stealing victim so that it is
+ * not freed from under us, e.g. in as_free()
*/
hat->hat_flags |= HAT_VICTIM;
mutex_exit(&hat_list_lock);
@@ -559,116 +633,72 @@ htable_steal(uint_t cnt)
hat_exit(hat);
/*
- * Don't steal on first pass.
+ * Don't steal active htables on first pass.
*/
- if (pass == 0 || stolen == cnt)
- continue;
+ if (pass != 0 && (stolen < cnt))
+ htable_steal_active(hat, cnt, threshold,
+ &stolen, &list);
/*
- * Search the active htables for one to steal.
- * Start at a different hash bucket every time to
- * help spread the pain of stealing.
+ * do synchronous teardown for the reap case so that
+ * we can forget hat; at this time, hat is
+ * guaranteed to be around because HAT_VICTIM is set
+ * (see htable_free() for similar code)
*/
- h = h_start = h_seed++ % hat->hat_num_hash;
- do {
- higher = NULL;
- HTABLE_ENTER(h);
- for (ht = hat->hat_ht_hash[h]; ht;
- ht = ht->ht_next) {
-
- /*
- * Can we rule out reaping?
- */
- if (ht->ht_busy != 0 ||
- (ht->ht_flags & HTABLE_SHARED_PFN)||
- ht->ht_level > 0 ||
- ht->ht_valid_cnt > threshold ||
- ht->ht_lock_cnt != 0)
- continue;
-
- /*
- * Increment busy so the htable can't
- * disappear. We drop the htable mutex
- * to avoid deadlocks with
- * hat_pageunload() and the hment mutex
- * while we call hat_pte_unmap()
- */
- ++ht->ht_busy;
- HTABLE_EXIT(h);
-
- /*
- * Try stealing.
- * - unload and invalidate all PTEs
- */
- for (e = 0, va = ht->ht_vaddr;
- e < HTABLE_NUM_PTES(ht) &&
- ht->ht_valid_cnt > 0 &&
- ht->ht_busy == 1 &&
- ht->ht_lock_cnt == 0;
- ++e, va += MMU_PAGESIZE) {
- pte = x86pte_get(ht, e);
- if (!PTE_ISVALID(pte))
- continue;
- hat_pte_unmap(ht, e,
- HAT_UNLOAD, pte, NULL);
- }
-
- /*
- * Reacquire htable lock. If we didn't
- * remove all mappings in the table,
- * or another thread added a new mapping
- * behind us, give up on this table.
- */
- HTABLE_ENTER(h);
- if (ht->ht_busy != 1 ||
- ht->ht_valid_cnt != 0 ||
- ht->ht_lock_cnt != 0) {
- --ht->ht_busy;
- continue;
- }
-
- /*
- * Steal it and unlink the page table.
- */
- higher = ht->ht_parent;
- unlink_ptp(higher, ht, ht->ht_vaddr);
-
- /*
- * remove from the hash list
- */
- if (ht->ht_next)
- ht->ht_next->ht_prev =
- ht->ht_prev;
-
- if (ht->ht_prev) {
- ht->ht_prev->ht_next =
- ht->ht_next;
- } else {
- ASSERT(hat->hat_ht_hash[h] ==
- ht);
- hat->hat_ht_hash[h] =
- ht->ht_next;
- }
-
- /*
- * Break to outer loop to release the
- * higher (ht_parent) pagetable. This
- * spreads out the pain caused by
- * pagefaults.
- */
- ht->ht_next = list;
- list = ht;
- ++stolen;
- break;
+ for (ht = list; (ht) && (reap); ht = ht->ht_next) {
+ if (ht->ht_hat == NULL)
+ continue;
+ ASSERT(ht->ht_hat == hat);
+#if defined(__xpv) && defined(__amd64)
+ if (!(ht->ht_flags & HTABLE_VLP) &&
+ ht->ht_level == mmu.max_level) {
+ ptable_free(hat->hat_user_ptable);
+ hat->hat_user_ptable = PFN_INVALID;
}
- HTABLE_EXIT(h);
- if (higher != NULL)
- htable_release(higher);
- if (++h == hat->hat_num_hash)
- h = 0;
- } while (stolen < cnt && h != h_start);
+#endif
+ /*
+ * forget the hat
+ */
+ ht->ht_hat = NULL;
+ }
+
+ mutex_enter(&hat_list_lock);
+
+ /*
+ * Are we finished?
+ */
+ if (stolen == cnt) {
+ /*
+ * Try to spread the pain of stealing,
+ * move victim HAT to the end of the HAT list.
+ */
+ if (pass >= 1 && cnt == 1 &&
+ kas.a_hat->hat_prev != hat)
+ move_victim(hat);
+ /*
+ * We are finished
+ */
+ }
+
+ /*
+ * Clear the victim flag, hat can go away now (once
+ * the lock is dropped)
+ */
+ if (hat->hat_flags & HAT_VICTIM) {
+ ASSERT(hat != kas.a_hat);
+ hat->hat_flags &= ~HAT_VICTIM;
+ cv_broadcast(&hat_list_cv);
+ }
+
+ /* move on to the next hat */
+ hat = hat->hat_next;
}
+
+ mutex_exit(&hat_list_lock);
+
}
+ ASSERT(!MUTEX_HELD(&hat_list_lock));
+
atomic_dec_32(&htable_dont_cache);
return (list);
}
@@ -696,16 +726,22 @@ htable_reap(void *handle)
reap_cnt = MAX(MIN(physmem / 20, active_ptables / 20), 10);
/*
+ * Note: htable_dont_cache should be set at the time of
+ * invoking htable_free()
+ */
+ atomic_inc_32(&htable_dont_cache);
+ /*
* Let htable_steal() do the work, we just call htable_free()
*/
XPV_DISALLOW_MIGRATE();
- list = htable_steal(reap_cnt);
+ list = htable_steal(reap_cnt, B_TRUE);
XPV_ALLOW_MIGRATE();
while ((ht = list) != NULL) {
list = ht->ht_next;
HATSTAT_INC(hs_reaped);
htable_free(ht);
}
+ atomic_dec_32(&htable_dont_cache);
/*
* Free up excess reserves
@@ -801,7 +837,7 @@ htable_alloc(
*/
while (ht == NULL && can_steal_post_boot) {
kmem_reap();
- ht = htable_steal(1);
+ ht = htable_steal(1, B_FALSE);
HATSTAT_INC(hs_steals);
/*
@@ -846,7 +882,7 @@ htable_alloc(
hat->hat_user_ptable = ptable_alloc((uintptr_t)ht + 1);
if (hat->hat_user_ptable != PFN_INVALID)
break;
- stolen = htable_steal(1);
+ stolen = htable_steal(1, B_FALSE);
if (stolen == NULL)
panic("2nd steal ptable failed\n");
htable_free(stolen);
@@ -948,7 +984,7 @@ htable_free(htable_t *ht)
} else if (!(ht->ht_flags & HTABLE_VLP)) {
ptable_free(ht->ht_pfn);
#if defined(__amd64) && defined(__xpv)
- if (ht->ht_level == mmu.max_level) {
+ if (ht->ht_level == mmu.max_level && hat != NULL) {
ptable_free(hat->hat_user_ptable);
hat->hat_user_ptable = PFN_INVALID;
}