summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGordon Ross <gwr@racktopsystems.com>2022-02-12 10:42:08 -0500
committerToomas Soome <tsoome@me.com>2022-12-06 23:50:55 +0200
commit7f5d80fd842f21a48514cca6718c3bbdaf592316 (patch)
treedb18acc12360f68dd40d8bad5cd419a610d90591
parentf92d0ef50ade5e261591e634cd91553a9658cf72 (diff)
downloadillumos-joyent-7f5d80fd842f21a48514cca6718c3bbdaf592316.tar.gz
15184 SMB Service fails to restart
15185 SMB sessions linger after disconnect 15186 SMB use smb_llist_post in srv_oplock Reviewed by: Joyce McIntosh <jmcintosh@racktopsystems.com> Reviewed by: Albert Lee <alee@racktopsystems.com> Reviewed by: Matt Barden <mbarden@racktopsystems.com> Approved by: Dan McDonald <danmcd@mnx.io>
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_node.c24
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_server.c29
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_session.c42
-rw-r--r--usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c82
4 files changed, 108 insertions, 69 deletions
diff --git a/usr/src/uts/common/fs/smbsrv/smb_node.c b/usr/src/uts/common/fs/smbsrv/smb_node.c
index 67c348433d..62396ecbcb 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_node.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_node.c
@@ -188,33 +188,31 @@ smb_node_fini(void)
if (smb_node_cache == NULL)
return;
-#ifdef DEBUG
for (i = 0; i <= SMBND_HASH_MASK; i++) {
smb_llist_t *bucket;
smb_node_t *node;
/*
- * The following sequence is just intended for sanity check.
- * This will have to be modified when the code goes into
- * production.
- *
- * The SMB node hash table should be emtpy at this point. If the
- * hash table is not empty a panic will be triggered.
+ * The SMB node hash table should be empty at this point.
+ * If the hash table is not empty, clean it up.
*
- * The reason why SMB nodes are still remaining in the hash
- * table is problably due to a mismatch between calls to
- * smb_node_lookup() and smb_node_release(). You must track that
- * down.
+ * The reason why SMB nodes might remain in this table is
+ * generally forgotten references somewhere, perhaps on
+ * open files, etc. Those are defects.
*/
bucket = &smb_node_hash_table[i];
node = smb_llist_head(bucket);
while (node != NULL) {
+#ifdef DEBUG
cmn_err(CE_NOTE, "leaked node: 0x%p %s",
(void *)node, node->od_name);
- node = smb_llist_next(bucket, node);
+ cmn_err(CE_NOTE, "...bucket: 0x%p", bucket);
+ debug_enter("leaked_node");
+#endif
+ smb_llist_remove(bucket, node);
+ node = smb_llist_head(bucket);
}
}
-#endif
for (i = 0; i <= SMBND_HASH_MASK; i++) {
smb_llist_destructor(&smb_node_hash_table[i]);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_server.c b/usr/src/uts/common/fs/smbsrv/smb_server.c
index ca2d584498..7f56792f7d 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_server.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_server.c
@@ -269,6 +269,9 @@ int smb_event_debug = 0;
static smb_llist_t smb_servers;
+/* for smb_server_destroy_session() */
+static smb_llist_t smb_server_session_zombies;
+
kmem_cache_t *smb_cache_request;
kmem_cache_t *smb_cache_session;
kmem_cache_t *smb_cache_user;
@@ -341,6 +344,9 @@ smb_server_g_init(void)
smb_llist_constructor(&smb_servers, sizeof (smb_server_t),
offsetof(smb_server_t, sv_lnd));
+ smb_llist_constructor(&smb_server_session_zombies,
+ sizeof (smb_session_t), offsetof(smb_session_t, s_lnd));
+
return (0);
errout:
@@ -1472,7 +1478,7 @@ smb_server_shutdown(smb_server_t *sv)
* with shutdown in spite of any leaked sessions.
* That's better than a server that won't reboot.
*/
- time = SEC_TO_TICK(10) + ddi_get_lbolt();
+ time = SEC_TO_TICK(5) + ddi_get_lbolt();
mutex_enter(&sv->sv_mutex);
while (sv->sv_session_list.ll_count != 0) {
if (cv_timedwait(&sv->sv_cv, &sv->sv_mutex, time) < 0)
@@ -1758,7 +1764,6 @@ smb_server_receiver(void *arg)
/* We stay in here until socket disconnect. */
smb_session_receiver(session);
- ASSERT(session->s_state == SMB_SESSION_STATE_SHUTDOWN);
smb_server_destroy_session(session);
}
@@ -2612,7 +2617,25 @@ smb_server_destroy_session(smb_session_t *session)
count = ll->ll_count;
smb_llist_exit(ll);
- smb_session_delete(session);
+ /*
+ * Normally, the session should have state SHUTDOWN here.
+ * If the session has any ofiles remaining, eg. due to
+ * forgotten ofile references or something, the state
+ * will be _DISCONNECTED or _TERMINATED. Keep such
+ * sessions in the list of zombies (for debugging).
+ */
+ if (session->s_state == SMB_SESSION_STATE_SHUTDOWN) {
+ smb_session_delete(session);
+ } else {
+#ifdef DEBUG
+ cmn_err(CE_NOTE, "Leaked session: 0x%p", (void *)session);
+ debug_enter("leaked session");
+#endif
+ smb_llist_enter(&smb_server_session_zombies, RW_WRITER);
+ smb_llist_insert_head(&smb_server_session_zombies, session);
+ smb_llist_exit(&smb_server_session_zombies);
+ }
+
if (count == 0) {
/* See smb_server_shutdown */
cv_signal(&sv->sv_cv);
diff --git a/usr/src/uts/common/fs/smbsrv/smb_session.c b/usr/src/uts/common/fs/smbsrv/smb_session.c
index 89ab25ae02..f9250c4f73 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_session.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_session.c
@@ -21,7 +21,7 @@
/*
* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2020 Tintri by DDN, Inc. All rights reserved.
- * Copyright 2020 RackTop Systems, Inc.
+ * Copyright 2022 RackTop Systems, Inc.
*/
#include <sys/atomic.h>
@@ -1194,6 +1194,8 @@ smb_session_disconnect_share(
smb_llist_exit(ll);
}
+int smb_session_logoff_maxwait = 2 * MILLISEC; /* 2 sec. */
+
/*
* Logoff all users associated with the specified session.
*
@@ -1207,6 +1209,8 @@ smb_session_logoff(smb_session_t *session)
{
smb_llist_t *ulist;
smb_user_t *user;
+ int count;
+ int timeleft = smb_session_logoff_maxwait;
SMB_SESSION_VALID(session);
@@ -1244,52 +1248,40 @@ top:
user = smb_llist_next(ulist, user);
}
- /* Needed below (Was the list empty?) */
- user = smb_llist_head(ulist);
+ count = smb_llist_get_count(ulist);
+ /* drop the lock and flush the dtor queue */
smb_llist_exit(ulist);
/*
- * It's possible for user objects to remain due to references
- * obtained via smb_server_lookup_ssnid(), when an SMB2
- * session setup is destroying a previous session.
- *
- * Wait for user objects to clear out (last refs. go away,
- * then smb_user_delete takes them out of the list). When
- * the last user object is removed, the session state is
- * set to SHUTDOWN and s_lock is signaled.
- *
- * Not all places that call smb_user_release necessarily
- * flush the delete queue, so after we wait for the list
- * to empty out, go back to the top and recheck the list
- * delete queue to make sure smb_user_delete happens.
+ * Wait (briefly) for user objects to go away.
+ * They might linger, eg. if some ofile ref has been
+ * forgotten, which holds, a tree and a user.
+ * See smb_session_destroy.
*/
- if (user == NULL) {
+ if (count == 0) {
/* User list is empty. */
smb_rwx_rwenter(&session->s_lock, RW_WRITER);
session->s_state = SMB_SESSION_STATE_SHUTDOWN;
smb_rwx_rwexit(&session->s_lock);
} else {
smb_rwx_rwenter(&session->s_lock, RW_READER);
- if (session->s_state != SMB_SESSION_STATE_SHUTDOWN) {
+ if (session->s_state != SMB_SESSION_STATE_SHUTDOWN &&
+ timeleft > 0) {
+ /* May be signaled in smb_user_delete */
(void) smb_rwx_cvwait(&session->s_lock,
MSEC_TO_TICK(200));
+ timeleft -= 200;
smb_rwx_rwexit(&session->s_lock);
goto top;
}
smb_rwx_rwexit(&session->s_lock);
}
- ASSERT(session->s_state == SMB_SESSION_STATE_SHUTDOWN);
/*
* User list should be empty now.
+ * (Checked in smb_session_destroy)
*/
-#ifdef DEBUG
- if (ulist->ll_count != 0) {
- cmn_err(CE_WARN, "user list not empty?");
- debug_enter("s_user_list");
- }
-#endif
/*
* User logoff happens first so we'll set preserve_opens
diff --git a/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c b/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c
index 5e5ea0ade0..76560f20a5 100644
--- a/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c
+++ b/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c
@@ -150,7 +150,7 @@ static void smb_oplock_wait_break_cancel(smb_request_t *sr);
* This is mostly the same as smb_oplock_ind_break() except:
* - The only CompletionStatus possible is STATUS_CANT_GRANT.
* - Instead of taskq_dispatch this appends the new SR to
- * the "post work" queue on the current SR.
+ * the "post work" queue on the current SR (if possible).
*
* Note called with the node ofile list rwlock held and
* the oplock mutex entered.
@@ -159,7 +159,13 @@ void
smb_oplock_ind_break_in_ack(smb_request_t *ack_sr, smb_ofile_t *ofile,
uint32_t NewLevel, boolean_t AckRequired)
{
- smb_request_t *new_sr;
+ smb_server_t *sv = ofile->f_server;
+ smb_node_t *node = ofile->f_node;
+ smb_request_t *sr = NULL;
+ boolean_t use_postwork = B_TRUE;
+
+ ASSERT(RW_READ_HELD(&node->n_ofile_list.ll_lock));
+ ASSERT(MUTEX_HELD(&node->n_oplock.ol_mutex));
/*
* This should happen only with SMB2 or later,
@@ -183,42 +189,59 @@ smb_oplock_ind_break_in_ack(smb_request_t *ack_sr, smb_ofile_t *ofile,
/*
* When called from Ack processing, we want to use a
- * request on the session doing the ack. If we can't
+ * request on the session doing the ack, so we can
+ * append "post work" to that session. If we can't
* allocate a request on that session (because it's
- * now disconnecting) just fall-back to the normal
- * oplock break code path which deals with that.
- * Once we have a request on the ack session, that
- * session won't go away until the request is done.
+ * now disconnecting) use a request from the server
+ * session like smb_oplock_ind_break does, and then
+ * use taskq_dispatch instead of postwork.
*/
- new_sr = smb_request_alloc(ack_sr->session, 0);
- if (new_sr == NULL) {
- smb_oplock_ind_break(ofile, NewLevel,
- AckRequired, STATUS_CANT_GRANT);
- smb_ofile_release(ofile);
+ sr = smb_request_alloc(ack_sr->session, 0);
+ if (sr == NULL) {
+ use_postwork = B_FALSE;
+ sr = smb_request_alloc(sv->sv_session, 0);
+ }
+ if (sr == NULL) {
+ /*
+ * Server must be shutting down. We took a
+ * hold on the ofile that must be released,
+ * but we can't release here because we're
+ * called with the node ofile list entered.
+ * See smb_ofile_release_LL.
+ */
+ smb_llist_post(&node->n_ofile_list, ofile,
+ smb_ofile_release_LL);
return;
}
- new_sr->sr_state = SMB_REQ_STATE_SUBMITTED;
- new_sr->smb2_async = B_TRUE;
- new_sr->user_cr = zone_kcred();
- new_sr->fid_ofile = ofile;
+ sr->sr_state = SMB_REQ_STATE_SUBMITTED;
+ sr->smb2_async = B_TRUE;
+ sr->user_cr = zone_kcred();
+ sr->fid_ofile = ofile;
if (ofile->f_tree != NULL) {
- new_sr->tid_tree = ofile->f_tree;
- smb_tree_hold_internal(ofile->f_tree);
+ sr->tid_tree = ofile->f_tree;
+ smb_tree_hold_internal(sr->tid_tree);
}
if (ofile->f_user != NULL) {
- new_sr->uid_user = ofile->f_user;
- smb_user_hold_internal(ofile->f_user);
+ sr->uid_user = ofile->f_user;
+ smb_user_hold_internal(sr->uid_user);
}
- new_sr->arg.olbrk.NewLevel = NewLevel;
- new_sr->arg.olbrk.AckRequired = AckRequired;
+ sr->arg.olbrk.NewLevel = NewLevel;
+ sr->arg.olbrk.AckRequired = AckRequired;
- /*
- * Using smb2_cmd_code to indicate what to call.
- * work func. will call smb_oplock_send_brk
- */
- new_sr->smb2_cmd_code = SMB2_OPLOCK_BREAK;
- smb2sr_append_postwork(ack_sr, new_sr);
+ if (use_postwork) {
+ /*
+ * Using smb2_cmd_code to indicate what to call.
+ * work func. will call smb_oplock_send_brk
+ */
+ sr->smb2_cmd_code = SMB2_OPLOCK_BREAK;
+ smb2sr_append_postwork(ack_sr, sr);
+ } else {
+ /* Will call smb_oplock_send_break */
+ sr->smb2_status = STATUS_CANT_GRANT;
+ (void) taskq_dispatch(sv->sv_worker_pool,
+ smb_oplock_async_break, sr, TQ_SLEEP);
+ }
}
/*
@@ -241,6 +264,9 @@ smb_oplock_ind_break(smb_ofile_t *ofile, uint32_t NewLevel,
smb_node_t *node = ofile->f_node;
smb_request_t *sr = NULL;
+ ASSERT(RW_READ_HELD(&node->n_ofile_list.ll_lock));
+ ASSERT(MUTEX_HELD(&node->n_oplock.ol_mutex));
+
/*
* See notes at smb_oplock_async_break re. CompletionStatus
* Check for any invalid codes here, so assert happens in