From 7f5d80fd842f21a48514cca6718c3bbdaf592316 Mon Sep 17 00:00:00 2001 From: Gordon Ross Date: Sat, 12 Feb 2022 10:42:08 -0500 Subject: 15184 SMB Service fails to restart 15185 SMB sessions linger after disconnect 15186 SMB use smb_llist_post in srv_oplock Reviewed by: Joyce McIntosh Reviewed by: Albert Lee Reviewed by: Matt Barden Approved by: Dan McDonald --- usr/src/uts/common/fs/smbsrv/smb_node.c | 24 ++++---- usr/src/uts/common/fs/smbsrv/smb_server.c | 29 +++++++++- usr/src/uts/common/fs/smbsrv/smb_session.c | 42 ++++++-------- usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c | 82 ++++++++++++++++++--------- 4 files changed, 108 insertions(+), 69 deletions(-) diff --git a/usr/src/uts/common/fs/smbsrv/smb_node.c b/usr/src/uts/common/fs/smbsrv/smb_node.c index 67c348433d..62396ecbcb 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_node.c +++ b/usr/src/uts/common/fs/smbsrv/smb_node.c @@ -188,33 +188,31 @@ smb_node_fini(void) if (smb_node_cache == NULL) return; -#ifdef DEBUG for (i = 0; i <= SMBND_HASH_MASK; i++) { smb_llist_t *bucket; smb_node_t *node; /* - * The following sequence is just intended for sanity check. - * This will have to be modified when the code goes into - * production. - * - * The SMB node hash table should be emtpy at this point. If the - * hash table is not empty a panic will be triggered. + * The SMB node hash table should be empty at this point. + * If the hash table is not empty, clean it up. * - * The reason why SMB nodes are still remaining in the hash - * table is problably due to a mismatch between calls to - * smb_node_lookup() and smb_node_release(). You must track that - * down. + * The reason why SMB nodes might remain in this table is + * generally forgotten references somewhere, perhaps on + * open files, etc. Those are defects. */ bucket = &smb_node_hash_table[i]; node = smb_llist_head(bucket); while (node != NULL) { +#ifdef DEBUG cmn_err(CE_NOTE, "leaked node: 0x%p %s", (void *)node, node->od_name); - node = smb_llist_next(bucket, node); + cmn_err(CE_NOTE, "...bucket: 0x%p", bucket); + debug_enter("leaked_node"); +#endif + smb_llist_remove(bucket, node); + node = smb_llist_head(bucket); } } -#endif for (i = 0; i <= SMBND_HASH_MASK; i++) { smb_llist_destructor(&smb_node_hash_table[i]); diff --git a/usr/src/uts/common/fs/smbsrv/smb_server.c b/usr/src/uts/common/fs/smbsrv/smb_server.c index ca2d584498..7f56792f7d 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_server.c +++ b/usr/src/uts/common/fs/smbsrv/smb_server.c @@ -269,6 +269,9 @@ int smb_event_debug = 0; static smb_llist_t smb_servers; +/* for smb_server_destroy_session() */ +static smb_llist_t smb_server_session_zombies; + kmem_cache_t *smb_cache_request; kmem_cache_t *smb_cache_session; kmem_cache_t *smb_cache_user; @@ -341,6 +344,9 @@ smb_server_g_init(void) smb_llist_constructor(&smb_servers, sizeof (smb_server_t), offsetof(smb_server_t, sv_lnd)); + smb_llist_constructor(&smb_server_session_zombies, + sizeof (smb_session_t), offsetof(smb_session_t, s_lnd)); + return (0); errout: @@ -1472,7 +1478,7 @@ smb_server_shutdown(smb_server_t *sv) * with shutdown in spite of any leaked sessions. * That's better than a server that won't reboot. */ - time = SEC_TO_TICK(10) + ddi_get_lbolt(); + time = SEC_TO_TICK(5) + ddi_get_lbolt(); mutex_enter(&sv->sv_mutex); while (sv->sv_session_list.ll_count != 0) { if (cv_timedwait(&sv->sv_cv, &sv->sv_mutex, time) < 0) @@ -1758,7 +1764,6 @@ smb_server_receiver(void *arg) /* We stay in here until socket disconnect. */ smb_session_receiver(session); - ASSERT(session->s_state == SMB_SESSION_STATE_SHUTDOWN); smb_server_destroy_session(session); } @@ -2612,7 +2617,25 @@ smb_server_destroy_session(smb_session_t *session) count = ll->ll_count; smb_llist_exit(ll); - smb_session_delete(session); + /* + * Normally, the session should have state SHUTDOWN here. + * If the session has any ofiles remaining, eg. due to + * forgotten ofile references or something, the state + * will be _DISCONNECTED or _TERMINATED. Keep such + * sessions in the list of zombies (for debugging). + */ + if (session->s_state == SMB_SESSION_STATE_SHUTDOWN) { + smb_session_delete(session); + } else { +#ifdef DEBUG + cmn_err(CE_NOTE, "Leaked session: 0x%p", (void *)session); + debug_enter("leaked session"); +#endif + smb_llist_enter(&smb_server_session_zombies, RW_WRITER); + smb_llist_insert_head(&smb_server_session_zombies, session); + smb_llist_exit(&smb_server_session_zombies); + } + if (count == 0) { /* See smb_server_shutdown */ cv_signal(&sv->sv_cv); diff --git a/usr/src/uts/common/fs/smbsrv/smb_session.c b/usr/src/uts/common/fs/smbsrv/smb_session.c index 89ab25ae02..f9250c4f73 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_session.c +++ b/usr/src/uts/common/fs/smbsrv/smb_session.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2020 Tintri by DDN, Inc. All rights reserved. - * Copyright 2020 RackTop Systems, Inc. + * Copyright 2022 RackTop Systems, Inc. */ #include @@ -1194,6 +1194,8 @@ smb_session_disconnect_share( smb_llist_exit(ll); } +int smb_session_logoff_maxwait = 2 * MILLISEC; /* 2 sec. */ + /* * Logoff all users associated with the specified session. * @@ -1207,6 +1209,8 @@ smb_session_logoff(smb_session_t *session) { smb_llist_t *ulist; smb_user_t *user; + int count; + int timeleft = smb_session_logoff_maxwait; SMB_SESSION_VALID(session); @@ -1244,52 +1248,40 @@ top: user = smb_llist_next(ulist, user); } - /* Needed below (Was the list empty?) */ - user = smb_llist_head(ulist); + count = smb_llist_get_count(ulist); + /* drop the lock and flush the dtor queue */ smb_llist_exit(ulist); /* - * It's possible for user objects to remain due to references - * obtained via smb_server_lookup_ssnid(), when an SMB2 - * session setup is destroying a previous session. - * - * Wait for user objects to clear out (last refs. go away, - * then smb_user_delete takes them out of the list). When - * the last user object is removed, the session state is - * set to SHUTDOWN and s_lock is signaled. - * - * Not all places that call smb_user_release necessarily - * flush the delete queue, so after we wait for the list - * to empty out, go back to the top and recheck the list - * delete queue to make sure smb_user_delete happens. + * Wait (briefly) for user objects to go away. + * They might linger, eg. if some ofile ref has been + * forgotten, which holds, a tree and a user. + * See smb_session_destroy. */ - if (user == NULL) { + if (count == 0) { /* User list is empty. */ smb_rwx_rwenter(&session->s_lock, RW_WRITER); session->s_state = SMB_SESSION_STATE_SHUTDOWN; smb_rwx_rwexit(&session->s_lock); } else { smb_rwx_rwenter(&session->s_lock, RW_READER); - if (session->s_state != SMB_SESSION_STATE_SHUTDOWN) { + if (session->s_state != SMB_SESSION_STATE_SHUTDOWN && + timeleft > 0) { + /* May be signaled in smb_user_delete */ (void) smb_rwx_cvwait(&session->s_lock, MSEC_TO_TICK(200)); + timeleft -= 200; smb_rwx_rwexit(&session->s_lock); goto top; } smb_rwx_rwexit(&session->s_lock); } - ASSERT(session->s_state == SMB_SESSION_STATE_SHUTDOWN); /* * User list should be empty now. + * (Checked in smb_session_destroy) */ -#ifdef DEBUG - if (ulist->ll_count != 0) { - cmn_err(CE_WARN, "user list not empty?"); - debug_enter("s_user_list"); - } -#endif /* * User logoff happens first so we'll set preserve_opens diff --git a/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c b/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c index 5e5ea0ade0..76560f20a5 100644 --- a/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c +++ b/usr/src/uts/common/fs/smbsrv/smb_srv_oplock.c @@ -150,7 +150,7 @@ static void smb_oplock_wait_break_cancel(smb_request_t *sr); * This is mostly the same as smb_oplock_ind_break() except: * - The only CompletionStatus possible is STATUS_CANT_GRANT. * - Instead of taskq_dispatch this appends the new SR to - * the "post work" queue on the current SR. + * the "post work" queue on the current SR (if possible). * * Note called with the node ofile list rwlock held and * the oplock mutex entered. @@ -159,7 +159,13 @@ void smb_oplock_ind_break_in_ack(smb_request_t *ack_sr, smb_ofile_t *ofile, uint32_t NewLevel, boolean_t AckRequired) { - smb_request_t *new_sr; + smb_server_t *sv = ofile->f_server; + smb_node_t *node = ofile->f_node; + smb_request_t *sr = NULL; + boolean_t use_postwork = B_TRUE; + + ASSERT(RW_READ_HELD(&node->n_ofile_list.ll_lock)); + ASSERT(MUTEX_HELD(&node->n_oplock.ol_mutex)); /* * This should happen only with SMB2 or later, @@ -183,42 +189,59 @@ smb_oplock_ind_break_in_ack(smb_request_t *ack_sr, smb_ofile_t *ofile, /* * When called from Ack processing, we want to use a - * request on the session doing the ack. If we can't + * request on the session doing the ack, so we can + * append "post work" to that session. If we can't * allocate a request on that session (because it's - * now disconnecting) just fall-back to the normal - * oplock break code path which deals with that. - * Once we have a request on the ack session, that - * session won't go away until the request is done. + * now disconnecting) use a request from the server + * session like smb_oplock_ind_break does, and then + * use taskq_dispatch instead of postwork. */ - new_sr = smb_request_alloc(ack_sr->session, 0); - if (new_sr == NULL) { - smb_oplock_ind_break(ofile, NewLevel, - AckRequired, STATUS_CANT_GRANT); - smb_ofile_release(ofile); + sr = smb_request_alloc(ack_sr->session, 0); + if (sr == NULL) { + use_postwork = B_FALSE; + sr = smb_request_alloc(sv->sv_session, 0); + } + if (sr == NULL) { + /* + * Server must be shutting down. We took a + * hold on the ofile that must be released, + * but we can't release here because we're + * called with the node ofile list entered. + * See smb_ofile_release_LL. + */ + smb_llist_post(&node->n_ofile_list, ofile, + smb_ofile_release_LL); return; } - new_sr->sr_state = SMB_REQ_STATE_SUBMITTED; - new_sr->smb2_async = B_TRUE; - new_sr->user_cr = zone_kcred(); - new_sr->fid_ofile = ofile; + sr->sr_state = SMB_REQ_STATE_SUBMITTED; + sr->smb2_async = B_TRUE; + sr->user_cr = zone_kcred(); + sr->fid_ofile = ofile; if (ofile->f_tree != NULL) { - new_sr->tid_tree = ofile->f_tree; - smb_tree_hold_internal(ofile->f_tree); + sr->tid_tree = ofile->f_tree; + smb_tree_hold_internal(sr->tid_tree); } if (ofile->f_user != NULL) { - new_sr->uid_user = ofile->f_user; - smb_user_hold_internal(ofile->f_user); + sr->uid_user = ofile->f_user; + smb_user_hold_internal(sr->uid_user); } - new_sr->arg.olbrk.NewLevel = NewLevel; - new_sr->arg.olbrk.AckRequired = AckRequired; + sr->arg.olbrk.NewLevel = NewLevel; + sr->arg.olbrk.AckRequired = AckRequired; - /* - * Using smb2_cmd_code to indicate what to call. - * work func. will call smb_oplock_send_brk - */ - new_sr->smb2_cmd_code = SMB2_OPLOCK_BREAK; - smb2sr_append_postwork(ack_sr, new_sr); + if (use_postwork) { + /* + * Using smb2_cmd_code to indicate what to call. + * work func. will call smb_oplock_send_brk + */ + sr->smb2_cmd_code = SMB2_OPLOCK_BREAK; + smb2sr_append_postwork(ack_sr, sr); + } else { + /* Will call smb_oplock_send_break */ + sr->smb2_status = STATUS_CANT_GRANT; + (void) taskq_dispatch(sv->sv_worker_pool, + smb_oplock_async_break, sr, TQ_SLEEP); + } } /* @@ -241,6 +264,9 @@ smb_oplock_ind_break(smb_ofile_t *ofile, uint32_t NewLevel, smb_node_t *node = ofile->f_node; smb_request_t *sr = NULL; + ASSERT(RW_READ_HELD(&node->n_ofile_list.ll_lock)); + ASSERT(MUTEX_HELD(&node->n_oplock.ol_mutex)); + /* * See notes at smb_oplock_async_break re. CompletionStatus * Check for any invalid codes here, so assert happens in -- cgit v1.2.3