summaryrefslogtreecommitdiff
path: root/source3/modules/vfs_aio_linux.c
diff options
context:
space:
mode:
Diffstat (limited to 'source3/modules/vfs_aio_linux.c')
-rw-r--r--source3/modules/vfs_aio_linux.c354
1 files changed, 354 insertions, 0 deletions
diff --git a/source3/modules/vfs_aio_linux.c b/source3/modules/vfs_aio_linux.c
new file mode 100644
index 0000000000..4ccdd8fb7e
--- /dev/null
+++ b/source3/modules/vfs_aio_linux.c
@@ -0,0 +1,354 @@
+/*
+ * Simulate Posix AIO using Linux kernel AIO.
+ *
+ * Copyright (C) Jeremy Allison 2012
+ * Copyright (C) Volker Lendecke 2012
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "includes.h"
+#include "system/filesys.h"
+#include "smbd/smbd.h"
+#include "smbd/globals.h"
+#include "lib/util/tevent_unix.h"
+#include <sys/eventfd.h>
+#include <libaio.h>
+
+static int event_fd = -1;
+static io_context_t io_ctx;
+static struct fd_event *aio_read_event;
+static bool used;
+static unsigned num_busy;
+
+static void aio_linux_done(struct event_context *event_ctx,
+ struct fd_event *event,
+ uint16 flags, void *private_data);
+
+/************************************************************************
+ Housekeeping. Cleanup if no activity for 30 seconds.
+***********************************************************************/
+
+static void aio_linux_housekeeping(struct tevent_context *event_ctx,
+ struct tevent_timer *te,
+ struct timeval now,
+ void *private_data)
+{
+ /* Remove this timed event handler. */
+ TALLOC_FREE(te);
+
+ if ((num_busy != 0) || used) {
+ used = false;
+
+ /* Still busy. Look again in 30 seconds. */
+ (void)tevent_add_timer(event_ctx,
+ NULL,
+ timeval_current_ofs(30, 0),
+ aio_linux_housekeeping,
+ NULL);
+ return;
+ }
+
+ /* No activity for 30 seconds. Close out kernel resources. */
+ io_queue_release(io_ctx);
+ memset(&io_ctx, '\0', sizeof(io_ctx));
+
+ if (event_fd != -1) {
+ close(event_fd);
+ event_fd = -1;
+ }
+
+ TALLOC_FREE(aio_read_event);
+}
+
+/************************************************************************
+ Ensure event fd and aio context are initialized.
+***********************************************************************/
+
+static bool init_aio_linux(struct vfs_handle_struct *handle)
+{
+ struct tevent_timer *te = NULL;
+
+ if (event_fd != -1) {
+ /* Already initialized. */
+ return true;
+ }
+
+ /* Schedule a shutdown event for 30 seconds from now. */
+ te = tevent_add_timer(handle->conn->sconn->ev_ctx,
+ NULL,
+ timeval_current_ofs(30, 0),
+ aio_linux_housekeeping,
+ NULL);
+
+ if (te == NULL) {
+ goto fail;
+ }
+
+ event_fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC);
+ if (event_fd == -1) {
+ goto fail;
+ }
+
+ aio_read_event = tevent_add_fd(server_event_context(),
+ NULL,
+ event_fd,
+ TEVENT_FD_READ,
+ aio_linux_done,
+ NULL);
+ if (aio_read_event == NULL) {
+ goto fail;
+ }
+
+ if (io_queue_init(aio_pending_size, &io_ctx)) {
+ goto fail;
+ }
+
+ DEBUG(10,("init_aio_linux: initialized with up to %d events\n",
+ aio_pending_size));
+
+ return true;
+
+ fail:
+
+ DEBUG(10,("init_aio_linux: initialization failed\n"));
+
+ TALLOC_FREE(te);
+ TALLOC_FREE(aio_read_event);
+ if (event_fd != -1) {
+ close(event_fd);
+ event_fd = -1;
+ }
+ memset(&io_ctx, '\0', sizeof(io_ctx));
+ return false;
+}
+
+struct aio_linux_state {
+ struct iocb event_iocb;
+ ssize_t ret;
+ int err;
+};
+
+static struct tevent_req *aio_linux_pread_send(
+ struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev, struct files_struct *fsp,
+ void *data, size_t n, off_t offset)
+{
+ struct tevent_req *req;
+ struct aio_linux_state *state;
+ struct iocb *piocb;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
+ if (req == NULL) {
+ return NULL;
+ }
+ if (!init_aio_linux(handle)) {
+ tevent_req_error(req, EIO);
+ return tevent_req_post(req, ev);
+ }
+
+ io_prep_pread(&state->event_iocb, fsp->fh->fd, data, n, offset);
+ io_set_eventfd(&state->event_iocb, event_fd);
+ state->event_iocb.data = req;
+
+ piocb = &state->event_iocb;
+
+ ret = io_submit(io_ctx, 1, &piocb);
+ if (ret < 0) {
+ tevent_req_error(req, -ret);
+ return tevent_req_post(req, ev);
+ }
+ num_busy += 1;
+ used = true;
+ return req;
+}
+
+static struct tevent_req *aio_linux_pwrite_send(
+ struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev, struct files_struct *fsp,
+ const void *data, size_t n, off_t offset)
+{
+ struct tevent_req *req;
+ struct aio_linux_state *state;
+ struct iocb *piocb;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
+ if (req == NULL) {
+ return NULL;
+ }
+ if (!init_aio_linux(handle)) {
+ tevent_req_error(req, EIO);
+ return tevent_req_post(req, ev);
+ }
+
+ io_prep_pwrite(&state->event_iocb, fsp->fh->fd, discard_const(data),
+ n, offset);
+ io_set_eventfd(&state->event_iocb, event_fd);
+ state->event_iocb.data = req;
+
+ piocb = &state->event_iocb;
+
+ ret = io_submit(io_ctx, 1, &piocb);
+ if (ret < 0) {
+ tevent_req_error(req, -ret);
+ return tevent_req_post(req, ev);
+ }
+ num_busy += 1;
+ used = true;
+ return req;
+}
+
+static struct tevent_req *aio_linux_fsync_send(
+ struct vfs_handle_struct *handle, TALLOC_CTX *mem_ctx,
+ struct tevent_context *ev, struct files_struct *fsp)
+{
+ struct tevent_req *req;
+ struct aio_linux_state *state;
+ struct iocb *piocb;
+ int ret;
+
+ req = tevent_req_create(mem_ctx, &state, struct aio_linux_state);
+ if (req == NULL) {
+ return NULL;
+ }
+ if (!init_aio_linux(handle)) {
+ tevent_req_error(req, EIO);
+ return tevent_req_post(req, ev);
+ }
+
+ io_prep_fsync(&state->event_iocb, fsp->fh->fd);
+ io_set_eventfd(&state->event_iocb, event_fd);
+ state->event_iocb.data = req;
+
+ piocb = &state->event_iocb;
+
+ ret = io_submit(io_ctx, 1, &piocb);
+ if (ret < 0) {
+ tevent_req_error(req, -ret);
+ return tevent_req_post(req, ev);
+ }
+ num_busy += 1;
+ used = true;
+ return req;
+}
+
+static void aio_linux_done(struct event_context *event_ctx,
+ struct fd_event *event,
+ uint16 flags, void *private_data)
+{
+ uint64_t num_events = 0;
+
+ DEBUG(10, ("aio_linux_done called with flags=%d\n",
+ (int)flags));
+
+ /* Read the number of events available. */
+ if (sys_read(event_fd, &num_events, sizeof(num_events)) !=
+ sizeof(num_events)) {
+ smb_panic("aio_linux_handle_completion: invalid read");
+ }
+
+ while (num_events > 0) {
+ struct timespec ts = { 0, };
+ struct io_event finished;
+ struct tevent_req *req;
+ struct aio_linux_state *state;
+ int ret;
+
+ ret = io_getevents(io_ctx, 1, 1, &finished, &ts);
+ if (ret < 0) {
+ DEBUG(1, ("aio_linux_done: io_getevents returned %s\n",
+ strerror(-ret)));
+ return;
+ }
+ if (ret == 0) {
+ DEBUG(10, ("aio_linux_done: io_getvents returned "
+ "0\n"));
+ continue;
+ }
+
+ num_busy -= 1;
+
+ req = talloc_get_type_abort(finished.data,
+ struct tevent_req);
+ state = tevent_req_data(req, struct aio_linux_state);
+
+ if (finished.res < 0) {
+ state->ret = -1;
+ state->err = -finished.res;
+ } else {
+ state->ret = finished.res;
+ state->err = 0;
+ }
+ tevent_req_done(req);
+ num_events -= 1;
+ }
+}
+
+static ssize_t aio_linux_recv(struct tevent_req *req, int *err)
+{
+ struct aio_linux_state *state = tevent_req_data(
+ req, struct aio_linux_state);
+
+ if (tevent_req_is_unix_error(req, err)) {
+ return -1;
+ }
+ if (state->ret == -1) {
+ *err = state->err;
+ }
+ return state->ret;
+}
+
+static int aio_linux_int_recv(struct tevent_req *req, int *err)
+{
+ /*
+ * Use implicit conversion ssize_t->int
+ */
+ return aio_linux_recv(req, err);
+}
+
+static int aio_linux_connect(vfs_handle_struct *handle, const char *service,
+ const char *user)
+{
+ /*********************************************************************
+ * How many io_events to initialize ?
+ * 128 per process seems insane as a default until you realize that
+ * (a) Throttling is done in SMB2 via the crediting algorithm.
+ * (b) SMB1 clients are limited to max_mux (50) outstanding
+ * requests and Windows clients don't use this anyway.
+ * Essentially we want this to be unlimited unless smb.conf
+ * says different.
+ *********************************************************************/
+ aio_pending_size = lp_parm_int(
+ SNUM(handle->conn), "aio_linux", "aio num events", 128);
+ return SMB_VFS_NEXT_CONNECT(handle, service, user);
+}
+
+static struct vfs_fn_pointers vfs_aio_linux_fns = {
+ .connect_fn = aio_linux_connect,
+ .pread_send_fn = aio_linux_pread_send,
+ .pread_recv_fn = aio_linux_recv,
+ .pwrite_send_fn = aio_linux_pwrite_send,
+ .pwrite_recv_fn = aio_linux_recv,
+ .fsync_send_fn = aio_linux_fsync_send,
+ .fsync_recv_fn = aio_linux_int_recv,
+};
+
+NTSTATUS vfs_aio_linux_init(void)
+{
+ return smb_register_vfs(SMB_VFS_INTERFACE_VERSION,
+ "aio_linux", &vfs_aio_linux_fns);
+}