diff options
Diffstat (limited to 'src/stat_cache.c')
-rw-r--r-- | src/stat_cache.c | 668 |
1 files changed, 668 insertions, 0 deletions
diff --git a/src/stat_cache.c b/src/stat_cache.c new file mode 100644 index 0000000..148f4c8 --- /dev/null +++ b/src/stat_cache.c @@ -0,0 +1,668 @@ +#define _GNU_SOURCE + +#include <sys/types.h> +#include <sys/stat.h> + +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <unistd.h> +#include <stdio.h> +#include <fcntl.h> +#include <assert.h> + +#include "log.h" +#include "stat_cache.h" +#include "fdevent.h" +#include "etag.h" + +#ifdef HAVE_ATTR_ATTRIBUTES_H +#include <attr/attributes.h> +#endif + +#ifdef HAVE_FAM_H +# include <fam.h> +#endif + +#include "sys-mmap.h" + +/* NetBSD 1.3.x needs it */ +#ifndef MAP_FAILED +# define MAP_FAILED -1 +#endif + +#ifndef O_LARGEFILE +# define O_LARGEFILE 0 +#endif + +#ifndef HAVE_LSTAT +#define lstat stat +#endif + +#if 0 +/* enables debug code for testing if all nodes in the stat-cache as accessable */ +#define DEBUG_STAT_CACHE +#endif + +/* + * stat-cache + * + * we cache the stat() calls in our own storage + * the directories are cached in FAM + * + * if we get a change-event from FAM, we increment the version in the FAM->dir mapping + * + * if the stat()-cache is queried we check if the version id for the directory is the + * same and return immediatly. + * + * + * What we need: + * + * - for each stat-cache entry we need a fast indirect lookup on the directory name + * - for each FAMRequest we have to find the version in the directory cache (index as userdata) + * + * stat <<-> directory <-> FAMRequest + * + * if file is deleted, directory is dirty, file is rechecked ... + * if directory is deleted, directory mapping is removed + * + * */ + +#ifdef HAVE_FAM_H +typedef struct { + FAMRequest *req; + FAMConnection *fc; + + buffer *name; + + int version; +} fam_dir_entry; +#endif + +/* the directory name is too long to always compare on it + * - we need a hash + * - the hash-key is used as sorting criteria for a tree + * - a splay-tree is used as we can use the caching effect of it + */ + +/* we want to cleanup the stat-cache every few seconds, let's say 10 + * + * - remove entries which are outdated since 30s + * - remove entries which are fresh but havn't been used since 60s + * - if we don't have a stat-cache entry for a directory, release it from the monitor + */ + +#ifdef DEBUG_STAT_CACHE +typedef struct { + int *ptr; + + size_t used; + size_t size; +} fake_keys; + +static fake_keys ctrl; +#endif + +stat_cache *stat_cache_init(void) { + stat_cache *fc = NULL; + + fc = calloc(1, sizeof(*fc)); + + fc->dir_name = buffer_init(); +#ifdef HAVE_FAM_H + fc->fam = calloc(1, sizeof(*fc->fam)); +#endif + +#ifdef DEBUG_STAT_CACHE + ctrl.size = 0; +#endif + + return fc; +} + +static stat_cache_entry * stat_cache_entry_init(void) { + stat_cache_entry *sce = NULL; + + sce = calloc(1, sizeof(*sce)); + + sce->name = buffer_init(); + sce->etag = buffer_init(); + sce->content_type = buffer_init(); + + return sce; +} + +static void stat_cache_entry_free(void *data) { + stat_cache_entry *sce = data; + if (!sce) return; + + buffer_free(sce->etag); + buffer_free(sce->name); + buffer_free(sce->content_type); + + free(sce); +} + +#ifdef HAVE_FAM_H +static fam_dir_entry * fam_dir_entry_init(void) { + fam_dir_entry *fam_dir = NULL; + + fam_dir = calloc(1, sizeof(*fam_dir)); + + fam_dir->name = buffer_init(); + + return fam_dir; +} + +static void fam_dir_entry_free(void *data) { + fam_dir_entry *fam_dir = data; + + if (!fam_dir) return; + + FAMCancelMonitor(fam_dir->fc, fam_dir->req); + + buffer_free(fam_dir->name); + free(fam_dir->req); + + free(fam_dir); +} +#endif + +void stat_cache_free(stat_cache *sc) { + while (sc->files) { + int osize; + splay_tree *node = sc->files; + + osize = sc->files->size; + + stat_cache_entry_free(node->data); + sc->files = splaytree_delete(sc->files, node->key); + + assert(osize - 1 == splaytree_size(sc->files)); + } + + buffer_free(sc->dir_name); + +#ifdef HAVE_FAM_H + while (sc->dirs) { + int osize; + splay_tree *node = sc->dirs; + + osize = sc->dirs->size; + + fam_dir_entry_free(node->data); + sc->dirs = splaytree_delete(sc->dirs, node->key); + + if (osize == 1) { + assert(NULL == sc->dirs); + } else { + assert(osize == (sc->dirs->size + 1)); + } + } + + if (sc->fam) { + FAMClose(sc->fam); + free(sc->fam); + } +#endif + free(sc); +} + +#ifdef HAVE_XATTR +static int stat_cache_attr_get(buffer *buf, char *name) { + int attrlen; + int ret; + + attrlen = 1024; + buffer_prepare_copy(buf, attrlen); + attrlen--; + if(0 == (ret = attr_get(name, "Content-Type", buf->ptr, &attrlen, 0))) { + buf->used = attrlen + 1; + buf->ptr[attrlen] = '\0'; + } + return ret; +} +#endif + +/* the famous DJB hash function for strings */ +static uint32_t hashme(buffer *str) { + uint32_t hash = 5381; + const char *s; + for (s = str->ptr; *s; s++) { + hash = ((hash << 5) + hash) + *s; + } + + hash &= ~(1 << 31); /* strip the highest bit */ + + return hash; +} + +#ifdef HAVE_FAM_H +handler_t stat_cache_handle_fdevent(void *_srv, void *_fce, int revent) { + size_t i; + server *srv = _srv; + stat_cache *sc = srv->stat_cache; + size_t events; + + UNUSED(_fce); + /* */ + + if ((revent & FDEVENT_IN) && + sc->fam) { + + events = FAMPending(sc->fam); + + for (i = 0; i < events; i++) { + FAMEvent fe; + fam_dir_entry *fam_dir; + splay_tree *node; + int ndx; + + FAMNextEvent(sc->fam, &fe); + + /* handle event */ + + switch(fe.code) { + case FAMChanged: + case FAMDeleted: + case FAMMoved: + /* if the filename is a directory remove the entry */ + + fam_dir = fe.userdata; + fam_dir->version++; + + /* file/dir is still here */ + if (fe.code == FAMChanged) break; + + buffer_copy_string(sc->dir_name, fe.filename); + + ndx = hashme(sc->dir_name); + + sc->dirs = splaytree_splay(sc->dirs, ndx); + node = sc->dirs; + + if (node && (node->key == ndx)) { + int osize = splaytree_size(sc->dirs); + + fam_dir_entry_free(node->data); + sc->dirs = splaytree_delete(sc->dirs, ndx); + + assert(osize - 1 == splaytree_size(sc->dirs)); + } + break; + default: + break; + } + } + } + + if (revent & FDEVENT_HUP) { + /* fam closed the connection */ + srv->stat_cache->fam_fcce_ndx = -1; + + fdevent_event_del(srv->ev, &(sc->fam_fcce_ndx), FAMCONNECTION_GETFD(sc->fam)); + fdevent_unregister(srv->ev, FAMCONNECTION_GETFD(sc->fam)); + + FAMClose(sc->fam); + free(sc->fam); + + sc->fam = NULL; + } + + return HANDLER_GO_ON; +} + +static int buffer_copy_dirname(buffer *dst, buffer *file) { + size_t i; + + if (buffer_is_empty(file)) return -1; + + for (i = file->used - 1; i+1 > 0; i--) { + if (file->ptr[i] == '/') { + buffer_copy_string_len(dst, file->ptr, i); + return 0; + } + } + + return -1; +} +#endif + +/*** + * + * + * + * returns: + * - HANDLER_FINISHED on cache-miss (don't forget to reopen the file) + * - HANDLER_ERROR on stat() failed -> see errno for problem + */ + +handler_t stat_cache_get_entry(server *srv, connection *con, buffer *name, stat_cache_entry **ret_sce) { +#ifdef HAVE_FAM_H + fam_dir_entry *fam_dir = NULL; + int dir_ndx = -1; + splay_tree *dir_node = NULL; +#endif + stat_cache_entry *sce = NULL; + stat_cache *sc; + struct stat st; + size_t k; + int fd; +#ifdef DEBUG_STAT_CACHE + size_t i; +#endif + + int file_ndx; + splay_tree *file_node = NULL; + + *ret_sce = NULL; + + /* + * check if the directory for this file has changed + */ + + sc = srv->stat_cache; + + file_ndx = hashme(name); + sc->files = splaytree_splay(sc->files, file_ndx); + +#ifdef DEBUG_STAT_CACHE + for (i = 0; i < ctrl.used; i++) { + if (ctrl.ptr[i] == file_ndx) break; + } +#endif + + if (sc->files && (sc->files->key == file_ndx)) { +#ifdef DEBUG_STAT_CACHE + /* it was in the cache */ + assert(i < ctrl.used); +#endif + + /* we have seen this file already and + * don't stat() it again in the same second */ + + file_node = sc->files; + + sce = file_node->data; + + /* check if the name is the same, we might have a collision */ + + if (buffer_is_equal(name, sce->name)) { + if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_SIMPLE) { + if (sce->stat_ts == srv->cur_ts) { + *ret_sce = sce; + return HANDLER_GO_ON; + } + } + } else { + /* oops, a collision, + * + * file_node is used by the FAM check below to see if we know this file + * and if we can save a stat(). + * + * BUT, the sce is not reset here as the entry into the cache is ok, we + * it is just not pointing to our requested file. + * + * */ + + file_node = NULL; + } + } else { +#ifdef DEBUG_STAT_CACHE + if (i != ctrl.used) { + fprintf(stderr, "%s.%d: %08x was already inserted but not found in cache, %s\n", __FILE__, __LINE__, file_ndx, name->ptr); + } + assert(i == ctrl.used); +#endif + } + +#ifdef HAVE_FAM_H + /* dir-check */ + if (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM) { + if (0 != buffer_copy_dirname(sc->dir_name, name)) { + SEGFAULT(); + } + + dir_ndx = hashme(sc->dir_name); + + sc->dirs = splaytree_splay(sc->dirs, dir_ndx); + + if (sc->dirs && (sc->dirs->key == dir_ndx)) { + dir_node = sc->dirs; + } + + if (dir_node && file_node) { + /* we found a file */ + + sce = file_node->data; + fam_dir = dir_node->data; + + if (fam_dir->version == sce->dir_version) { + /* the stat()-cache entry is still ok */ + + *ret_sce = sce; + return HANDLER_GO_ON; + } + } + } +#endif + + /* + * *lol* + * - open() + fstat() on a named-pipe results in a (intended) hang. + * - stat() if regualar file + open() to see if we can read from it is better + * + * */ + + if (-1 == stat(name->ptr, &st)) { + return HANDLER_ERROR; + } + + + if (S_ISREG(st.st_mode)) { + /* try to open the file to check if we can read it */ + if (-1 == (fd = open(name->ptr, O_RDONLY))) { + return HANDLER_ERROR; + } + close(fd); + } + + if (NULL == sce) { + int osize = 0; + + if (sc->files) { + osize = sc->files->size; + } + + sce = stat_cache_entry_init(); + buffer_copy_string_buffer(sce->name, name); + + sc->files = splaytree_insert(sc->files, file_ndx, sce); +#ifdef DEBUG_STAT_CACHE + if (ctrl.size == 0) { + ctrl.size = 16; + ctrl.used = 0; + ctrl.ptr = malloc(ctrl.size * sizeof(*ctrl.ptr)); + } else if (ctrl.size == ctrl.used) { + ctrl.size += 16; + ctrl.ptr = realloc(ctrl.ptr, ctrl.size * sizeof(*ctrl.ptr)); + } + + ctrl.ptr[ctrl.used++] = file_ndx; + + assert(sc->files); + assert(sc->files->data == sce); + assert(osize + 1 == splaytree_size(sc->files)); +#endif + } + + sce->st = st; + sce->stat_ts = srv->cur_ts; + + /* catch the obvious symlinks + * + * this is not a secure check as we still have a race-condition between + * the stat() and the open. We can only solve this by + * 1. open() the file + * 2. fstat() the fd + * + * and keeping the file open for the rest of the time. But this can + * only be done at network level. + * + * */ + if (S_ISLNK(st.st_mode) && !con->conf.follow_symlink) { + return HANDLER_ERROR; + } + + if (S_ISREG(st.st_mode)) { + /* determine mimetype */ + buffer_reset(sce->content_type); + + for (k = 0; k < con->conf.mimetypes->used; k++) { + data_string *ds = (data_string *)con->conf.mimetypes->data[k]; + buffer *type = ds->key; + + if (type->used == 0) continue; + + /* check if the right side is the same */ + if (type->used > name->used) continue; + + if (0 == strncasecmp(name->ptr + name->used - type->used, type->ptr, type->used - 1)) { + buffer_copy_string_buffer(sce->content_type, ds->value); + break; + } + } + etag_create(sce->etag, &(sce->st)); +#ifdef HAVE_XATTR + if (buffer_is_empty(sce->content_type)) { + stat_cache_attr_get(sce->content_type, name->ptr); + } +#endif + } + +#ifdef HAVE_FAM_H + if (sc->fam && + (srv->srvconf.stat_cache_engine == STAT_CACHE_ENGINE_FAM)) { + /* is this directory already registered ? */ + if (!dir_node) { + fam_dir = fam_dir_entry_init(); + fam_dir->fc = sc->fam; + + buffer_copy_string_buffer(fam_dir->name, sc->dir_name); + + fam_dir->version = 1; + + fam_dir->req = calloc(1, sizeof(FAMRequest)); + + if (0 != FAMMonitorDirectory(sc->fam, fam_dir->name->ptr, + fam_dir->req, fam_dir)) { + + log_error_write(srv, __FILE__, __LINE__, "sbs", + "monitoring dir failed:", + fam_dir->name, + FamErrlist[FAMErrno]); + + fam_dir_entry_free(fam_dir); + } else { + int osize = 0; + + if (sc->dirs) { + osize = sc->dirs->size; + } + + sc->dirs = splaytree_insert(sc->dirs, dir_ndx, fam_dir); + assert(sc->dirs); + assert(sc->dirs->data == fam_dir); + assert(osize == (sc->dirs->size - 1)); + } + } else { + fam_dir = dir_node->data; + } + + /* bind the fam_fc to the stat() cache entry */ + + if (fam_dir) { + sce->dir_version = fam_dir->version; + sce->dir_ndx = dir_ndx; + } + } +#endif + + *ret_sce = sce; + + return HANDLER_GO_ON; +} + +/** + * remove stat() from cache which havn't been stat()ed for + * more than 10 seconds + * + * + * walk though the stat-cache, collect the ids which are too old + * and remove them in a second loop + */ + +static int stat_cache_tag_old_entries(server *srv, splay_tree *t, int *keys, size_t *ndx) { + stat_cache_entry *sce; + + if (!t) return 0; + + stat_cache_tag_old_entries(srv, t->left, keys, ndx); + stat_cache_tag_old_entries(srv, t->right, keys, ndx); + + sce = t->data; + + if (srv->cur_ts - sce->stat_ts > 2) { + keys[(*ndx)++] = t->key; + } + + return 0; +} + +int stat_cache_trigger_cleanup(server *srv) { + stat_cache *sc; + size_t max_ndx = 0, i; + int *keys; + + sc = srv->stat_cache; + + if (!sc->files) return 0; + + keys = calloc(1, sizeof(size_t) * sc->files->size); + + stat_cache_tag_old_entries(srv, sc->files, keys, &max_ndx); + + for (i = 0; i < max_ndx; i++) { + int ndx = keys[i]; + splay_tree *node; + + sc->files = splaytree_splay(sc->files, ndx); + + node = sc->files; + + if (node && (node->key == ndx)) { +#ifdef DEBUG_STAT_CACHE + size_t j; + int osize = splaytree_size(sc->files); + stat_cache_entry *sce = node->data; +#endif + stat_cache_entry_free(node->data); + sc->files = splaytree_delete(sc->files, ndx); + +#ifdef DEBUG_STAT_CACHE + for (j = 0; j < ctrl.used; j++) { + if (ctrl.ptr[j] == ndx) { + ctrl.ptr[j] = ctrl.ptr[--ctrl.used]; + break; + } + } + + assert(osize - 1 == splaytree_size(sc->files)); +#endif + } + } + + free(keys); + + return 0; +} |