aboutsummaryrefslogtreecommitdiffstats
path: root/example/passthrough_hp.cc
diff options
context:
space:
mode:
Diffstat (limited to 'example/passthrough_hp.cc')
-rw-r--r--example/passthrough_hp.cc1280
1 files changed, 1280 insertions, 0 deletions
diff --git a/example/passthrough_hp.cc b/example/passthrough_hp.cc
new file mode 100644
index 0000000..dba8751
--- /dev/null
+++ b/example/passthrough_hp.cc
@@ -0,0 +1,1280 @@
+/*
+ FUSE: Filesystem in Userspace
+ Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu>
+ Copyright (C) 2017 Nikolaus Rath <Nikolaus@rath.org>
+ Copyright (C) 2018 Valve, Inc
+
+ This program can be distributed under the terms of the GNU GPL.
+ See the file COPYING.
+*/
+
+/** @file
+ *
+ * This is a "high-performance" version of passthrough_ll.c. While
+ * passthrough_ll.c is designed to be as simple as possible, this
+ * example intended to be as efficient and correct as possible.
+ *
+ * passthrough_hp.cc mirrors a specified "source" directory under a
+ * specified the mountpoint with as much fidelity and performance as
+ * possible.
+ *
+ * If --nocache is specified, the source directory may be changed
+ * directly even while mounted and the filesystem will continue
+ * to work correctly.
+ *
+ * Without --nocache, the source directory is assumed to be modified
+ * only through the passthrough filesystem. This enables much better
+ * performance, but if changes are made directly to the source, they
+ * may not be immediately visible under the mountpoint and further
+ * access to the mountpoint may result in incorrect behavior,
+ * including data-loss.
+ *
+ * On its own, this filesystem fulfills no practical purpose. It is
+ * intended as a template upon which additional functionality can be
+ * built.
+ *
+ * Unless --nocache is specified, is only possible to write to files
+ * for which the mounting user has read permissions. This is because
+ * the writeback cache requires the kernel to be able to issue read
+ * requests for all files (which the passthrough filesystem cannot
+ * satisfy if it can't read the file in the underlying filesystem).
+ *
+ * ## Source code ##
+ * \include passthrough_hp.cc
+ */
+
+#define FUSE_USE_VERSION 35
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+
+// C includes
+#include <dirent.h>
+#include <err.h>
+#include <errno.h>
+#include <ftw.h>
+#include <fuse_lowlevel.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/file.h>
+#include <sys/resource.h>
+#include <sys/xattr.h>
+#include <time.h>
+#include <unistd.h>
+#include <pthread.h>
+
+// C++ includes
+#include <cstddef>
+#include <cstdio>
+#include <cstdlib>
+#include <list>
+#include <cxxopts.hpp>
+#include <mutex>
+#include <fstream>
+#include <thread>
+#include <iomanip>
+
+using namespace std;
+
+/* We are re-using pointers to our `struct sfs_inode` and `struct
+ sfs_dirp` elements as inodes and file handles. This means that we
+ must be able to store pointer a pointer in both a fuse_ino_t
+ variable and a uint64_t variable (used for file handles). */
+static_assert(sizeof(fuse_ino_t) >= sizeof(void*),
+ "void* must fit into fuse_ino_t");
+static_assert(sizeof(fuse_ino_t) >= sizeof(uint64_t),
+ "fuse_ino_t must be at least 64 bits");
+
+
+/* Forward declarations */
+struct Inode;
+static Inode& get_inode(fuse_ino_t ino);
+static void forget_one(fuse_ino_t ino, uint64_t n);
+
+// Uniquely identifies a file in the source directory tree. This could
+// be simplified to just ino_t since we require the source directory
+// not to contain any mountpoints. This hasn't been done yet in case
+// we need to reconsider this constraint (but relaxing this would have
+// the drawback that we can no longer re-use inode numbers, and thus
+// readdir() would need to do a full lookup() in order to report the
+// right inode number).
+typedef std::pair<ino_t, dev_t> SrcId;
+
+// Define a hash function for SrcId
+namespace std {
+ template<>
+ struct hash<SrcId> {
+ size_t operator()(const SrcId& id) const {
+ return hash<ino_t>{}(id.first) ^ hash<dev_t>{}(id.second);
+ }
+ };
+}
+
+// Maps files in the source directory tree to inodes
+typedef std::unordered_map<SrcId, Inode> InodeMap;
+
+struct Inode {
+ int fd {-1};
+ bool is_symlink {false};
+ dev_t src_dev {0};
+ ino_t src_ino {0};
+ uint64_t nlookup {0};
+ std::mutex m;
+
+ // Delete copy constructor and assignments. We could implement
+ // move if we need it.
+ Inode() = default;
+ Inode(const Inode&) = delete;
+ Inode(Inode&& inode) = delete;
+ Inode& operator=(Inode&& inode) = delete;
+ Inode& operator=(const Inode&) = delete;
+
+ ~Inode() {
+ if(fd > 0)
+ close(fd);
+ }
+};
+
+struct Fs {
+ // Must be acquired *after* any Inode.m locks.
+ std::mutex mutex;
+ InodeMap inodes; // protected by mutex
+ Inode root;
+ double timeout;
+ bool debug;
+ std::string source;
+ size_t blocksize;
+ dev_t src_dev;
+ bool nosplice;
+ bool nocache;
+};
+static Fs fs{};
+
+
+#define FUSE_BUF_COPY_FLAGS \
+ (fs.nosplice ? \
+ FUSE_BUF_NO_SPLICE : \
+ static_cast<fuse_buf_copy_flags>(0))
+
+
+static Inode& get_inode(fuse_ino_t ino) {
+ if (ino == FUSE_ROOT_ID)
+ return fs.root;
+
+ Inode* inode = reinterpret_cast<Inode*>(ino);
+ if(inode->fd == -1) {
+ cerr << "INTERNAL ERROR: Unknown inode " << ino << endl;
+ abort();
+ }
+ return *inode;
+}
+
+
+static int get_fs_fd(fuse_ino_t ino) {
+ int fd = get_inode(ino).fd;
+ return fd;
+}
+
+
+static void sfs_init(void *userdata, fuse_conn_info *conn) {
+ (void)userdata;
+ if (conn->capable & FUSE_CAP_EXPORT_SUPPORT)
+ conn->want |= FUSE_CAP_EXPORT_SUPPORT;
+
+ if (fs.timeout && conn->capable & FUSE_CAP_WRITEBACK_CACHE)
+ conn->want |= FUSE_CAP_WRITEBACK_CACHE;
+
+ if (conn->capable & FUSE_CAP_FLOCK_LOCKS)
+ conn->want |= FUSE_CAP_FLOCK_LOCKS;
+
+ // Use splicing if supported. Since we are using writeback caching
+ // and readahead, individual requests should have a decent size so
+ // that splicing between fd's is well worth it.
+ if (conn->capable & FUSE_CAP_SPLICE_WRITE && !fs.nosplice)
+ conn->want |= FUSE_CAP_SPLICE_WRITE;
+ if (conn->capable & FUSE_CAP_SPLICE_READ && !fs.nosplice)
+ conn->want |= FUSE_CAP_SPLICE_READ;
+}
+
+
+static void sfs_getattr(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) {
+ (void)fi;
+ Inode& inode = get_inode(ino);
+ struct stat attr;
+ auto res = fstatat(inode.fd, "", &attr,
+ AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+ if (res == -1) {
+ fuse_reply_err(req, errno);
+ return;
+ }
+ fuse_reply_attr(req, &attr, fs.timeout);
+}
+
+
+#ifdef HAVE_UTIMENSAT
+static int utimensat_empty_nofollow(Inode& inode,
+ const struct timespec *tv) {
+ if (inode.is_symlink) {
+ /* Does not work on current kernels, but may in the future:
+ https://marc.info/?l=linux-kernel&m=154158217810354&w=2 */
+ auto res = utimensat(inode.fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+ if (res == -1 && errno == EINVAL) {
+ /* Sorry, no race free way to set times on symlink. */
+ errno = EPERM;
+ }
+ return res;
+ }
+
+ char procname[64];
+ sprintf(procname, "/proc/self/fd/%i", inode.fd);
+
+ return utimensat(AT_FDCWD, procname, tv, 0);
+}
+#endif
+
+
+static void do_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
+ int valid, struct fuse_file_info* fi) {
+ Inode& inode = get_inode(ino);
+ int ifd = inode.fd;
+ int res;
+
+ if (valid & FUSE_SET_ATTR_MODE) {
+ if (fi) {
+ res = fchmod(fi->fh, attr->st_mode);
+ } else {
+ char procname[64];
+ sprintf(procname, "/proc/self/fd/%i", ifd);
+ res = chmod(procname, attr->st_mode);
+ }
+ if (res == -1)
+ goto out_err;
+ }
+ if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) {
+ uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : static_cast<uid_t>(-1);
+ gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : static_cast<gid_t>(-1);
+
+ res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+ if (res == -1)
+ goto out_err;
+ }
+ if (valid & FUSE_SET_ATTR_SIZE) {
+ if (fi) {
+ res = ftruncate(fi->fh, attr->st_size);
+ } else {
+ char procname[64];
+ sprintf(procname, "/proc/self/fd/%i", ifd);
+ res = truncate(procname, attr->st_size);
+ }
+ if (res == -1)
+ goto out_err;
+ }
+ if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) {
+ struct timespec tv[2];
+
+ tv[0].tv_sec = 0;
+ tv[1].tv_sec = 0;
+ tv[0].tv_nsec = UTIME_OMIT;
+ tv[1].tv_nsec = UTIME_OMIT;
+
+ if (valid & FUSE_SET_ATTR_ATIME_NOW)
+ tv[0].tv_nsec = UTIME_NOW;
+ else if (valid & FUSE_SET_ATTR_ATIME)
+ tv[0] = attr->st_atim;
+
+ if (valid & FUSE_SET_ATTR_MTIME_NOW)
+ tv[1].tv_nsec = UTIME_NOW;
+ else if (valid & FUSE_SET_ATTR_MTIME)
+ tv[1] = attr->st_mtim;
+
+ if (fi)
+ res = futimens(fi->fh, tv);
+ else {
+#ifdef HAVE_UTIMENSAT
+ res = utimensat_empty_nofollow(inode, tv);
+#else
+ res = -1;
+ errno = EOPNOTSUPP;
+#endif
+ }
+ if (res == -1)
+ goto out_err;
+ }
+ return sfs_getattr(req, ino, fi);
+
+out_err:
+ fuse_reply_err(req, errno);
+}
+
+
+static void sfs_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr,
+ int valid, fuse_file_info *fi) {
+ (void) ino;
+ do_setattr(req, ino, attr, valid, fi);
+}
+
+
+static int do_lookup(fuse_ino_t parent, const char *name,
+ fuse_entry_param *e) {
+ if (fs.debug)
+ cerr << "DEBUG: lookup(): name=" << name
+ << ", parent=" << parent << endl;
+ memset(e, 0, sizeof(*e));
+ e->attr_timeout = fs.timeout;
+ e->entry_timeout = fs.timeout;
+
+ auto newfd = openat(get_fs_fd(parent), name, O_PATH | O_NOFOLLOW);
+ if (newfd == -1)
+ return errno;
+
+ auto res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+ if (res == -1) {
+ auto saveerr = errno;
+ close(newfd);
+ if (fs.debug)
+ cerr << "DEBUG: lookup(): fstatat failed" << endl;
+ return saveerr;
+ }
+
+ if (e->attr.st_dev != fs.src_dev) {
+ cerr << "WARNING: Mountpoints in the source directory tree will be hidden." << endl;
+ return ENOTSUP;
+ } else if (e->attr.st_ino == FUSE_ROOT_ID) {
+ cerr << "ERROR: Source directory tree must not include inode "
+ << FUSE_ROOT_ID << endl;
+ return EIO;
+ }
+
+ SrcId id {e->attr.st_ino, e->attr.st_dev};
+ unique_lock<mutex> fs_lock {fs.mutex};
+ Inode* inode_p;
+ try {
+ inode_p = &fs.inodes[id];
+ } catch (std::bad_alloc&) {
+ return ENOMEM;
+ }
+ e->ino = reinterpret_cast<fuse_ino_t>(inode_p);
+ Inode& inode {*inode_p};
+
+ if(inode.fd != -1) { // found existing inode
+ fs_lock.unlock();
+ if (fs.debug)
+ cerr << "DEBUG: lookup(): inode " << e->attr.st_ino
+ << " (userspace) already known." << endl;
+ lock_guard<mutex> g {inode.m};
+ inode.nlookup++;
+ close(newfd);
+ } else { // no existing inode
+ /* This is just here to make Helgrind happy. It violates the
+ lock ordering requirement (inode.m must be acquired before
+ fs.mutex), but this is of no consequence because at this
+ point no other thread has access to the inode mutex */
+ lock_guard<mutex> g {inode.m};
+ inode.src_ino = e->attr.st_ino;
+ inode.src_dev = e->attr.st_dev;
+ inode.is_symlink = S_ISLNK(e->attr.st_mode);
+ inode.nlookup = 1;
+ inode.fd = newfd;
+ fs_lock.unlock();
+
+ if (fs.debug)
+ cerr << "DEBUG: lookup(): created userspace inode " << e->attr.st_ino
+ << endl;
+ }
+
+ return 0;
+}
+
+
+static void sfs_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) {
+ fuse_entry_param e {};
+ auto err = do_lookup(parent, name, &e);
+ if (err == ENOENT) {
+ e.attr_timeout = fs.timeout;
+ e.entry_timeout = fs.timeout;
+ e.ino = e.attr.st_ino = 0;
+ fuse_reply_entry(req, &e);
+ } else if (err) {
+ if (err == ENFILE || err == EMFILE)
+ cerr << "ERROR: Reached maximum number of file descriptors." << endl;
+ fuse_reply_err(req, err);
+ } else {
+ fuse_reply_entry(req, &e);
+ }
+}
+
+
+static void mknod_symlink(fuse_req_t req, fuse_ino_t parent,
+ const char *name, mode_t mode, dev_t rdev,
+ const char *link) {
+ int res;
+ Inode& inode_p = get_inode(parent);
+ auto saverr = ENOMEM;
+
+ if (S_ISDIR(mode))
+ res = mkdirat(inode_p.fd, name, mode);
+ else if (S_ISLNK(mode))
+ res = symlinkat(link, inode_p.fd, name);
+ else
+ res = mknodat(inode_p.fd, name, mode, rdev);
+ saverr = errno;
+ if (res == -1)
+ goto out;
+
+ fuse_entry_param e;
+ saverr = do_lookup(parent, name, &e);
+ if (saverr)
+ goto out;
+
+ fuse_reply_entry(req, &e);
+ return;
+
+out:
+ if (saverr == ENFILE || saverr == EMFILE)
+ cerr << "ERROR: Reached maximum number of file descriptors." << endl;
+ fuse_reply_err(req, saverr);
+}
+
+
+static void sfs_mknod(fuse_req_t req, fuse_ino_t parent, const char *name,
+ mode_t mode, dev_t rdev) {
+ mknod_symlink(req, parent, name, mode, rdev, nullptr);
+}
+
+
+static void sfs_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name,
+ mode_t mode) {
+ mknod_symlink(req, parent, name, S_IFDIR | mode, 0, nullptr);
+}
+
+
+static void sfs_symlink(fuse_req_t req, const char *link, fuse_ino_t parent,
+ const char *name) {
+ mknod_symlink(req, parent, name, S_IFLNK, 0, link);
+}
+
+
+static int linkat_empty_nofollow(Inode& inode, int dfd, const char *name) {
+ if (inode.is_symlink) {
+ auto res = linkat(inode.fd, "", dfd, name, AT_EMPTY_PATH);
+ if (res == -1 && (errno == ENOENT || errno == EINVAL)) {
+ /* Sorry, no race free way to hard-link a symlink. */
+ errno = EOPNOTSUPP;
+ }
+ return res;
+ }
+
+ char procname[64];
+ sprintf(procname, "/proc/self/fd/%i", inode.fd);
+ return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW);
+}
+
+
+static void sfs_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent,
+ const char *name) {
+ Inode& inode = get_inode(ino);
+ Inode& inode_p = get_inode(parent);
+ fuse_entry_param e {};
+
+ e.attr_timeout = fs.timeout;
+ e.entry_timeout = fs.timeout;
+
+ auto res = linkat_empty_nofollow(inode, inode_p.fd, name);
+ if (res == -1) {
+ fuse_reply_err(req, errno);
+ return;
+ }
+
+ res = fstatat(inode.fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW);
+ if (res == -1) {
+ fuse_reply_err(req, errno);
+ return;
+ }
+ e.ino = reinterpret_cast<fuse_ino_t>(&inode);
+ {
+ lock_guard<mutex> g {inode.m};
+ inode.nlookup++;
+ }
+
+ fuse_reply_entry(req, &e);
+ return;
+}
+
+
+static void sfs_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) {
+ Inode& inode_p = get_inode(parent);
+ lock_guard<mutex> g {inode_p.m};
+ auto res = unlinkat(inode_p.fd, name, AT_REMOVEDIR);
+ fuse_reply_err(req, res == -1 ? errno : 0);
+}
+
+
+static void sfs_rename(fuse_req_t req, fuse_ino_t parent, const char *name,
+ fuse_ino_t newparent, const char *newname,
+ unsigned int flags) {
+ Inode& inode_p = get_inode(parent);
+ Inode& inode_np = get_inode(newparent);
+ if (flags) {
+ fuse_reply_err(req, EINVAL);
+ return;
+ }
+
+ auto res = renameat(inode_p.fd, name, inode_np.fd, newname);
+ fuse_reply_err(req, res == -1 ? errno : 0);
+}
+
+
+static void sfs_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) {
+ Inode& inode_p = get_inode(parent);
+ auto res = unlinkat(inode_p.fd, name, 0);
+ fuse_reply_err(req, res == -1 ? errno : 0);
+}
+
+
+static void forget_one(fuse_ino_t ino, uint64_t n) {
+ Inode& inode = get_inode(ino);
+ unique_lock<mutex> l {inode.m};
+
+ if(n > inode.nlookup) {
+ cerr << "INTERNAL ERROR: Negative lookup count for inode "
+ << inode.src_ino << endl;
+ abort();
+ }
+ inode.nlookup -= n;
+ if (!inode.nlookup) {
+ if (fs.debug)
+ cerr << "DEBUG: forget: cleaning up inode " << inode.src_ino << endl;
+ {
+ lock_guard<mutex> g_fs {fs.mutex};
+ l.unlock();
+ fs.inodes.erase({inode.src_ino, inode.src_dev});
+ }
+ } else if (fs.debug)
+ cerr << "DEBUG: forget: inode " << inode.src_ino
+ << " lookup count now " << inode.nlookup << endl;
+}
+
+static void sfs_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) {
+ forget_one(ino, nlookup);
+ fuse_reply_none(req);
+}
+
+
+static void sfs_forget_multi(fuse_req_t req, size_t count,
+ fuse_forget_data *forgets) {
+ for (int i = 0; i < count; i++)
+ forget_one(forgets[i].ino, forgets[i].nlookup);
+ fuse_reply_none(req);
+}
+
+
+static void sfs_readlink(fuse_req_t req, fuse_ino_t ino) {
+ Inode& inode = get_inode(ino);
+ char buf[PATH_MAX + 1];
+ auto res = readlinkat(inode.fd, "", buf, sizeof(buf));
+ if (res == -1)
+ fuse_reply_err(req, errno);
+ else if (res == sizeof(buf))
+ fuse_reply_err(req, ENAMETOOLONG);
+ else {
+ buf[res] = '\0';
+ fuse_reply_readlink(req, buf);
+ }
+}
+
+
+struct DirHandle {
+ DIR *dp {nullptr};
+ off_t offset;
+
+ DirHandle() = default;
+ DirHandle(const DirHandle&) = delete;
+ DirHandle& operator=(const DirHandle&) = delete;
+
+ ~DirHandle() {
+ if(dp)
+ closedir(dp);
+ }
+};
+
+
+static DirHandle *get_dir_handle(fuse_file_info *fi) {
+ return reinterpret_cast<DirHandle*>(fi->fh);
+}
+
+
+static void sfs_opendir(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) {
+ Inode& inode = get_inode(ino);
+ auto d = new (nothrow) DirHandle;
+ if (d == nullptr) {
+ fuse_reply_err(req, ENOMEM);
+ return;
+ }
+
+ // Make Helgrind happy - it can't know that there's an implicit
+ // synchronization due to the fact that other threads cannot
+ // access d until we've called fuse_reply_*.
+ lock_guard<mutex> g {inode.m};
+
+ auto fd = openat(inode.fd, ".", O_RDONLY);
+ if (fd == -1)
+ goto out_errno;
+
+ // On success, dir stream takes ownership of fd, so we
+ // do not have to close it.
+ d->dp = fdopendir(fd);
+ if(d->dp == nullptr)
+ goto out_errno;
+
+ d->offset = 0;
+
+ fi->fh = reinterpret_cast<uint64_t>(d);
+ if(fs.timeout) {
+ fi->keep_cache = 1;
+ fi->cache_readdir = 1;
+ }
+ fuse_reply_open(req, fi);
+ return;
+
+out_errno:
+ auto error = errno;
+ delete d;
+ if (error == ENFILE || error == EMFILE)
+ cerr << "ERROR: Reached maximum number of file descriptors." << endl;
+ fuse_reply_err(req, error);
+}
+
+
+static bool is_dot_or_dotdot(const char *name) {
+ return name[0] == '.' &&
+ (name[1] == '\0' || (name[1] == '.' && name[2] == '\0'));
+}
+
+
+static void do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
+ off_t offset, fuse_file_info *fi, int plus) {
+ auto d = get_dir_handle(fi);
+ Inode& inode = get_inode(ino);
+ lock_guard<mutex> g {inode.m};
+ char *p;
+ auto rem = size;
+ int err = 0, count = 0;
+
+ if (fs.debug)
+ cerr << "DEBUG: readdir(): started with offset "
+ << offset << endl;
+
+ auto buf = new (nothrow) char[size];
+ if (!buf) {
+ fuse_reply_err(req, ENOMEM);
+ return;
+ }
+ p = buf;
+
+ if (offset != d->offset) {
+ if (fs.debug)
+ cerr << "DEBUG: readdir(): seeking to " << offset << endl;
+ seekdir(d->dp, offset);
+ d->offset = offset;
+ }
+
+ while (1) {
+ struct dirent *entry;
+ errno = 0;
+ entry = readdir(d->dp);
+ if (!entry) {
+ if(errno) {
+ err = errno;
+ if (fs.debug)
+ warn("DEBUG: readdir(): readdir failed with");
+ goto error;
+ }
+ break; // End of stream
+ }
+ d->offset = entry->d_off;
+ if (is_dot_or_dotdot(entry->d_name))
+ continue;
+
+ fuse_entry_param e{};
+ size_t entsize;
+ if(plus) {
+ err = do_lookup(ino, entry->d_name, &e);
+ if (err)
+ goto error;
+ entsize = fuse_add_direntry_plus(req, p, rem, entry->d_name, &e, entry->d_off);
+
+ if (entsize > rem) {
+ if (fs.debug)
+ cerr << "DEBUG: readdir(): buffer full, returning data. " << endl;
+ forget_one(e.ino, 1);
+ break;
+ }
+ } else {
+ e.attr.st_ino = entry->d_ino;
+ e.attr.st_mode = entry->d_type << 12;
+ entsize = fuse_add_direntry(req, p, rem, entry->d_name, &e.attr, entry->d_off);
+
+ if (entsize > rem) {
+ if (fs.debug)
+ cerr << "DEBUG: readdir(): buffer full, returning data. " << endl;
+ break;
+ }
+ }
+
+ p += entsize;
+ rem -= entsize;
+ count++;
+ if (fs.debug) {
+ cerr << "DEBUG: readdir(): added to buffer: " << entry->d_name
+ << ", ino " << e.attr.st_ino << ", offset " << entry->d_off << endl;
+ }
+ }
+ err = 0;
+error:
+
+ // If there's an error, we can only signal it if we haven't stored
+ // any entries yet - otherwise we'd end up with wrong lookup
+ // counts for the entries that are already in the buffer. So we
+ // return what we've collected until that point.
+ if (err && rem == size) {
+ if (err == ENFILE || err == EMFILE)
+ cerr << "ERROR: Reached maximum number of file descriptors." << endl;
+ fuse_reply_err(req, err);
+ } else {
+ if (fs.debug)
+ cerr << "DEBUG: readdir(): returning " << count
+ << " entries, curr offset " << d->offset << endl;
+ fuse_reply_buf(req, buf, size - rem);
+ }
+ delete[] buf;
+ return;
+}
+
+
+static void sfs_readdir(fuse_req_t req, fuse_ino_t ino, size_t size,
+ off_t offset, fuse_file_info *fi) {
+ // operation logging is done in readdir to reduce code duplication
+ do_readdir(req, ino, size, offset, fi, 0);
+}
+
+
+static void sfs_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size,
+ off_t offset, fuse_file_info *fi) {
+ // operation logging is done in readdir to reduce code duplication
+ do_readdir(req, ino, size, offset, fi, 1);
+}
+
+
+static void sfs_releasedir(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) {
+ (void) ino;
+ auto d = get_dir_handle(fi);
+ delete d;
+ fuse_reply_err(req, 0);
+}
+
+
+static void sfs_create(fuse_req_t req, fuse_ino_t parent, const char *name,
+ mode_t mode, fuse_file_info *fi) {
+ Inode& inode_p = get_inode(parent);
+
+ auto fd = openat(inode_p.fd, name,
+ (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode);
+ if (fd == -1) {
+ auto err = errno;
+ if (err == ENFILE || err == EMFILE)
+ cerr << "ERROR: Reached maximum number of file descriptors." << endl;
+ fuse_reply_err(req, err);
+ return;
+ }
+
+ fi->fh = fd;
+ fuse_entry_param e;
+ auto err = do_lookup(parent, name, &e);
+ if (err) {
+ if (err == ENFILE || err == EMFILE)
+ cerr << "ERROR: Reached maximum number of file descriptors." << endl;
+ fuse_reply_err(req, err);
+ } else
+ fuse_reply_create(req, &e, fi);
+}
+
+
+static void sfs_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync,
+ fuse_file_info *fi) {
+ (void) ino;
+ int res;
+ int fd = dirfd(get_dir_handle(fi)->dp);
+ if (datasync)
+ res = fdatasync(fd);
+ else
+ res = fsync(fd);
+ fuse_reply_err(req, res == -1 ? errno : 0);
+}
+
+
+static void sfs_open(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) {
+ Inode& inode = get_inode(ino);
+
+ /* With writeback cache, kernel may send read requests even
+ when userspace opened write-only */
+ if (fs.timeout && (fi->flags & O_ACCMODE) == O_WRONLY) {
+ fi->flags &= ~O_ACCMODE;
+ fi->flags |= O_RDWR;
+ }
+
+ /* With writeback cache, O_APPEND is handled by the kernel. This
+ breaks atomicity (since the file may change in the underlying
+ filesystem, so that the kernel's idea of the end of the file
+ isn't accurate anymore). However, no process should modify the
+ file in the underlying filesystem once it has been read, so
+ this is not a problem. */
+ if (fs.timeout && fi->flags & O_APPEND)
+ fi->flags &= ~O_APPEND;
+
+ /* Unfortunately we cannot use inode.fd, because this was opened
+ with O_PATH (so it doesn't allow read/write access). */
+ char buf[64];
+ sprintf(buf, "/proc/self/fd/%i", inode.fd);
+ auto fd = open(buf, fi->flags & ~O_NOFOLLOW);
+ if (fd == -1) {
+ auto err = errno;
+ if (err == ENFILE || err == EMFILE)
+ cerr << "ERROR: Reached maximum number of file descriptors." << endl;
+ fuse_reply_err(req, err);
+ return;
+ }
+
+ fi->keep_cache = (fs.timeout != 0);
+ fi->fh = fd;
+ fuse_reply_open(req, fi);
+}
+
+
+static void sfs_release(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) {
+ (void) ino;
+ close(fi->fh);
+ fuse_reply_err(req, 0);
+}
+
+
+static void sfs_flush(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) {
+ (void) ino;
+ auto res = close(dup(fi->fh));
+ fuse_reply_err(req, res == -1 ? errno : 0);
+}
+
+
+static void sfs_fsync(fuse_req_t req, fuse_ino_t ino, int datasync,
+ fuse_file_info *fi) {
+ (void) ino;
+ int res;
+ if (datasync)
+ res = fdatasync(fi->fh);
+ else
+ res = fsync(fi->fh);
+ fuse_reply_err(req, res == -1 ? errno : 0);
+}
+
+
+static void do_read(fuse_req_t req, size_t size, off_t off, fuse_file_info *fi) {
+
+ fuse_bufvec buf = FUSE_BUFVEC_INIT(size);
+ buf.buf[0].flags = static_cast<fuse_buf_flags>(
+ FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK);
+ buf.buf[0].fd = fi->fh;
+ buf.buf[0].pos = off;
+
+ fuse_reply_data(req, &buf, FUSE_BUF_COPY_FLAGS);
+}
+
+static void sfs_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off,
+ fuse_file_info *fi) {
+ (void) ino;
+ do_read(req, size, off, fi);
+}
+
+
+static void do_write_buf(fuse_req_t req, size_t size, off_t off,
+ fuse_bufvec *in_buf, fuse_file_info *fi) {
+ fuse_bufvec out_buf = FUSE_BUFVEC_INIT(size);
+ out_buf.buf[0].flags = static_cast<fuse_buf_flags>(
+ FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK);
+ out_buf.buf[0].fd = fi->fh;
+ out_buf.buf[0].pos = off;
+
+ auto res = fuse_buf_copy(&out_buf, in_buf, FUSE_BUF_COPY_FLAGS);
+ if (res < 0)
+ fuse_reply_err(req, -res);
+ else
+ fuse_reply_write(req, (size_t)res);
+}
+
+
+static void sfs_write_buf(fuse_req_t req, fuse_ino_t ino, fuse_bufvec *in_buf,
+ off_t off, fuse_file_info *fi) {
+ (void) ino;
+ auto size {fuse_buf_size(in_buf)};
+ do_write_buf(req, size, off, in_buf, fi);
+}
+
+
+static void sfs_statfs(fuse_req_t req, fuse_ino_t ino) {
+ struct statvfs stbuf;
+
+ auto res = fstatvfs(get_fs_fd(ino), &stbuf);
+ if (res == -1)
+ fuse_reply_err(req, errno);
+ else
+ fuse_reply_statfs(req, &stbuf);
+}
+
+
+#ifdef HAVE_POSIX_FALLOCATE
+static void sfs_fallocate(fuse_req_t req, fuse_ino_t ino, int mode,
+ off_t offset, off_t length, fuse_file_info *fi) {
+ (void) ino;
+ if (mode) {
+ fuse_reply_err(req, EOPNOTSUPP);
+ return;
+ }
+
+ auto err = posix_fallocate(fi->fh, offset, length);
+ fuse_reply_err(req, err);
+}
+#endif
+
+static void sfs_flock(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi,
+ int op) {
+ (void) ino;
+ auto res = flock(fi->fh, op);
+ fuse_reply_err(req, res == -1 ? errno : 0);
+}
+
+
+#ifdef HAVE_SETXATTR
+static void sfs_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
+ size_t size) {
+ char *value = nullptr;
+ Inode& inode = get_inode(ino);
+ ssize_t ret;
+ int saverr;
+
+ if (inode.is_symlink) {
+ /* Sorry, no race free way to getxattr on symlink. */
+ saverr = ENOTSUP;
+ goto out;
+ }
+
+ char procname[64];
+ sprintf(procname, "/proc/self/fd/%i", inode.fd);
+
+ if (size) {
+ value = new (nothrow) char[size];
+ if (value == nullptr) {
+ saverr = ENOMEM;
+ goto out;
+ }
+
+ ret = getxattr(procname, name, value, size);
+ if (ret == -1)
+ goto out_err;
+ saverr = 0;
+ if (ret == 0)
+ goto out;
+
+ fuse_reply_buf(req, value, ret);
+ } else {
+ ret = getxattr(procname, name, nullptr, 0);
+ if (ret == -1)
+ goto out_err;
+
+ fuse_reply_xattr(req, ret);
+ }
+out_free:
+ delete[] value;
+ return;
+
+out_err:
+ saverr = errno;
+out:
+ fuse_reply_err(req, saverr);
+ goto out_free;
+}
+
+
+static void sfs_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) {
+ char *value = nullptr;
+ Inode& inode = get_inode(ino);
+ ssize_t ret;
+ int saverr;
+
+ if (inode.is_symlink) {
+ /* Sorry, no race free way to listxattr on symlink. */
+ saverr = ENOTSUP;
+ goto out;
+ }
+
+ char procname[64];
+ sprintf(procname, "/proc/self/fd/%i", inode.fd);
+
+ if (size) {
+ value = new (nothrow) char[size];
+ if (value == nullptr) {
+ saverr = ENOMEM;
+ goto out;
+ }
+
+ ret = listxattr(procname, value, size);
+ if (ret == -1)
+ goto out_err;
+ saverr = 0;
+ if (ret == 0)
+ goto out;
+
+ fuse_reply_buf(req, value, ret);
+ } else {
+ ret = listxattr(procname, nullptr, 0);
+ if (ret == -1)
+ goto out_err;
+
+ fuse_reply_xattr(req, ret);
+ }
+out_free:
+ delete[] value;
+ return;
+out_err:
+ saverr = errno;
+out:
+ fuse_reply_err(req, saverr);
+ goto out_free;
+}
+
+
+static void sfs_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name,
+ const char *value, size_t size, int flags) {
+ Inode& inode = get_inode(ino);
+ ssize_t ret;
+ int saverr;
+
+ if (inode.is_symlink) {
+ /* Sorry, no race free way to setxattr on symlink. */
+ saverr = ENOTSUP;
+ goto out;
+ }
+
+ char procname[64];
+ sprintf(procname, "/proc/self/fd/%i", inode.fd);
+
+ ret = setxattr(procname, name, value, size, flags);
+ saverr = ret == -1 ? errno : 0;
+
+out:
+ fuse_reply_err(req, saverr);
+}
+
+
+static void sfs_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) {
+ char procname[64];
+ Inode& inode = get_inode(ino);
+ ssize_t ret;
+ int saverr;
+
+ if (inode.is_symlink) {
+ /* Sorry, no race free way to setxattr on symlink. */
+ saverr = ENOTSUP;
+ goto out;
+ }
+
+ sprintf(procname, "/proc/self/fd/%i", inode.fd);
+ ret = removexattr(procname, name);
+ saverr = ret == -1 ? errno : 0;
+
+out:
+ fuse_reply_err(req, saverr);
+}
+#endif
+
+
+static void assign_operations(fuse_lowlevel_ops &sfs_oper) {
+ sfs_oper.init = sfs_init;
+ sfs_oper.lookup = sfs_lookup;
+ sfs_oper.mkdir = sfs_mkdir;
+ sfs_oper.mknod = sfs_mknod;
+ sfs_oper.symlink = sfs_symlink;
+ sfs_oper.link = sfs_link;
+ sfs_oper.unlink = sfs_unlink;
+ sfs_oper.rmdir = sfs_rmdir;
+ sfs_oper.rename = sfs_rename;
+ sfs_oper.forget = sfs_forget;
+ sfs_oper.forget_multi = sfs_forget_multi;
+ sfs_oper.getattr = sfs_getattr;
+ sfs_oper.setattr = sfs_setattr;
+ sfs_oper.readlink = sfs_readlink;
+ sfs_oper.opendir = sfs_opendir;
+ sfs_oper.readdir = sfs_readdir;
+ sfs_oper.readdirplus = sfs_readdirplus;
+ sfs_oper.releasedir = sfs_releasedir;
+ sfs_oper.fsyncdir = sfs_fsyncdir;
+ sfs_oper.create = sfs_create;
+ sfs_oper.open = sfs_open;
+ sfs_oper.release = sfs_release;
+ sfs_oper.flush = sfs_flush;
+ sfs_oper.fsync = sfs_fsync;
+ sfs_oper.read = sfs_read;
+ sfs_oper.write_buf = sfs_write_buf;
+ sfs_oper.statfs = sfs_statfs;
+#ifdef HAVE_POSIX_FALLOCATE
+ sfs_oper.fallocate = sfs_fallocate;
+#endif
+ sfs_oper.flock = sfs_flock;
+#ifdef HAVE_SETXATTR
+ sfs_oper.setxattr = sfs_setxattr;
+ sfs_oper.getxattr = sfs_getxattr;
+ sfs_oper.listxattr = sfs_listxattr;
+ sfs_oper.removexattr = sfs_removexattr;
+#endif
+}
+
+static void print_usage(char *prog_name) {
+ cout << "Usage: " << prog_name << " --help\n"
+ << " " << prog_name << " [options] <source> <mountpoint>\n";
+}
+
+static cxxopts::ParseResult parse_wrapper(cxxopts::Options& parser, int& argc, char**& argv) {
+ try {
+ return parser.parse(argc, argv);
+ } catch (cxxopts::option_not_exists_exception& exc) {
+ std::cout << argv[0] << ": " << exc.what() << std::endl;
+ print_usage(argv[0]);
+ exit(2);
+ }
+}
+
+
+static cxxopts::ParseResult parse_options(int argc, char **argv) {
+ cxxopts::Options opt_parser(argv[0]);
+ opt_parser.add_options()
+ ("debug", "Enable filesystem debug messages")
+ ("debug-fuse", "Enable libfuse debug messages")
+ ("help", "Print help")
+ ("nocache", "Disable all caching")
+ ("nosplice", "Do not use splice(2) to transfer data")
+ ("single", "Run single-threaded");
+
+ // FIXME: Find a better way to limit the try clause to just
+ // opt_parser.parse() (cf. https://github.com/jarro2783/cxxopts/issues/146)
+ auto options = parse_wrapper(opt_parser, argc, argv);
+
+ if (options.count("help")) {
+ print_usage(argv[0]);
+ // Strip everything before the option list from the
+ // default help string.
+ auto help = opt_parser.help();
+ std::cout << std::endl << "options:"
+ << help.substr(help.find("\n\n") + 1, string::npos);
+ exit(0);
+
+ } else if (argc != 3) {
+ std::cout << argv[0] << ": invalid number of arguments\n";
+ print_usage(argv[0]);
+ exit(2);
+ }
+
+ fs.debug = options.count("debug") != 0;
+ fs.nosplice = options.count("nosplice") != 0;
+ fs.source = std::string {realpath(argv[1], NULL)};
+
+ return options;
+}
+
+
+static void maximize_fd_limit() {
+ struct rlimit lim {};
+ auto res = getrlimit(RLIMIT_NOFILE, &lim);
+ if (res != 0) {
+ warn("WARNING: getrlimit() failed with");
+ return;
+ }
+ lim.rlim_cur = lim.rlim_max;
+ res = setrlimit(RLIMIT_NOFILE, &lim);
+ if (res != 0)
+ warn("WARNING: setrlimit() failed with");
+}
+
+
+int main(int argc, char *argv[]) {
+
+ // Parse command line options
+ auto options {parse_options(argc, argv)};
+
+ // We need an fd for every dentry in our the filesystem that the
+ // kernel knows about. This is way more than most processes need,
+ // so try to get rid of any resource softlimit.
+ maximize_fd_limit();
+
+ // Initialize filesystem root
+ fs.root.fd = -1;
+ fs.root.nlookup = 9999;
+ fs.root.is_symlink = false;
+ fs.timeout = options.count("nocache") ? 0 : 86400.0;
+
+ struct stat stat;
+ auto ret = lstat(fs.source.c_str(), &stat);
+ if (ret == -1)
+ err(1, "ERROR: failed to stat source (\"%s\")", fs.source.c_str());
+ if (!S_ISDIR(stat.st_mode))
+ errx(1, "ERROR: source is not a directory");
+ fs.src_dev = stat.st_dev;
+
+ fs.root.fd = open(fs.source.c_str(), O_PATH);
+ if (fs.root.fd == -1)
+ err(1, "ERROR: open(\"%s\", O_PATH)", fs.source.c_str());
+
+ // Initialize fuse
+ fuse_args args = FUSE_ARGS_INIT(0, nullptr);
+ if (fuse_opt_add_arg(&args, argv[0]) ||
+ fuse_opt_add_arg(&args, "-o") ||
+ fuse_opt_add_arg(&args, "default_permissions,fsname=hpps") ||
+ (options.count("debug-fuse") && fuse_opt_add_arg(&args, "-odebug")))
+ errx(3, "ERROR: Out of memory");
+
+ fuse_lowlevel_ops sfs_oper {};
+ assign_operations(sfs_oper);
+ auto se = fuse_session_new(&args, &sfs_oper, sizeof(sfs_oper), &fs);
+ if (se == nullptr)
+ goto err_out1;
+
+ if (fuse_set_signal_handlers(se) != 0)
+ goto err_out2;
+
+ // Don't apply umask, use modes exactly as specified
+ umask(0);
+
+ // Mount and run main loop
+ struct fuse_loop_config loop_config;
+ loop_config.clone_fd = 0;
+ loop_config.max_idle_threads = 10;
+ if (fuse_session_mount(se, argv[2]) != 0)
+ goto err_out3;
+ if (options.count("single"))
+ ret = fuse_session_loop(se);
+ else
+ ret = fuse_session_loop_mt(se, &loop_config);
+
+ fuse_session_unmount(se);
+
+err_out3:
+ fuse_remove_signal_handlers(se);
+err_out2:
+ fuse_session_destroy(se);
+err_out1:
+ fuse_opt_free_args(&args);
+
+ return ret ? 1 : 0;
+}
+