diff options
Diffstat (limited to 'example/passthrough_hp.cc')
-rw-r--r-- | example/passthrough_hp.cc | 1280 |
1 files changed, 1280 insertions, 0 deletions
diff --git a/example/passthrough_hp.cc b/example/passthrough_hp.cc new file mode 100644 index 0000000..dba8751 --- /dev/null +++ b/example/passthrough_hp.cc @@ -0,0 +1,1280 @@ +/* + FUSE: Filesystem in Userspace + Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> + Copyright (C) 2017 Nikolaus Rath <Nikolaus@rath.org> + Copyright (C) 2018 Valve, Inc + + This program can be distributed under the terms of the GNU GPL. + See the file COPYING. +*/ + +/** @file + * + * This is a "high-performance" version of passthrough_ll.c. While + * passthrough_ll.c is designed to be as simple as possible, this + * example intended to be as efficient and correct as possible. + * + * passthrough_hp.cc mirrors a specified "source" directory under a + * specified the mountpoint with as much fidelity and performance as + * possible. + * + * If --nocache is specified, the source directory may be changed + * directly even while mounted and the filesystem will continue + * to work correctly. + * + * Without --nocache, the source directory is assumed to be modified + * only through the passthrough filesystem. This enables much better + * performance, but if changes are made directly to the source, they + * may not be immediately visible under the mountpoint and further + * access to the mountpoint may result in incorrect behavior, + * including data-loss. + * + * On its own, this filesystem fulfills no practical purpose. It is + * intended as a template upon which additional functionality can be + * built. + * + * Unless --nocache is specified, is only possible to write to files + * for which the mounting user has read permissions. This is because + * the writeback cache requires the kernel to be able to issue read + * requests for all files (which the passthrough filesystem cannot + * satisfy if it can't read the file in the underlying filesystem). + * + * ## Source code ## + * \include passthrough_hp.cc + */ + +#define FUSE_USE_VERSION 35 + +#ifdef HAVE_CONFIG_H +#include <config.h> +#endif + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +// C includes +#include <dirent.h> +#include <err.h> +#include <errno.h> +#include <ftw.h> +#include <fuse_lowlevel.h> +#include <inttypes.h> +#include <string.h> +#include <sys/file.h> +#include <sys/resource.h> +#include <sys/xattr.h> +#include <time.h> +#include <unistd.h> +#include <pthread.h> + +// C++ includes +#include <cstddef> +#include <cstdio> +#include <cstdlib> +#include <list> +#include <cxxopts.hpp> +#include <mutex> +#include <fstream> +#include <thread> +#include <iomanip> + +using namespace std; + +/* We are re-using pointers to our `struct sfs_inode` and `struct + sfs_dirp` elements as inodes and file handles. This means that we + must be able to store pointer a pointer in both a fuse_ino_t + variable and a uint64_t variable (used for file handles). */ +static_assert(sizeof(fuse_ino_t) >= sizeof(void*), + "void* must fit into fuse_ino_t"); +static_assert(sizeof(fuse_ino_t) >= sizeof(uint64_t), + "fuse_ino_t must be at least 64 bits"); + + +/* Forward declarations */ +struct Inode; +static Inode& get_inode(fuse_ino_t ino); +static void forget_one(fuse_ino_t ino, uint64_t n); + +// Uniquely identifies a file in the source directory tree. This could +// be simplified to just ino_t since we require the source directory +// not to contain any mountpoints. This hasn't been done yet in case +// we need to reconsider this constraint (but relaxing this would have +// the drawback that we can no longer re-use inode numbers, and thus +// readdir() would need to do a full lookup() in order to report the +// right inode number). +typedef std::pair<ino_t, dev_t> SrcId; + +// Define a hash function for SrcId +namespace std { + template<> + struct hash<SrcId> { + size_t operator()(const SrcId& id) const { + return hash<ino_t>{}(id.first) ^ hash<dev_t>{}(id.second); + } + }; +} + +// Maps files in the source directory tree to inodes +typedef std::unordered_map<SrcId, Inode> InodeMap; + +struct Inode { + int fd {-1}; + bool is_symlink {false}; + dev_t src_dev {0}; + ino_t src_ino {0}; + uint64_t nlookup {0}; + std::mutex m; + + // Delete copy constructor and assignments. We could implement + // move if we need it. + Inode() = default; + Inode(const Inode&) = delete; + Inode(Inode&& inode) = delete; + Inode& operator=(Inode&& inode) = delete; + Inode& operator=(const Inode&) = delete; + + ~Inode() { + if(fd > 0) + close(fd); + } +}; + +struct Fs { + // Must be acquired *after* any Inode.m locks. + std::mutex mutex; + InodeMap inodes; // protected by mutex + Inode root; + double timeout; + bool debug; + std::string source; + size_t blocksize; + dev_t src_dev; + bool nosplice; + bool nocache; +}; +static Fs fs{}; + + +#define FUSE_BUF_COPY_FLAGS \ + (fs.nosplice ? \ + FUSE_BUF_NO_SPLICE : \ + static_cast<fuse_buf_copy_flags>(0)) + + +static Inode& get_inode(fuse_ino_t ino) { + if (ino == FUSE_ROOT_ID) + return fs.root; + + Inode* inode = reinterpret_cast<Inode*>(ino); + if(inode->fd == -1) { + cerr << "INTERNAL ERROR: Unknown inode " << ino << endl; + abort(); + } + return *inode; +} + + +static int get_fs_fd(fuse_ino_t ino) { + int fd = get_inode(ino).fd; + return fd; +} + + +static void sfs_init(void *userdata, fuse_conn_info *conn) { + (void)userdata; + if (conn->capable & FUSE_CAP_EXPORT_SUPPORT) + conn->want |= FUSE_CAP_EXPORT_SUPPORT; + + if (fs.timeout && conn->capable & FUSE_CAP_WRITEBACK_CACHE) + conn->want |= FUSE_CAP_WRITEBACK_CACHE; + + if (conn->capable & FUSE_CAP_FLOCK_LOCKS) + conn->want |= FUSE_CAP_FLOCK_LOCKS; + + // Use splicing if supported. Since we are using writeback caching + // and readahead, individual requests should have a decent size so + // that splicing between fd's is well worth it. + if (conn->capable & FUSE_CAP_SPLICE_WRITE && !fs.nosplice) + conn->want |= FUSE_CAP_SPLICE_WRITE; + if (conn->capable & FUSE_CAP_SPLICE_READ && !fs.nosplice) + conn->want |= FUSE_CAP_SPLICE_READ; +} + + +static void sfs_getattr(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { + (void)fi; + Inode& inode = get_inode(ino); + struct stat attr; + auto res = fstatat(inode.fd, "", &attr, + AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + if (res == -1) { + fuse_reply_err(req, errno); + return; + } + fuse_reply_attr(req, &attr, fs.timeout); +} + + +#ifdef HAVE_UTIMENSAT +static int utimensat_empty_nofollow(Inode& inode, + const struct timespec *tv) { + if (inode.is_symlink) { + /* Does not work on current kernels, but may in the future: + https://marc.info/?l=linux-kernel&m=154158217810354&w=2 */ + auto res = utimensat(inode.fd, "", tv, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + if (res == -1 && errno == EINVAL) { + /* Sorry, no race free way to set times on symlink. */ + errno = EPERM; + } + return res; + } + + char procname[64]; + sprintf(procname, "/proc/self/fd/%i", inode.fd); + + return utimensat(AT_FDCWD, procname, tv, 0); +} +#endif + + +static void do_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + int valid, struct fuse_file_info* fi) { + Inode& inode = get_inode(ino); + int ifd = inode.fd; + int res; + + if (valid & FUSE_SET_ATTR_MODE) { + if (fi) { + res = fchmod(fi->fh, attr->st_mode); + } else { + char procname[64]; + sprintf(procname, "/proc/self/fd/%i", ifd); + res = chmod(procname, attr->st_mode); + } + if (res == -1) + goto out_err; + } + if (valid & (FUSE_SET_ATTR_UID | FUSE_SET_ATTR_GID)) { + uid_t uid = (valid & FUSE_SET_ATTR_UID) ? attr->st_uid : static_cast<uid_t>(-1); + gid_t gid = (valid & FUSE_SET_ATTR_GID) ? attr->st_gid : static_cast<gid_t>(-1); + + res = fchownat(ifd, "", uid, gid, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + if (res == -1) + goto out_err; + } + if (valid & FUSE_SET_ATTR_SIZE) { + if (fi) { + res = ftruncate(fi->fh, attr->st_size); + } else { + char procname[64]; + sprintf(procname, "/proc/self/fd/%i", ifd); + res = truncate(procname, attr->st_size); + } + if (res == -1) + goto out_err; + } + if (valid & (FUSE_SET_ATTR_ATIME | FUSE_SET_ATTR_MTIME)) { + struct timespec tv[2]; + + tv[0].tv_sec = 0; + tv[1].tv_sec = 0; + tv[0].tv_nsec = UTIME_OMIT; + tv[1].tv_nsec = UTIME_OMIT; + + if (valid & FUSE_SET_ATTR_ATIME_NOW) + tv[0].tv_nsec = UTIME_NOW; + else if (valid & FUSE_SET_ATTR_ATIME) + tv[0] = attr->st_atim; + + if (valid & FUSE_SET_ATTR_MTIME_NOW) + tv[1].tv_nsec = UTIME_NOW; + else if (valid & FUSE_SET_ATTR_MTIME) + tv[1] = attr->st_mtim; + + if (fi) + res = futimens(fi->fh, tv); + else { +#ifdef HAVE_UTIMENSAT + res = utimensat_empty_nofollow(inode, tv); +#else + res = -1; + errno = EOPNOTSUPP; +#endif + } + if (res == -1) + goto out_err; + } + return sfs_getattr(req, ino, fi); + +out_err: + fuse_reply_err(req, errno); +} + + +static void sfs_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + int valid, fuse_file_info *fi) { + (void) ino; + do_setattr(req, ino, attr, valid, fi); +} + + +static int do_lookup(fuse_ino_t parent, const char *name, + fuse_entry_param *e) { + if (fs.debug) + cerr << "DEBUG: lookup(): name=" << name + << ", parent=" << parent << endl; + memset(e, 0, sizeof(*e)); + e->attr_timeout = fs.timeout; + e->entry_timeout = fs.timeout; + + auto newfd = openat(get_fs_fd(parent), name, O_PATH | O_NOFOLLOW); + if (newfd == -1) + return errno; + + auto res = fstatat(newfd, "", &e->attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + if (res == -1) { + auto saveerr = errno; + close(newfd); + if (fs.debug) + cerr << "DEBUG: lookup(): fstatat failed" << endl; + return saveerr; + } + + if (e->attr.st_dev != fs.src_dev) { + cerr << "WARNING: Mountpoints in the source directory tree will be hidden." << endl; + return ENOTSUP; + } else if (e->attr.st_ino == FUSE_ROOT_ID) { + cerr << "ERROR: Source directory tree must not include inode " + << FUSE_ROOT_ID << endl; + return EIO; + } + + SrcId id {e->attr.st_ino, e->attr.st_dev}; + unique_lock<mutex> fs_lock {fs.mutex}; + Inode* inode_p; + try { + inode_p = &fs.inodes[id]; + } catch (std::bad_alloc&) { + return ENOMEM; + } + e->ino = reinterpret_cast<fuse_ino_t>(inode_p); + Inode& inode {*inode_p}; + + if(inode.fd != -1) { // found existing inode + fs_lock.unlock(); + if (fs.debug) + cerr << "DEBUG: lookup(): inode " << e->attr.st_ino + << " (userspace) already known." << endl; + lock_guard<mutex> g {inode.m}; + inode.nlookup++; + close(newfd); + } else { // no existing inode + /* This is just here to make Helgrind happy. It violates the + lock ordering requirement (inode.m must be acquired before + fs.mutex), but this is of no consequence because at this + point no other thread has access to the inode mutex */ + lock_guard<mutex> g {inode.m}; + inode.src_ino = e->attr.st_ino; + inode.src_dev = e->attr.st_dev; + inode.is_symlink = S_ISLNK(e->attr.st_mode); + inode.nlookup = 1; + inode.fd = newfd; + fs_lock.unlock(); + + if (fs.debug) + cerr << "DEBUG: lookup(): created userspace inode " << e->attr.st_ino + << endl; + } + + return 0; +} + + +static void sfs_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) { + fuse_entry_param e {}; + auto err = do_lookup(parent, name, &e); + if (err == ENOENT) { + e.attr_timeout = fs.timeout; + e.entry_timeout = fs.timeout; + e.ino = e.attr.st_ino = 0; + fuse_reply_entry(req, &e); + } else if (err) { + if (err == ENFILE || err == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." << endl; + fuse_reply_err(req, err); + } else { + fuse_reply_entry(req, &e); + } +} + + +static void mknod_symlink(fuse_req_t req, fuse_ino_t parent, + const char *name, mode_t mode, dev_t rdev, + const char *link) { + int res; + Inode& inode_p = get_inode(parent); + auto saverr = ENOMEM; + + if (S_ISDIR(mode)) + res = mkdirat(inode_p.fd, name, mode); + else if (S_ISLNK(mode)) + res = symlinkat(link, inode_p.fd, name); + else + res = mknodat(inode_p.fd, name, mode, rdev); + saverr = errno; + if (res == -1) + goto out; + + fuse_entry_param e; + saverr = do_lookup(parent, name, &e); + if (saverr) + goto out; + + fuse_reply_entry(req, &e); + return; + +out: + if (saverr == ENFILE || saverr == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." << endl; + fuse_reply_err(req, saverr); +} + + +static void sfs_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, dev_t rdev) { + mknod_symlink(req, parent, name, mode, rdev, nullptr); +} + + +static void sfs_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode) { + mknod_symlink(req, parent, name, S_IFDIR | mode, 0, nullptr); +} + + +static void sfs_symlink(fuse_req_t req, const char *link, fuse_ino_t parent, + const char *name) { + mknod_symlink(req, parent, name, S_IFLNK, 0, link); +} + + +static int linkat_empty_nofollow(Inode& inode, int dfd, const char *name) { + if (inode.is_symlink) { + auto res = linkat(inode.fd, "", dfd, name, AT_EMPTY_PATH); + if (res == -1 && (errno == ENOENT || errno == EINVAL)) { + /* Sorry, no race free way to hard-link a symlink. */ + errno = EOPNOTSUPP; + } + return res; + } + + char procname[64]; + sprintf(procname, "/proc/self/fd/%i", inode.fd); + return linkat(AT_FDCWD, procname, dfd, name, AT_SYMLINK_FOLLOW); +} + + +static void sfs_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t parent, + const char *name) { + Inode& inode = get_inode(ino); + Inode& inode_p = get_inode(parent); + fuse_entry_param e {}; + + e.attr_timeout = fs.timeout; + e.entry_timeout = fs.timeout; + + auto res = linkat_empty_nofollow(inode, inode_p.fd, name); + if (res == -1) { + fuse_reply_err(req, errno); + return; + } + + res = fstatat(inode.fd, "", &e.attr, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW); + if (res == -1) { + fuse_reply_err(req, errno); + return; + } + e.ino = reinterpret_cast<fuse_ino_t>(&inode); + { + lock_guard<mutex> g {inode.m}; + inode.nlookup++; + } + + fuse_reply_entry(req, &e); + return; +} + + +static void sfs_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) { + Inode& inode_p = get_inode(parent); + lock_guard<mutex> g {inode_p.m}; + auto res = unlinkat(inode_p.fd, name, AT_REMOVEDIR); + fuse_reply_err(req, res == -1 ? errno : 0); +} + + +static void sfs_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + fuse_ino_t newparent, const char *newname, + unsigned int flags) { + Inode& inode_p = get_inode(parent); + Inode& inode_np = get_inode(newparent); + if (flags) { + fuse_reply_err(req, EINVAL); + return; + } + + auto res = renameat(inode_p.fd, name, inode_np.fd, newname); + fuse_reply_err(req, res == -1 ? errno : 0); +} + + +static void sfs_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) { + Inode& inode_p = get_inode(parent); + auto res = unlinkat(inode_p.fd, name, 0); + fuse_reply_err(req, res == -1 ? errno : 0); +} + + +static void forget_one(fuse_ino_t ino, uint64_t n) { + Inode& inode = get_inode(ino); + unique_lock<mutex> l {inode.m}; + + if(n > inode.nlookup) { + cerr << "INTERNAL ERROR: Negative lookup count for inode " + << inode.src_ino << endl; + abort(); + } + inode.nlookup -= n; + if (!inode.nlookup) { + if (fs.debug) + cerr << "DEBUG: forget: cleaning up inode " << inode.src_ino << endl; + { + lock_guard<mutex> g_fs {fs.mutex}; + l.unlock(); + fs.inodes.erase({inode.src_ino, inode.src_dev}); + } + } else if (fs.debug) + cerr << "DEBUG: forget: inode " << inode.src_ino + << " lookup count now " << inode.nlookup << endl; +} + +static void sfs_forget(fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) { + forget_one(ino, nlookup); + fuse_reply_none(req); +} + + +static void sfs_forget_multi(fuse_req_t req, size_t count, + fuse_forget_data *forgets) { + for (int i = 0; i < count; i++) + forget_one(forgets[i].ino, forgets[i].nlookup); + fuse_reply_none(req); +} + + +static void sfs_readlink(fuse_req_t req, fuse_ino_t ino) { + Inode& inode = get_inode(ino); + char buf[PATH_MAX + 1]; + auto res = readlinkat(inode.fd, "", buf, sizeof(buf)); + if (res == -1) + fuse_reply_err(req, errno); + else if (res == sizeof(buf)) + fuse_reply_err(req, ENAMETOOLONG); + else { + buf[res] = '\0'; + fuse_reply_readlink(req, buf); + } +} + + +struct DirHandle { + DIR *dp {nullptr}; + off_t offset; + + DirHandle() = default; + DirHandle(const DirHandle&) = delete; + DirHandle& operator=(const DirHandle&) = delete; + + ~DirHandle() { + if(dp) + closedir(dp); + } +}; + + +static DirHandle *get_dir_handle(fuse_file_info *fi) { + return reinterpret_cast<DirHandle*>(fi->fh); +} + + +static void sfs_opendir(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { + Inode& inode = get_inode(ino); + auto d = new (nothrow) DirHandle; + if (d == nullptr) { + fuse_reply_err(req, ENOMEM); + return; + } + + // Make Helgrind happy - it can't know that there's an implicit + // synchronization due to the fact that other threads cannot + // access d until we've called fuse_reply_*. + lock_guard<mutex> g {inode.m}; + + auto fd = openat(inode.fd, ".", O_RDONLY); + if (fd == -1) + goto out_errno; + + // On success, dir stream takes ownership of fd, so we + // do not have to close it. + d->dp = fdopendir(fd); + if(d->dp == nullptr) + goto out_errno; + + d->offset = 0; + + fi->fh = reinterpret_cast<uint64_t>(d); + if(fs.timeout) { + fi->keep_cache = 1; + fi->cache_readdir = 1; + } + fuse_reply_open(req, fi); + return; + +out_errno: + auto error = errno; + delete d; + if (error == ENFILE || error == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." << endl; + fuse_reply_err(req, error); +} + + +static bool is_dot_or_dotdot(const char *name) { + return name[0] == '.' && + (name[1] == '\0' || (name[1] == '.' && name[2] == '\0')); +} + + +static void do_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, fuse_file_info *fi, int plus) { + auto d = get_dir_handle(fi); + Inode& inode = get_inode(ino); + lock_guard<mutex> g {inode.m}; + char *p; + auto rem = size; + int err = 0, count = 0; + + if (fs.debug) + cerr << "DEBUG: readdir(): started with offset " + << offset << endl; + + auto buf = new (nothrow) char[size]; + if (!buf) { + fuse_reply_err(req, ENOMEM); + return; + } + p = buf; + + if (offset != d->offset) { + if (fs.debug) + cerr << "DEBUG: readdir(): seeking to " << offset << endl; + seekdir(d->dp, offset); + d->offset = offset; + } + + while (1) { + struct dirent *entry; + errno = 0; + entry = readdir(d->dp); + if (!entry) { + if(errno) { + err = errno; + if (fs.debug) + warn("DEBUG: readdir(): readdir failed with"); + goto error; + } + break; // End of stream + } + d->offset = entry->d_off; + if (is_dot_or_dotdot(entry->d_name)) + continue; + + fuse_entry_param e{}; + size_t entsize; + if(plus) { + err = do_lookup(ino, entry->d_name, &e); + if (err) + goto error; + entsize = fuse_add_direntry_plus(req, p, rem, entry->d_name, &e, entry->d_off); + + if (entsize > rem) { + if (fs.debug) + cerr << "DEBUG: readdir(): buffer full, returning data. " << endl; + forget_one(e.ino, 1); + break; + } + } else { + e.attr.st_ino = entry->d_ino; + e.attr.st_mode = entry->d_type << 12; + entsize = fuse_add_direntry(req, p, rem, entry->d_name, &e.attr, entry->d_off); + + if (entsize > rem) { + if (fs.debug) + cerr << "DEBUG: readdir(): buffer full, returning data. " << endl; + break; + } + } + + p += entsize; + rem -= entsize; + count++; + if (fs.debug) { + cerr << "DEBUG: readdir(): added to buffer: " << entry->d_name + << ", ino " << e.attr.st_ino << ", offset " << entry->d_off << endl; + } + } + err = 0; +error: + + // If there's an error, we can only signal it if we haven't stored + // any entries yet - otherwise we'd end up with wrong lookup + // counts for the entries that are already in the buffer. So we + // return what we've collected until that point. + if (err && rem == size) { + if (err == ENFILE || err == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." << endl; + fuse_reply_err(req, err); + } else { + if (fs.debug) + cerr << "DEBUG: readdir(): returning " << count + << " entries, curr offset " << d->offset << endl; + fuse_reply_buf(req, buf, size - rem); + } + delete[] buf; + return; +} + + +static void sfs_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, fuse_file_info *fi) { + // operation logging is done in readdir to reduce code duplication + do_readdir(req, ino, size, offset, fi, 0); +} + + +static void sfs_readdirplus(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t offset, fuse_file_info *fi) { + // operation logging is done in readdir to reduce code duplication + do_readdir(req, ino, size, offset, fi, 1); +} + + +static void sfs_releasedir(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { + (void) ino; + auto d = get_dir_handle(fi); + delete d; + fuse_reply_err(req, 0); +} + + +static void sfs_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, fuse_file_info *fi) { + Inode& inode_p = get_inode(parent); + + auto fd = openat(inode_p.fd, name, + (fi->flags | O_CREAT) & ~O_NOFOLLOW, mode); + if (fd == -1) { + auto err = errno; + if (err == ENFILE || err == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." << endl; + fuse_reply_err(req, err); + return; + } + + fi->fh = fd; + fuse_entry_param e; + auto err = do_lookup(parent, name, &e); + if (err) { + if (err == ENFILE || err == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." << endl; + fuse_reply_err(req, err); + } else + fuse_reply_create(req, &e, fi); +} + + +static void sfs_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + fuse_file_info *fi) { + (void) ino; + int res; + int fd = dirfd(get_dir_handle(fi)->dp); + if (datasync) + res = fdatasync(fd); + else + res = fsync(fd); + fuse_reply_err(req, res == -1 ? errno : 0); +} + + +static void sfs_open(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { + Inode& inode = get_inode(ino); + + /* With writeback cache, kernel may send read requests even + when userspace opened write-only */ + if (fs.timeout && (fi->flags & O_ACCMODE) == O_WRONLY) { + fi->flags &= ~O_ACCMODE; + fi->flags |= O_RDWR; + } + + /* With writeback cache, O_APPEND is handled by the kernel. This + breaks atomicity (since the file may change in the underlying + filesystem, so that the kernel's idea of the end of the file + isn't accurate anymore). However, no process should modify the + file in the underlying filesystem once it has been read, so + this is not a problem. */ + if (fs.timeout && fi->flags & O_APPEND) + fi->flags &= ~O_APPEND; + + /* Unfortunately we cannot use inode.fd, because this was opened + with O_PATH (so it doesn't allow read/write access). */ + char buf[64]; + sprintf(buf, "/proc/self/fd/%i", inode.fd); + auto fd = open(buf, fi->flags & ~O_NOFOLLOW); + if (fd == -1) { + auto err = errno; + if (err == ENFILE || err == EMFILE) + cerr << "ERROR: Reached maximum number of file descriptors." << endl; + fuse_reply_err(req, err); + return; + } + + fi->keep_cache = (fs.timeout != 0); + fi->fh = fd; + fuse_reply_open(req, fi); +} + + +static void sfs_release(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { + (void) ino; + close(fi->fh); + fuse_reply_err(req, 0); +} + + +static void sfs_flush(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { + (void) ino; + auto res = close(dup(fi->fh)); + fuse_reply_err(req, res == -1 ? errno : 0); +} + + +static void sfs_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + fuse_file_info *fi) { + (void) ino; + int res; + if (datasync) + res = fdatasync(fi->fh); + else + res = fsync(fi->fh); + fuse_reply_err(req, res == -1 ? errno : 0); +} + + +static void do_read(fuse_req_t req, size_t size, off_t off, fuse_file_info *fi) { + + fuse_bufvec buf = FUSE_BUFVEC_INIT(size); + buf.buf[0].flags = static_cast<fuse_buf_flags>( + FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK); + buf.buf[0].fd = fi->fh; + buf.buf[0].pos = off; + + fuse_reply_data(req, &buf, FUSE_BUF_COPY_FLAGS); +} + +static void sfs_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, + fuse_file_info *fi) { + (void) ino; + do_read(req, size, off, fi); +} + + +static void do_write_buf(fuse_req_t req, size_t size, off_t off, + fuse_bufvec *in_buf, fuse_file_info *fi) { + fuse_bufvec out_buf = FUSE_BUFVEC_INIT(size); + out_buf.buf[0].flags = static_cast<fuse_buf_flags>( + FUSE_BUF_IS_FD | FUSE_BUF_FD_SEEK); + out_buf.buf[0].fd = fi->fh; + out_buf.buf[0].pos = off; + + auto res = fuse_buf_copy(&out_buf, in_buf, FUSE_BUF_COPY_FLAGS); + if (res < 0) + fuse_reply_err(req, -res); + else + fuse_reply_write(req, (size_t)res); +} + + +static void sfs_write_buf(fuse_req_t req, fuse_ino_t ino, fuse_bufvec *in_buf, + off_t off, fuse_file_info *fi) { + (void) ino; + auto size {fuse_buf_size(in_buf)}; + do_write_buf(req, size, off, in_buf, fi); +} + + +static void sfs_statfs(fuse_req_t req, fuse_ino_t ino) { + struct statvfs stbuf; + + auto res = fstatvfs(get_fs_fd(ino), &stbuf); + if (res == -1) + fuse_reply_err(req, errno); + else + fuse_reply_statfs(req, &stbuf); +} + + +#ifdef HAVE_POSIX_FALLOCATE +static void sfs_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, + off_t offset, off_t length, fuse_file_info *fi) { + (void) ino; + if (mode) { + fuse_reply_err(req, EOPNOTSUPP); + return; + } + + auto err = posix_fallocate(fi->fh, offset, length); + fuse_reply_err(req, err); +} +#endif + +static void sfs_flock(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi, + int op) { + (void) ino; + auto res = flock(fi->fh, op); + fuse_reply_err(req, res == -1 ? errno : 0); +} + + +#ifdef HAVE_SETXATTR +static void sfs_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + size_t size) { + char *value = nullptr; + Inode& inode = get_inode(ino); + ssize_t ret; + int saverr; + + if (inode.is_symlink) { + /* Sorry, no race free way to getxattr on symlink. */ + saverr = ENOTSUP; + goto out; + } + + char procname[64]; + sprintf(procname, "/proc/self/fd/%i", inode.fd); + + if (size) { + value = new (nothrow) char[size]; + if (value == nullptr) { + saverr = ENOMEM; + goto out; + } + + ret = getxattr(procname, name, value, size); + if (ret == -1) + goto out_err; + saverr = 0; + if (ret == 0) + goto out; + + fuse_reply_buf(req, value, ret); + } else { + ret = getxattr(procname, name, nullptr, 0); + if (ret == -1) + goto out_err; + + fuse_reply_xattr(req, ret); + } +out_free: + delete[] value; + return; + +out_err: + saverr = errno; +out: + fuse_reply_err(req, saverr); + goto out_free; +} + + +static void sfs_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) { + char *value = nullptr; + Inode& inode = get_inode(ino); + ssize_t ret; + int saverr; + + if (inode.is_symlink) { + /* Sorry, no race free way to listxattr on symlink. */ + saverr = ENOTSUP; + goto out; + } + + char procname[64]; + sprintf(procname, "/proc/self/fd/%i", inode.fd); + + if (size) { + value = new (nothrow) char[size]; + if (value == nullptr) { + saverr = ENOMEM; + goto out; + } + + ret = listxattr(procname, value, size); + if (ret == -1) + goto out_err; + saverr = 0; + if (ret == 0) + goto out; + + fuse_reply_buf(req, value, ret); + } else { + ret = listxattr(procname, nullptr, 0); + if (ret == -1) + goto out_err; + + fuse_reply_xattr(req, ret); + } +out_free: + delete[] value; + return; +out_err: + saverr = errno; +out: + fuse_reply_err(req, saverr); + goto out_free; +} + + +static void sfs_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + const char *value, size_t size, int flags) { + Inode& inode = get_inode(ino); + ssize_t ret; + int saverr; + + if (inode.is_symlink) { + /* Sorry, no race free way to setxattr on symlink. */ + saverr = ENOTSUP; + goto out; + } + + char procname[64]; + sprintf(procname, "/proc/self/fd/%i", inode.fd); + + ret = setxattr(procname, name, value, size, flags); + saverr = ret == -1 ? errno : 0; + +out: + fuse_reply_err(req, saverr); +} + + +static void sfs_removexattr(fuse_req_t req, fuse_ino_t ino, const char *name) { + char procname[64]; + Inode& inode = get_inode(ino); + ssize_t ret; + int saverr; + + if (inode.is_symlink) { + /* Sorry, no race free way to setxattr on symlink. */ + saverr = ENOTSUP; + goto out; + } + + sprintf(procname, "/proc/self/fd/%i", inode.fd); + ret = removexattr(procname, name); + saverr = ret == -1 ? errno : 0; + +out: + fuse_reply_err(req, saverr); +} +#endif + + +static void assign_operations(fuse_lowlevel_ops &sfs_oper) { + sfs_oper.init = sfs_init; + sfs_oper.lookup = sfs_lookup; + sfs_oper.mkdir = sfs_mkdir; + sfs_oper.mknod = sfs_mknod; + sfs_oper.symlink = sfs_symlink; + sfs_oper.link = sfs_link; + sfs_oper.unlink = sfs_unlink; + sfs_oper.rmdir = sfs_rmdir; + sfs_oper.rename = sfs_rename; + sfs_oper.forget = sfs_forget; + sfs_oper.forget_multi = sfs_forget_multi; + sfs_oper.getattr = sfs_getattr; + sfs_oper.setattr = sfs_setattr; + sfs_oper.readlink = sfs_readlink; + sfs_oper.opendir = sfs_opendir; + sfs_oper.readdir = sfs_readdir; + sfs_oper.readdirplus = sfs_readdirplus; + sfs_oper.releasedir = sfs_releasedir; + sfs_oper.fsyncdir = sfs_fsyncdir; + sfs_oper.create = sfs_create; + sfs_oper.open = sfs_open; + sfs_oper.release = sfs_release; + sfs_oper.flush = sfs_flush; + sfs_oper.fsync = sfs_fsync; + sfs_oper.read = sfs_read; + sfs_oper.write_buf = sfs_write_buf; + sfs_oper.statfs = sfs_statfs; +#ifdef HAVE_POSIX_FALLOCATE + sfs_oper.fallocate = sfs_fallocate; +#endif + sfs_oper.flock = sfs_flock; +#ifdef HAVE_SETXATTR + sfs_oper.setxattr = sfs_setxattr; + sfs_oper.getxattr = sfs_getxattr; + sfs_oper.listxattr = sfs_listxattr; + sfs_oper.removexattr = sfs_removexattr; +#endif +} + +static void print_usage(char *prog_name) { + cout << "Usage: " << prog_name << " --help\n" + << " " << prog_name << " [options] <source> <mountpoint>\n"; +} + +static cxxopts::ParseResult parse_wrapper(cxxopts::Options& parser, int& argc, char**& argv) { + try { + return parser.parse(argc, argv); + } catch (cxxopts::option_not_exists_exception& exc) { + std::cout << argv[0] << ": " << exc.what() << std::endl; + print_usage(argv[0]); + exit(2); + } +} + + +static cxxopts::ParseResult parse_options(int argc, char **argv) { + cxxopts::Options opt_parser(argv[0]); + opt_parser.add_options() + ("debug", "Enable filesystem debug messages") + ("debug-fuse", "Enable libfuse debug messages") + ("help", "Print help") + ("nocache", "Disable all caching") + ("nosplice", "Do not use splice(2) to transfer data") + ("single", "Run single-threaded"); + + // FIXME: Find a better way to limit the try clause to just + // opt_parser.parse() (cf. https://github.com/jarro2783/cxxopts/issues/146) + auto options = parse_wrapper(opt_parser, argc, argv); + + if (options.count("help")) { + print_usage(argv[0]); + // Strip everything before the option list from the + // default help string. + auto help = opt_parser.help(); + std::cout << std::endl << "options:" + << help.substr(help.find("\n\n") + 1, string::npos); + exit(0); + + } else if (argc != 3) { + std::cout << argv[0] << ": invalid number of arguments\n"; + print_usage(argv[0]); + exit(2); + } + + fs.debug = options.count("debug") != 0; + fs.nosplice = options.count("nosplice") != 0; + fs.source = std::string {realpath(argv[1], NULL)}; + + return options; +} + + +static void maximize_fd_limit() { + struct rlimit lim {}; + auto res = getrlimit(RLIMIT_NOFILE, &lim); + if (res != 0) { + warn("WARNING: getrlimit() failed with"); + return; + } + lim.rlim_cur = lim.rlim_max; + res = setrlimit(RLIMIT_NOFILE, &lim); + if (res != 0) + warn("WARNING: setrlimit() failed with"); +} + + +int main(int argc, char *argv[]) { + + // Parse command line options + auto options {parse_options(argc, argv)}; + + // We need an fd for every dentry in our the filesystem that the + // kernel knows about. This is way more than most processes need, + // so try to get rid of any resource softlimit. + maximize_fd_limit(); + + // Initialize filesystem root + fs.root.fd = -1; + fs.root.nlookup = 9999; + fs.root.is_symlink = false; + fs.timeout = options.count("nocache") ? 0 : 86400.0; + + struct stat stat; + auto ret = lstat(fs.source.c_str(), &stat); + if (ret == -1) + err(1, "ERROR: failed to stat source (\"%s\")", fs.source.c_str()); + if (!S_ISDIR(stat.st_mode)) + errx(1, "ERROR: source is not a directory"); + fs.src_dev = stat.st_dev; + + fs.root.fd = open(fs.source.c_str(), O_PATH); + if (fs.root.fd == -1) + err(1, "ERROR: open(\"%s\", O_PATH)", fs.source.c_str()); + + // Initialize fuse + fuse_args args = FUSE_ARGS_INIT(0, nullptr); + if (fuse_opt_add_arg(&args, argv[0]) || + fuse_opt_add_arg(&args, "-o") || + fuse_opt_add_arg(&args, "default_permissions,fsname=hpps") || + (options.count("debug-fuse") && fuse_opt_add_arg(&args, "-odebug"))) + errx(3, "ERROR: Out of memory"); + + fuse_lowlevel_ops sfs_oper {}; + assign_operations(sfs_oper); + auto se = fuse_session_new(&args, &sfs_oper, sizeof(sfs_oper), &fs); + if (se == nullptr) + goto err_out1; + + if (fuse_set_signal_handlers(se) != 0) + goto err_out2; + + // Don't apply umask, use modes exactly as specified + umask(0); + + // Mount and run main loop + struct fuse_loop_config loop_config; + loop_config.clone_fd = 0; + loop_config.max_idle_threads = 10; + if (fuse_session_mount(se, argv[2]) != 0) + goto err_out3; + if (options.count("single")) + ret = fuse_session_loop(se); + else + ret = fuse_session_loop_mt(se, &loop_config); + + fuse_session_unmount(se); + +err_out3: + fuse_remove_signal_handlers(se); +err_out2: + fuse_session_destroy(se); +err_out1: + fuse_opt_free_args(&args); + + return ret ? 1 : 0; +} + |