diff options
-rw-r--r-- | example/passthrough_hp.cc | 66 | ||||
-rw-r--r-- | include/fuse_common.h | 39 | ||||
-rw-r--r-- | include/fuse_kernel.h | 23 | ||||
-rw-r--r-- | include/fuse_lowlevel.h | 13 | ||||
-rw-r--r-- | lib/fuse_lowlevel.c | 43 | ||||
-rw-r--r-- | lib/fuse_versionscript | 2 |
6 files changed, 180 insertions, 6 deletions
diff --git a/example/passthrough_hp.cc b/example/passthrough_hp.cc index 7c1dadf..7b8febe 100644 --- a/example/passthrough_hp.cc +++ b/example/passthrough_hp.cc @@ -123,6 +123,7 @@ struct Inode { dev_t src_dev {0}; ino_t src_ino {0}; int generation {0}; + int backing_id {0}; uint64_t nopen {0}; uint64_t nlookup {0}; std::mutex m; @@ -159,6 +160,7 @@ struct Fs { bool clone_fd; std::string fuse_mount_options; bool direct_io; + bool passthrough; }; static Fs fs{}; @@ -190,7 +192,15 @@ static int get_fs_fd(fuse_ino_t ino) { static void sfs_init(void *userdata, fuse_conn_info *conn) { (void)userdata; - if (fs.timeout && conn->capable & FUSE_CAP_WRITEBACK_CACHE) + + if (fs.passthrough && conn->capable & FUSE_CAP_PASSTHROUGH) + conn->want |= FUSE_CAP_PASSTHROUGH; + else + fs.passthrough = false; + + /* Passthrough and writeback cache are conflicting modes */ + if (fs.timeout && !fs.passthrough && + conn->capable & FUSE_CAP_WRITEBACK_CACHE) conn->want |= FUSE_CAP_WRITEBACK_CACHE; if (conn->capable & FUSE_CAP_FLOCK_LOCKS) @@ -810,6 +820,30 @@ static void sfs_releasedir(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { } +static void do_passthrough_open(fuse_req_t req, fuse_ino_t ino, int fd, + fuse_file_info *fi) { + Inode& inode = get_inode(ino); + /* Setup a shared backing file on first open of an inode */ + if (inode.backing_id) { + if (fs.debug) + cerr << "DEBUG: reusing shared backing file " + << inode.backing_id << " for inode " << ino << endl; + fi->backing_id = inode.backing_id; + } else if (!(inode.backing_id = fuse_passthrough_open(req, fd))) { + cerr << "DEBUG: fuse_passthrough_open failed for inode " << ino + << ", disabling rw passthrough." << endl; + fs.passthrough = false; + } else { + if (fs.debug) + cerr << "DEBUG: setup shared backing file " + << inode.backing_id << " for inode " << ino << endl; + fi->backing_id = inode.backing_id; + } + /* open in passthrough mode must drop old page cache */ + if (fi->backing_id) + fi->keep_cache = false; +} + static void sfs_create(fuse_req_t req, fuse_ino_t parent, const char *name, mode_t mode, fuse_file_info *fi) { Inode& inode_p = get_inode(parent); @@ -845,6 +879,8 @@ static void sfs_create(fuse_req_t req, fuse_ino_t parent, const char *name, Inode& inode = get_inode(e.ino); lock_guard<mutex> g {inode.m}; inode.nopen++; + if (fs.passthrough) + do_passthrough_open(req, e.ino, fd, fi); fuse_reply_create(req, &e, fi); } @@ -914,6 +950,8 @@ static void sfs_open(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { fi->parallel_direct_writes = 1; fi->fh = fd; + if (fs.passthrough) + do_passthrough_open(req, ino, fd, fi); fuse_reply_open(req, fi); } @@ -922,6 +960,19 @@ static void sfs_release(fuse_req_t req, fuse_ino_t ino, fuse_file_info *fi) { Inode& inode = get_inode(ino); lock_guard<mutex> g {inode.m}; inode.nopen--; + + /* Close the shared backing file on last file close of an inode */ + if (inode.backing_id && !inode.nopen) { + if (fuse_passthrough_close(req, inode.backing_id) < 0) { + cerr << "DEBUG: fuse_passthrough_close failed for inode " + << ino << " backing file " << inode.backing_id << endl; + } else if (fs.debug) { + cerr << "DEBUG: closed backing file " << inode.backing_id + << " for inode " << ino << endl; + } + inode.backing_id = 0; + } + close(fi->fh); fuse_reply_err(req, 0); } @@ -960,6 +1011,11 @@ static void do_read(fuse_req_t req, size_t size, off_t off, fuse_file_info *fi) static void sfs_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, fuse_file_info *fi) { (void) ino; + if (fs.passthrough && !fs.direct_io) { + cerr << "ERROR: fuse_passthrough read failed." << endl; + fuse_reply_err(req, EIO); + return; + } do_read(req, size, off, fi); } @@ -983,6 +1039,11 @@ static void do_write_buf(fuse_req_t req, size_t size, off_t off, static void sfs_write_buf(fuse_req_t req, fuse_ino_t ino, fuse_bufvec *in_buf, off_t off, fuse_file_info *fi) { (void) ino; + if (fs.passthrough && !fs.direct_io) { + cerr << "ERROR: fuse_passthrough write failed." << endl; + fuse_reply_err(req, EIO); + return; + } auto size {fuse_buf_size(in_buf)}; do_write_buf(req, size, off, in_buf, fi); } @@ -1232,6 +1293,7 @@ static cxxopts::ParseResult parse_options(int argc, char **argv) { ("help", "Print help") ("nocache", "Disable attribute all caching") ("nosplice", "Do not use splice(2) to transfer data") + ("nopassthrough", "Do not use pass-through mode for read/write") ("single", "Run single-threaded") ("o", "Mount options (see mount.fuse(5) - only use if you know what " "you are doing)", cxxopts::value(mount_options)) @@ -1240,7 +1302,6 @@ static cxxopts::ParseResult parse_options(int argc, char **argv) { ("clone-fd", "use separate fuse device fd for each thread") ("direct-io", "enable fuse kernel internal direct-io"); - // FIXME: Find a better way to limit the try clause to just // opt_parser.parse() (cf. https://github.com/jarro2783/cxxopts/issues/146) auto options = parse_wrapper(opt_parser, argc, argv); @@ -1268,6 +1329,7 @@ static cxxopts::ParseResult parse_options(int argc, char **argv) { fs.foreground = true; fs.nosplice = options.count("nosplice") != 0; + fs.passthrough = options.count("nopassthrough") == 0; fs.num_threads = options["num-threads"].as<int>(); fs.clone_fd = options.count("clone-fd"); fs.direct_io = options.count("direct-io"); diff --git a/include/fuse_common.h b/include/fuse_common.h index a614fb0..ba4b127 100644 --- a/include/fuse_common.h +++ b/include/fuse_common.h @@ -105,6 +105,11 @@ struct fuse_file_info { /** Requested poll events. Available in ->poll. Only set on kernels which support it. If unsupported, this field is set to zero. */ uint32_t poll_events; + + /** Passthrough backing file id. May be filled in by filesystem in + * create and open. It is used to create a passthrough connection + * between FUSE file and backing file. */ + int32_t backing_id; }; @@ -469,6 +474,18 @@ struct fuse_loop_config_v1 { #define FUSE_CAP_DIRECT_IO_ALLOW_MMAP (1 << 28) /** + * Indicates support for passthrough mode access for read/write operations. + * + * If this flag is set in the `capable` field of the `fuse_conn_info` + * structure, then the FUSE kernel module supports redirecting read/write + * operations to the backing file instead of letting them to be handled + * by the FUSE daemon. + * + * This feature is disabled by default. + */ +#define FUSE_CAP_PASSTHROUGH (1 << 29) + +/** * Ioctl flags * * FUSE_IOCTL_COMPAT: 32bit compat ioctl on 64bit machine @@ -598,9 +615,29 @@ struct fuse_conn_info { unsigned time_gran; /** + * When FUSE_CAP_PASSTHROUGH is enabled, this is the maximum allowed + * stacking depth of the backing files. In current kernel, the maximum + * allowed stack depth if FILESYSTEM_MAX_STACK_DEPTH (2), which includes + * the FUSE passthrough layer, so the maximum stacking depth for backing + * files is 1. + * + * The default is FUSE_BACKING_STACKED_UNDER (0), meaning that the + * backing files cannot be on a stacked filesystem, but another stacked + * filesystem can be stacked over this FUSE passthrough filesystem. + * + * Set this to FUSE_BACKING_STACKED_OVER (1) if backing files may be on + * a stacked filesystem, such as overlayfs or another FUSE passthrough. + * In this configuration, another stacked filesystem cannot be stacked + * over this FUSE passthrough filesystem. + */ +#define FUSE_BACKING_STACKED_UNDER (0) +#define FUSE_BACKING_STACKED_OVER (1) + unsigned max_backing_stack_depth; + + /** * For future use. */ - unsigned reserved[22]; + unsigned reserved[21]; }; struct fuse_session; diff --git a/include/fuse_kernel.h b/include/fuse_kernel.h index e7418d1..897f654 100644 --- a/include/fuse_kernel.h +++ b/include/fuse_kernel.h @@ -211,6 +211,10 @@ * 7.39 * - add FUSE_DIRECT_IO_ALLOW_MMAP * - add FUSE_STATX and related structures + * + * 7.40 + * - add max_stack_depth to fuse_init_out, add FUSE_PASSTHROUGH init flag + * - add backing_id to fuse_open_out, add FOPEN_PASSTHROUGH open flag */ #ifndef _LINUX_FUSE_H @@ -246,7 +250,7 @@ #define FUSE_KERNEL_VERSION 7 /** Minor version number of this interface */ -#define FUSE_KERNEL_MINOR_VERSION 39 +#define FUSE_KERNEL_MINOR_VERSION 40 /** The node ID of the root inode */ #define FUSE_ROOT_ID 1 @@ -353,6 +357,7 @@ struct fuse_file_lock { * FOPEN_STREAM: the file is stream-like (no file position at all) * FOPEN_NOFLUSH: don't flush data cache on close (unless FUSE_WRITEBACK_CACHE) * FOPEN_PARALLEL_DIRECT_WRITES: Allow concurrent direct writes on the same inode + * FOPEN_PASSTHROUGH: passthrough read/write operations for this open file */ #define FOPEN_DIRECT_IO (1 << 0) #define FOPEN_KEEP_CACHE (1 << 1) @@ -361,6 +366,7 @@ struct fuse_file_lock { #define FOPEN_STREAM (1 << 4) #define FOPEN_NOFLUSH (1 << 5) #define FOPEN_PARALLEL_DIRECT_WRITES (1 << 6) +#define FOPEN_PASSTHROUGH (1 << 7) /** * INIT request/reply flags @@ -449,6 +455,7 @@ struct fuse_file_lock { #define FUSE_CREATE_SUPP_GROUP (1ULL << 34) #define FUSE_HAS_EXPIRE_ONLY (1ULL << 35) #define FUSE_DIRECT_IO_ALLOW_MMAP (1ULL << 36) +#define FUSE_PASSTHROUGH (1ULL << 37) /* Obsolete alias for FUSE_DIRECT_IO_ALLOW_MMAP */ #define FUSE_DIRECT_IO_RELAX FUSE_DIRECT_IO_ALLOW_MMAP @@ -761,7 +768,7 @@ struct fuse_create_in { struct fuse_open_out { uint64_t fh; uint32_t open_flags; - uint32_t padding; + int32_t backing_id; }; struct fuse_release_in { @@ -877,7 +884,8 @@ struct fuse_init_out { uint16_t max_pages; uint16_t map_alignment; uint32_t flags2; - uint32_t unused[7]; + uint32_t max_stack_depth; + uint32_t unused[6]; }; #define CUSE_INIT_INFO_MAX 4096 @@ -1049,9 +1057,18 @@ struct fuse_notify_retrieve_in { uint64_t dummy4; }; +struct fuse_backing_map { + int32_t fd; + uint32_t flags; + uint64_t padding; +}; + /* Device ioctls: */ #define FUSE_DEV_IOC_MAGIC 229 #define FUSE_DEV_IOC_CLONE _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t) +#define FUSE_DEV_IOC_BACKING_OPEN _IOW(FUSE_DEV_IOC_MAGIC, 1, \ + struct fuse_backing_map) +#define FUSE_DEV_IOC_BACKING_CLOSE _IOW(FUSE_DEV_IOC_MAGIC, 2, uint32_t) struct fuse_lseek_in { uint64_t fh; diff --git a/include/fuse_lowlevel.h b/include/fuse_lowlevel.h index 2ada62b..cb38115 100644 --- a/include/fuse_lowlevel.h +++ b/include/fuse_lowlevel.h @@ -1397,6 +1397,19 @@ int fuse_reply_attr(fuse_req_t req, const struct stat *attr, int fuse_reply_readlink(fuse_req_t req, const char *link); /** + * Setup passthrough backing file for open reply + * + * Possible requests: + * open, opendir, create + * + * @param req request handle + * @param fd backing file descriptor + * @return positive backing id for success, 0 for failure + */ +int fuse_passthrough_open(fuse_req_t req, int fd); +int fuse_passthrough_close(fuse_req_t req, int backing_id); + +/** * Reply with open parameters * * currently the following members of 'fi' are used: diff --git a/lib/fuse_lowlevel.c b/lib/fuse_lowlevel.c index c08c99c..1f3a5fa 100644 --- a/lib/fuse_lowlevel.c +++ b/lib/fuse_lowlevel.c @@ -27,6 +27,7 @@ #include <errno.h> #include <assert.h> #include <sys/file.h> +#include <sys/ioctl.h> #ifndef F_LINUX_SPECIFIC_BASE #define F_LINUX_SPECIFIC_BASE 1024 @@ -400,6 +401,10 @@ static void fill_open(struct fuse_open_out *arg, const struct fuse_file_info *f) { arg->fh = f->fh; + if (f->backing_id > 0) { + arg->backing_id = f->backing_id; + arg->open_flags |= FOPEN_PASSTHROUGH; + } if (f->direct_io) arg->open_flags |= FOPEN_DIRECT_IO; if (f->keep_cache) @@ -466,6 +471,31 @@ int fuse_reply_readlink(fuse_req_t req, const char *linkname) return send_reply_ok(req, linkname, strlen(linkname)); } +int fuse_passthrough_open(fuse_req_t req, int fd) +{ + struct fuse_backing_map map = { .fd = fd }; + int ret; + + ret = ioctl(req->se->fd, FUSE_DEV_IOC_BACKING_OPEN, &map); + if (ret <= 0) { + fuse_log(FUSE_LOG_ERR, "fuse: passthrough_open: %s\n", strerror(errno)); + return 0; + } + + return ret; +} + +int fuse_passthrough_close(fuse_req_t req, int backing_id) +{ + int ret; + + ret = ioctl(req->se->fd, FUSE_DEV_IOC_BACKING_CLOSE, &backing_id); + if (ret < 0) + fuse_log(FUSE_LOG_ERR, "fuse: passthrough_close: %s\n", strerror(errno)); + + return ret; +} + int fuse_reply_open(fuse_req_t req, const struct fuse_file_info *f) { struct fuse_open_out arg; @@ -2027,6 +2057,8 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) se->conn.capable |= FUSE_CAP_DIRECT_IO_ALLOW_MMAP; if (arg->minor >= 38 || (inargflags & FUSE_HAS_EXPIRE_ONLY)) se->conn.capable |= FUSE_CAP_EXPIRE_ONLY; + if (inargflags & FUSE_PASSTHROUGH) + se->conn.capable |= FUSE_CAP_PASSTHROUGH; } else { se->conn.max_readahead = 0; } @@ -2161,6 +2193,14 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) outargflags |= FUSE_SETXATTR_EXT; if (se->conn.want & FUSE_CAP_DIRECT_IO_ALLOW_MMAP) outargflags |= FUSE_DIRECT_IO_ALLOW_MMAP; + if (se->conn.want & FUSE_CAP_PASSTHROUGH) { + outargflags |= FUSE_PASSTHROUGH; + /* + * outarg.max_stack_depth includes the fuse stack layer, + * so it is one more than max_backing_stack_depth. + */ + outarg.max_stack_depth = se->conn.max_backing_stack_depth + 1; + } if (inargflags & FUSE_INIT_EXT) { outargflags |= FUSE_INIT_EXT; @@ -2199,6 +2239,9 @@ void do_init(fuse_req_t req, fuse_ino_t nodeid, const void *inarg) outarg.congestion_threshold); fuse_log(FUSE_LOG_DEBUG, " time_gran=%u\n", outarg.time_gran); + if (se->conn.want & FUSE_CAP_PASSTHROUGH) + fuse_log(FUSE_LOG_DEBUG, " max_stack_depth=%u\n", + outarg.max_stack_depth); } if (arg->minor < 5) outargsize = FUSE_COMPAT_INIT_OUT_SIZE; diff --git a/lib/fuse_versionscript b/lib/fuse_versionscript index 22782bc..d0b98f6 100644 --- a/lib/fuse_versionscript +++ b/lib/fuse_versionscript @@ -194,6 +194,8 @@ FUSE_3.17 { _fuse_new_30; _fuse_new_317; fuse_main_real_317; + fuse_passthrough_open; + fuse_passthrough_close; } FUSE_3.12; # Local Variables: |