diff options
author | Bernd Schubert <bschubert@ddn.com> | 2024-01-10 15:11:34 +0100 |
---|---|---|
committer | Bernd Schubert <bernd.schubert@fastmail.fm> | 2024-02-26 20:37:53 +0100 |
commit | 420a6c3c5da2cbcc2276be7fb767ecc5a884cac7 (patch) | |
tree | ee680ec67c24112ee1c1a381f62f6376e719c53a | |
parent | 31bf17c7448944a04d4c84df534013a4d6924acc (diff) | |
download | libfuse-420a6c3c5da2cbcc2276be7fb767ecc5a884cac7.tar.gz |
example/notify_store_retrieve: Fix races and handle errors
This test was racy, the lookup counter must only be increased once
kernel side has lookup completed and knows about the inode.
However, this is still racy as
app thread
(python script) kernel-side libfuse thread kernel
----------------------------------------------------------------------
open file
lookup req
wait
handle req
reply
wake app thread
return
new_inode()
<continue file open>
So actually on libfuse side even after returning from kernel side
it is not ensured that the kernel has created the inode. I.e.
using lookup_cnt in the test is still racy.
A new variabled 'open_cnt' is added that is only increased in open
Using open_cnt should be safe to use as kernel side (with atomic-open)
first does lookup, from that data creates the inode and only then sends
an open request. I.e. update_fs_loop() must only call
fuse_lowlevel_notify_store() once it is absolutely sure kernel side has
created the inode (open_cnt) and when it is sure the kernel inode still
exists (lookup_cnt).
Not really nice, but we actually need to accept some errors, as
these still come up at umount time. Typically it is hard to hit,
but tests in github actually frequently get it. Actually, it can be
easily reproduced by commenting out the sleep line in
update_fs_loop(). Underlying issue is that kernel side is
sending ->destroy() only when it already internally released all
inodes - too late for this test. The errors I run into are ENOENT
and EBADFD, but I added back in ENODEV for safety.
In order to avoid any other kind races mutex lock is also introduced.
-rw-r--r-- | example/notify_store_retrieve.c | 49 |
1 files changed, 38 insertions, 11 deletions
diff --git a/example/notify_store_retrieve.c b/example/notify_store_retrieve.c index c4d0087..d7e1c4b 100644 --- a/example/notify_store_retrieve.c +++ b/example/notify_store_retrieve.c @@ -81,7 +81,9 @@ #define FILE_NAME "current_time" static char file_contents[MAX_STR_LEN]; static int lookup_cnt = 0; +static int open_cnt = 0; static size_t file_size; +static pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; /* Keep track if we ever stored data (==1), and received it back correctly (==2) */ @@ -139,7 +141,6 @@ static void tfs_lookup(fuse_req_t req, fuse_ino_t parent, goto err_out; else if (strcmp(name, FILE_NAME) == 0) { e.ino = FILE_INO; - lookup_cnt++; } else goto err_out; @@ -148,6 +149,18 @@ static void tfs_lookup(fuse_req_t req, fuse_ino_t parent, if (tfs_stat(e.ino, &e.attr) != 0) goto err_out; fuse_reply_entry(req, &e); + + /* + * must only be set when the kernel knows about the entry, + * otherwise update_fs_loop() might see a positive count, but kernel + * would not have the entry yet + */ + if (e.ino == FILE_INO) { + pthread_mutex_lock(&lock); + lookup_cnt++; + pthread_mutex_unlock(&lock); + } + return; err_out: @@ -157,9 +170,11 @@ err_out: static void tfs_forget (fuse_req_t req, fuse_ino_t ino, uint64_t nlookup) { (void) req; - if(ino == FILE_INO) + if(ino == FILE_INO) { + pthread_mutex_lock(&lock); lookup_cnt -= nlookup; - else + pthread_mutex_unlock(&lock); + } else assert(ino == FUSE_ROOT_ID); fuse_reply_none(req); } @@ -232,9 +247,12 @@ static void tfs_open(fuse_req_t req, fuse_ino_t ino, fuse_reply_err(req, EISDIR); else if ((fi->flags & O_ACCMODE) != O_RDONLY) fuse_reply_err(req, EACCES); - else if (ino == FILE_INO) + else if (ino == FILE_INO) { fuse_reply_open(req, fi); - else { + pthread_mutex_lock(&lock); + open_cnt++; + pthread_mutex_unlock(&lock); + } else { // This should not happen fprintf(stderr, "Got open for non-existing inode!\n"); fuse_reply_err(req, ENOENT); @@ -315,7 +333,8 @@ static void* update_fs_loop(void *data) { while(!is_umount) { update_fs(); - if (!options.no_notify && lookup_cnt) { + pthread_mutex_lock(&lock); + if (!options.no_notify && open_cnt && lookup_cnt) { /* Only send notification if the kernel is aware of the inode */ bufv.count = 1; @@ -325,11 +344,17 @@ static void* update_fs_loop(void *data) { bufv.buf[0].mem = file_contents; bufv.buf[0].flags = 0; - /* This shouldn't fail, but apparently it sometimes - does - see https://github.com/libfuse/libfuse/issues/105 */ + /* + * Some errors (ENOENT, EBADFD, ENODEV) have to be accepted as they + * might come up during umount, when kernel side already releases + * all inodes, but does not send FUSE_DESTROY yet. + */ + ret = fuse_lowlevel_notify_store(se, FILE_INO, 0, &bufv, 0); - if (ret != 0 && !is_umount) { - fprintf(stderr, "ERROR: fuse_lowlevel_notify_store() failed with %s (%d)\n", + if ((ret != 0 && !is_umount) && + ret != -ENOENT && ret != -EBADFD && ret != -ENODEV) { + fprintf(stderr, + "ERROR: fuse_lowlevel_notify_store() failed with %s (%d)\n", strerror(-ret), -ret); abort(); } @@ -338,10 +363,12 @@ static void* update_fs_loop(void *data) { kernel to send us back the stored data */ ret = fuse_lowlevel_notify_retrieve(se, FILE_INO, MAX_STR_LEN, 0, (void*) strdup(file_contents)); - assert(ret == 0 || is_umount); + assert((ret == 0 || is_umount) || ret == -ENOENT || ret == -EBADFD || + ret != -ENODEV); if(retrieve_status == 0) retrieve_status = 1; } + pthread_mutex_unlock(&lock); sleep(options.update_interval); } return NULL; |