From 2f9fb74a42a1f8d017081ab18f7bc73f46c229bd Mon Sep 17 00:00:00 2001
From: Takeshi Nakatani <ggtakec@gmail.com>
Date: Wed, 24 Jan 2024 22:10:14 +0900
Subject: [PATCH] Corrected list_bucket to search in stat cache during creating
 new file (#2376)

---
 src/cache.cpp | 198 ++++++++++++++++++++++++++++++++++++++------------
 src/cache.h   |  17 ++++-
 src/s3fs.cpp  |   8 +-
 3 files changed, 172 insertions(+), 51 deletions(-)

diff --git a/src/cache.cpp b/src/cache.cpp
index 3f4e86c4cb..76e3720637 100644
--- a/src/cache.cpp
+++ b/src/cache.cpp
@@ -338,21 +338,17 @@ bool StatCache::AddStat(const std::string& key, const headers_t& meta, bool forc
     }
     S3FS_PRN_INFO3("add stat cache entry[path=%s]", key.c_str());
 
-    bool found;
-    bool do_truncate;
-    {
-        AutoLock lock(&StatCache::stat_cache_lock);
-        found       = stat_cache.end() != stat_cache.find(key);
-        do_truncate = stat_cache.size() > CacheSize;
-    }
+    AutoLock lock(&StatCache::stat_cache_lock);
 
-    if(found){
-        DelStat(key.c_str());
+    if(stat_cache.end() != stat_cache.find(key)){
+        // found cache
+        DelStat(key.c_str(), AutoLock::ALREADY_LOCKED);
     }else{
-        if(do_truncate){
+        // check: need to truncate cache
+        if(stat_cache.size() > CacheSize){
             // cppcheck-suppress unmatchedSuppression
             // cppcheck-suppress knownConditionTrueFalse
-            if(!TruncateCache()){
+            if(!TruncateCache(AutoLock::ALREADY_LOCKED)){
                 return false;
             }
         }
@@ -386,9 +382,6 @@ bool StatCache::AddStat(const std::string& key, const headers_t& meta, bool forc
         }
     }
 
-    // add
-    AutoLock lock(&StatCache::stat_cache_lock);
-
     const auto& value = stat_cache[key] = std::move(ent);
 
     // check symbolic link cache
@@ -398,6 +391,13 @@ bool StatCache::AddStat(const std::string& key, const headers_t& meta, bool forc
             DelSymlink(key.c_str(), AutoLock::ALREADY_LOCKED);
         }
     }
+
+    // If no_truncate flag is set, set file name to notruncate_file_cache
+    //
+    if(no_truncate){
+        AddNotruncateCache(key);
+    }
+
     return true;
 }
 
@@ -458,21 +458,17 @@ bool StatCache::AddNoObjectCache(const std::string& key)
     }
     S3FS_PRN_INFO3("add no object cache entry[path=%s]", key.c_str());
 
-    bool found;
-    bool do_truncate;
-    {
-        AutoLock lock(&StatCache::stat_cache_lock);
-        found       = stat_cache.end() != stat_cache.find(key);
-        do_truncate = stat_cache.size() > CacheSize;
-    }
+    AutoLock lock(&StatCache::stat_cache_lock);
 
-    if(found){
-        DelStat(key.c_str());
+    if(stat_cache.end() != stat_cache.find(key)){
+		// found
+        DelStat(key.c_str(), AutoLock::ALREADY_LOCKED);
     }else{
-        if(do_truncate){
+        // check: need to truncate cache
+        if(stat_cache.size() > CacheSize){
             // cppcheck-suppress unmatchedSuppression
             // cppcheck-suppress knownConditionTrueFalse
-            if(!TruncateCache()){
+            if(!TruncateCache(AutoLock::ALREADY_LOCKED)){
                 return false;
             }
         }
@@ -488,9 +484,6 @@ bool StatCache::AddNoObjectCache(const std::string& key)
     ent.meta.clear();
     SetStatCacheTime(ent.cache_date);    // Set time.
 
-    // add
-    AutoLock lock(&StatCache::stat_cache_lock);
-
     stat_cache[key] = std::move(ent);
 
     // check symbolic link cache
@@ -509,18 +502,26 @@ void StatCache::ChangeNoTruncateFlag(const std::string& key, bool no_truncate)
     if(stat_cache.end() != iter){
         stat_cache_entry* ent = &iter->second;
         if(no_truncate){
+            if(0L == ent->notruncate){
+                // need to add no truncate cache.
+                AddNotruncateCache(key);
+            }
             ++(ent->notruncate);
         }else{
             if(0L < ent->notruncate){
                 --(ent->notruncate);
+                if(0L == ent->notruncate){
+                    // need to delete from no truncate cache.
+                    DelNotruncateCache(key);
+                }
             }
         }
     }
 }
 
-bool StatCache::TruncateCache()
+bool StatCache::TruncateCache(AutoLock::Type locktype)
 {
-    AutoLock lock(&StatCache::stat_cache_lock);
+    AutoLock lock(&StatCache::stat_cache_lock, locktype);
 
     if(stat_cache.empty()){
         return true;
@@ -588,6 +589,7 @@ bool StatCache::DelStat(const char* key, AutoLock::Type locktype)
     stat_cache_t::iterator iter;
     if(stat_cache.end() != (iter = stat_cache.find(key))){
         stat_cache.erase(iter);
+        DelNotruncateCache(key);
     }
     if(0 < strlen(key) && 0 != strcmp(key, "/")){
         std::string strpath = key;
@@ -600,6 +602,7 @@ bool StatCache::DelStat(const char* key, AutoLock::Type locktype)
         }
         if(stat_cache.end() != (iter = stat_cache.find(strpath))){
             stat_cache.erase(iter);
+            DelNotruncateCache(strpath);
         }
     }
     S3FS_MALLOCTRIM(0);
@@ -648,21 +651,17 @@ bool StatCache::AddSymlink(const std::string& key, const std::string& value)
     }
     S3FS_PRN_INFO3("add symbolic link cache entry[path=%s, value=%s]", key.c_str(), value.c_str());
 
-    bool found;
-    bool do_truncate;
-    {
-        AutoLock lock(&StatCache::stat_cache_lock);
-        found       = symlink_cache.end() != symlink_cache.find(key);
-        do_truncate = symlink_cache.size() > CacheSize;
-    }
+    AutoLock lock(&StatCache::stat_cache_lock);
 
-    if(found){
-        DelSymlink(key.c_str());
+    if(symlink_cache.end() != symlink_cache.find(key)){
+    	// found
+        DelSymlink(key.c_str(), AutoLock::ALREADY_LOCKED);
     }else{
-        if(do_truncate){
+        // check: need to truncate cache
+        if(symlink_cache.size() > CacheSize){
             // cppcheck-suppress unmatchedSuppression
             // cppcheck-suppress knownConditionTrueFalse
-            if(!TruncateSymlink()){
+            if(!TruncateSymlink(AutoLock::ALREADY_LOCKED)){
                 return false;
             }
         }
@@ -674,17 +673,14 @@ bool StatCache::AddSymlink(const std::string& key, const std::string& value)
     ent.hit_count  = 0;
     SetStatCacheTime(ent.cache_date);    // Set time(use the same as Stats).
 
-    // add
-    AutoLock lock(&StatCache::stat_cache_lock);
-
     symlink_cache[key] = std::move(ent);
 
     return true;
 }
 
-bool StatCache::TruncateSymlink()
+bool StatCache::TruncateSymlink(AutoLock::Type locktype)
 {
-    AutoLock lock(&StatCache::stat_cache_lock);
+    AutoLock lock(&StatCache::stat_cache_lock, locktype);
 
     if(symlink_cache.empty()){
         return true;
@@ -746,6 +742,116 @@ bool StatCache::DelSymlink(const char* key, AutoLock::Type locktype)
     return true;
 }
 
+// [NOTE]
+// Need to lock StatCache::stat_cache_lock before calling this method.
+//
+bool StatCache::AddNotruncateCache(const std::string& key)
+{
+    if(key.empty() || '/' == *key.rbegin()){
+        return false;
+    }
+
+    std::string parentdir = mydirname(key);
+    std::string filename  = mybasename(key);
+    if(parentdir.empty() || filename.empty()){
+        return false;
+    }
+    parentdir += '/';       // directory path must be '/' termination.
+
+    notruncate_dir_map_t::iterator iter = notruncate_file_cache.find(parentdir);
+    if(iter == notruncate_file_cache.end()){
+        // add new list
+        notruncate_filelist_t list;
+        list.push_back(filename);
+        notruncate_file_cache[parentdir] = list;
+    }else{
+        // add filename to existed list
+        notruncate_filelist_t& filelist = iter->second;
+        notruncate_filelist_t::const_iterator fiter = std::find(filelist.begin(), filelist.end(), filename);
+        if(fiter == filelist.end()){
+            filelist.push_back(filename);
+        }
+    }
+    return true;
+}
+
+// [NOTE]
+// Need to lock StatCache::stat_cache_lock before calling this method.
+//
+bool StatCache::DelNotruncateCache(const std::string& key)
+{
+    if(key.empty() || '/' == *key.rbegin()){
+        return false;
+    }
+
+    std::string parentdir = mydirname(key);
+    std::string filename  = mybasename(key);
+    if(parentdir.empty() || filename.empty()){
+        return false;
+    }
+    parentdir += '/';       // directory path must be '/' termination.
+
+    notruncate_dir_map_t::iterator iter = notruncate_file_cache.find(parentdir);
+    if(iter != notruncate_file_cache.end()){
+        // found directory in map
+        notruncate_filelist_t& filelist = iter->second;
+        notruncate_filelist_t::iterator fiter = std::find(filelist.begin(), filelist.end(), filename);
+        if(fiter != filelist.end()){
+            // found filename in directory file list
+            filelist.erase(fiter);
+            if(filelist.empty()){
+                notruncate_file_cache.erase(parentdir);
+            }
+        }
+    }
+    return true;
+}
+
+// [Background]
+// When s3fs creates a new file, the file does not exist until the file contents
+// are uploaded.(because it doesn't create a 0 byte file)
+// From the time this file is created(opened) until it is uploaded(flush), it
+// will have a Stat cache with the No truncate flag added.
+// This avoids file not existing errors in operations such as chmod and utimens
+// that occur in the short period before file upload.
+// Besides this, we also need to support readdir(list_bucket), this method is
+// called to maintain the cache for readdir and return its value.
+//
+// [NOTE]
+// Add the file names under parentdir to the list.
+// However, if the same file name exists in the list, it will not be added.
+// parentdir must be terminated with a '/'.
+//
+bool StatCache::GetNotruncateCache(const std::string& parentdir, notruncate_filelist_t& list)
+{
+    if(parentdir.empty()){
+        return false;
+    }
+
+    std::string dirpath = parentdir;
+    if('/' != *dirpath.rbegin()){
+        dirpath += '/';
+    }
+
+    AutoLock lock(&StatCache::stat_cache_lock);
+
+    notruncate_dir_map_t::iterator iter = notruncate_file_cache.find(dirpath);
+    if(iter == notruncate_file_cache.end()){
+        // not found directory map
+        return true;
+    }
+
+    // found directory in map
+    const notruncate_filelist_t& filelist = iter->second;
+    for(notruncate_filelist_t::const_iterator fiter = filelist.begin(); fiter != filelist.end(); ++fiter){
+        if(list.end() == std::find(list.begin(), list.end(), *fiter)){
+           // found notuncate file that does not exist in the list, so add it.
+           list.push_back(*fiter);
+        }
+    }
+    return true;
+}
+
 //-------------------------------------------------------------------
 // Functions
 //-------------------------------------------------------------------
diff --git a/src/cache.h b/src/cache.h
index 78fe733a4b..5157f7423d 100644
--- a/src/cache.h
+++ b/src/cache.h
@@ -69,6 +69,12 @@ struct symlink_cache_entry {
 
 typedef std::map<std::string, symlink_cache_entry> symlink_cache_t;
 
+//
+// Typedefs for No truncate file name cache
+//
+typedef std::vector<std::string> notruncate_filelist_t;                    // untruncated file name list in dir
+typedef std::map<std::string, notruncate_filelist_t> notruncate_dir_map_t; // key is parent dir path
+
 //-------------------------------------------------------------------
 // Class StatCache
 //-------------------------------------------------------------------
@@ -93,6 +99,7 @@ class StatCache
         unsigned long          CacheSize;
         bool                   IsCacheNoObject;
         symlink_cache_t        symlink_cache;
+        notruncate_dir_map_t   notruncate_file_cache;
 
     private:
         StatCache();
@@ -101,9 +108,12 @@ class StatCache
         void Clear();
         bool GetStat(const std::string& key, struct stat* pst, headers_t* meta, bool overcheck, const char* petag, bool* pisforce);
         // Truncate stat cache
-        bool TruncateCache();
+        bool TruncateCache(AutoLock::Type locktype = AutoLock::NONE);
         // Truncate symbolic link cache
-        bool TruncateSymlink();
+        bool TruncateSymlink(AutoLock::Type locktype = AutoLock::NONE);
+
+        bool AddNotruncateCache(const std::string& key);
+        bool DelNotruncateCache(const std::string& key);
 
     public:
         // Reference singleton
@@ -182,6 +192,9 @@ class StatCache
         bool GetSymlink(const std::string& key, std::string& value);
         bool AddSymlink(const std::string& key, const std::string& value);
         bool DelSymlink(const char* key, AutoLock::Type locktype = AutoLock::NONE);
+
+        // Cache for Notruncate file
+        bool GetNotruncateCache(const std::string& parentdir, notruncate_filelist_t& list);
 };
 
 //-------------------------------------------------------------------
diff --git a/src/s3fs.cpp b/src/s3fs.cpp
index f5bca60982..67fc8d3391 100644
--- a/src/s3fs.cpp
+++ b/src/s3fs.cpp
@@ -1742,8 +1742,9 @@ static int rename_directory(const char* from, const char* to)
         S3FS_PRN_ERR("list_bucket returns error.");
         return result; 
     }
-    head.GetNameList(headlist);                       // get name without "/".
-    S3ObjList::MakeHierarchizedList(headlist, false); // add hierarchized dir.
+    head.GetNameList(headlist);                                             // get name without "/".
+    StatCache::getStatCacheData()->GetNotruncateCache(basepath, headlist);  // Add notruncate file name from stat cache
+    S3ObjList::MakeHierarchizedList(headlist, false);                       // add hierarchized dir.
 
     s3obj_list_t::const_iterator liter;
     for(liter = headlist.begin(); headlist.end() != liter; ++liter){
@@ -3271,7 +3272,8 @@ static int readdir_multi_head(const char* path, const S3ObjList& head, void* buf
     S3FS_PRN_INFO1("[path=%s][list=%zu]", path, headlist.size());
 
     // Make base path list.
-    head.GetNameList(headlist, true, false);  // get name with "/".
+    head.GetNameList(headlist, true, false);                                        // get name with "/".
+    StatCache::getStatCacheData()->GetNotruncateCache(std::string(path), headlist); // Add notruncate file name from stat cache
 
     // Initialize S3fsMultiCurl
     curlmulti.SetSuccessCallback(multi_head_callback);