Skip to content

Commit

Permalink
Restore fsync behavior in FSDirectory via P/Invoke
Browse files Browse the repository at this point in the history
This restores the commented-out fsync behavior in FSDirectory to help
mitigate a performance regression in .NET 8.
  • Loading branch information
paulirwin committed May 11, 2024
1 parent 9b74275 commit b2b8cdc
Show file tree
Hide file tree
Showing 5 changed files with 300 additions and 50 deletions.
66 changes: 27 additions & 39 deletions src/Lucene.Net/Store/FSDirectory.cs
Original file line number Diff line number Diff line change
Expand Up @@ -67,11 +67,11 @@ namespace Lucene.Net.Store
/// the best <see cref="FSDirectory"/> implementation given your
/// environment, and the known limitations of each
/// implementation. For users who have no reason to prefer a
/// specific implementation, it's best to simply use
/// specific implementation, it's best to simply use
/// <see cref="Open(string)"/> (or one of its overloads). For all others, you should instantiate the
/// desired implementation directly.
///
/// <para/>The locking implementation is by default
/// <para/>The locking implementation is by default
/// <see cref="NativeFSLockFactory"/>, but can be changed by
/// passing in a custom <see cref="LockFactory"/> instance.
///
Expand All @@ -94,10 +94,8 @@ public abstract class FSDirectory : BaseDirectory
public const int DEFAULT_READ_CHUNK_SIZE = 8192;

protected readonly DirectoryInfo m_directory; // The underlying filesystem directory
protected readonly ISet<string> m_staleFiles = new ConcurrentHashSet<string>(); // Files written, but not yet sync'ed

// LUCENENET specific: No such thing as "stale files" in .NET, since Flush(true) writes everything to disk before
// our FileStream is disposed.
//protected readonly ISet<string> m_staleFiles = new ConcurrentHashSet<string>(); // Files written, but not yet sync'ed
#pragma warning disable 612, 618
private int chunkSize = DEFAULT_READ_CHUNK_SIZE;
#pragma warning restore 612, 618
Expand Down Expand Up @@ -321,9 +319,8 @@ public override void DeleteFile(string name)
{
throw new IOException("Cannot delete " + file, e);
}
// LUCENENET specific: No such thing as "stale files" in .NET, since Flush(true) writes everything to disk before
// our FileStream is disposed.
//m_staleFiles.Remove(name);

m_staleFiles.Remove(name);
}

/// <summary>
Expand Down Expand Up @@ -366,35 +363,29 @@ protected virtual void EnsureCanWrite(string name)

protected virtual void OnIndexOutputClosed(FSIndexOutput io)
{
// LUCENENET specific: No such thing as "stale files" in .NET, since Flush(true) writes everything to disk before
// our FileStream is disposed.
//m_staleFiles.Add(io.name);
m_staleFiles.Add(io.name);
}

public override void Sync(ICollection<string> names)
{
EnsureOpen();

// LUCENENET specific: No such thing as "stale files" in .NET, since Flush(true) writes everything to disk before
// our FileStream is disposed. Therefore, there is nothing else to do in this method.
//ISet<string> toSync = new HashSet<string>(names);
//toSync.IntersectWith(m_staleFiles);

//// LUCENENET specific: Fsync breaks concurrency here.
//// Part of a solution suggested by Vincent Van Den Berghe: http://apache.markmail.org/message/hafnuhq2ydhfjmi2
////foreach (var name in toSync)
////{
//// Fsync(name);
////}

//// fsync the directory itsself, but only if there was any file fsynced before
//// (otherwise it can happen that the directory does not yet exist)!
//if (toSync.Count > 0)
//{
// IOUtils.Fsync(m_directory.FullName, true);
//}
ISet<string> toSync = new HashSet<string>(names);
toSync.IntersectWith(m_staleFiles);

foreach (var name in toSync)
{
Fsync(name);
}

//m_staleFiles.ExceptWith(toSync);
// fsync the directory itsself, but only if there was any file fsynced before
// (otherwise it can happen that the directory does not yet exist)!
if (toSync.Count > 0)
{
IOUtils.Fsync(m_directory, true);
}

m_staleFiles.ExceptWith(toSync);
}

public override string GetLockID()
Expand Down Expand Up @@ -546,7 +537,7 @@ protected override void Dispose(bool disposing)
Exception priorE = null; // LUCENENET: No need to cast to IOExcpetion
try
{
file.Flush(flushToDisk: true);
file.Flush(flushToDisk: false);
}
catch (Exception ioe) when (ioe.IsIOException())
{
Expand Down Expand Up @@ -586,12 +577,9 @@ public override void Seek(long pos)
public override long Position => file.Position; // LUCENENET specific - need to override, since we are buffering locally, renamed from getFilePointer() to match FileStream
}

// LUCENENET specific: Fsync is pointless in .NET, since we are
// calling FileStream.Flush(true) before the stream is disposed
// which means we never need it at the point in Java where it is called.
//protected virtual void Fsync(string name)
//{
// IOUtils.Fsync(Path.Combine(m_directory.FullName, name), false);
//}
protected virtual void Fsync(string name)
{
IOUtils.Fsync(new FileInfo(Path.Combine(m_directory.FullName, name)), false);
}
}
}
}
25 changes: 17 additions & 8 deletions src/Lucene.Net/Support/ConcurrentHashSet.cs
Original file line number Diff line number Diff line change
Expand Up @@ -204,16 +204,16 @@ public ConcurrentHashSet(IEnumerable<T> collection, IEqualityComparer<T> compare


/// <summary>
/// Initializes a new instance of the <see cref="ConcurrentHashSet{T}"/>
/// class that contains elements copied from the specified <see cref="T:System.Collections.IEnumerable"/>,
/// has the specified concurrency level, has the specified initial capacity, and uses the specified
/// Initializes a new instance of the <see cref="ConcurrentHashSet{T}"/>
/// class that contains elements copied from the specified <see cref="T:System.Collections.IEnumerable"/>,
/// has the specified concurrency level, has the specified initial capacity, and uses the specified
/// <see cref="T:System.Collections.Generic.IEqualityComparer{T}"/>.
/// </summary>
/// <param name="concurrencyLevel">The estimated number of threads that will update the
/// <param name="concurrencyLevel">The estimated number of threads that will update the
/// <see cref="ConcurrentHashSet{T}"/> concurrently.</param>
/// <param name="collection">The <see cref="T:System.Collections.IEnumerable{T}"/> whose elements are copied to the new
/// <param name="collection">The <see cref="T:System.Collections.IEnumerable{T}"/> whose elements are copied to the new
/// <see cref="ConcurrentHashSet{T}"/>.</param>
/// <param name="comparer">The <see cref="T:System.Collections.Generic.IEqualityComparer{T}"/> implementation to use
/// <param name="comparer">The <see cref="T:System.Collections.Generic.IEqualityComparer{T}"/> implementation to use
/// when comparing items.</param>
/// <exception cref="ArgumentNullException">
/// <paramref name="collection"/> is a null reference.
Expand Down Expand Up @@ -638,7 +638,7 @@ private void GrowTable(Tables tables)
// We want to make sure that GrowTable will not be called again, since table is at the maximum size.
// To achieve that, we set the budget to int.MaxValue.
//
// (There is one special case that would allow GrowTable() to be called in the future:
// (There is one special case that would allow GrowTable() to be called in the future:
// calling Clear() on the ConcurrentHashSet will shrink the table and lower the budget.)
_budget = int.MaxValue;
}
Expand Down Expand Up @@ -753,7 +753,16 @@ private void CopyToItems(T[] array, int index)

public void ExceptWith(IEnumerable<T> other)
{
throw new NotImplementedException();
if (ReferenceEquals(this, other))
{
Clear();
return;
}

foreach (var item in other)
{
TryRemove(item);
}
}

public void IntersectWith(IEnumerable<T> other)
Expand Down
95 changes: 95 additions & 0 deletions src/Lucene.Net/Support/IO/PosixFsyncSupport.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
using Lucene.Net.Util;
using System.IO;
using System.Runtime.InteropServices;

namespace Lucene.Net.Support.IO
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

internal static class PosixFsyncSupport
{
// https://pubs.opengroup.org/onlinepubs/009695399/functions/fsync.html
[DllImport("libc", SetLastError = true)]
private static extern int fsync(int fd);

// https://pubs.opengroup.org/onlinepubs/007904875/functions/open.html
[DllImport("libc", SetLastError = true)]
private static extern int open([MarshalAs(UnmanagedType.LPStr)] string pathname, int flags);

// https://pubs.opengroup.org/onlinepubs/009604499/functions/close.html
[DllImport("libc", SetLastError = true)]
private static extern int close(int fd);

// https://pubs.opengroup.org/onlinepubs/007904975/functions/fcntl.html
// and https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/fcntl.2.html
[DllImport("libc", SetLastError = true)]
private static extern int fcntl(int fd, int cmd, int arg);

private const int O_RDONLY = 0;
private const int O_WRONLY = 1;

// https://opensource.apple.com/source/xnu/xnu-6153.81.5/bsd/sys/fcntl.h.auto.html
private const int F_FULLFSYNC = 51;

public static void Fsync(string path, bool isDir)
{
using DescriptorWrapper handle = new DescriptorWrapper(path, isDir);
handle.Flush();
}

private readonly ref struct DescriptorWrapper
{
private readonly int fd;

public DescriptorWrapper(string path, bool isDir)
{
fd = open(path, isDir ? O_RDONLY : O_WRONLY);

if (fd == -1)
{
int error = Marshal.GetLastWin32Error();
throw new IOException($"Unable to open path, error: 0x{error:x8}", error);
}
}

public void Flush()
{
// if macOS, use F_FULLFSYNC
if (Constants.MAC_OS_X)
{
if (fcntl(fd, F_FULLFSYNC, 0) == -1)
{
throw new IOException("fcntl failed", Marshal.GetLastWin32Error());
}
}
else if (fsync(fd) == -1)
{
throw new IOException("fsync failed", Marshal.GetLastWin32Error());
}
}

public void Dispose()
{
if (close(fd) == -1)
{
throw new IOException("close failed", Marshal.GetLastWin32Error());
}
}
}
}
}
110 changes: 110 additions & 0 deletions src/Lucene.Net/Support/IO/WindowsFsyncSupport.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
using System;
using System.IO;
using System.Runtime.InteropServices;

namespace Lucene.Net.Support.IO
{
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

public static class WindowsFsyncSupport
{
// https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-createfilew
[DllImport("kernel32.dll", CharSet = CharSet.Unicode, SetLastError = true)]
private static extern IntPtr CreateFileW(
string lpFileName,
uint dwDesiredAccess,
uint dwShareMode,
IntPtr lpSecurityAttributes,
uint dwCreationDisposition,
uint dwFlagsAndAttributes,
IntPtr hTemplateFile
);

// https://learn.microsoft.com/en-us/windows/win32/api/fileapi/nf-fileapi-flushfilebuffers
[DllImport("kernel32.dll", SetLastError = true)]
private static extern bool FlushFileBuffers(IntPtr hFile);

// https://learn.microsoft.com/en-us/windows/win32/api/handleapi/nf-handleapi-closehandle
[DllImport("kernel32.dll", SetLastError = true)]
private static extern bool CloseHandle(IntPtr hObject);

private static readonly IntPtr INVALID_HANDLE_VALUE = new IntPtr(-1);

private const int ERROR_ACCESS_DENIED = 5;

// https://learn.microsoft.com/en-us/windows/win32/secauthz/generic-access-rights
private const int GENERIC_WRITE = 0x40000000;

private const int FILE_SHARE_READ = 0x00000001;
private const int FILE_SHARE_WRITE = 0x00000002;
private const int FILE_SHARE_DELETE = 0x00000004;
private const int FILE_FLAG_BACKUP_SEMANTICS = 0x02000000;
private const int OPEN_EXISTING = 3;

public static void Fsync(string path, bool isDir)
{
using HandleWrapper handle = new HandleWrapper(path, isDir);
handle.Flush();
}

private readonly ref struct HandleWrapper
{
private readonly IntPtr handle;

public HandleWrapper(string path, bool isDir)
{
handle = CreateFileW(path,
GENERIC_WRITE,
FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
IntPtr.Zero,
OPEN_EXISTING,
(uint)(isDir ? FILE_FLAG_BACKUP_SEMANTICS : 0), // FILE_FLAG_BACKUP_SEMANTICS required to open a directory
IntPtr.Zero);

if (handle == INVALID_HANDLE_VALUE)
{
int error = Marshal.GetLastWin32Error();
throw new IOException($"Unable to open directory, error: 0x{error:x8}", error);
}
}

public void Flush()
{
if (!FlushFileBuffers(handle))
{
int error = Marshal.GetLastWin32Error();

if (error != ERROR_ACCESS_DENIED)
{
// swallow ERROR_ACCESS_DENIED like in OpenJDK
throw new IOException($"FlushFileBuffers failed, error: 0x{error:x8}", error);
}
}
}

public void Dispose()
{
if (!CloseHandle(handle))
{
int error = Marshal.GetLastWin32Error();
throw new IOException($"CloseHandle failed, error: 0x{error:x8}", error);
}
}
}
}
}
Loading

0 comments on commit b2b8cdc

Please sign in to comment.