Skip to content

Commit

Permalink
Avoid unnecessary FileInfo/DirectoryInfo allocations, #832
Browse files Browse the repository at this point in the history
  • Loading branch information
paulirwin committed Jan 18, 2025
1 parent dcfa0e2 commit 81d36cb
Show file tree
Hide file tree
Showing 37 changed files with 477 additions and 242 deletions.
2 changes: 2 additions & 0 deletions Lucene.Net.sln.DotSettings
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@
<s:Boolean x:Key="/Default/UserDictionary/Words/=Coord/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=csharpsquid/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=LUCENENET/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=stopword/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=stopwords/@EntryIndexedValue">True</s:Boolean>
<s:Boolean x:Key="/Default/UserDictionary/Words/=testsettings/@EntryIndexedValue">True</s:Boolean></wpf:ResourceDictionary>
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,7 @@ protected virtual string UnpackValues(int k)
/// <param name="filename"> the filename </param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(string filename)
{
LoadPatterns(filename, Encoding.UTF8);
}
=> LoadPatterns(filename, Encoding.UTF8);

/// <summary>
/// Read hyphenation patterns from an XML file.
Expand All @@ -149,9 +147,7 @@ public virtual void LoadPatterns(string filename, Encoding encoding)
/// <param name="f"> a <see cref="FileInfo"/> object representing the file </param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(FileInfo f)
{
LoadPatterns(f, Encoding.UTF8);
}
=> LoadPatterns(f.FullName, Encoding.UTF8);

/// <summary>
/// Read hyphenation patterns from an XML file.
Expand All @@ -160,20 +156,15 @@ public virtual void LoadPatterns(FileInfo f)
/// <param name="encoding">The character encoding to use</param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(FileInfo f, Encoding encoding)
{
var src = new FileStream(f.FullName, FileMode.Open, FileAccess.Read);
LoadPatterns(src, encoding);
}
=> LoadPatterns(f.FullName, encoding);

/// <summary>
/// Read hyphenation patterns from an XML file.
/// </summary>
/// <param name="source"> <see cref="Stream"/> input source for the file </param>
/// <exception cref="IOException"> In case the parsing fails </exception>
public virtual void LoadPatterns(Stream source)
{
LoadPatterns(source, Encoding.UTF8);
}
=> LoadPatterns(source, Encoding.UTF8);

/// <summary>
/// Read hyphenation patterns from an XML file.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,7 @@ public virtual IPatternConsumer Consumer
/// <param name="path">The complete file path to be read.</param>
/// <exception cref="IOException"> In case of an exception while parsing </exception>
public virtual void Parse(string path)
{
Parse(path, Encoding.UTF8);
}
=> Parse(path, Encoding.UTF8);

/// <summary>
/// Parses a hyphenation pattern file.
Expand All @@ -103,9 +101,7 @@ public virtual void Parse(string path, Encoding encoding)
/// <param name="file"> a <see cref="FileInfo"/> object representing the file </param>
/// <exception cref="IOException"> In case of an exception while parsing </exception>
public virtual void Parse(FileInfo file)
{
Parse(file, Encoding.UTF8);
}
=> Parse(file.FullName, Encoding.UTF8);

/// <summary>
/// Parses a hyphenation pattern file.
Expand All @@ -114,12 +110,7 @@ public virtual void Parse(FileInfo file)
/// <param name="encoding">The character encoding to use</param>
/// <exception cref="IOException"> In case of an exception while parsing </exception>
public virtual void Parse(FileInfo file, Encoding encoding)
{
var xmlReaderSettings = GetXmlReaderSettings();

using var src = XmlReader.Create(new StreamReader(file.OpenRead(), encoding), xmlReaderSettings);
Parse(src);
}
=> Parse(file.FullName, encoding);

/// <summary>
/// Parses a hyphenation pattern file.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public class HyphenationCompoundWordTokenFilter : CompoundWordTokenFilterBase
private readonly HyphenationTree hyphenator;

/// <summary>
/// Creates a new <see cref="HyphenationCompoundWordTokenFilter"/> instance.
/// Creates a new <see cref="HyphenationCompoundWordTokenFilter"/> instance.
/// </summary>
/// <param name="matchVersion">
/// Lucene version to enable correct Unicode 4.0 behavior in the
Expand All @@ -59,9 +59,9 @@ public class HyphenationCompoundWordTokenFilter : CompoundWordTokenFilterBase
/// the hyphenation pattern tree to use for hyphenation </param>
/// <param name="dictionary">
/// the word dictionary to match against. </param>
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
HyphenationTree hyphenator, CharArraySet dictionary)
: this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
: this(matchVersion, input, hyphenator, dictionary, DEFAULT_MIN_WORD_SIZE,
DEFAULT_MIN_SUBWORD_SIZE, DEFAULT_MAX_SUBWORD_SIZE, false)
{
}
Expand All @@ -88,10 +88,10 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea
/// only subwords shorter than this get to the output stream </param>
/// <param name="onlyLongestMatch">
/// Add only the longest matching subword to the stream </param>
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize,
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
HyphenationTree hyphenator, CharArraySet dictionary, int minWordSize, int minSubwordSize,
int maxSubwordSize, bool onlyLongestMatch)
: base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
: base(matchVersion, input, dictionary, minWordSize, minSubwordSize, maxSubwordSize,
onlyLongestMatch)
{
this.hyphenator = hyphenator;
Expand All @@ -103,10 +103,10 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea
/// Calls <see cref="HyphenationCompoundWordTokenFilter.HyphenationCompoundWordTokenFilter(LuceneVersion, TokenStream, HyphenationTree, CharArraySet, int, int, int, bool)"/>
/// </para>
/// </summary>
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
HyphenationTree hyphenator, int minWordSize, int minSubwordSize,
int maxSubwordSize)
: this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize,
: this(matchVersion, input, hyphenator, null, minWordSize, minSubwordSize,
maxSubwordSize, false)
{
}
Expand All @@ -117,9 +117,9 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea
/// Calls <see cref="HyphenationCompoundWordTokenFilter.HyphenationCompoundWordTokenFilter(LuceneVersion, TokenStream, HyphenationTree, int, int, int)"/>
/// </para>
/// </summary>
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStream input,
HyphenationTree hyphenator)
: this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE,
: this(matchVersion, input, hyphenator, DEFAULT_MIN_WORD_SIZE, DEFAULT_MIN_SUBWORD_SIZE,
DEFAULT_MAX_SUBWORD_SIZE)
{
}
Expand All @@ -131,9 +131,7 @@ public HyphenationCompoundWordTokenFilter(LuceneVersion matchVersion, TokenStrea
/// <returns> An object representing the hyphenation patterns </returns>
/// <exception cref="IOException"> If there is a low-level I/O error. </exception>
public static HyphenationTree GetHyphenationTree(string hyphenationFilename)
{
return GetHyphenationTree(hyphenationFilename, Encoding.UTF8);
}
=> GetHyphenationTree(hyphenationFilename, Encoding.UTF8);

/// <summary>
/// Create a hyphenator tree
Expand All @@ -143,9 +141,7 @@ public static HyphenationTree GetHyphenationTree(string hyphenationFilename)
/// <returns> An object representing the hyphenation patterns </returns>
/// <exception cref="IOException"> If there is a low-level I/O error. </exception>
public static HyphenationTree GetHyphenationTree(string hyphenationFilename, Encoding encoding)
{
return GetHyphenationTree(new FileStream(hyphenationFilename, FileMode.Open, FileAccess.Read), encoding);
}
=> GetHyphenationTree(new FileStream(hyphenationFilename, FileMode.Open, FileAccess.Read), encoding);

/// <summary>
/// Create a hyphenator tree
Expand All @@ -154,9 +150,7 @@ public static HyphenationTree GetHyphenationTree(string hyphenationFilename, Enc
/// <returns> An object representing the hyphenation patterns </returns>
/// <exception cref="IOException"> If there is a low-level I/O error. </exception>
public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile)
{
return GetHyphenationTree(hyphenationFile, Encoding.UTF8);
}
=> GetHyphenationTree(hyphenationFile.FullName, Encoding.UTF8);

/// <summary>
/// Create a hyphenator tree
Expand All @@ -166,9 +160,7 @@ public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile)
/// <returns> An object representing the hyphenation patterns </returns>
/// <exception cref="IOException"> If there is a low-level I/O error. </exception>
public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile, Encoding encoding)
{
return GetHyphenationTree(new FileStream(hyphenationFile.FullName, FileMode.Open, FileAccess.Read), encoding);
}
=> GetHyphenationTree(hyphenationFile.FullName, encoding);

/// <summary>
/// Create a hyphenator tree
Expand All @@ -177,9 +169,7 @@ public static HyphenationTree GetHyphenationTree(FileInfo hyphenationFile, Encod
/// <returns> An object representing the hyphenation patterns </returns>
/// <exception cref="IOException"> If there is a low-level I/O error. </exception>
public static HyphenationTree GetHyphenationTree(Stream hyphenationSource)
{
return GetHyphenationTree(hyphenationSource, Encoding.UTF8);
}
=> GetHyphenationTree(hyphenationSource, Encoding.UTF8);

/// <summary>
/// Create a hyphenator tree
Expand Down Expand Up @@ -227,7 +217,7 @@ protected override void Decompose()
// that are longer than minPartSize
if (partLength < this.m_minSubwordSize)
{
// BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
// BOGUS/BROKEN/FUNKY/WACKO: somehow we have negative 'parts' according to the
// calculation above, and we rely upon minSubwordSize being >=0 to filter them out...
continue;
}
Expand Down Expand Up @@ -287,4 +277,4 @@ protected override void Decompose()
}
}
}
}
}
15 changes: 14 additions & 1 deletion src/Lucene.Net.Analysis.Common/Analysis/Core/StopAnalyzer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,19 @@ public StopAnalyzer(LuceneVersion matchVersion, CharArraySet stopWords)
{
}

/// <summary>
/// Builds an analyzer with the stop words from the given file. </summary>
/// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/>
/// <param name="matchVersion"> See <see cref="LuceneVersion"/> </param>
/// <param name="stopwordsFileName"> File name to load stop words from </param>
/// <remarks>
/// LUCENENET: This overload takes a string file name to avoid allocating a <see cref="FileInfo"/> object.
/// </remarks>
public StopAnalyzer(LuceneVersion matchVersion, string stopwordsFileName)
: this(matchVersion, LoadStopwordSet(stopwordsFileName, matchVersion))
{
}

/// <summary>
/// Builds an analyzer with the stop words from the given file. </summary>
/// <seealso cref="WordlistLoader.GetWordSet(TextReader, LuceneVersion)"/>
Expand Down Expand Up @@ -111,4 +124,4 @@ protected internal override TokenStreamComponents CreateComponents(string fieldN
return new TokenStreamComponents(source, new StopFilter(m_matchVersion, source, m_stopwords));
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Lucene version compatibility level 4.8.1
using System;
using System.IO;
#nullable enable

namespace Lucene.Net.Analysis.Util
{
Expand All @@ -25,20 +26,20 @@ namespace Lucene.Net.Analysis.Util
/// Simple <see cref="IResourceLoader"/> that opens resource files
/// from the local file system, optionally resolving against
/// a base directory.
///
///
/// <para>This loader wraps a delegate <see cref="IResourceLoader"/>
/// that is used to resolve all files, the current base directory
/// does not contain. <see cref="NewInstance"/> is always resolved
/// against the delegate, as an <see cref="T:System.Assembly"/> is needed.
///
///
/// </para>
/// <para>You can chain several <see cref="FilesystemResourceLoader"/>s
/// to allow lookup of files in more than one base directory.
/// </para>
/// </summary>
public sealed class FilesystemResourceLoader : IResourceLoader
{
private readonly DirectoryInfo baseDirectory;
private readonly string? baseDirectory; // LUCENENET specific: changed to use string directory name instead of allocating a DirectoryInfo (#832)
private readonly IResourceLoader @delegate;

/// <summary>
Expand All @@ -47,7 +48,7 @@ public sealed class FilesystemResourceLoader : IResourceLoader
/// are delegated to context classloader.
/// </summary>
public FilesystemResourceLoader()
: this((DirectoryInfo)null)
: this((string?)null)
{
}

Expand All @@ -57,22 +58,44 @@ public FilesystemResourceLoader()
/// Files not found in file system and class lookups are delegated to context
/// classloader.
/// </summary>
public FilesystemResourceLoader(DirectoryInfo baseDirectory)
public FilesystemResourceLoader(string? baseDirectory)
: this(baseDirectory, new ClasspathResourceLoader(typeof(FilesystemResourceLoader)))
{
}

/// <summary>
/// Creates a resource loader that resolves resources against the given
/// base directory (may be <c>null</c> to refer to CWD).
/// Files not found in file system and class lookups are delegated to context
/// classloader.
/// </summary>
public FilesystemResourceLoader(DirectoryInfo? baseDirectory)
: this(baseDirectory?.FullName, new ClasspathResourceLoader(typeof(FilesystemResourceLoader)))
{
}

/// <summary>
/// Creates a resource loader that resolves resources against the given
/// base directory (may be <c>null</c> to refer to CWD).
/// Files not found in file system and class lookups are delegated
/// to the given delegate <see cref="IResourceLoader"/>.
/// </summary>
public FilesystemResourceLoader(DirectoryInfo? baseDirectory, IResourceLoader @delegate)
: this(baseDirectory?.FullName, @delegate)
{
}

/// <summary>
/// Creates a resource loader that resolves resources against the given
/// base directory (may be <c>null</c> to refer to CWD).
/// Files not found in file system and class lookups are delegated
/// to the given delegate <see cref="IResourceLoader"/>.
/// </summary>
public FilesystemResourceLoader(DirectoryInfo baseDirectory, IResourceLoader @delegate)
public FilesystemResourceLoader(string? baseDirectory, IResourceLoader @delegate)
{
// LUCENENET NOTE: If you call DirectoryInfo.Create() it doesn't set the DirectoryInfo.Exists
// flag to true, so we use the Directory object to check the path explicitly.
if (baseDirectory is not null && !Directory.Exists(baseDirectory.FullName))
if (baseDirectory is not null && !Directory.Exists(baseDirectory))
{
throw new ArgumentException("baseDirectory is not a directory or is null");
}
Expand All @@ -89,35 +112,35 @@ public Stream OpenResource(string resource)
{
try
{
FileInfo file = null;
string? file = null; // LUCENENET specific: changed to use string file name instead of allocating a FileInfo (#832)

// First try absolute.
if (File.Exists(resource))
{
file = new FileInfo(resource);
file = resource;
}
else
{
// Try as a relative path
var fullPath = System.IO.Path.GetFullPath(resource);
if (File.Exists(fullPath))
{
file = new FileInfo(fullPath);
file = fullPath;
}
else if (baseDirectory != null)
{
// Try to combine with the base directory
string based = System.IO.Path.Combine(baseDirectory.FullName, resource);
string based = System.IO.Path.Combine(baseDirectory, resource);
if (File.Exists(based))
{
file = new FileInfo(based);
file = based;
}
}
}

if (file != null)
{
return file.OpenRead();
return new FileStream(file, FileMode.Open, FileAccess.Read, FileShare.Read);
}

// Fallback on the inner resource loader (this could fail)
Expand All @@ -139,4 +162,4 @@ public Type FindType(string cname)
return @delegate.FindType(cname);
}
}
}
}
Loading

0 comments on commit 81d36cb

Please sign in to comment.