Skip to content

Commit

Permalink
Add back Dispose, make TokenStreamComponents disposable via Analyzer …
Browse files Browse the repository at this point in the history
…stored value
  • Loading branch information
paulirwin committed Dec 5, 2024
1 parent c95a56a commit d3e98c7
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 14 deletions.
65 changes: 56 additions & 9 deletions src/Lucene.Net/Analysis/Analyzer.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Lucene.Net.Util;
using Lucene.Net.Index;
using Lucene.Net.Util;
using System;
using System.Collections.Generic;
using System.IO;
Expand Down Expand Up @@ -314,7 +315,7 @@ public TokenStream GetTokenStream(string fieldName, string text)
/// The default implementation returns <paramref name="reader"/>
/// unchanged.
/// </summary>
/// <param name="fieldName"> <see cref="Index.IIndexableField"/> name being indexed </param>
/// <param name="fieldName"> <see cref="IIndexableField"/> name being indexed </param>
/// <param name="reader"> original <see cref="TextReader"/> </param>
/// <returns> reader, optionally decorated with <see cref="CharFilter"/>(s) </returns>
protected internal virtual TextReader InitReader(string fieldName, TextReader reader)
Expand All @@ -323,16 +324,16 @@ protected internal virtual TextReader InitReader(string fieldName, TextReader re
}

/// <summary>
/// Invoked before indexing a <see cref="Index.IIndexableField"/> instance if
/// Invoked before indexing a <see cref="IIndexableField"/> instance if
/// terms have already been added to that field. This allows custom
/// analyzers to place an automatic position increment gap between
/// <see cref="Index.IIndexableField"/> instances using the same field name. The default value
/// <see cref="IIndexableField"/> instances using the same field name. The default value
/// position increment gap is 0. With a 0 position increment gap and
/// the typical default token position increment of 1, all terms in a field,
/// including across <see cref="Index.IIndexableField"/> instances, are in successive positions, allowing
/// exact <see cref="Search.PhraseQuery"/> matches, for instance, across <see cref="Index.IIndexableField"/> instance boundaries.
/// including across <see cref="IIndexableField"/> instances, are in successive positions, allowing
/// exact <see cref="Search.PhraseQuery"/> matches, for instance, across <see cref="IIndexableField"/> instance boundaries.
/// </summary>
/// <param name="fieldName"> <see cref="Index.IIndexableField"/> name being indexed. </param>
/// <param name="fieldName"> <see cref="IIndexableField"/> name being indexed. </param>
/// <returns> position increment gap, added to the next token emitted from <see cref="GetTokenStream(string, TextReader)"/>.
/// this value must be <c>&gt;= 0</c>.</returns>
public virtual int GetPositionIncrementGap(string fieldName)
Expand Down Expand Up @@ -459,11 +460,28 @@ public override void SetReusableComponents(Analyzer analyzer, string fieldName,
if (componentsPerField is null)
{
// LUCENENET-615: This needs to support nullable keys
componentsPerField = new JCG.Dictionary<string, TokenStreamComponents>();
componentsPerField = new TokenStreamComponentsDictionary();
SetStoredValue(analyzer, componentsPerField);
}
componentsPerField[fieldName] = components;
}

/// <summary>
/// A dictionary that supports disposing of the values when the dictionary is disposed.
/// </summary>
/// <seealso cref="TokenStreamComponents"/>
private class TokenStreamComponentsDictionary
: JCG.Dictionary<string, TokenStreamComponents>, IDisposable
{
public void Dispose()
{
foreach (var kvp in this)
{
kvp.Value?.Dispose();
}
Clear();
}
}
}

/// <summary>
Expand Down Expand Up @@ -508,7 +526,17 @@ protected internal override TextReader InitReader(string fieldName, TextReader r
/// <see cref="Analysis.TokenStream"/> returned by
/// <see cref="Analyzer.GetTokenStream(string, TextReader)"/>.
/// </summary>
public class TokenStreamComponents
/// <remarks>
/// LUCENENET: This class implements IDisposable so that any TokenStream implementations
/// that need to be disposed are disposed when the Analyzer that stores this in its
/// stored value is disposed.
/// <para />
/// Because it's impossible to know if the <see cref="TokenStream"/> would dispose of the <see cref="Tokenizer"/>,
/// this class calls <see cref="IDisposable.Dispose()"/> on both if they are not reference equal.
/// Implementations of <see cref="TokenStream.Dispose(bool)"/> should be careful to make their
/// code idempotent so that calling <see cref="TokenStream.Dispose()"/> multiple times has no effect.
/// </remarks>
public class TokenStreamComponents : IDisposable
{
/// <summary>
/// Original source of the tokens.
Expand Down Expand Up @@ -573,6 +601,25 @@ protected internal virtual void SetReader(TextReader reader)
/// </summary>
/// <returns> Component's <see cref="Analysis.Tokenizer"/> </returns>
public virtual Tokenizer Tokenizer => m_source;

/// <summary>
/// Disposes of the <see cref="Tokenizer"/> and <see cref="TokenStream"/>.
/// </summary>
/// <remarks>
/// LUCENENET specific: see remarks on the <see cref="TokenStreamComponents"/> class.
/// </remarks>
public void Dispose()
{
m_source?.Dispose();

if (!ReferenceEquals(m_source, m_sink))
{
m_sink?.Dispose();
}

reusableStringReader?.Dispose();
GC.SuppressFinalize(this);
}
}

/// <summary>
Expand Down
25 changes: 22 additions & 3 deletions src/Lucene.Net/Analysis/TokenStream.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
using Lucene.Net.Analysis.TokenAttributes;
using Lucene.Net.Index;
using Lucene.Net.Util;
using System;
using System.IO;
Expand Down Expand Up @@ -77,7 +78,7 @@ namespace Lucene.Net.Analysis
/// Therefore all non-abstract subclasses must be sealed or have at least a sealed
/// implementation of <see cref="IncrementToken()"/>! This is checked when assertions are enabled.
/// </summary>
public abstract class TokenStream : AttributeSource, ICloseable
public abstract class TokenStream : AttributeSource, ICloseable, IDisposable
{
/// <summary>
/// A <see cref="TokenStream"/> using the default attribute factory.
Expand Down Expand Up @@ -110,7 +111,7 @@ protected TokenStream(AttributeFactory factory)
}

/// <summary>
/// Consumers (i.e., <see cref="Index.IndexWriter"/>) use this method to advance the stream to
/// Consumers (i.e., <see cref="IndexWriter"/>) use this method to advance the stream to
/// the next token. Implementing classes must implement this method and update
/// the appropriate <see cref="Lucene.Net.Util.IAttribute"/>s with the attributes of the next
/// token.
Expand Down Expand Up @@ -187,10 +188,28 @@ public virtual void Reset()
/// </summary>
/// <remarks>
/// LUCENENET notes - this is intended to release resources in a way that allows the
/// object to be reused, so it is not the same as <see cref="IDisposable"/>.
/// object to be reused, so it is not the same as <see cref="Dispose()"/>.
/// </remarks>
public virtual void Close()
{
}

// LUCENENET specific - implementing proper dispose pattern
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}

/// <summary>
/// Releases resources associated with this stream, in a way such that the stream is not reusable.
/// <para/>
/// If you override this method, always call <c>base.Dispose(disposing)</c>.
/// Also, ensure that your implementation is idempotent as it may be called multiple times.
/// </summary>
/// <seealso cref="TokenStreamComponents.Dispose()"/>
protected virtual void Dispose(bool disposing)
{
}
}
}
19 changes: 17 additions & 2 deletions src/Lucene.Net/Util/CloseableThreadLocal.cs
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ namespace Lucene.Net.Util
/// <para/>
/// This class works around the issue by using an alternative approach than using <see cref="ThreadLocal{T}"/>.
/// It keeps track of each thread's local and global state in order to later optimize garbage collection.
/// A complete explanation can be found at
/// A complete explanation can be found at
/// <a href="https://ayende.com/blog/189793-A/the-design-and-implementation-of-a-better-threadlocal-t">
/// https://ayende.com/blog/189793-A/the-design-and-implementation-of-a-better-threadlocal-t</a>.
/// <para/>
Expand Down Expand Up @@ -169,6 +169,21 @@ public void Dispose()
if (copy is null)
return;

foreach (var value in copy.Values)
{
if (value is IDisposable disposable)
{
try
{
disposable.Dispose();
}
catch
{
// ignored
}
}
}

Interlocked.Increment(ref globalVersion);
_disposed = true;
_values = null;
Expand Down Expand Up @@ -298,4 +313,4 @@ private sealed class LocalState
public int localVersion;
}
}
}
}

0 comments on commit d3e98c7

Please sign in to comment.