Skip to content

Commit

Permalink
Merge pull request #5539 from danieldresser-ie/resampleOpt2
Browse files Browse the repository at this point in the history
Resample : Optimize inseparable case with no scaling
  • Loading branch information
johnhaddon authored Nov 20, 2023
2 parents ef1b7fd + 217f1cc commit d4c707a
Show file tree
Hide file tree
Showing 3 changed files with 133 additions and 8 deletions.
5 changes: 5 additions & 0 deletions Changes.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@ Features

- Viewer : Added "Snapshot To Catalogue" command to the right-click menu of the 3D view.

Improvements
------------

- ImageTransform, Resample : Improved performance for non-separable filters without scaling, with 2-6x speedups in some benchmark cases.

Fixes
-----

Expand Down
21 changes: 21 additions & 0 deletions python/GafferImageTest/ResampleTest.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,27 @@ def __test( fileName, size, filter ) :
with self.subTest( fileName = args[0], size = args[1], ftilter = args[2] ):
__test( *args )

def testInseparableFastPath( self ) :

reader = GafferImage.ImageReader()
reader["fileName"].setValue( self.imagesPath() / "resamplePatterns.exr" )

# When applying an inseparable filter with no scaling, we can use a much faster code path.
# This code path should not have any effect on the result
resampleFastPath = GafferImage.Resample()
resampleFastPath["in"].setInput( reader["out"] )
resampleFastPath['filterScale'].setValue( imath.V2f( 4 ) )
resampleFastPath["filter"].setValue( "radial-lanczos3" )

# Force the slow code path using the "debug" parameter
resampleReference = GafferImage.Resample()
resampleReference["in"].setInput( reader["out"] )
resampleReference['filterScale'].setValue( imath.V2f( 4 ) )
resampleReference["filter"].setValue( "radial-lanczos3" )
resampleReference["debug"].setValue( GafferImage.Resample.Debug.SinglePass )

self.assertImagesEqual( resampleFastPath["out"], resampleReference["out"] )

def testSincUpsize( self ) :

c = GafferImage.Constant()
Expand Down
115 changes: 107 additions & 8 deletions src/GafferImage/Resample.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,14 @@ enum Passes
{
Horizontal = 1,
Vertical = 2,
Both = Horizontal | Vertical
Both = Horizontal | Vertical,

// Special pass label when we must compute both passes in one, but there is no scaling.
// This allows a special code path which is up to 6X faster.
BothOptimized = Both | 4
};

unsigned requiredPasses( const Resample *resample, const ImagePlug *image, const OIIO::Filter2D *filter )
unsigned requiredPasses( const Resample *resample, const ImagePlug *image, const OIIO::Filter2D *filter, V2f &ratio )
{
int debug = resample->debugPlug()->getValue();
if( debug == Resample::HorizontalPass )
Expand All @@ -76,12 +80,24 @@ unsigned requiredPasses( const Resample *resample, const ImagePlug *image, const
}
else if( debug == Resample::SinglePass )
{
// For a SinglePass debug mode, we always use Both.
// Note that we don't use the optimized pass here, even if the ratio is 1 - we want debug to always
// use the same path.
return Horizontal | Vertical;
}

if( image == image->parent<ImageNode>()->outPlug() )
{
return filter->separable() ? Vertical : Both;
if( filter->separable() )
{
return Vertical;
}
else
{
// The filter isn't separable, so we must process everything at once. If the ratio has no
// scaling though, we can use the optimized path.
return ( ratio == V2f( 1.0 ) ) ? BothOptimized : Both;
}
}
return Horizontal;
}
Expand Down Expand Up @@ -193,7 +209,7 @@ const OIIO::Filter2D *filterAndScale( const std::string &name, V2f ratio, V2f &i
/// only computed once and then reused. At the time of writing, profiles indicate that
/// accessing pixels via the Sampler is the main bottleneck, but once that is optimised
/// perhaps cached filter weights could have a benefit.
void filterWeights( const OIIO::Filter2D *filter, const float inputFilterScale, const float filterRadius, const int x, const float ratio, const float offset, Passes pass, std::vector<int> &supportRanges, std::vector<float> &weights )
void filterWeights1D( const OIIO::Filter2D *filter, const float inputFilterScale, const float filterRadius, const int x, const float ratio, const float offset, Passes pass, std::vector<int> &supportRanges, std::vector<float> &weights )
{
weights.reserve( ( 2 * ceilf( filterRadius ) + 1 ) * ImagePlug::tileSize() );
supportRanges.reserve( 2 * ImagePlug::tileSize() );
Expand Down Expand Up @@ -221,6 +237,42 @@ void filterWeights( const OIIO::Filter2D *filter, const float inputFilterScale,
}
}

// For the inseparable case, we can't always reuse the weights for an adjacent row or column.
// There are a lot of possible scaling factors where the ratio can be represented as a fraction,
// and the weights needed would repeat after a certain number of pixels, and we could compute weights
// for a limited section of pixels, and reuse them in a tiling way.
// That's a bit complicated though, so we're just handling the simplest case currently ( since it is
// a common case ):
// if there is no scaling, then we only need to compute the weights for one pixel, and we can reuse them
// for all pixels. This means we don't loop over output pixels at all here - we just compute the weights
// for one output pixel, and return one 2D support for this pixel - it just gets shifted for each adjacent
// pixel.
void filterWeights2D( const OIIO::Filter2D *filter, const V2f inputFilterScale, const V2f filterRadius, const V2i p, const V2f offset, Box2i &support, std::vector<float> &weights )
{
weights.reserve( ( 2 * ceilf( filterRadius.x ) + 1 ) * ( 2 * ceilf( filterRadius.y ) + 1 ) );

const V2f filterCoordinateMult( 1.0f / inputFilterScale.x, 1.0f / inputFilterScale.y );

// input pixel position (floating point)
V2f i = V2f( p ) + V2f( 0.5 ) + offset;

support = Box2i(
V2i( ceilf( i.x - 0.5f - filterRadius.x ), ceilf( i.y - 0.5f - filterRadius.y ) ),
V2i( floorf( i.x + 0.5f + filterRadius.x ), floorf( i.y + 0.5f + filterRadius.y ) )
);

for( int fY = support.min.y; fY < support.max.y; ++fY )
{
const float fy = filterCoordinateMult.y * ( float( fY ) + 0.5 - i.y );
for( int fX = support.min.x; fX < support.max.x; ++fX )
{
const float fx = filterCoordinateMult.x * ( float( fX ) + 0.5f - i.x );
const float w = (*filter)( fx, fy );
weights.push_back( w );
}
}
}

Box2f transform( const Box2f &b, const M33f &m )
{
if( b.isEmpty() )
Expand Down Expand Up @@ -477,7 +529,7 @@ void Resample::hashChannelData( const GafferImage::ImagePlug *parent, const Gaff

filterPlug()->hash( h );

const unsigned passes = requiredPasses( this, parent, filter );
const unsigned passes = requiredPasses( this, parent, filter, ratio );
if( passes & Horizontal )
{
h.append( inputFilterScale.x );
Expand All @@ -491,6 +543,12 @@ void Resample::hashChannelData( const GafferImage::ImagePlug *parent, const Gaff
h.append( offset.y );
}

if( passes == BothOptimized )
{
// Append an extra flag so our hash reflects that we are going to take the optimized path
h.append( true );
}

const V2i tileOrigin = context->get<V2i>( ImagePlug::tileOriginContextName );
Sampler sampler(
passes == Vertical ? horizontalPassPlug() : inPlug(),
Expand Down Expand Up @@ -518,7 +576,7 @@ IECore::ConstFloatVectorDataPtr Resample::computeChannelData( const std::string
const OIIO::Filter2D *filter = filterAndScale( filterPlug()->getValue(), ratio, inputFilterScale );
inputFilterScale *= filterScalePlug()->getValue();

const unsigned passes = requiredPasses( this, parent, filter );
const unsigned passes = requiredPasses( this, parent, filter, ratio );

Sampler sampler(
passes == Vertical ? horizontalPassPlug() : inPlug(),
Expand Down Expand Up @@ -588,6 +646,47 @@ IECore::ConstFloatVectorDataPtr Resample::computeChannelData( const std::string
}
}
}
else if( passes == BothOptimized )
{
Box2i support;
std::vector<float> weights;
filterWeights2D( filter, inputFilterScale, filterRadius, tileBound.min, offset, support, weights );

V2i oP; // output pixel position
V2i supportOffset;
for( oP.y = tileBound.min.y; oP.y < tileBound.max.y; ++oP.y )
{
supportOffset.y = oP.y - tileBound.min.y;

for( oP.x = tileBound.min.x; oP.x < tileBound.max.x; ++oP.x )
{
Canceller::check( context->canceller() );

supportOffset.x = oP.x - tileBound.min.x;
std::vector<float>::const_iterator wIt = weights.begin();

float v = 0.0f;
float totalW = 0.0f;
sampler.visitPixels(
Imath::Box2i( support.min + supportOffset, support.max + supportOffset ),
[&wIt, &v, &totalW]( float cur, int x, int y )
{
const float w = *wIt++;
v += w * cur;
totalW += w;
}
);

if( totalW != 0.0f )
{
*pIt = v / totalW;
}

++pIt;
}
}

}
else if( passes == Horizontal )
{
// When the filter is separable we can perform filtering in two
Expand All @@ -600,7 +699,7 @@ IECore::ConstFloatVectorDataPtr Resample::computeChannelData( const std::string
// we precompute the weights now to avoid repeating work later.
std::vector<int> supportRanges;
std::vector<float> weights;
filterWeights( filter, inputFilterScale.x, filterRadius.x, tileBound.min.x, ratio.x, offset.x, Horizontal, supportRanges, weights );
filterWeights1D( filter, inputFilterScale.x, filterRadius.x, tileBound.min.x, ratio.x, offset.x, Horizontal, supportRanges, weights );

V2i oP; // output pixel position

Expand Down Expand Up @@ -646,7 +745,7 @@ IECore::ConstFloatVectorDataPtr Resample::computeChannelData( const std::string
// we precompute the weights now to avoid repeating work later.
std::vector<int> supportRanges;
std::vector<float> weights;
filterWeights( filter, inputFilterScale.y, filterRadius.y, tileBound.min.y, ratio.y, offset.y, Vertical, supportRanges, weights );
filterWeights1D( filter, inputFilterScale.y, filterRadius.y, tileBound.min.y, ratio.y, offset.y, Vertical, supportRanges, weights );

std::vector<int>::const_iterator supportIt = supportRanges.begin();
std::vector<float>::const_iterator rowWeightsIt = weights.begin();
Expand Down

0 comments on commit d4c707a

Please sign in to comment.