Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Add two feature sampling methods for graphs #337

Open
wants to merge 17 commits into
base: staging
Choose a base branch
from
13 changes: 10 additions & 3 deletions Python/rerf/rerfClassifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ class rerfClassifier(BaseEstimator, ClassifierMixin):
The random combination of features to use: either "RerF", "Base", or
"S-RerF". "RerF" randomly combines features for each `mtry`. Base
is our implementation of Random Forest. "S-RerF" is structured RerF,
combining multiple features together in random patches.
combining multiple features together in random patches. "Graph-Node-MORF"
and "Graph-Edge-MORF" is MORF for graph valued data.
See Tomita et al. (2016) [#Tomita]_ for further details.
n_estimators : int, optional (default: 500)
Number of trees in forest.
Expand Down Expand Up @@ -230,7 +231,7 @@ def fit(self, X, y):
else:
forestType = "binnedBaseTern"
self.method_to_use_ = 1
elif self.projection_matrix == "S-RerF":
elif self.projection_matrix in ["S-RerF", "Graph-Node-MORF", "Graph-Edge-MORF"]:
if self.oob_score:
warn(
"OOB is not currently implemented for the S-RerF"
Expand All @@ -241,7 +242,13 @@ def fit(self, X, y):
self.oob_score = False

forestType = "binnedBaseTern" # this should change
self.method_to_use_ = 2
if self.projection_matrix == "S-RerF":
self.method_to_use_ = 2
elif self.projection_matrix == "Graph-Node-MORF":
self.method_to_use_ = 3
elif self.projection_matrix == "Graph-Edge-MORF":
self.method_to_use_ = 4

# Check that image_height and image_width are divisors of
# the num_features. This is the most we can do to
# prevent an invalid value being passed in.
Expand Down
145 changes: 144 additions & 1 deletion packedForest/src/forestTypes/binnedTree/processingNodeBin.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <algorithm>
#include <limits>
#include <random>
#include <numeric>

namespace fp{

Expand Down Expand Up @@ -81,7 +82,7 @@ namespace fp{
inline void calcMtryForNode(std::vector<weightedFeature>& featuresToTry){
featuresToTry.resize(fpSingleton::getSingleton().returnMtry());
int methodToUse = fpSingleton::getSingleton().returnMethodToUse();
assert(methodToUse == 1 || methodToUse == 2);
assert(methodToUse == 1 || methodToUse == 2 || methodToUse == 3 || methodToUse == 4);

switch(methodToUse){
case 1:{
Expand All @@ -92,6 +93,14 @@ namespace fp{
randMatImagePatch(featuresToTry, paramsRandMatImagePatch());
break;
}
case 3:{
randMatGraphNodePatch(featuresToTry, paramsRandMatGraphNodePatch());
break;
}
case 4:{
randMatGraphEdgePatch(featuresToTry, paramsRandMatGraphEdgePatch());
break;
}
}
}

Expand Down Expand Up @@ -175,6 +184,140 @@ namespace fp{
}
} // END randMatStructured

inline std::vector<int> paramsRandMatGraphNodePatch()
{
// Preset parameters
const int &imageHeight = fpSingleton::getSingleton().returnImageHeight();
const int &imageWidth = fpSingleton::getSingleton().returnImageWidth();

// Use height as placeholder for number of nodes to sample
const int &patchHeightMax = fpSingleton::getSingleton().returnPatchHeightMax();
const int &patchHeightMin = fpSingleton::getSingleton().returnPatchHeightMin();

// A vector of vectors that specifies the parameters
// for each patch: < <Height>, <Width>, <TopLeft> >
// std::vector<std::vector<int>> heightWidthTop(3, std::vector<int>(fpSingleton::getSingleton().returnMtry()));

// A vector for sampling how many nodes to sample
std::vector<int> numNodes(fpSingleton::getSingleton().returnMtry());

// The weight is currently hard-coded to 1.

// Loop over mtry to load random node sizes
for (int k = 0; k < fpSingleton::getSingleton().returnMtry(); k++)
{
numNodes[k] = randNum->gen(patchHeightMax - patchHeightMin + 1) + patchHeightMin;
//sample from [patchHeightMin, patchHeightMax]
// Using the above, 1-node patches are possible ... [J1C]
}

return (numNodes);
} // End paramsRandMatGraphPatch

inline void randMatGraphNodePatch(std::vector<weightedFeature> &featuresToTry, std::vector<int> numNodes)
{
assert((int)(numNodes.size()) == fpSingleton::getSingleton().returnMtry());

// Preset parameters
const int &imageWidth = fpSingleton::getSingleton().returnImageWidth();

for (int k = 0; k < fpSingleton::getSingleton().returnMtry(); k++)
{
// for each element in numNodes
// sample w/o replacement
// add the index to featuresToTry matrix?
// add 1 to the weights
std::vector<int> subsample(imageWidth);
std::iota(std::begin(subsample), std::end(subsample), 0);

int tempSwap;

// Sample w/o replacement numNodes number of times
for (int locationToMove = 0; locationToMove < numNodes[k]; locationToMove++)
{
int randomPosition = randNum->gen(imageWidth - locationToMove) + locationToMove;
tempSwap = subsample[locationToMove];
subsample[locationToMove] = subsample[randomPosition];
subsample[randomPosition] = tempSwap;
}

for (int i = 0; i < numNodes[k]; i++) {
// index magic here
for (int j = i + 1; j < numNodes[k]; j++) {
int featureIndex = subsample[i] * imageWidth + subsample[j];
featuresToTry[k].returnFeatures().push_back(featureIndex);
featuresToTry[k].returnWeights().push_back(1);
}
}
}
} // END randMatStructured

inline std::vector<std::vector<int>> paramsRandMatGraphEdgePatch()
{
// Preset parameters
const int &imageHeight = fpSingleton::getSingleton().returnImageHeight();
const int &imageWidth = fpSingleton::getSingleton().returnImageWidth();

// Use height as placeholder for number of nodes to sample
const int &patchHeightMax = fpSingleton::getSingleton().returnPatchHeightMax();
const int &patchHeightMin = fpSingleton::getSingleton().returnPatchHeightMin();

// A vector of vectors that specifies the parameters
// for each patch: < <Height>, <Width>, <TopLeft> >
// std::vector<std::vector<int>> heightWidthTop(3, std::vector<int>(fpSingleton::getSingleton().returnMtry()));

// for each patch: < <whichNode>, <numEdges> >
std::vector<std::vector<int>> nodeNumEdges(2, std::vector<int>(fpSingleton::getSingleton().returnMtry()));

// The weight is currently hard-coded to 1.

// Loop over mtry to load random node sizes
for (int k = 0; k < fpSingleton::getSingleton().returnMtry(); k++)
{
nodeNumEdges[0][k] = randNum->gen(imageHeight);
nodeNumEdges[1][k] = randNum->gen(patchHeightMax - patchHeightMin + 1) + patchHeightMin;
//sample from [patchHeightMin, patchHeightMax]
// Using the above, 1-node patches are possible ... [J1C]
}

return (nodeNumEdges);
} // End paramsRandMatGraphEdgePatch

inline void randMatGraphEdgePatch(std::vector<weightedFeature> &featuresToTry, std::vector<std::vector<int>> nodeNumEdges)
{
assert((int)(nodeNumEdges.size()) == fpSingleton::getSingleton().returnMtry());

// Preset parameters
const int &imageWidth = fpSingleton::getSingleton().returnImageWidth();

for (int k = 0; k < fpSingleton::getSingleton().returnMtry(); k++)
{
// for each element in numEdges
// sample w/o replacement from 1..imageWidth
// add the index to featuresToTry matrix?
// add 1 to the weights
std::vector<int> subsample(imageWidth);
std::iota(std::begin(subsample), std::end(subsample), 0);

int tempSwap;

// Sample w/o replacement numEdges number of times
for (int locationToMove = 0; locationToMove < nodeNumEdges[1][k]; locationToMove++)
{
int randomPosition = randNum->gen(imageWidth - locationToMove) + locationToMove;
tempSwap = subsample[locationToMove];
subsample[locationToMove] = subsample[randomPosition];
subsample[randomPosition] = tempSwap;
}

for (int i = 0; i < nodeNumEdges[1][k]; i++)
{
int featureIndex = nodeNumEdges[0][k] * imageWidth + subsample[i];
featuresToTry[k].returnFeatures().push_back(featureIndex);
featuresToTry[k].returnWeights().push_back(1);
}
}
} // END randMatStructured

inline void resetLeftNode(){
propertiesOfLeftNode.resetClassTotals();
Expand Down
4 changes: 2 additions & 2 deletions packedForest/src/fpSingleton/fpInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,8 @@ namespace fp {
useRowMajor = (bool)parameterValue;
}else if(parameterName == "methodToUse"){
methodToUse = parameterValue;
if(!(methodToUse == 1 || methodToUse == 2)){
throw std::runtime_error("methodToUse outside allowable parameters {1,2}.");
if(!(methodToUse == 1 || methodToUse == 2 || methodToUse == 3 || methodToUse == 4)){
throw std::runtime_error("methodToUse outside allowable parameters {1,2,3,4}.");
}
}else if(parameterName == "imageHeight"){
imageHeight = parameterValue;
Expand Down