Skip to content

Commit

Permalink
MAHOUT-597: Applied patch to support kernels in MeanShift. All tests …
Browse files Browse the repository at this point in the history
…run.

git-svn-id: https://svn.apache.org/repos/asf/mahout/trunk@1131504 13f79535-47bb-0310-9956-ffa450edef68
  • Loading branch information
Jeff Eastman committed Jun 4, 2011
1 parent 6f0b33c commit 821aff8
Show file tree
Hide file tree
Showing 14 changed files with 1,368 additions and 906 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -29,47 +29,57 @@
import org.apache.mahout.math.list.IntArrayList;

/**
* This class models a canopy as a center point, the number of points that are contained within it according
* to the application of some distance metric, and a point total which is the sum of all the points and is
* used to compute the centroid when needed.
* This class models a canopy as a center point, the number of points that are
* contained within it according to the application of some distance metric, and
* a point total which is the sum of all the points and is used to compute the
* centroid when needed.
*/
public class MeanShiftCanopy extends Cluster {

// TODO: this is still problematic from a scalability perspective, but how else to encode membership?

// TODO: this is still problematic from a scalability perspective, but how
// else to encode membership?
private IntArrayList boundPoints = new IntArrayList();

/**
* Used for Writable
*/
public MeanShiftCanopy() {
}

public MeanShiftCanopy() {}

/**
* Create a new Canopy containing the given point
*
* @param point a Vector
* @param id an int canopy id
* @param measure a DistanceMeasure
* @param point
* a Vector
* @param id
* an int canopy id
* @param measure
* a DistanceMeasure
*/
public MeanShiftCanopy(Vector point, int id, DistanceMeasure measure) {
super(point, id, measure);
boundPoints.add(id);
}

/**
* Create an initial Canopy, retaining the original type of the given point (e.g. NamedVector)
* @param point a Vector
* @param id an int
* @param measure a DistanceMeasure
* Create an initial Canopy, retaining the original type of the given point
* (e.g. NamedVector)
*
* @param point
* a Vector
* @param id
* an int
* @param measure
* a DistanceMeasure
* @return a MeanShiftCanopy
*/
public static MeanShiftCanopy initialCanopy(Vector point, int id, DistanceMeasure measure) {
public static MeanShiftCanopy initialCanopy(Vector point, int id,
DistanceMeasure measure) {
MeanShiftCanopy result = new MeanShiftCanopy(point, id, measure);
// overwrite center so original point type is retained
result.setCenter(point);
return result;
}

/**
* Create a new Canopy containing the given point, id and bound points
*
Expand All @@ -82,19 +92,20 @@ public static MeanShiftCanopy initialCanopy(Vector point, int id, DistanceMeasur
* @param converged
* true if the canopy has converged
*/
MeanShiftCanopy(Vector point, int id, IntArrayList boundPoints, boolean converged) {
MeanShiftCanopy(Vector point, int id, IntArrayList boundPoints,
boolean converged) {
this.setId(id);
this.setCenter(point);
this.setRadius(point.like());
this.setNumPoints(1);
this.boundPoints = boundPoints;
setConverged(converged);
}

public IntArrayList getBoundPoints() {
return boundPoints;
}

/**
* The receiver overlaps the given canopy. Add my bound points to it.
*
Expand All @@ -104,18 +115,21 @@ public IntArrayList getBoundPoints() {
void merge(MeanShiftCanopy canopy) {
boundPoints.addAllOf(canopy.boundPoints);
}

/**
* The receiver touches the given canopy. Add respective centers.
* The receiver touches the given canopy. Add respective centers with the
* given weights.
*
* @param canopy
* an existing MeanShiftCanopy
* @param the
* double weight of the touching
*/
void touch(MeanShiftCanopy canopy) {
canopy.observe(getCenter(), boundPoints.size());
observe(canopy.getCenter(), canopy.boundPoints.size());
void touch(MeanShiftCanopy canopy, double weight) {
canopy.observe(getCenter(), weight * ((double) boundPoints.size()));
observe(canopy.getCenter(), weight * ((double) canopy.boundPoints.size()));
}

@Override
public void readFields(DataInput in) throws IOException {
super.readFields(in);
Expand All @@ -125,7 +139,7 @@ public void readFields(DataInput in) throws IOException {
this.boundPoints.add(in.readInt());
}
}

@Override
public void write(DataOutput out) throws IOException {
super.write(out);
Expand All @@ -134,7 +148,7 @@ public void write(DataOutput out) throws IOException {
out.writeInt(v);
}
}

public MeanShiftCanopy shallowCopy() {
MeanShiftCanopy result = new MeanShiftCanopy();
result.setMeasure(this.getMeasure());
Expand All @@ -145,25 +159,26 @@ public MeanShiftCanopy shallowCopy() {
result.setBoundPoints(boundPoints);
return result;
}

@Override
public String asFormatString() {
return toString();
}

public void setBoundPoints(IntArrayList boundPoints) {
this.boundPoints = boundPoints;
}

@Override
public String getIdentifier() {
return (isConverged() ? "MSV-" : "MSC-") + getId();
}

@Override
public double pdf(VectorWritable vw) {
// MSCanopy membership is explicit via membership in boundPoints. Can't compute pdf for Arbitrary point
// MSCanopy membership is explicit via membership in boundPoints. Can't
// compute pdf for Arbitrary point
throw new NotImplementedException();
}

}
Loading

0 comments on commit 821aff8

Please sign in to comment.