From 0196c3ff0e1e22b290c00a966c9e13416baabadc Mon Sep 17 00:00:00 2001 From: Michael ZBYSZYNSKI Date: Tue, 14 Nov 2023 10:14:32 +0000 Subject: [PATCH] Revert "silencing warnings" This reverts commit f5878b50404ba7a020eea2dbfea2733de5b70799. --- dependencies/jsoncpp.cpp | 2 +- dependencies/libsvm/libsvm.cpp | 2 +- src/classification.cpp | 188 +++++++------- src/fastDTW.cpp | 86 +++---- src/fastDTW.h | 76 +++--- src/knnClassification.cpp | 249 +++++++++---------- src/modelSet.cpp | 434 ++++++++++++++++----------------- 7 files changed, 506 insertions(+), 531 deletions(-) diff --git a/dependencies/jsoncpp.cpp b/dependencies/jsoncpp.cpp index 4c1b04c..85abbab 100644 --- a/dependencies/jsoncpp.cpp +++ b/dependencies/jsoncpp.cpp @@ -4206,7 +4206,7 @@ JSONCPP_STRING valueToString(double value, bool useSpecialFloats, unsigned int p int len = -1; char formatString[6]; - snprintf(formatString, 6, "%%.%dg", precision); + sprintf(formatString, "%%.%dg", precision); // Print into the buffer. We need not request the alternative representation // that always has a decimal point because JSON doesn't distingish the diff --git a/dependencies/libsvm/libsvm.cpp b/dependencies/libsvm/libsvm.cpp index 44b949d..7186344 100644 --- a/dependencies/libsvm/libsvm.cpp +++ b/dependencies/libsvm/libsvm.cpp @@ -52,7 +52,7 @@ namespace LIBSVM { char buf[BUFSIZ]; va_list ap; va_start(ap,fmt); - vsnprintf(buf,BUFSIZ,fmt,ap); + vsprintf(buf,fmt,ap); va_end(ap); (*svm_print_string)(buf); } diff --git a/src/classification.cpp b/src/classification.cpp index 780434c..0bc392d 100644 --- a/src/classification.cpp +++ b/src/classification.cpp @@ -14,127 +14,125 @@ #endif template -classificationTemplate::classificationTemplate() : -classificationType(knn) //this is the default algorithm +classificationTemplate::classificationTemplate() { - modelSet::numInputs = -1; - modelSet::numOutputs = -1; - modelSet::isTraining = false; -} + modelSet::numInputs = -1; + modelSet::numOutputs = -1; + modelSet::isTraining = false; + classificationType = knn; //this is the default algorithm +}; template -classificationTemplate::classificationTemplate(classificationTypes classification_type) : -classificationType(classification_type) +classificationTemplate::classificationTemplate(classificationTypes classification_type) { - modelSet::numInputs = -1; - modelSet::numOutputs = -1; - modelSet::isTraining = false; + modelSet::numInputs = -1; + modelSet::numOutputs = -1; + modelSet::isTraining = false; + classificationType = classification_type; }; template classificationTemplate::classificationTemplate(const int &num_inputs, const int &num_outputs) //TODO: this feature isn't really useful -{ - modelSet::numInputs = num_inputs; - modelSet::numOutputs = num_outputs; - modelSet::isTraining = false; - std::vector whichInputs; - - for (size_t i {}; i < modelSet::numInputs; ++i) - { - whichInputs.push_back(i); - } - - std::vector > trainingSet; - - for (size_t i {}; i < modelSet::numOutputs; ++i) - { - modelSet::myModelSet.push_back(new knnClassification(modelSet::numInputs, whichInputs, trainingSet, 1)); - } +{ + modelSet::numInputs = num_inputs; + modelSet::numOutputs = num_outputs; + modelSet::isTraining = false; + std::vector whichInputs; + + for (size_t i = 0; i < modelSet::numInputs; ++i) + { + whichInputs.push_back(i); + } + std::vector > trainingSet; + + for (size_t i = 0; i < modelSet::numOutputs; ++i) + { + modelSet::myModelSet.push_back(new knnClassification(modelSet::numInputs, whichInputs, trainingSet, 1)); + } }; template -classificationTemplate::classificationTemplate(const std::vector > &trainingSet) +classificationTemplate::classificationTemplate(const std::vector > &trainingSet) { - modelSet::numInputs = -1; - modelSet::numOutputs = -1; - modelSet::isTraining = false; - train(trainingSet); + modelSet::numInputs = -1; + modelSet::numOutputs = -1; + modelSet::isTraining = false; + train(trainingSet); }; template -bool classificationTemplate::train(const std::vector > &training_set) +bool classificationTemplate::train(const std::vector > &training_set) { - //TODO: time this process? - modelSet::reset(); - - if (training_set.size() > 0) - { - //create model(s) here - modelSet::numInputs = static_cast(training_set[0].input.size()); - modelSet::numOutputs = static_cast(training_set[0].output.size()); - - for (int i {}; i < modelSet::numInputs; ++i) - { - modelSet::inputNames.push_back("inputs-" + std::to_string(i + 1)); - } - modelSet::numOutputs = int(training_set[0].output.size()); - - for (auto example : training_set) - { - if (example.input.size() != modelSet::numInputs) - { - throw std::length_error("unequal feature vectors in input."); - return false; - } - - if (example.output.size() != modelSet::numOutputs) - { - throw std::length_error("unequal output vectors."); - return false; - } - } - std::vector whichInputs; - - for (int inputNum {}; inputNum < modelSet::numInputs; ++inputNum) - { - whichInputs.push_back(inputNum); - } - - for (int i {}; i < modelSet::numOutputs; ++i) + //TODO: time this process? + modelSet::reset(); + + if (training_set.size() > 0) { - if (classificationType == svm) - { - modelSet::myModelSet.push_back(new svmClassification(modelSet::numInputs)); - } - else - { - modelSet::myModelSet.push_back(new knnClassification(modelSet::numInputs, whichInputs, training_set, 1)); - } + //create model(s) here + modelSet::numInputs = int(training_set[0].input.size()); + modelSet::numOutputs = int(training_set[0].output.size()); + + for (int i = 0; i < modelSet::numInputs; ++i) + { + modelSet::inputNames.push_back("inputs-" + std::to_string(i + 1)); + } + modelSet::numOutputs = int(training_set[0].output.size()); + + for ( auto example : training_set) + { + if (example.input.size() != modelSet::numInputs) + { + throw std::length_error("unequal feature vectors in input."); + return false; + } + if (example.output.size() != modelSet::numOutputs) + { + throw std::length_error("unequal output vectors."); + return false; + } + } + std::vector whichInputs; + + for (int j = 0; j < modelSet::numInputs; ++j) + { + whichInputs.push_back(j); + } + + for (int i = 0; i < modelSet::numOutputs; ++i) + { + if (classificationType == svm) + { + modelSet::myModelSet.push_back(new svmClassification(modelSet::numInputs)); + } + else + { + modelSet::myModelSet.push_back(new knnClassification(modelSet::numInputs, whichInputs, training_set, 1)); + } + } + + return modelSet::train(training_set); } - - return modelSet::train(training_set); - } - - return false; + return false; } template -std::vector classificationTemplate::getK() +std::vector classificationTemplate::getK() { - std::vector kVector; - - for (const baseModel* model : modelSet::myModelSet) - { - kVector.push_back(dynamic_cast*>(model)->getK()); //FIXME: I really dislike this design - } - - return kVector; + std::vector kVector; + + for (baseModel* model : modelSet::myModelSet) + { + knnClassification* kNNModel = dynamic_cast*>(model); //FIXME: I really dislike this design + kVector.push_back(kNNModel->getK()); + } + return kVector; } template -void classificationTemplate::setK(const int whichModel, const int newK) +void classificationTemplate::setK(const int whichModel, const int newK) { - dynamic_cast*>(modelSet::myModelSet[whichModel])->setK(newK); //FIXME: I really dislike this design + knnClassification* kNNModel = dynamic_cast*>(modelSet::myModelSet[whichModel]); //FIXME: I really dislike this design + kNNModel->setK(newK); } //explicit instantiation diff --git a/src/fastDTW.cpp b/src/fastDTW.cpp index 7694f13..49d92a0 100644 --- a/src/fastDTW.cpp +++ b/src/fastDTW.cpp @@ -19,70 +19,62 @@ fastDTW::~fastDTW() {}; template warpInfo fastDTW::fullFastDTW(const std::vector> &seriesX, const std::vector > &seriesY, int searchRadius) { - + #ifndef EMSCRIPTEN - if (seriesY.size() > seriesX.size()) - { - return fullFastDTW(seriesY, seriesX, searchRadius); //TODO: I'm not sure why I need this. Also, not sure why it fails with Emscripten. - } + if (seriesY.size() > seriesX.size()) + { + return fullFastDTW(seriesY, seriesX, searchRadius); //TODO: I'm not sure why I need this. Also, not sure why it fails with Emscripten. + } #endif - - dtw dtw; - searchRadius = std::max(0, searchRadius); - const int minSeries { searchRadius + 2 }; - - if (seriesX.size() <= minSeries || seriesY.size() <= minSeries) - { - return dtw.dynamicTimeWarp(seriesX, seriesY); - } - - const T resolution = 2.0; //TODO: Just hardcode this? - - const std::vector> shrunkenX { downsample(seriesX, resolution) }; - const std::vector> shrunkenY { downsample(seriesY, resolution) }; - - //some nice recursion here - const searchWindow window(static_cast(seriesX.size()), static_cast(seriesY.size()), getWarpPath(shrunkenX, shrunkenY, searchRadius), searchRadius); - return dtw.constrainedDTW(seriesX, seriesY, window); + + dtw dtw; + searchRadius = (searchRadius < 0) ? 0 : searchRadius; + int minSeries = searchRadius + 2; + if (seriesX.size() <= minSeries || seriesY.size() <= minSeries) + { + return dtw.dynamicTimeWarp(seriesX, seriesY); + } + + T resolution = 2.0;//TODO: Just hardcode this? + std::vector> shrunkenX = downsample(seriesX, resolution); + std::vector> shrunkenY = downsample(seriesY, resolution); + + //some nice recursion here + searchWindow window(int(seriesX.size()), int(seriesY.size()), getWarpPath(shrunkenX, shrunkenY, searchRadius), searchRadius); + return dtw.constrainedDTW(seriesX, seriesY, window); }; template T fastDTW::getCost(const std::vector> &seriesX, const std::vector > &seriesY, int searchRadius) { - const warpInfo info { fullFastDTW(seriesX, seriesY, searchRadius) }; - return info.cost; + warpInfo info = fullFastDTW(seriesX, seriesY, searchRadius); + return info.cost; }; template warpPath fastDTW::getWarpPath(const std::vector> &seriesX, const std::vector > &seriesY, int searchRadius) { - const warpInfo info { fullFastDTW(seriesX, seriesY, searchRadius) }; - return info.path; + warpInfo info = fullFastDTW(seriesX, seriesY, searchRadius); + return info.path; }; template -inline std::vector > fastDTW::downsample(const std::vector> &series, T resolution) +inline std::vector > fastDTW::downsample(const std::vector> &series, T resolution) { - std::vector > shrunkenSeries; - - for (std::size_t i {}; i < series.size(); ++i) - { - if (i % 2 == 0) - { - shrunkenSeries.push_back(series[i]); - } - else - { - const int shrunkIndex { static_cast(i * 0.5) }; - for (std::size_t j {}; j < series[i].size(); ++j) - { - shrunkenSeries[shrunkIndex][j] = (shrunkenSeries[shrunkIndex][j] + series[i][j]) * (T)0.5; - } + std::vector > shrunkenSeries; + + for (std::size_t i = 0; i < series.size(); ++i) { + if (i % 2 == 0) { + shrunkenSeries.push_back(series[i]); + } else { + int shrunkIndex = int(i * 0.5); + for (std::size_t j = 0; j < series[i].size(); ++j) { + shrunkenSeries[shrunkIndex][j] = (shrunkenSeries[shrunkIndex][j] + series[i][j]) * (T)0.5; + } + } } - } - - //TODO: implement downsampling by resolution - return shrunkenSeries; + //TODO: implement downsampling by resolution + return shrunkenSeries; } //explicit instantiation diff --git a/src/fastDTW.h b/src/fastDTW.h index 1404332..4998296 100644 --- a/src/fastDTW.h +++ b/src/fastDTW.h @@ -17,45 +17,45 @@ template class fastDTW { public: - fastDTW(); - ~fastDTW(); - - /** - * Returns just the cost of warping one series into a second. - * @param seriesX time series X - * @param seriesY time series Y - * @param searchRadius search radius (usually 1) - * @return cost to warp between series - */ - static T getCost(const std::vector> &seriesX, const std::vector > &seriesY, int searchRadius); - + fastDTW(); + ~fastDTW(); + + /** + * Returns just the cost of warping one series into a second. + * @param seriesX time series X + * @param seriesY time series Y + * @param searchRadius search radius (usually 1) + * @return cost to warp between series + */ + static T getCost(const std::vector> &seriesX, const std::vector > &seriesY, int searchRadius); + private: - /** - * Returns the cost and the warp path. - * @param seriesX time series X - * @param seriesY time series Y - * @param searchRadius search radius (usually 1) - * @return information about optimal time warp - */ - static warpInfo fullFastDTW(const std::vector> &seriesX, const std::vector > &seriesY, int searchRadius); - - /** - * Returns just lowest cost path to warping one series into a second. - * @param seriesX time series X - * @param seriesY time series Y - * @param searchRadius search radius (usually 1) - * @return The warp path - */ - static warpPath getWarpPath(const std::vector> &seriesX, const std::vector > &seriesY, int searchRadius); - - /** - * Downsamples a time series by two. Resolution isn't implemented yet - * @param series - * @param resolution (not used) - * @return downsampled series - `*/ - inline static std::vector > downsample(const std::vector> &series, T resolution); - + /** + * Returns the cost and the warp path. + * @param seriesX time series X + * @param seriesY time series Y + * @param searchRadius search radius (usually 1) + * @return information about optimal time warp + */ + static warpInfo fullFastDTW(const std::vector> &seriesX, const std::vector > &seriesY, int searchRadius); + + /** + * Returns just lowest cost path to warping one series into a second. + * @param seriesX time series X + * @param seriesY time series Y + * @param searchRadius search radius (usually 1) + * @return The warp path + */ + static warpPath getWarpPath(const std::vector> &seriesX, const std::vector > &seriesY, int searchRadius); + + /** + * Downsamples a time series by two. Resolution isn't implemented yet + * @param series + * @param resolution (not used) + * @return downsampled series + `*/ + inline static std::vector > downsample(const std::vector> &series, T resolution); + }; #endif diff --git a/src/knnClassification.cpp b/src/knnClassification.cpp index c5febf9..b9180d3 100644 --- a/src/knnClassification.cpp +++ b/src/knnClassification.cpp @@ -17,16 +17,16 @@ #endif template -knnClassification::knnClassification(const int &num_inputs, - const std::vector &which_inputs, - const std::vector > &_neighbours, - const int k) : -numInputs(num_inputs), -whichInputs(which_inputs), -whichOutput(0), -neighbours(_neighbours), -desiredK(k), -currentK(k) +knnClassification::knnClassification(const int &num_inputs, + const std::vector &which_inputs, + const std::vector > &_neighbours, + const int k) : + numInputs(num_inputs), + whichInputs(which_inputs), + whichOutput(0), + neighbours(_neighbours), + desiredK(k), + currentK(k) { } @@ -34,180 +34,171 @@ template knnClassification::~knnClassification() {} template -void knnClassification::reset() +void knnClassification::reset() { - //TODO: implement this + //TODO: implement this } template size_t knnClassification::getNumInputs() const { - return numInputs; + return numInputs; } template -std::vector knnClassification::getWhichInputs() const +std::vector knnClassification::getWhichInputs() const { - return whichInputs; + return whichInputs; } template -int knnClassification::getK() const +int knnClassification::getK() const { - return currentK; + return currentK; } template -inline void knnClassification::updateK() +inline void knnClassification::updateK() { - if (currentK != desiredK) currentK = std::min(desiredK, (int) neighbours.size()); + if (currentK != desiredK) currentK = std::min(desiredK, (int) neighbours.size()); } template -void knnClassification::setK(int newK) +void knnClassification::setK(int newK) { - desiredK = newK; - updateK(); + desiredK = newK; + updateK(); } template -void knnClassification::addNeighbour(const int &classNum, const std::vector &features) +void knnClassification::addNeighbour(const int &classNum, const std::vector &features) { - std::vector classVec {}; - classVec.push_back(T(classNum)); - trainingExampleTemplate newNeighbour = {features, classVec}; - neighbours.push_back(newNeighbour); - updateK(); + std::vector classVec; + classVec.push_back(T(classNum)); + trainingExampleTemplate newNeighbour = {features, classVec}; + neighbours.push_back(newNeighbour); + updateK(); }; template -void knnClassification::train(const std::vector >& trainingSet) +void knnClassification::train(const std::vector >& trainingSet) { - train(trainingSet, 0); + train(trainingSet, 0); } -// FIXME: Not paying attention to whichOutput. +// FIXME: Not paying attention to whichOutput. template void knnClassification::train(const std::vector > &trainingSet, const std::size_t which_output) //FIXME: Does numInputs need to be reset here? -MZ -{ - neighbours.clear(); - neighbours = trainingSet; - updateK(); - whichOutput = which_output; +{ + neighbours.clear(); + neighbours = trainingSet; + updateK(); + whichOutput = which_output; }; template -T knnClassification::run(const std::vector &inputVector) +T knnClassification::run(const std::vector &inputVector) { - std::vector> nearestNeighbours {}; //These are our k nearest neighbours - - for (size_t i {}; i < currentK; ++i) - { - nearestNeighbours.push_back( std::make_pair(0, 0.) ); - }; - - std::pair farthestNN {0, 0.}; //This one will be replaced if there's a closer one - std::vector pattern; //This is what we're trying to match - - for (size_t h {}; h < numInputs; ++h) - { - pattern.push_back(inputVector[whichInputs[h]]); - } - - //Find k nearest neighbours - size_t index {}; - - for (auto it = neighbours.cbegin(); it != neighbours.cend(); ++it) - { - //find Euclidian distance for this neighbor - T euclidianDistance {}; - - for(size_t j {}; j < numInputs ; ++j) + std::vector> nearestNeighbours; //These are our k nearest neighbours + + for (size_t i = 0; i < currentK; ++i) { - euclidianDistance += (T)pow((pattern[j] - it->input[j]), 2); - } - - euclidianDistance = sqrt(euclidianDistance); + nearestNeighbours.push_back( std::make_pair(0, 0.) ); + }; + std::pair farthestNN {0, 0.}; //This one will be replaced if there's a closer one - if (index < currentK) + std::vector pattern; //This is what we're trying to match + for (size_t h = 0; h < numInputs; ++h) { - //save the first k neighbours - nearestNeighbours[index] = {index, euclidianDistance}; - if (euclidianDistance > farthestNN.second) farthestNN = {index, euclidianDistance}; + pattern.push_back(inputVector[whichInputs[h]]); } - else if (euclidianDistance < farthestNN.second) + + //Find k nearest neighbours + size_t index = 0; + for (auto it = neighbours.cbegin(); it != neighbours.cend(); ++it) { - //replace farthest, if new neighbour is closer - nearestNeighbours[farthestNN.first] = {index, euclidianDistance}; - size_t currentFarthest {}; - T currentFarthestDistance = 0.; - - for (size_t n {}; n < currentK; ++n) - { - if (nearestNeighbours[n].second > currentFarthestDistance) + //find Euclidian distance for this neighbor + T euclidianDistance = 0; + for(size_t j = 0; j < numInputs ; ++j) { - currentFarthest = n; - currentFarthestDistance = nearestNeighbours[n].second; + euclidianDistance += (T)pow((pattern[j] - it->input[j]), 2); } - } - - farthestNN = { currentFarthest, currentFarthestDistance} ; - } - ++index; - } - - //majority vote on nearest neighbours - std::map classVoteMap; - using classVotePair = std::pair; - - for (size_t i {}; i < currentK; ++i) - { - T classNum = (T)round(neighbours[nearestNeighbours[i].first].output[whichOutput]); - if ( classVoteMap.find(classNum) == classVoteMap.end() ) - { - classVoteMap.insert(classVotePair(classNum, 1)); + euclidianDistance = sqrt(euclidianDistance); + + if (index < currentK) + { + //save the first k neighbours + nearestNeighbours[index] = {index, euclidianDistance}; + if (euclidianDistance > farthestNN.second) farthestNN = {index, euclidianDistance}; + } + else if (euclidianDistance < farthestNN.second) + { + //replace farthest, if new neighbour is closer + nearestNeighbours[farthestNN.first] = {index, euclidianDistance}; + size_t currentFarthest = 0; + T currentFarthestDistance = 0.; + + for (size_t n = 0; n < currentK; ++n) + { + if (nearestNeighbours[n].second > currentFarthestDistance) + { + currentFarthest = n; + currentFarthestDistance = nearestNeighbours[n].second; + } + } + farthestNN = { currentFarthest, currentFarthestDistance} ; + } + ++index; } - else + + //majority vote on nearest neighbours + std::map classVoteMap; + using classVotePair = std::pair; + for (size_t i = 0; i < currentK; ++i) { - ++classVoteMap[classNum]; + T classNum = (T)round(neighbours[nearestNeighbours[i].first].output[whichOutput]); + if ( classVoteMap.find(classNum) == classVoteMap.end() ) + { + classVoteMap.insert(classVotePair(classNum, 1)); + } + else + { + ++classVoteMap[classNum]; + } } - } - - T foundClass {}; - int mostVotes {}; - - for (auto const& [whichClass, votes] : classVoteMap) - { - if (votes > mostVotes) + + T foundClass = 0; + int mostVotes = 0; + for (auto p = classVoteMap.cbegin(); p != classVoteMap.cend(); ++p) { - mostVotes = votes; - foundClass = whichClass; + if (p->second > mostVotes) + { + mostVotes = p->second; + foundClass = p->first; + } } - } - - return foundClass; + return foundClass; } #ifndef EMSCRIPTEN template -void knnClassification::getJSONDescription(Json::Value &jsonModelDescription) +void knnClassification::getJSONDescription(Json::Value &jsonModelDescription) { - jsonModelDescription["modelType"] = "kNN Classificiation"; - jsonModelDescription["numInputs"] = numInputs; - jsonModelDescription["whichInputs"] = this->vector2json(whichInputs); - jsonModelDescription["k"] = desiredK; - Json::Value examples; - - for (auto const& neighbour : neighbours) - //for (auto it = neighbours.cbegin(); it != neighbours.cend(); ++it) - { - Json::Value oneExample; - oneExample["class"] = neighbour.output[whichOutput]; - oneExample["features"] = this->vector2json(neighbour.input); - examples.append(oneExample); - } - - jsonModelDescription["examples"] = examples; + jsonModelDescription["modelType"] = "kNN Classificiation"; + jsonModelDescription["numInputs"] = numInputs; + jsonModelDescription["whichInputs"] = this->vector2json(whichInputs); + jsonModelDescription["k"] = desiredK; + Json::Value examples; + + for (auto it = neighbours.cbegin(); it != neighbours.cend(); ++it) + { + Json::Value oneExample; + oneExample["class"] = it->output[whichOutput]; + oneExample["features"] = this->vector2json(it->input); + examples.append(oneExample); + } + + jsonModelDescription["examples"] = examples; } #endif diff --git a/src/modelSet.cpp b/src/modelSet.cpp index caef72e..64968a8 100644 --- a/src/modelSet.cpp +++ b/src/modelSet.cpp @@ -22,112 +22,109 @@ #endif /** No arguments, don't create any models yet */ -template +template modelSet::modelSet() : -numInputs(-1), -numOutputs(-1), -isTraining(false), -isTrained(false) + numInputs(-1), + numOutputs(-1), + isTraining(false), + isTrained(false) { }; template -modelSet::~modelSet() +modelSet::~modelSet() { - for (auto& model : myModelSet) - { - delete model; - } + for (auto& model : myModelSet) + { + delete model; + } }; template -bool modelSet::train(const std::vector > &training_set) +bool modelSet::train(const std::vector > &training_set) { - bool success { false }; - - if (isTraining) - { - throw std::runtime_error("model is already training"); - } - else - { - for (const trainingExampleTemplate example : training_set) - { - if (example.input.size() != numInputs) - { - throw std::length_error("unequal feature vectors in input."); - return false; - } - - if (example.output.size() != numOutputs) - { - throw std::length_error("unequal output vectors."); - return false; - } - } - - // Multithreaded training - std::vector trainingThreads {}; - for (std::size_t i {}; i < myModelSet.size(); ++i) + bool success = false; + if (isTraining) { - trainingThreads.push_back(std::thread(&modelSet::threadTrain, this, i, training_set)); + throw std::runtime_error("model is already training"); } - - for (std::size_t i {}; i < myModelSet.size(); ++i) + else { - trainingThreads.at(i).join(); + for (trainingExampleTemplate example : training_set) + { + if (example.input.size() != numInputs) + { + throw std::length_error("unequal feature vectors in input."); + return false; + } + if (example.output.size() != numOutputs) + { + throw std::length_error("unequal output vectors."); + return false; + } + } + + // Multithreaded training + std::vector trainingThreads; + for (std::size_t i = 0; i < myModelSet.size(); ++i) + { + trainingThreads.push_back(std::thread(&modelSet::threadTrain, this, i, training_set)); + } + + for (std::size_t i = 0; i < myModelSet.size(); ++i) + { + trainingThreads.at(i).join(); + } + isTraining = false; + success = isTrained = true; } - - isTraining = false; - success = isTrained = true; - } - - return success; + return success; } template -void modelSet::threadTrain(std::size_t i, const std::vector > &training_set) +void modelSet::threadTrain(std::size_t i, const std::vector > &training_set) { - myModelSet[i]->train(training_set, i); + myModelSet[i]->train(training_set, i); } template -bool modelSet::reset() +bool modelSet::reset() { - for (auto& model : myModelSet) delete model; - - myModelSet.clear(); - numInputs = -1; - numOutputs = -1; - isTraining = false; - - return true; + for (auto& model : myModelSet) + { + delete model; + } + myModelSet.clear(); + numInputs = -1; + numOutputs = -1; + isTraining = false; + return true; } template -std::vector modelSet::run(const std::vector &inputVector) +std::vector modelSet::run(const std::vector &inputVector) { - std::vector returnVector; - - if (isTraining) - { - throw std::runtime_error("can't run a model during training"); - returnVector.push_back(0); - } - else if (inputVector.size() != numInputs) - { - throw std::length_error("bad input size: " + std::to_string(inputVector.size())); - returnVector.push_back(0); - } - else - { - for (const auto& model : myModelSet) + std::vector returnVector; + + if (isTraining) { - returnVector.push_back(model->run(inputVector)); + throw std::runtime_error("can't run a model during training"); + returnVector.push_back(0); + } + else if (inputVector.size() != numInputs) + { + std::string badSize = std::to_string(inputVector.size()); + throw std::length_error("bad input size: " + badSize); + returnVector.push_back(0); + } + else + { + for (auto model : myModelSet) + { + returnVector.push_back(model->run(inputVector)); + } } - } - - return returnVector; + return returnVector; } @@ -135,184 +132,181 @@ std::vector modelSet::run(const std::vector &inputVector) #ifndef EMSCRIPTEN //In emscripten, we do the JSON parsing with native JavaScript template -std::vector json2vector(Json::Value json) +std::vector json2vector(Json::Value json) { - std::vector returnVec; - for (auto jsonValue : json) - { - returnVec.push_back((T)jsonValue.asDouble()); - } - - return returnVec; + std::vector returnVec; + for (auto jsonValue : json) + { + returnVec.push_back((T)jsonValue.asDouble()); + } + return returnVec; } template -Json::Value modelSet::parse2json() +Json::Value modelSet::parse2json() { - Json::Value root; - Json::Value metadata; - Json::Value modelSet; - - metadata["creator"] = "Rapid API C++"; - metadata["version"] = "v0.1.1"; //TODO: This should be a macro someplace - metadata["numInputs"] = numInputs; - Json::Value inputNamesJSON; - - for (const auto name : inputNames) - { - inputNamesJSON.append(name); - } - - metadata["inputNames"] = inputNamesJSON; - metadata["numOutputs"] = numOutputs; - root["metadata"] = metadata; - - for (auto model : myModelSet) - { - Json::Value currentModel; - currentModel["inputNames"] = inputNamesJSON; //TODO: implment this feature - model->getJSONDescription(currentModel); - modelSet.append(currentModel); - } - root["modelSet"] = modelSet; - return root; + Json::Value root; + Json::Value metadata; + Json::Value modelSet; + + metadata["creator"] = "Rapid API C++"; + metadata["version"] = "v0.1.1"; //TODO: This should be a macro someplace + metadata["numInputs"] = numInputs; + Json::Value inputNamesJSON; + + for (size_t i = 0; i < inputNames.size(); ++i) + { + inputNamesJSON.append(inputNames[i]); + } + metadata["inputNames"] = inputNamesJSON; + metadata["numOutputs"] = numOutputs; + root["metadata"] = metadata; + for (auto model : myModelSet) + { + Json::Value currentModel; + currentModel["inputNames"] = inputNamesJSON; //TODO: implment this feature + model->getJSONDescription(currentModel); + modelSet.append(currentModel); + } + root["modelSet"] = modelSet; + return root; } template -std::string modelSet::getJSON() +std::string modelSet::getJSON() { - Json::Value root { parse2json() }; - return root.toStyledString(); + Json::Value root = parse2json(); + return root.toStyledString(); } template -void modelSet::writeJSON(const std::string &filepath) +void modelSet::writeJSON(const std::string &filepath) { - Json::Value root { parse2json() }; - std::ofstream jsonOut; - jsonOut.open (filepath); - Json::StyledStreamWriter writer; - writer.write(jsonOut, root); - jsonOut.close(); + Json::Value root = parse2json(); + std::ofstream jsonOut; + jsonOut.open (filepath); + Json::StyledStreamWriter writer; + writer.write(jsonOut, root); + jsonOut.close(); + } template -bool modelSet::putJSON(const std::string &jsonMessage) +bool modelSet::putJSON(const std::string &jsonMessage) { - Json::Value parsedFromString; - Json::Reader reader; - bool parsingSuccessful { reader.parse(jsonMessage, parsedFromString) }; - if (parsingSuccessful) json2modelSet(parsedFromString); - return parsingSuccessful; + Json::Value parsedFromString; + Json::Reader reader; + bool parsingSuccessful = reader.parse(jsonMessage, parsedFromString); + if (parsingSuccessful) json2modelSet(parsedFromString); + return parsingSuccessful; } template -void modelSet::json2modelSet(const Json::Value &root) +void modelSet::json2modelSet(const Json::Value &root) { - numInputs = root["metadata"]["numInputs"].asInt(); - - for (unsigned int i = 0; i < root["metadata"]["inputNames"].size(); ++i) - { - inputNames.push_back(root["metadata"]["inputNames"][i].asString()); - } - - numOutputs = root["metadata"]["numOutputs"].asInt(); - - for (const Json::Value& model : root["modelSet"]) - { - int modelNumInputs = model["numInputs"].asInt(); - std::vector whichInputs; - std::vector modelInputNames; - - for (unsigned int i = 0; i < model["inputNames"].size(); ++i) - { - modelInputNames.push_back(model["inputNames"][i].asString()); - } - - for (size_t i = 0; i < inputNames.size(); ++i) + numInputs = root["metadata"]["numInputs"].asInt(); + + for (unsigned int i = 0; i < root["metadata"]["inputNames"].size(); ++i) { - if (std::find(modelInputNames.begin(), modelInputNames.end(), inputNames[i]) != modelInputNames.end()) - { - whichInputs.push_back(i); - } + inputNames.push_back(root["metadata"]["inputNames"][i].asString()); } + numOutputs = root["metadata"]["numOutputs"].asInt(); - if (model["modelType"].asString() == "Neural Network") + for (const Json::Value& model : root["modelSet"]) { - int numHiddenLayers = model["numHiddenLayers"].asInt(); - int numHiddenNodes = model["numHiddenNodes"].asInt(); - std::vector weights; - std::vector wHiddenOutput; - int nodeIndex = 0; - - for (const Json::Value& node : model["nodes"]) - { - if (node["name"].asString() == "Linear Node 0") + int modelNumInputs = model["numInputs"].asInt(); + std::vector whichInputs; + std::vector modelInputNames; + + for (unsigned int i = 0; i < model["inputNames"].size(); ++i) { - for (int i = 1; i <= numHiddenNodes; ++i) - { - std::string whichNode = "Node " + std::to_string(i + (numHiddenNodes * (numHiddenLayers - 1))); - wHiddenOutput.push_back((T)node[whichNode].asDouble()); - } - wHiddenOutput.push_back(node["Threshold"].asDouble()); + modelInputNames.push_back(model["inputNames"][i].asString()); } - else - { //FIXME: this will break if nodes are out of order - int currentLayer = (int) floor((nodeIndex - 1.0)/ (double)numHiddenNodes); - if (currentLayer < 1) //Nodes connected to input - { - for (int i = 0; i < numInputs; ++i) + + for (size_t i = 0; i < inputNames.size(); ++i) + { + if (std::find(modelInputNames.begin(), modelInputNames.end(), inputNames[i]) != modelInputNames.end()) { - std::string whichNode = "Attrib " + model["inputNames"][i].asString(); - weights.push_back(node[whichNode].asDouble()); + whichInputs.push_back(i); } - } - else //Hidden Layers - { - for (int i = 0; i < numHiddenNodes; ++i) + } + + if (model["modelType"].asString() == "Neural Network") + { + int numHiddenLayers = model["numHiddenLayers"].asInt(); + int numHiddenNodes = model["numHiddenNodes"].asInt(); + std::vector weights; + std::vector wHiddenOutput; + int nodeIndex = 0; + + for (const Json::Value& node : model["nodes"]) { - std::string whichNode = "Node " + std::to_string(i + (numHiddenNodes * (currentLayer - 1))); - weights.push_back(node[whichNode].asDouble()); - } } - weights.push_back(node["Threshold"].asDouble()); + if (node["name"].asString() == "Linear Node 0") + { + for (int i = 1; i <= numHiddenNodes; ++i) + { + std::string whichNode = "Node " + std::to_string(i + (numHiddenNodes * (numHiddenLayers - 1))); + wHiddenOutput.push_back((T)node[whichNode].asDouble()); + } + wHiddenOutput.push_back(node["Threshold"].asDouble()); + } + else + { //FIXME: this will break if nodes are out of order + int currentLayer = (int) floor((nodeIndex - 1.0)/ (double)numHiddenNodes); + if (currentLayer < 1) //Nodes connected to input + { + for (int i = 0; i < numInputs; ++i) + { + std::string whichNode = "Attrib " + model["inputNames"][i].asString(); + weights.push_back(node[whichNode].asDouble()); + } + } + else //Hidden Layers + { + for (int i = 0; i < numHiddenNodes; ++i) + { + std::string whichNode = "Node " + std::to_string(i + (numHiddenNodes * (currentLayer - 1))); + weights.push_back(node[whichNode].asDouble()); + } } + weights.push_back(node["Threshold"].asDouble()); + } + nodeIndex++; + } + std::vector inBases = json2vector(model["inBases"]); + std::vector inRanges = json2vector(model["inRanges"]); + T outRange = (T)model["outRange"].asDouble(); + T outBase = (T)model["outBase"].asDouble(); + + //TODO: many of these arguments could be size_t + myModelSet.push_back(new neuralNetwork(modelNumInputs, whichInputs, numHiddenLayers, numHiddenNodes, weights, wHiddenOutput, inRanges, inBases, outRange, outBase)); + } + else if (model["modelType"].asString() == "kNN Classificiation") + { + std::vector > trainingSet; + const Json::Value examples = model["examples"]; + + for (unsigned int i = 0; i < examples.size(); ++i) + { + trainingExampleTemplate tempExample; + tempExample.input = json2vector(examples[i]["features"]); + tempExample.output.push_back((T)examples[i]["class"].asDouble()); + trainingSet.push_back(tempExample); + } + int k = model["k"].asInt(); + + myModelSet.push_back(new knnClassification(modelNumInputs, whichInputs, trainingSet, k)); } - nodeIndex++; - } - std::vector inBases = json2vector(model["inBases"]); - std::vector inRanges = json2vector(model["inRanges"]); - T outRange = (T)model["outRange"].asDouble(); - T outBase = (T)model["outBase"].asDouble(); - - //TODO: many of these arguments could be size_t - myModelSet.push_back(new neuralNetwork(modelNumInputs, whichInputs, numHiddenLayers, numHiddenNodes, weights, wHiddenOutput, inRanges, inBases, outRange, outBase)); - } - else if (model["modelType"].asString() == "kNN Classificiation") - { - std::vector > trainingSet; - const Json::Value examples = model["examples"]; - - for (unsigned int i = 0; i < examples.size(); ++i) - { - trainingExampleTemplate tempExample; - tempExample.input = json2vector(examples[i]["features"]); - tempExample.output.push_back((T)examples[i]["class"].asDouble()); - trainingSet.push_back(tempExample); - } - int k = model["k"].asInt(); - - myModelSet.push_back(new knnClassification(modelNumInputs, whichInputs, trainingSet, k)); } - } } template -bool modelSet::readJSON(const std::string &filepath) +bool modelSet::readJSON(const std::string &filepath) { - Json::Value root; - std::ifstream file(filepath); - file >> root; - json2modelSet(root); - return true; //TODO: check something first + Json::Value root; + std::ifstream file(filepath); + file >> root; + json2modelSet(root); + return true; //TODO: check something first } #endif