Skip to content

Commit

Permalink
cleans up some printouts. adds set_labels to empty predictions. adds …
Browse files Browse the repository at this point in the history
…second op_weights routine to add weight to functions operating on outputs of other functions.

Former-commit-id: e5120f1
  • Loading branch information
lacava committed Mar 22, 2019
1 parent 02d9c54 commit 9f85cc2
Show file tree
Hide file tree
Showing 4 changed files with 142 additions and 106 deletions.
108 changes: 19 additions & 89 deletions src/dat/data.cc
Original file line number Diff line number Diff line change
Expand Up @@ -166,48 +166,38 @@ namespace FT{
Eigen::PermutationMatrix<Dynamic,Dynamic> perm(o->X.cols());
perm.setIdentity();
r.shuffle(perm.indices().data(), perm.indices().data()+perm.indices().size());
cout << "X before shuffle: \n";
cout << o->X.transpose() << "\n";
/* cout << "X before shuffle: \n"; */
/* cout << o->X.transpose() << "\n"; */
o->X = o->X * perm; // shuffles columns of X

cout << "X after shuffle: \n";
cout << o->X.transpose() << "\n";
/* cout << "X after shuffle: \n"; */
/* cout << o->X.transpose() << "\n"; */
o->y = (o->y.transpose() * perm).transpose() ; // shuffle y too

if(o->Z.size() > 0)
{
std::vector<int> zidx(o->y.size());
/* std::iota(zidx.begin(), zidx.end(), 0); */
/* VectorXi zw = Map<VectorXi>(zidx.data(), zidx.size()); */
// shuffle z indices
/* zw = (zw.transpose()*perm).transpose(); */
/* cout << "zw : \n" << zw; */
cout << "perm indices: " << perm.indices() << "\n";
// assign shuffled zw to zidx
/* zidx.assign(zw.data(), zw.data() + zw.size()); */
// zidx maps the perm_indices values to their indices, i.e. the inverse transform
for (unsigned i = 0; i < perm.indices().size(); ++i)
zidx.at(perm.indices()(i)) = i;
cout << "zidx :\n";
for (const auto& zi : zidx)
cout << zi << "," ;
cout << "\n";
/* cout << "zidx :\n"; */
/* for (const auto& zi : zidx) */
/* cout << zi << "," ; */
/* cout << "\n"; */
for(auto &val : o->Z)
{
cout << "unshuffled " << val.first << ": \n";
for (unsigned i = 0; i < val.second.first.size(); ++i)
{
cout << val.second.first.at(i).transpose() << "\n";
/* val.second.first.at(i) = (val.second.first.at(i) * perm).transpose(); */
/* val.second.second.at(i) = (val.second.second.at(i) * perm).transpose(); */
}

/* cout << "unshuffled " << val.first << ": \n"; */
/* for (unsigned i = 0; i < val.second.first.size(); ++i) */
/* { */
/* cout << val.second.first.at(i).transpose() << "\n"; */
/* } */
reorder_longitudinal(val.second.first, zidx);
reorder_longitudinal(val.second.second, zidx);
cout << "shuffled " << val.first << ": \n";
for (unsigned i = 0; i < val.second.first.size(); ++i)
{
cout << val.second.first.at(i).transpose() << "\n";
}
/* cout << "shuffled " << val.first << ": \n"; */
/* for (unsigned i = 0; i < val.second.first.size(); ++i) */
/* { */
/* cout << val.second.first.at(i).transpose() << "\n"; */
/* } */
}
}
}
Expand Down Expand Up @@ -356,66 +346,6 @@ namespace FT{
}
}

/* void DataRef::reorder_longitudinal(vector<ArrayXf> &vec1, vector<ArrayXf> &vec2, */
/* vector<long> const &order) */
/* { */

/* for( int s = 1, d; s < order.size(); ++ s ) */
/* { */
/* cout << "s: " << s << "\n"; */
/* for ( d = order[s]; d < s; d = order[d] ); */

/* cout << "d: " << s << "\n"; */
/* if ( d == s ) */
/* { */
/* while ( d = order[d], d != s ) */
/* { */
/* swap(vec1[s], vec1[d]); */
/* swap(vec2[s], vec2[d]); */
/* } */
/* } */
/* } */
/* } */

/* void DataRef::reorder_longitudinal(vector<ArrayXf> &vec1, const vector<int>& order) */
/* { */
/* vector<int> index = order; */
/* cout << "order: " ; */
/* for (const auto& o : order) */
/* cout << o << ", "; */
/* cout << "\n"; */
/* // Fix all elements one by one */
/* for (int i=0; i<index.size()-1; ++i) */
/* { */
/* // While index[i] and vec1[i] are not fixed */
/* while (index.at(i) != i) */
/* { */
/* int alt = index.at(i); */
/* swap(vec1.at(i), vec1.at(alt)); */
/* swap(index.at(i), index.at(alt)); */
/* /1* // Store values of the target (or correct) *1/ */
/* /1* // position before placing vec1[i] there *1/ */
/* /1* int oldTargetI = index.at(index.at(i)); *1/ */
/* /1* auto oldTargetE = vec1.at(index.at(i)); *1/ */

/* /1* // Place vec1[i] at its target (or correct) *1/ */
/* /1* // position. Also copy corrected index for *1/ */
/* /1* // new position *1/ */
/* /1* vec1.at(index.at(i)) = vec1.at(i); *1/ */
/* /1* index.at(index.at(i)) = index.at(i); *1/ */

/* /1* // Copy old target values to vec1[i] and *1/ */
/* /1* // index[i] *1/ */
/* /1* index.at(i) = oldTargetI; *1/ */
/* /1* vec1.at(i) = oldTargetE; *1/ */
/* } */
/* } */
/* cout << "ordered order: " ; */
/* for (const auto& o : index) */
/* cout << o << ", "; */
/* cout << "\n"; */
/* } */
/* template< class T > */
void DataRef::reorder_longitudinal(vector<ArrayXf> &v, vector<int> const &order ) {
for ( int s = 1, d; s < order.size(); ++ s ) {
for ( d = order[s]; d < s; d = order[d] ) ;
Expand Down
14 changes: 7 additions & 7 deletions src/feat.cc
Original file line number Diff line number Diff line change
Expand Up @@ -744,13 +744,13 @@ void Feat::initial_model(DataRef &d)
//
bool pass = true;
shared_ptr<CLabels> yhat = best_ind.fit(*d.t,params,pass);
SGVector<double> _Tmp = dynamic_pointer_cast<sh::CBinaryLabels>(yhat)->get_labels();
SGVector<float> Tmp(_Tmp.begin(), _Tmp.end());
Map<VectorXf> yhatV(Tmp.data(),Tmp.size());
cout << "yhat: " << yhatV.transpose() << "\n";
cout << "y: " << d.t->y.transpose() << "\n";
ArrayXf diff = yhatV.array() - d.t->y.array() ;
cout << "diff: " << diff.transpose() << "\n";
/* SGVector<double> _Tmp = dynamic_pointer_cast<sh::CBinaryLabels>(yhat)->get_labels(); */
/* SGVector<float> Tmp(_Tmp.begin(), _Tmp.end()); */
/* Map<VectorXf> yhatV(Tmp.data(),Tmp.size()); */
/* cout << "yhat: " << yhatV.transpose() << "\n"; */
/* cout << "y: " << d.t->y.transpose() << "\n"; */
/* ArrayXf diff = yhatV.array() - d.t->y.array() ; */
/* cout << "diff: " << diff.transpose() << "\n"; */
// set terminal weights based on model
vector<float> w;
if (n_feats + n_long_feats == d.t->X.rows() + d.t->Z.size())
Expand Down
56 changes: 47 additions & 9 deletions src/model/ml.cc
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ namespace FT{
p_est = make_shared<sh::CMyLibLinear>(sh::L2R_LR);
// setting parameters to match sklearn defaults
dynamic_pointer_cast<sh::CMyLibLinear>(p_est)->set_compute_bias(true);
/* dynamic_pointer_cast<sh::CMyLibLinear>(p_est)->set_epsilon(0.0001); */
dynamic_pointer_cast<sh::CMyLibLinear>(p_est)->set_epsilon(0.0001);
/* dynamic_pointer_cast<sh::CMyLibLinear>(p_est)->set_C(1.0,1.0); */
dynamic_pointer_cast<sh::CMyLibLinear>(p_est)->set_max_iterations(10000);
//cout << "set ml type to CMyLibLinear\n";
Expand Down Expand Up @@ -398,15 +398,51 @@ namespace FT{
if (get_weights().empty())
{
cout << "weight empty; returning zeros\n";
CRegressionLabels dlabels(X.cols());
for (unsigned i = 0; i < X.cols() ; ++i)
dlabels.set_value(0,i);
cout << "setting labels\n";
labels =shared_ptr<CLabels>(&dlabels);
cout << "returning\n";
return labels;
if (this->prob_type==PT_BINARY)
{
CBinaryLabels dlabels(X.cols());
for (unsigned i = 0; i < X.cols() ; ++i)
{
dlabels.set_value(0,i);
dlabels.set_label(0,i);
}
cout << "setting labels\n";
labels =shared_ptr<CLabels>(&dlabels);
cout << "returning\n";
return labels;
}
else if (this->prob_type == PT_MULTICLASS)
{
CMulticlassLabels dlabels(X.cols());
for (unsigned i = 0; i < X.cols() ; ++i)
{
dlabels.set_value(0,i);
dlabels.set_label(0,i);
}
cout << "setting labels\n";
labels =shared_ptr<CLabels>(&dlabels);
cout << "returning\n";
return labels;
}
else
{
CRegressionLabels dlabels(X.cols());
for (unsigned i = 0; i < X.cols() ; ++i)
{
dlabels.set_value(0,i);
dlabels.set_label(0,i);
}
cout << "setting labels\n";
labels =shared_ptr<CLabels>(&dlabels);
cout << "returning\n";
return labels;
}
}

/* cout << "weights: \n"; */
/* for (const auto& w : get_weights()) */
/* cout << w << ", " ; */
/* cout << "\n"; */
/* cout << "normalize\n"; */
if (normalize)
N.normalize(X);

Expand All @@ -417,8 +453,10 @@ namespace FT{
if (this->prob_type==PT_BINARY &&
(ml_type == SVM || ml_type == LR || ml_type == CART || ml_type == RF))
{
/* cout << "apply binary\n"; */
labels = std::shared_ptr<CLabels>(p_est->apply_binary(features));

/* cout << "set probability\n"; */
if (ml_type == CART)
dynamic_pointer_cast<sh::CMyCARTree>(p_est)->set_probabilities(labels.get(),
features);
Expand Down
70 changes: 69 additions & 1 deletion src/params.cc
Original file line number Diff line number Diff line change
Expand Up @@ -558,12 +558,80 @@ namespace FT{
op_weights.at(i) /= float(total_args);
++i;
}
// Now, we need to account for the output types of the operators that have non-zero
// weights, in addition to the terminals.
// So we now upweight the terminals according to the output types of the terminals that have
// non-zero weights.

int total_ops_terms = total_terms;
b_count = 0;
c_count = 0;
f_count = 0;
z_count = 0;
for (unsigned i = 0; i < functions.size(); ++i)
{
if (op_weights.at(i) > 0)
{
switch (functions.at(i)->otype)
{
case 'b':
++b_count;
break;
case 'c':
++c_count;
break;
case 'f':
++f_count;
break;
case 'z':
++z_count;
break;
}
}
++total_ops_terms;
}
cout << "b_count: " << b_count << "\n"
<< "f_count: " << f_count << "\n"
<< "c_count: " << c_count << "\n"
<< "z_count: " << z_count << "\n"
<< "total_ops_terms: " << total_ops_terms << "\n";

i = 0; // op_weights counter
for (const auto& op : functions)
{
int total_args = 0;
for (auto& kv : op->arity)
{
switch (kv.first) // kv.first is the arity type (character)
{
case 'b':
for (unsigned j = 0; j < kv.second; ++j)
op_weights.at(i) += float(b_count)/float(total_ops_terms);
break;
case 'c':
for (unsigned j = 0; j < kv.second; ++j)
op_weights.at(i) += float(c_count)/float(total_ops_terms);
break;
case 'f':
for (unsigned j = 0; j < kv.second; ++j)
op_weights.at(i) += float(f_count)/float(total_ops_terms);
break;
case 'z':
for (unsigned j = 0; j < kv.second; ++j)
op_weights.at(i) += float(z_count)/float(total_ops_terms);
break;
}
total_args += kv.second;
}
op_weights.at(i) /= float(total_args);
++i;
}

string ow = "op_weights: ";
for (unsigned i = 0; i< functions.size(); ++i)
ow += "(" + functions.at(i)->name + ", " + std::to_string(op_weights.at(i)) + "), ";
ow += "\n";
logger.log(ow,2);

}
void Parameters::set_terminals(int nf,
std::map<string, std::pair<vector<ArrayXf>, vector<ArrayXf> > > Z)
Expand Down

0 comments on commit 9f85cc2

Please sign in to comment.