vpredictor

#!/usr/bin/python2

"""
This file is part of VDISCOVER.

VDISCOVER is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

VDISCOVER is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with VDISCOVER. If not, see <http://www.gnu.org/licenses/>.

Copyright 2014 by G.Grieco
"""

import os
import argparse
import sys
import csv

csv.field_size_limit(sys.maxsize)
sys.setrecursionlimit(1024 * 1024 * 1024)

from vdiscover.Pipeline import *
from vdiscover.Recall import Recall
from vdiscover.Train import Train

if __name__ == "__main__":

    # Arguments
    parser = argparse.ArgumentParser(
        description='A trainer and predictor of vulnerabilities')
    parser.add_argument(
        "infile",
        help="A csv with the features to train or predict",
        type=str,
        default=None)

    parser.add_argument("--model", type=str,
                        help="Use a pretrained model (recall only)",
                        action="store", default=None)

    parser.add_argument(
        "--prob",
        help="Output the probability of each prediction (recall only)",
        action="store_true",
        default=False)

    parser.add_argument("--test",
                        help="Test a model using infile (recall only)",
                        action="store_true", default=False)

    parser.add_argument("--test-aggr",
                        help="Test a model using infile (recall only)",
                        action="store_true", default=False)

    parser.add_argument("--static",
                        help="Use static features",
                        action="store_true", default=False)

    parser.add_argument("--dynamic",
                        help="Use dynamic features",
                        action="store_true", default=False)

    #parser.add_argument("--valid",
    #                    help="Valid a model using infile",
    #                    action="store", default=None)

    #parser.add_argument(
    #    "--cluster-with-repr",
    #    help="Cluster input traces using some representation (bow, doc2vec)",
    #    action="store",
    #    default=None)

    #parser.add_argument(
    #    "--cluster-with-rdim",
    #    help="Cluster input traces reducing dimensionality (pca, svd, none)",
    #    action="store",
    #    default="pca")

    # parser.add_argument("--cluster-doc2vec",
    #                    help="Cluster input traces using doc2vec",
    #                    action="store_true", default=False)

    #parser.add_argument("--cluster-param", type=float,
    #                    help="Cluster parameter",
    #                    action="store", default=0.1)

    #parser.add_argument(
    #    "--cluster-cnn",
    #    help="Cluster input traces using a convolutional model",
    #    action="store_true",
    #    default=False)

    parser.add_argument("--train",
                        help="Train a model using a random forest",
                        action="store_true", default=False)

    parser.add_argument("--vect", type=str,
                        help="Which technique use to vectorize traces",
                        action="store", default="bow")


    # parser.add_argument("--train-lstm",
    #                    help="Train a LSTM using infile (warning: very experimental and slow)",
    #                    action="store_true", default=False)

    # parser.add_argument("--train-cnn",
    #                    help="Train a CNN using infile",
    #                    action="store_true", default=False)

    parser.add_argument(
        "--n-samples",
        type=int,
        help="Select a number of samples from infile (train only)",
        action="store",
        default=None)

    parser.add_argument("--out-file",
                        help="File to output the results/model",
                        type=str, default="/dev/stdout")

    options = parser.parse_args()
    in_file = options.infile
    vector_type = options.vect

    #valid_file = options.valid

    test_simple = options.test
    test_aggr = options.test_aggr

    #training_mode_rf = options.train
    #training_mode_lstm = options.train_lstm
    #training_mode_cnn = options.train_cnn

    #training_mode_cluster_repr = options.cluster_with_repr

    #cluster_rdim = options.cluster_with_rdim
    #cluster_param = options.cluster_param

    # training_mode_cluster_bow or training_mode_cluster_cnn or training_mode_cluster_doc2vec
    training_mode = options.train #training_mode_rf or training_mode_cluster_repr

    probability_mode = options.prob
    nsamples = options.n_samples

    static_only = options.static
    dynamic_only = options.dynamic

    out_file = options.out_file
    model_file = options.model

    if (not static_only and not dynamic_only) or (
            static_only and dynamic_only):
        print "VDiscover requires to select either static of dynamic features exclusively"
        exit(-1)
    elif static_only:
        features_type = "static"
    elif dynamic_only:
        features_type = "dynamic"

    if training_mode:
        model_type = "rf"
        Train(out_file, in_file, None, model_type, vector_type, features_type, nsamples)

        #elif training_mode_cluster_repr:
        #    cluster_repr = training_mode_cluster_repr
        #    from vdiscover.Cluster import ClusterScikit
        #
        #    ClusterScikit(None, in_file, valid_file, ftype, nsamples,
        #                  cluster_repr, cluster_rdim, cluster_param)

        """
      elif training_mode_cluster_cnn:

        if (model_file is None):
          from vdiscover.Cluster import TrainCnn

          TrainCnn(out_file, in_file, valid_file, ftype, nsamples)
          exit(0)

        from vdiscover.Cluster  import ClusterCnn
        ClusterCnn(model_file, in_file, valid_file, ftype, nsamples, None)
      """

    else:
        if model_file is None:
            print "VDiscover requires a pre-trained model to predict"
            exit(-1)

        test_mode = None
        if test_simple:
            test_mode = "simple"
        elif test_aggr:
            test_mode = "aggregated"

        Recall(model_file, in_file, features_type, out_file,
               test_mode, probability=probability_mode)