vaibhavbichave · tim-githaiga · Sep 20, 2022 · Sep 20, 2022 · Sep 22, 2022 · Jul 11, 2024
diff --git a/.anima/.gitignore b/.anima/.gitignore
@@ -0,0 +1 @@
+cache
diff --git a/__pycache__/app.cpython-310.pyc b/__pycache__/app.cpython-310.pyc
diff --git a/__pycache__/feature.cpython-310.pyc b/__pycache__/feature.cpython-310.pyc
diff --git a/app.py b/app.py
@@ -1,5 +1,3 @@
-#importing required libraries
-
 from flask import Flask, request, render_template
 import numpy as np
 import pandas as pd
@@ -9,31 +7,36 @@
 warnings.filterwarnings('ignore')
 from feature import FeatureExtraction
 
-file = open("pickle/model.pkl","rb")
-gbc = pickle.load(file)
-file.close()
-
+# Load the model
+try:
+    with open("pickle/model.pkl", "rb") as file:
+        gbc = pickle.load(file)
+except Exception as e:
+    raise Exception(f"Error loading model: {e}")
 
 app = Flask(__name__)
 
 @app.route("/", methods=["GET", "POST"])
 def index():
     if request.method == "POST":
-
         url = request.form["url"]
-        obj = FeatureExtraction(url)
-        x = np.array(obj.getFeaturesList()).reshape(1,30) 
-
-        y_pred =gbc.predict(x)[0]
-        #1 is safe       
-        #-1 is unsafe
-        y_pro_phishing = gbc.predict_proba(x)[0,0]
-        y_pro_non_phishing = gbc.predict_proba(x)[0,1]
-        # if(y_pred ==1 ):
-        pred = "It is {0:.2f} % safe to go ".format(y_pro_phishing*100)
-        return render_template('index.html',xx =round(y_pro_non_phishing,2),url=url )
-    return render_template("index.html", xx =-1)
-
+        try:
+            obj = FeatureExtraction(url)
+            features = obj.getFeaturesList()
+            x = np.array(features).reshape(1, -1)  # Adjust reshape if needed
+
+            if not hasattr(gbc, 'predict'):
+                raise ValueError("Loaded object is not a valid model.")
+
+            y_pred = gbc.predict(x)[0]
+            y_pro_phishing = gbc.predict_proba(x)[0, 0]
+            y_pro_non_phishing = gbc.predict_proba(x)[0, 1]
+
+            pred = "It is {0:.2f} % safe to go ".format(y_pro_phishing * 100)
+            return render_template('index.html', xx=round(y_pro_non_phishing, 2), url=url)
+        except Exception as e:
+            return str(e), 500
+    return render_template("index.html", xx=-1)
 
 if __name__ == "__main__":
-    app.run(debug=True)
+    app.run(debug=True)
diff --git a/pickle/model.pkl b/pickle/model.pkl
diff --git a/templates/index.html b/templates/index.html
@@ -45,7 +45,7 @@ <h3 id="prediction"></h3>
     </div> 
 </div>
 <br>
-<p>©2021 VAIBHAV BICHAVE</p>
+<p>©2024 TIM</p>
 </div>
 
     <!-- JavaScript -->

diff --git a/train.py b/train.py
@@ -0,0 +1,55 @@
+# train.py
+
+import pandas as pd
+from sklearn.model_selection import train_test_split
+from sklearn.ensemble import RandomForestClassifier
+from sklearn.metrics import accuracy_score
+import joblib  # To save the trained model
+
+def load_data(csv_file):
+    """Load the dataset from CSV file."""
+    df = pd.read_csv(csv_file)
+    return df
+
+def preprocess_data(df):
+    """Preprocess the dataset, separating features and target."""
+    X = df.drop(columns=['class'])  # Features
+    y = df['class']                 # Target
+    return X, y
+
+def train_model(X_train, y_train):
+    """Train a RandomForestClassifier model."""
+    model = RandomForestClassifier(n_estimators=100, random_state=42)
+    model.fit(X_train, y_train)
+    return model
+
+def evaluate_model(model, X_test, y_test):
+    """Evaluate the model on test data."""
+    y_pred = model.predict(X_test)
+    accuracy = accuracy_score(y_test, y_pred)
+    print(f"Accuracy: {accuracy}")
+
+def save_model(model, model_file='pickle/model.pkl'):
+    """Save the trained model to a file."""
+    joblib.dump(model, model_file)
+    print(f"Model saved as {model_file}")
+
+if __name__ == "__main__":
+    # Load data
+    csv_file = 'phishing.csv'
+    df = load_data(csv_file)
+
+    # Preprocess data
+    X, y = preprocess_data(df)
+
+    # Split data into train and test sets
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+    # Train model
+    model = train_model(X_train, y_train)
+
+    # Evaluate model
+    evaluate_model(model, X_test, y_test)
+
+    # Save model
+    save_model(model)