From f20bc799cc28af8fc71bfc48d4f0522eed31277b Mon Sep 17 00:00:00 2001 From: "Pt. Prashant tripathi" <26687933+PtPrashantTripathi@users.noreply.github.com> Date: Thu, 1 Aug 2024 14:15:32 +0530 Subject: [PATCH] Update new (#11) * updated * formatted * cool * pnl * updated * update * update * updated * p * 6 * updatye * update * new * u[pdate * up * update * n * u * update * v * update * p * u * profit and loss * h * holdings * holding * temp * pro * pnl_amount_percentage * expire * silver broncze fix * up * up * ppt * update --------- Co-authored-by: Prashant Tripathi This Notebooks reads the RAW files, performs data harmonization\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Importing necessary files and packages\n" + "> This Notebook reads the RAW files and performs data harmonization." ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 20, "metadata": { "lines_to_next_cell": 2 }, "outputs": [], "source": [ + "# Import necessary libraries and utility functions\n", "import pandas as pd\n", + "import pathlib\n", "from common_utilities import (\n", " check_files_availability,\n", " find_correct_headers,\n", @@ -32,83 +27,210 @@ " fix_duplicate_column_names,\n", " global_path,\n", " replace_punctuation_from_columns,\n", + " logger\n", ")" ] }, - { - "cell_type": "markdown", - "metadata": { - "lines_to_next_cell": 2 - }, - "source": [ - "### Reading & Validate the data from the files\n" - ] - }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ - "def read_file(file_path):\n", - " print(f\"\\nProccessing => {file_path}\")\n", - " df = pd.read_excel(\n", - " file_path, engine=\"openpyxl\", sheet_name=None, header=None, skipfooter=1\n", - " )\n", - " df = find_correct_sheetname(df, sheet_name_regex=\"trade\")\n", - " df = find_correct_headers(df, global_header_regex=\"date\")\n", - " df = replace_punctuation_from_columns(df)\n", - " df = fix_duplicate_column_names(df)\n", - " df = df.dropna(how=\"all\")\n", + "# Define a function to read and process an Excel file\n", + "def read_file(file_path: pathlib.Path) -> None:\n", + " \"\"\"\n", + " Reads and processes an Excel file from the specified file path.\n", + " It performs data harmonization and saves the processed data as a CSV file.\n", "\n", - " # Save the result as a csv file\n", - " output_file = global_path.tradehistory_bronze_layer_path.joinpath(\n", - " file_path.name.replace(\"xlsx\", \"csv\")\n", - " )\n", - " df.to_csv(output_file, index=None)\n", - " print(f\"Proccessed => {output_file}\")" + " Args:\n", + " file_path (pathlib.Path): The path to the Excel file to be processed.\n", + " \"\"\"\n", + " # Log the start of processing for the file\n", + " logger.info(f\"Processing => {file_path}\")\n", + "\n", + " try:\n", + " # Read the Excel file into a DataFrame\n", + " df = pd.read_excel(\n", + " file_path, engine=\"openpyxl\", sheet_name=None, header=None, skipfooter=1\n", + " )\n", + "\n", + " # Find and select the correct sheetname containing \"trade\"\n", + " df = find_correct_sheetname(df, sheet_name_regex=\"trade\")\n", + "\n", + " # Find and set the correct headers matching \"date\"\n", + " df = find_correct_headers(df, global_header_regex=\"date\")\n", + "\n", + " # Replace punctuation from column names for consistency\n", + " df = replace_punctuation_from_columns(df)\n", + "\n", + " # Fix duplicate column names by appending numerical suffixes\n", + " df = fix_duplicate_column_names(df)\n", + "\n", + " # Drop rows where all elements are NaN\n", + " df = df.dropna(how=\"all\")\n", + "\n", + " # Save the result as a CSV file in the bronze layer path\n", + " output_file = global_path.tradehistory_bronze_layer_path.joinpath(\n", + " file_path.name.replace(\"xlsx\", \"csv\")\n", + " )\n", + " df.to_csv(output_file, index=None)\n", + "\n", + " # Log successful processing of the file\n", + " logger.info(f\"Processed => {output_file}\")\n", + "\n", + " except Exception as e:\n", + " # Log any exceptions that occur during processing\n", + " logger.error(f\"Failed to process {file_path} due to error: {e}\")" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 22, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T11:38:42Z - INFO - Number of Files Detected: 5\n", + "2024-08-01T11:38:42Z - INFO - Processing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SOURCE\\TradeHistory\\trade_2021.xlsx\n", + "2024-08-01T11:38:42Z - INFO - Sheet name => TRADE\n", + "2024-08-01T11:38:42Z - INFO - Processed => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2021.csv\n", + "2024-08-01T11:38:42Z - INFO - Processing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SOURCE\\TradeHistory\\trade_2122.xlsx\n", + "2024-08-01T11:38:42Z - INFO - Sheet name => TRADE\n", + "2024-08-01T11:38:42Z - INFO - Processed => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2122.csv\n", + "2024-08-01T11:38:42Z - INFO - Processing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SOURCE\\TradeHistory\\trade_2223.xlsx\n", + "2024-08-01T11:38:42Z - INFO - Sheet name => TRADE\n", + "2024-08-01T11:38:42Z - INFO - Processed => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2223.csv\n", + "2024-08-01T11:38:42Z - INFO - Processing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SOURCE\\TradeHistory\\trade_2324.xlsx\n", + "2024-08-01T11:38:42Z - INFO - Sheet name => TRADE\n", + "2024-08-01T11:38:42Z - INFO - Processed => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2324.csv\n", + "2024-08-01T11:38:42Z - INFO - Processing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SOURCE\\TradeHistory\\trade_2425.xlsx\n", + "2024-08-01T11:38:42Z - INFO - Sheet name => TRADE\n", + "2024-08-01T11:38:42Z - INFO - Processed => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2425.csv\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ - "Number of Files Detected: 5\n", - "\n", - "Proccessing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SOURCE\\TradeHistory\\trade_2021.xlsx\n", - "Sheet name => TRADE\n", - "Proccessed => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2021.csv\n", - "\n", - "Proccessing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SOURCE\\TradeHistory\\trade_2122.xlsx\n", - "Sheet name => TRADE\n", - "Proccessed => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2122.csv\n", - "\n", - "Proccessing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SOURCE\\TradeHistory\\trade_2223.xlsx\n", - "Sheet name => TRADE\n", - "Proccessed => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2223.csv\n", - "\n", - "Proccessing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SOURCE\\TradeHistory\\trade_2324.xlsx\n", - "Sheet name => TRADE\n", - "Proccessed => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2324.csv\n", - "\n", - "Proccessing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SOURCE\\TradeHistory\\trade_2425.xlsx\n", - "Sheet name => TRADE\n", - "Proccessed => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2425.csv\n" + "\n", + "Index: 18 entries, 9 to 26\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 date 18 non-null object\n", + " 1 company 18 non-null object\n", + " 2 amount 18 non-null object\n", + " 3 exchange 18 non-null object\n", + " 4 segment 18 non-null object\n", + " 5 scrip_code 18 non-null object\n", + " 6 instrument_type 18 non-null object\n", + " 7 strike_price 18 non-null object\n", + " 8 expiry 0 non-null object\n", + " 9 trade_num 17 non-null object\n", + " 10 trade_time 17 non-null object\n", + " 11 side 18 non-null object\n", + " 12 quantity 18 non-null object\n", + " 13 price 18 non-null object\n", + "dtypes: object(14)\n", + "memory usage: 2.1+ KB\n", + "\n", + "Index: 14 entries, 9 to 22\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 date 14 non-null object\n", + " 1 company 14 non-null object\n", + " 2 amount 14 non-null object\n", + " 3 exchange 14 non-null object\n", + " 4 segment 14 non-null object\n", + " 5 scrip_code 14 non-null object\n", + " 6 instrument_type 14 non-null object\n", + " 7 strike_price 14 non-null object\n", + " 8 expiry 0 non-null object\n", + " 9 trade_num 14 non-null object\n", + " 10 trade_time 14 non-null object\n", + " 11 side 14 non-null object\n", + " 12 quantity 14 non-null object\n", + " 13 price 14 non-null object\n", + "dtypes: object(14)\n", + "memory usage: 1.6+ KB\n", + "\n", + "Index: 8 entries, 9 to 16\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 date 8 non-null object\n", + " 1 company 8 non-null object\n", + " 2 amount 8 non-null object\n", + " 3 exchange 8 non-null object\n", + " 4 segment 8 non-null object\n", + " 5 scrip_code 8 non-null object\n", + " 6 instrument_type 8 non-null object\n", + " 7 strike_price 8 non-null object\n", + " 8 expiry 0 non-null object\n", + " 9 trade_num 7 non-null object\n", + " 10 trade_time 7 non-null object\n", + " 11 side 8 non-null object\n", + " 12 quantity 8 non-null object\n", + " 13 price 8 non-null object\n", + "dtypes: object(14)\n", + "memory usage: 960.0+ bytes\n", + "\n", + "Index: 0 entries\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 date 0 non-null object\n", + " 1 company 0 non-null object\n", + " 2 amount 0 non-null object\n", + " 3 exchange 0 non-null object\n", + " 4 segment 0 non-null object\n", + " 5 scrip_code 0 non-null object\n", + " 6 instrument_type 0 non-null object\n", + " 7 strike_price 0 non-null object\n", + " 8 expiry 0 non-null object\n", + " 9 trade_num 0 non-null object\n", + " 10 trade_time 0 non-null object\n", + " 11 side 0 non-null object\n", + " 12 quantity 0 non-null object\n", + " 13 price 0 non-null object\n", + "dtypes: object(14)\n", + "memory usage: 0.0+ bytes\n", + "\n", + "Index: 135 entries, 9 to 143\n", + "Data columns (total 14 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 date 135 non-null object\n", + " 1 company 135 non-null object\n", + " 2 amount 135 non-null object\n", + " 3 exchange 135 non-null object\n", + " 4 segment 135 non-null object\n", + " 5 scrip_code 135 non-null object\n", + " 6 instrument_type 135 non-null object\n", + " 7 strike_price 135 non-null object\n", + " 8 expiry 129 non-null object\n", + " 9 trade_num 131 non-null object\n", + " 10 trade_time 131 non-null object\n", + " 11 side 135 non-null object\n", + " 12 quantity 135 non-null object\n", + " 13 price 135 non-null object\n", + "dtypes: object(14)\n", + "memory usage: 15.8+ KB\n" ] } ], "source": [ - "# Generate file_paths\n", + "# Generate file paths for available Excel files in the source layer\n", "file_paths = check_files_availability(\n", " global_path.tradehistory_source_layer_path,\n", " file_pattern=\"trade_*.xlsx\",\n", ")\n", "\n", + "# Process each file path\n", "for file_path in file_paths:\n", " read_file(file_path)" ] diff --git a/NOTEBOOKS/01_ETL_Bronze2Silver_Layer_Symbol.ipynb b/NOTEBOOKS/01_ETL_Bronze2Silver_Layer_Symbol.ipynb index a28655d4..d2c57d5b 100644 --- a/NOTEBOOKS/01_ETL_Bronze2Silver_Layer_Symbol.ipynb +++ b/NOTEBOOKS/01_ETL_Bronze2Silver_Layer_Symbol.ipynb @@ -4,31 +4,69 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## BRONZE TO SILVER LAYER\n" + "## BRONZE TO SILVER LAYER\n", + "\n", + "### Bronze Layer - ScripCode\n" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ + "# Import necessary libraries and utility functions\n", "import pandas as pd\n", - "from common_utilities import global_path, replace_punctuation_from_columns" + "import pathlib\n", + "from common_utilities import (\n", + " global_path,\n", + " replace_punctuation_from_columns,\n", + " logger,\n", + " check_files_availability,\n", + ")" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": 8, "metadata": {}, + "outputs": [], "source": [ - "### Bronze Layer - ScripCode\n" + "# Define a function to read and process an csv file\n", + "def read_file(file_path: pathlib.Path) -> None:\n", + " \"\"\"\n", + " Processes CSV files from the Bronze layer and consolidates them into a single DataFrame.\n", + " The data is then harmonized and saved as a CSV file in the Silver layer.\n", + " \"\"\"\n", + " # Log the reading of the file\n", + " logger.info(f\"Processing file: {file_path}\")\n", + " \n", + " # Read each CSV file into a DataFrame\n", + " df = pd.read_csv(file_path)\n", + "\n", + " # Harmonize the DataFrame by replacing punctuation from column names\n", + " df = replace_punctuation_from_columns(df)\n", + " \n", + " # Drop columns where all elements are NaN\n", + " df.dropna(how=\"all\", axis=1, inplace=True)\n", + " return df" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 9, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T11:31:30Z - INFO - Number of Files Detected: 1\n", + "2024-08-01T11:31:30Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\Symbol\\Symbol_data.csv\n", + "2024-08-01T11:31:31Z - INFO - Successfully created SILVER Layer CSV file for Symbol at:\n", + "2024-08-01T11:31:31Z - INFO - C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SILVER\\Symbol\\Symbol_data.csv\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -44,38 +82,47 @@ " 3 isin_no 4328 non-null object\n", " 4 instrument_type 4328 non-null object\n", "dtypes: object(5)\n", - "memory usage: 202.9+ KB\n", - "SILVER Layer csv file for Symbol successfully created at:\n", - "C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SILVER\\Symbol\\Symbol_data.csv\n" + "memory usage: 202.9+ KB\n" ] } ], "source": [ - "# Initialize an empty list to store DataFrames\n", - "df_Symbol = []\n", - "\n", - "# Loop through List of all CSV files in the folder\n", - "for file_path in global_path.symbol_bronze_layer_path.glob(\"*.csv\"):\n", - " # Read the CSV file\n", - " df = pd.read_csv(file_path)\n", - " # Append the DataFrame to the list\n", - " df_Symbol.append(df)\n", - "\n", - "# Concatenate all DataFrames into one\n", - "df_Symbol = pd.concat(df_Symbol, ignore_index=True)\n", + "# Process the Bronze layer CSV files to create a consolidated DataFrame\n", + "try:\n", + " # Initialize an empty list to store DataFrames\n", + " df_symbol_list = []\n", "\n", - "df_Symbol = replace_punctuation_from_columns(df_Symbol)\n", - "df_Symbol.dropna(how=\"all\", axis=1, inplace=True)\n", + " # Generate file paths for available Excel files in the source layer\n", + " file_paths = check_files_availability(\n", + " global_path.symbol_bronze_layer_path,\n", + " file_pattern=\"*.csv\"\n", + " )\n", "\n", - "# sort the dataframe by date\n", - "df_Symbol = df_Symbol.sort_values(by=[\"scrip_code\"])\n", + " # Loop through all CSV files in the bronze layer folder\n", + " for file_path in file_paths:\n", + " try:\n", + " df = read_file(file_path) \n", + " # Append the DataFrame to the list\n", + " df_symbol_list.append(df)\n", + " except Exception as e:\n", + " # Log any exceptions during file reading\n", + " logger.error(f\"Failed to read {file_path} due to error: {e}\")\n", "\n", - "# Save the result as a csv file\n", - "df_Symbol.to_csv(global_path.symbol_silver_file_path, index=None)\n", - "df_Symbol.info()\n", + " # Concatenate all DataFrames into one\n", + " df = pd.concat(df_symbol_list, ignore_index=True)\n", "\n", - "print(\"SILVER Layer csv file for Symbol successfully created at:\")\n", - "print(global_path.symbol_silver_file_path.resolve())" + " # Sort the DataFrame by 'scrip_code'\n", + " df = df.sort_values(by=[\"scrip_code\"])\n", + " \n", + " # Save the result as a CSV file in the silver layer\n", + " df.to_csv(global_path.symbol_silver_file_path, index=None)\n", + " logger.info(\"Successfully created SILVER Layer CSV file for Symbol at:\")\n", + " logger.info(global_path.symbol_silver_file_path.resolve())\n", + " # Log the DataFrame information\n", + " df.info()\n", + "except Exception as e:\n", + " # Log any exceptions during file writing\n", + " logger.error(f\"Failed to save SILVER Layer CSV file due to error: {e}\")" ] } ], diff --git a/NOTEBOOKS/02_ETL_Bronze2Silver_Layer_TradeHistory.ipynb b/NOTEBOOKS/02_ETL_Bronze2Silver_Layer_TradeHistory.ipynb index a7b71c07..a8febb21 100644 --- a/NOTEBOOKS/02_ETL_Bronze2Silver_Layer_TradeHistory.ipynb +++ b/NOTEBOOKS/02_ETL_Bronze2Silver_Layer_TradeHistory.ipynb @@ -4,42 +4,53 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## BRONZE TO SILVER LAYER\n" + "## BRONZE TO SILVER LAYER\n", + "### Reading & Validate the Data from the Files" ] }, { "cell_type": "code", - "execution_count": 8, - "metadata": { - "lines_to_next_cell": 2 - }, + "execution_count": 1, + "metadata": {}, "outputs": [], "source": [ + "## Import necessary libraries and utility functions\n", "import pandas as pd\n", "from common_utilities import (\n", " check_files_availability,\n", " global_path,\n", " replace_punctuation_from_columns,\n", + " logger\n", ")" ] }, { "cell_type": "markdown", - "metadata": { - "lines_to_next_cell": 2 - }, + "metadata": {}, "source": [ - "### Reading & Validate the data from the files\n" + "### Function Definitions\n", + "\n", + "- **concat_stock_name**: Concatenates stock names based on instrument type.\n", + "- **read_file**: Reads and processes a CSV file from the Bronze layer." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Function to apply the conditional concatenation\n", - "def concat_stock_name(row):\n", + "def concat_stock_name(row:pd.Series)->str:\n", + " \"\"\"\n", + " Concatenate stock names based on the instrument type.\n", + " \n", + " Parameters:\n", + " row (pd.Series): A row of DataFrame containing instrument data.\n", + "\n", + " Returns:\n", + " str: The concatenated stock name.\n", + " \"\"\"\n", " if row[\"instrument_type\"] == \"European Call\":\n", " return (\n", " str(row[\"company\"])\n", @@ -62,23 +73,34 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ + "# Function to read and process a CSV file\n", "def read_file(file_path):\n", - " print(f\"\\nProccessing => {file_path}\")\n", + " \"\"\"\n", + " Reads and processes a CSV file from the Bronze layer.\n", + "\n", + " Parameters:\n", + " file_path (str): The path to the CSV file.\n", + "\n", + " Returns:\n", + " pd.DataFrame: The processed DataFrame.\n", + " \"\"\"\n", + " logger.info(f\"Processing file: {file_path}\")\n", "\n", " # Read the CSV file\n", " df = pd.read_csv(file_path)\n", " df = replace_punctuation_from_columns(df)\n", - " \n", + "\n", " # Convert 'trade_num' to int\n", " df[\"trade_num\"] = df[\"trade_num\"].fillna(0).astype(int)\n", "\n", " # Remove the currency symbol and commas, then convert to float\n", " df[\"amount\"] = df[\"amount\"].astype(float)\n", " df[\"price\"] = df[\"price\"].astype(float)\n", + " df[\"quantity\"] = df[\"quantity\"].astype(float)\n", "\n", " # Add Datetime Col\n", " df[\"datetime\"] = pd.to_datetime(\n", @@ -89,8 +111,8 @@ " )\n", "\n", " # Convert 'expiry' to desired string format\n", - " df[\"expiry\"] = pd.to_datetime(df[\"expiry\"], format=\"%d-%m-%Y\")\n", - " df[\"expiry\"] = df[\"expiry\"].dt.strftime(\"%d%b%Y\")\n", + " df[\"expiry_date\"] = pd.to_datetime(df[\"expiry\"], format=\"%d-%m-%Y\")\n", + " df[\"expiry\"] = df[\"expiry_date\"].dt.strftime(\"%d%b%Y\")\n", "\n", " # Convert the 'side' column in df to uppercase\n", " df[\"side\"] = df[\"side\"].astype(str).str.strip().str.upper()\n", @@ -101,29 +123,40 @@ " # Remove all-NA columns from each DataFrame\n", " df = df.dropna(axis=1, how='all')\n", "\n", + " logger.info(f\"Completed processing file: {file_path}\")\n", " return df" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data Processing\n", + "\n", + "- Generate file paths for available CSV files in the Bronze layer.\n", + "- Read and concatenate data from multiple files." + ] + }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "Number of Files Detected: 5\n", - "\n", - "Proccessing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2021.csv\n", - "\n", - "Proccessing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2122.csv\n", - "\n", - "Proccessing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2223.csv\n", - "\n", - "Proccessing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2324.csv\n", - "\n", - "Proccessing => C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2425.csv\n" + "2024-08-01T11:48:03Z - INFO - Number of Files Detected: 5\n", + "2024-08-01T11:48:03Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2021.csv\n", + "2024-08-01T11:48:03Z - INFO - Completed processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2021.csv\n", + "2024-08-01T11:48:03Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2122.csv\n", + "2024-08-01T11:48:03Z - INFO - Completed processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2122.csv\n", + "2024-08-01T11:48:03Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2223.csv\n", + "2024-08-01T11:48:03Z - INFO - Completed processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2223.csv\n", + "2024-08-01T11:48:03Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2324.csv\n", + "2024-08-01T11:48:03Z - INFO - Completed processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2324.csv\n", + "2024-08-01T11:48:03Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2425.csv\n", + "2024-08-01T11:48:03Z - INFO - Completed processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\TradeHistory\\trade_2425.csv\n" ] } ], @@ -139,26 +172,38 @@ "\n", "# Loop through List of all CSV files in the folder\n", "for file_path in file_paths:\n", - " # Read the CSV file\n", - " df = read_file(file_path)\n", - " # Append the DataFrame to the list\n", - " if not df.empty:\n", - " dfs.append(df)\n", + " try:\n", + " # Read the CSV file\n", + " df = read_file(file_path)\n", + " # Append the DataFrame to the list\n", + " if not df.empty:\n", + " dfs.append(df)\n", + " except Exception as e:\n", + " logger.error(f\"Failed to read {file_path} due to error: {e}\")\n", "\n", "# Concatenate all DataFrames into one\n", "df_TradeHistory = pd.concat(dfs, ignore_index=True)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Data Harmonization\n", + "\n", + "- Replace scrip codes with company names using the SILVER layer symbol data." + ] + }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ - "# replace scrip code to compnay name\n", + "# Replace scrip code with company name\n", "df_Symbol = pd.read_csv(global_path.symbol_silver_file_path)\n", "\n", - "# string and strip\n", + "# String and strip\n", "df_Symbol[\"scrip_code\"] = df_Symbol[\"scrip_code\"].astype(str).str.strip()\n", "df_TradeHistory[\"scrip_code\"] = df_TradeHistory[\"scrip_code\"].astype(str).str.strip()\n", "\n", @@ -169,44 +214,67 @@ " right_on=\"scrip_code\",\n", " how=\"left\",\n", ")\n", + "\n", "# Assign the new column 'stock_name' in df_TradeHistory to the values from 'symbol'\n", "df_TradeHistory[\"stock_name\"] = df_TradeHistory[\"symbol\"].combine_first(\n", " df_TradeHistory[\"stock_name\"]\n", - ")\n" + ")" ] }, { - "cell_type": "code", - "execution_count": 13, + "cell_type": "markdown", "metadata": {}, + "source": [ + "### Final Processing and Export\n", + "\n", + "- Sort the DataFrame by date and stock name.\n", + "- Save the processed data as a CSV file in the Silver layer.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": { + "lines_to_next_cell": 2 + }, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T11:48:03Z - INFO - SILVER Layer CSV file for trade history successfully created at:\n", + "2024-08-01T11:48:03Z - INFO - C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SILVER\\TradeHistory\\TradeHistory_data.csv\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Index: 175 entries, 17 to 40\n", - "Data columns (total 9 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 datetime 175 non-null datetime64[ns]\n", - " 1 exchange 175 non-null object \n", - " 2 segment 175 non-null object \n", - " 3 stock_name 175 non-null object \n", - " 4 scrip_code 175 non-null object \n", - " 5 side 175 non-null object \n", - " 6 amount 175 non-null float64 \n", - " 7 quantity 175 non-null float64 \n", - " 8 price 175 non-null float64 \n", - "dtypes: datetime64[ns](1), float64(3), object(5)\n", - "memory usage: 13.7+ KB\n" + "Data columns (total 10 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 datetime 175 non-null datetime64[ns]\n", + " 1 exchange 175 non-null object \n", + " 2 segment 175 non-null object \n", + " 3 stock_name 175 non-null object \n", + " 4 scrip_code 175 non-null object \n", + " 5 side 175 non-null object \n", + " 6 amount 175 non-null float64 \n", + " 7 quantity 175 non-null float64 \n", + " 8 price 175 non-null float64 \n", + " 9 expiry_date 129 non-null datetime64[ns]\n", + "dtypes: datetime64[ns](2), float64(3), object(5)\n", + "memory usage: 15.0+ KB\n" ] } ], "source": [ - "# sort the dataframe by date\n", + "# Sort the DataFrame by date and stock name\n", "df_TradeHistory = df_TradeHistory.sort_values(by=[\"datetime\", \"stock_name\"])\n", "\n", + "# Select relevant columns\n", "df_TradeHistory = df_TradeHistory[\n", " [\n", " \"datetime\",\n", @@ -218,30 +286,19 @@ " \"amount\",\n", " \"quantity\",\n", " \"price\",\n", + " \"expiry_date\"\n", " ]\n", "]\n", - "df_TradeHistory.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "SILVER Layer csv file for trade history successfully created at:\n", - "C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SILVER\\TradeHistory\\TradeHistory_data.csv\n" - ] - } - ], - "source": [ - "# Save the result as a csv file\n", - "df_TradeHistory.to_csv(global_path.tradehistory_silver_file_path, index=None)\n", - "print(\"SILVER Layer csv file for trade history successfully created at:\")\n", - "print(global_path.tradehistory_silver_file_path.resolve())" + "\n", + "# Save the result as a CSV file\n", + "try:\n", + " df_TradeHistory.to_csv(global_path.tradehistory_silver_file_path, index=None)\n", + " logger.info(\"SILVER Layer CSV file for trade history successfully created at:\")\n", + " logger.info(global_path.tradehistory_silver_file_path.resolve())\n", + " # Log the DataFrame info\n", + " df_TradeHistory.info()\n", + "except Exception as e:\n", + " logger.error(f\"Failed to save SILVER Layer CSV file due to error: {e}\")\n" ] } ], diff --git a/NOTEBOOKS/03_ETL_Bronze2Silver_Layer_StockPrice.ipynb b/NOTEBOOKS/03_ETL_Bronze2Silver_Layer_StockPrice.ipynb index 08c21bcb..f5563a05 100644 --- a/NOTEBOOKS/03_ETL_Bronze2Silver_Layer_StockPrice.ipynb +++ b/NOTEBOOKS/03_ETL_Bronze2Silver_Layer_StockPrice.ipynb @@ -4,31 +4,121 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## BRONZE TO SILVER LAYER\n" + "## BRONZE TO SILVER LAYER\n", + "\n", + "### Silver Layer - Stock Price History\n" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ + "# Import necessary libraries and utility functions\n", "import pandas as pd\n", - "from common_utilities import global_path, replace_punctuation_from_columns" + "from common_utilities import (\n", + " global_path,\n", + " replace_punctuation_from_columns,\n", + " logger,\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Silver Layer - Stock Price History\n" + "### Data Processing\n", + "\n", + "- Initialize an empty list to store DataFrames.\n", + "- Read and concatenate data from multiple CSV files.\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\BHAGERIA.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\BPCL.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\GOLDBEES.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\HERANBA.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\IDEA.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\INFY.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\IRCTC.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\KPITTECH.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\LICI.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\NIFTYBEES.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\PNB.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\SBIN.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\TATACHEM.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\TATAMOTORS.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\TATAPOWER.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\VOLTAS.NS.csv\n", + "2024-08-01T14:07:12Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\StockPrice\\YESBANK.NS.csv\n" + ] + } + ], + "source": [ + "# Initialize an empty list to store DataFrames\n", + "df_stock_price_list = []\n", + "\n", + "# Generate file paths for available CSV files in the Bronze layer\n", + "file_paths = global_path.stockprice_bronze_layer_path.glob(\"*.NS.csv\")\n", + "\n", + "# Loop through List of all CSV files in the folder\n", + "for file_path in file_paths:\n", + " try:\n", + " logger.info(f\"Processing file: {file_path}\")\n", + " # Read the CSV file\n", + " df = pd.read_csv(file_path)\n", + "\n", + " # Extract stock name from file path\n", + " df[\"stock_name\"] = file_path.name.split(\".\")[0].upper().strip()\n", + "\n", + " # Append the DataFrame to the list\n", + " df_stock_price_list.append(df)\n", + " except Exception as e:\n", + " logger.error(f\"Failed to read {file_path} due to error: {e}\")\n", + "\n", + "# Concatenate all DataFrames into one\n", + "df = pd.concat(df_stock_price_list, ignore_index=True)\n", + "\n", + "# Harmonize column names\n", + "df = replace_punctuation_from_columns(df)\n", + "\n", + "# Remove all-NA columns from each DataFrame\n", + "df.dropna(how=\"all\", axis=1, inplace=True)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Final Processing and Export\n", + "\n", + "- Round numerical values to 2 decimal places.\n", + "- Sort the DataFrame by stock name and date.\n", + "- Save the processed data as a CSV file in the Silver layer.\n" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T14:07:13Z - INFO - SILVER Layer CSV file for Stock Price history successfully created at:\n", + "2024-08-01T14:07:13Z - INFO - C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SILVER\\StockPrice\\StockPrice_data.csv\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -45,45 +135,33 @@ " 4 low 13688 non-null float64\n", " 5 close 13688 non-null float64\n", "dtypes: float64(4), object(2)\n", - "memory usage: 641.8+ KB\n", - "SILVER Layer csv file for Stock Price history successfully created at:\n", - "C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SILVER\\StockPrice\\StockPrice_data.csv\n" + "memory usage: 641.8+ KB\n" ] } ], "source": [ - "# Initialize an empty list to store DataFrames\n", - "df_StockPrice = []\n", - "\n", - "# Loop through List of all CSV files in the folder\n", - "for file_path in global_path.stockprice_bronze_layer_path.glob(\"*.NS.csv\"):\n", - " # Read the CSV file\n", - " df = pd.read_csv(file_path)\n", - "\n", - " # stock name from file path\n", - " df[\"stock_name\"] = file_path.name.split(\".\")[0].upper().strip()\n", - "\n", - " # Append the DataFrame to the list\n", - " df_StockPrice.append(df)\n", - "\n", - "# Concatenate all DataFrames into one\n", - "df_StockPrice = pd.concat(df_StockPrice, ignore_index=True)\n", - "df_StockPrice = replace_punctuation_from_columns(df_StockPrice)\n", - "df_StockPrice.dropna(how=\"all\", axis=1, inplace=True)\n", - "\n", - "# round the numbers\n", - "df_StockPrice = df_StockPrice.round(2)\n", + "try:\n", + " # Round numerical values to 2 decimal places\n", + " df = df.round(2)\n", "\n", - "# sort the dataframe by date\n", - "df_StockPrice = df_StockPrice.sort_values(by=[\"stock_name\", \"date\"])\n", + " # Sort the DataFrame by stock name and date\n", + " df = df.sort_values(by=[\"stock_name\", \"date\"])\n", "\n", - "# Save the result as a csv file\n", - "df_StockPrice = df_StockPrice[[\"date\", \"stock_name\", \"open\", \"high\", \"low\", \"close\"]]\n", - "df_StockPrice.to_csv(global_path.stockprice_silver_file_path, index=None)\n", - "df_StockPrice.info()\n", + " # Select relevant columns\n", + " df = df[\n", + " [\"date\", \"stock_name\", \"open\", \"high\", \"low\", \"close\"]\n", + " ]\n", "\n", - "print(\"SILVER Layer csv file for Stock Price history successfully created at:\")\n", - "print(global_path.stockprice_silver_file_path.resolve())" + " # Save the result as a CSV file\n", + " df.to_csv(global_path.stockprice_silver_file_path, index=None)\n", + " logger.info(\n", + " \"SILVER Layer CSV file for Stock Price history successfully created at:\"\n", + " )\n", + " logger.info(global_path.stockprice_silver_file_path.resolve())\n", + " # Log the DataFrame info\n", + " df.info()\n", + "except Exception as e:\n", + " logger.error(f\"Failed to save SILVER Layer CSV file due to error: {e}\")" ] } ], diff --git a/NOTEBOOKS/04_ETL_Bronze2Silver_Layer_Ledger.ipynb b/NOTEBOOKS/04_ETL_Bronze2Silver_Layer_Ledger.ipynb index 84df3c38..0a59458f 100644 --- a/NOTEBOOKS/04_ETL_Bronze2Silver_Layer_Ledger.ipynb +++ b/NOTEBOOKS/04_ETL_Bronze2Silver_Layer_Ledger.ipynb @@ -4,19 +4,23 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## BRONZE TO SILVER LAYER\n" + "## BRONZE TO SILVER LAYER\n", + "\n", + "### Bronze Layer - Trade History" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ + "# Import necessary libraries and utility functions\n", "import pandas as pd\n", "from common_utilities import (\n", " global_path,\n", " replace_punctuation_from_columns,\n", + " logger\n", ")" ] }, @@ -24,14 +28,86 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### Bronze Layer - Trade History\n" + "### Data Processing\n", + "\n", + "- Initialize an empty list to store DataFrames.\n", + "- Read and concatenate data from multiple CSV files.\n", + "- Harmonize column names and clean the data.\n", + "- Convert data types and add additional datetime columns." ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T13:03:41Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\Ledger\\ledger_2021.csv\n", + "2024-08-01T13:03:41Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\Ledger\\ledger_2122.csv\n", + "2024-08-01T13:03:41Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\Ledger\\ledger_2223.csv\n", + "2024-08-01T13:03:41Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\Ledger\\ledger_2324.csv\n", + "2024-08-01T13:03:41Z - INFO - Processing file: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\BRONZE\\Ledger\\ledger_2425.csv\n" + ] + } + ], + "source": [ + "# Initialize an empty list to store DataFrames\n", + "df_ledger_list = []\n", + "\n", + "# Generate file paths for available CSV files in the Bronze layer\n", + "file_paths = global_path.ledger_bronze_layer_path.glob(\"*.csv\")\n", + "\n", + "# Loop through List of all CSV files in the folder\n", + "for file_path in file_paths:\n", + " try:\n", + " logger.info(f\"Processing file: {file_path}\")\n", + " # Read the CSV file\n", + " df = pd.read_csv(file_path)\n", + " # Append the DataFrame to the list\n", + " df_ledger_list.append(df)\n", + " except Exception as e:\n", + " logger.error(f\"Failed to read {file_path} due to error: {e}\")\n", + "\n", + "# Concatenate all DataFrames into one\n", + "df_ledger = pd.concat(df_ledger_list, ignore_index=True)\n", + "\n", + "# Harmonize column names\n", + "df_ledger = replace_punctuation_from_columns(df_ledger)\n", + "\n", + "# Remove all-NA columns from each DataFrame\n", + "df_ledger.dropna(how=\"all\", axis=1, inplace=True)\n", + "\n", + "# Add Datetime Columns\n", + "df_ledger[\"trade_date\"] = pd.to_datetime(df_ledger[\"trade_date\"], format=\"%Y-%m-%d\").dt.date\n", + "df_ledger[\"settlement_date\"] = pd.to_datetime(df_ledger[\"settlement_date\"], format=\"%Y-%m-%d\").dt.date" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Final Processing and Export\n", + "\n", + "- Sort the DataFrame by relevant columns.\n", + "- Save the processed data as a CSV file in the Silver layer." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T13:03:41Z - INFO - SILVER Layer CSV file for Bill Summary successfully created at:\n", + "2024-08-01T13:03:41Z - INFO - C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SILVER\\Ledger\\Ledger_data.csv\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -52,73 +128,39 @@ " 8 credit 39 non-null float64\n", " 9 closing_balance 100 non-null float64\n", "dtypes: float64(3), object(7)\n", - "memory usage: 8.6+ KB\n", - "SILVER Layer csv file for Bill Summary successfully created at:\n", - "C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SILVER\\Ledger\\Ledger_data.csv\n" + "memory usage: 8.6+ KB\n" ] } ], "source": [ - "# Initialize an empty list to store DataFrames\n", - "df_Ledger = []\n", - "\n", - "# Loop through List of all CSV files in the folder\n", - "for file_path in global_path.ledger_bronze_layer_path.glob(\"*.csv\"):\n", - " # Read the CSV file\n", - " df = pd.read_csv(file_path)\n", - " # Append the DataFrame to the list\n", - " df_Ledger.append(df)\n", - "\n", - "# Concatenate all DataFrames into one\n", - "df_Ledger = pd.concat(df_Ledger, ignore_index=True)\n", - "df_Ledger = replace_punctuation_from_columns(df_Ledger)\n", - "df_Ledger.dropna(how=\"all\", axis=1, inplace=True)\n", - "\n", - "# # Convert 'bill_number' to int\n", - "# df_Ledger[\"bill_number\"] = df_Ledger[\"bill_number\"].fillna(0).astype(int)\n", - "\n", - "# Add Datetime Col\n", - "df_Ledger[\"trade_date\"] = pd.to_datetime(df_Ledger[\"trade_date\"], format=\"%Y-%m-%d\").dt.date\n", - "df_Ledger[\"settlement_date\"] = pd.to_datetime(df_Ledger[\"settlement_date\"], format=\"%Y-%m-%d\").dt.date\n", - "\n", - "\n", - "# sort the dataframe by date\n", - "df_Ledger = df_Ledger.sort_values(by=[\"trade_date\", \"settlement_date\", \"exchange\", \"segment\"])\n", - "\n", - "\n", - "df_Ledger = df_Ledger[\n", - " [\n", - " \"wallet\",\n", - " \"trade_date\",\n", - " \"settlement_date\",\n", - " \"exchange\",\n", - " \"segment\",\n", - " \"type\",\n", - " \"narration\",\n", - " \"debit\",\n", - " \"credit\",\n", - " \"closing_balance\",\n", + "try:\n", + " # Sort the DataFrame by date and other relevant columns\n", + " df_ledger = df_ledger.sort_values(by=[\"trade_date\", \"settlement_date\", \"exchange\", \"segment\"])\n", + " \n", + " # Select relevant columns\n", + " df_ledger = df_ledger[\n", + " [\n", + " \"wallet\",\n", + " \"trade_date\",\n", + " \"settlement_date\",\n", + " \"exchange\",\n", + " \"segment\",\n", + " \"type\",\n", + " \"narration\",\n", + " \"debit\",\n", + " \"credit\",\n", + " \"closing_balance\",\n", + " ]\n", " ]\n", - "]\n", - "\n", - "# Save the result as a csv file\n", - "df_Ledger.to_csv(global_path.ledger_silver_file_path, index=None)\n", - "df_Ledger.info()\n", - "\n", - "print(\"SILVER Layer csv file for Bill Summary successfully created at:\")\n", - "print(global_path.ledger_silver_file_path.resolve())" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# for file_path in global_path.ledger_bronze_layer_path.glob(\"*.xlsx\"):\n", - "# # Read the xlsx file with header at line 8 (index 7 in pandas)\n", - "# df = pd.read_excel(file_path, header=7)\n", - "# df.to_csv(str(file_path).replace(\"Securities_846833.xlsx\",\".csv\"), index=False)" + " # Log the DataFrame info\n", + " df_ledger.info()\n", + " \n", + " # Save the result as a CSV file\n", + " df_ledger.to_csv(global_path.ledger_silver_file_path, index=None)\n", + " logger.info(\"SILVER Layer CSV file for Bill Summary successfully created at:\")\n", + " logger.info(global_path.ledger_silver_file_path.resolve())\n", + "except Exception as e:\n", + " logger.error(f\"Failed to save SILVER Layer CSV file due to error: {e}\")\n" ] } ], diff --git a/NOTEBOOKS/05_ETL_Silver2Gold_Layer_TradeHistory.ipynb b/NOTEBOOKS/05_ETL_Silver2Gold_Layer_TradeHistory.ipynb index ae0c12a6..55e54609 100644 --- a/NOTEBOOKS/05_ETL_Silver2Gold_Layer_TradeHistory.ipynb +++ b/NOTEBOOKS/05_ETL_Silver2Gold_Layer_TradeHistory.ipynb @@ -4,137 +4,202 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## SILVER TO GOLD LAYER\n" + "## SILVER TO GOLD LAYER\n", + "\n", + "### Gold Layer - Trade History\n" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ + "# Import necessary libraries and utility functions\n", "import pandas as pd\n", - "from common_utilities import Portfolio, global_path" + "from common_utilities import Portfolio, global_path, logger" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### Gold Layer - Trade History\n" + "### Data Processing\n", + "\n", + "- Read and sort trade history data.\n", + "- Apply portfolio trade logic.\n" ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T14:05:16Z - INFO - Read SILVER Layer trade history data from: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SILVER\\TradeHistory\\TradeHistory_data.csv\n" + ] + } + ], "source": [ - "# read the csv file\n", - "df_TradeHistory = pd.read_csv(global_path.tradehistory_silver_file_path)\n", + "# Read the CSV file\n", + "try:\n", + " df_trade_history = pd.read_csv(global_path.tradehistory_silver_file_path)\n", "\n", - "# Convert 'datetime' to datetime type\n", - "df_TradeHistory[\"datetime\"] = pd.to_datetime(df_TradeHistory[\"datetime\"])\n", + " # Convert 'datetime' to datetime type\n", + " df_trade_history[\"datetime\"] = pd.to_datetime(df_trade_history[\"datetime\"])\n", "\n", - "# sort the dataframe by date\n", - "df_TradeHistory = df_TradeHistory.sort_values(by=\"datetime\")" + " # Sort the DataFrame by 'datetime'\n", + " df_trade_history = df_trade_history.sort_values(by=\"datetime\")\n", + "\n", + " logger.info(\n", + " f\"Read SILVER Layer trade history data from: {global_path.tradehistory_silver_file_path}\"\n", + " )\n", + "except Exception as e:\n", + " logger.error(\n", + " f\"Failed to read SILVER Layer trade history data due to error: {e}\"\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Portfolio Logic Application\n", + "\n", + "- Instantiate Portfolio and apply trade logic.\n", + "- Handle expired stocks and round necessary columns.\n" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T14:05:17Z - ERROR - Failed to process trade history data due to error: Must specify a fill 'value' or 'method'.\n" + ] + } + ], "source": [ - "portfolio = Portfolio()\n", + "# Apply the trade logic to each row of the DataFrame\n", + "try:\n", + " # Instantiate the Portfolio object\n", + " portfolio = Portfolio()\n", + "\n", + " data = [\n", + " portfolio.trade(row.to_dict()) for _, row in df_trade_history.iterrows()\n", + " ]\n", + " data += portfolio.check_expired_stocks()\n", "\n", - "# Apply the function of trade logic to each row of the DataFrame\n", - "data = [portfolio.trade(row.to_dict()) for _, row in df_TradeHistory.iterrows()]\n", + " # Create a DataFrame from the processed data\n", + " df_trade_history = pd.DataFrame(data)\n", "\n", - "df_TradeHistory = pd.DataFrame(data)\n", + " # Round the values in the columns to two decimal places\n", + " df_trade_history = df_trade_history.round(2)\n", + " \n", "\n", - "# Create a list of column names that contain the substrings 'price', 'amount', or 'pnl'\n", - "columns_to_round = [\n", - " col \n", - " for col in df_TradeHistory.columns\n", - " if \"price\" in col or \"amount\" in col or \"pnl\" in col \n", - "]\n", + " df_trade_history = df_trade_history.fillna(None)\n", "\n", - "# Round the values in the selected columns to two decimal places\n", - "df_TradeHistory[columns_to_round] = df_TradeHistory[columns_to_round].round(2)" + "except Exception as e:\n", + " logger.error(f\"Failed to process trade history data due to error: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Final Processing and Export\n", + "\n", + "- Select and sort relevant columns.\n", + "- Save the processed data as a CSV file in the Gold layer.\n" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 4, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T14:05:17Z - INFO - GOLD Layer CSV file for trade history successfully created at:\n", + "2024-08-01T14:05:17Z - INFO - C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\GOLD\\TradeHistory\\TradeHistory_data.csv\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ "\n", - "Index: 175 entries, 2 to 93\n", - "Data columns (total 13 columns):\n", + "Index: 176 entries, 2 to 93\n", + "Data columns (total 15 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", - " 0 datetime 175 non-null datetime64[ns]\n", - " 1 exchange 175 non-null object \n", - " 2 segment 175 non-null object \n", - " 3 stock_name 175 non-null object \n", - " 4 scrip_code 175 non-null object \n", - " 5 side 175 non-null object \n", - " 6 quantity 175 non-null float64 \n", - " 7 price 175 non-null float64 \n", - " 8 amount 175 non-null float64 \n", - " 9 holding_quantity 175 non-null int64 \n", - " 10 avg_price 175 non-null float64 \n", - " 11 holding_amount 175 non-null float64 \n", - " 12 pnl_amount 175 non-null float64 \n", - "dtypes: datetime64[ns](1), float64(6), int64(1), object(5)\n", - "memory usage: 19.1+ KB\n", - "GOLD Layer csv file for trade history successfully created at:\n", - "C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\GOLD\\TradeHistory\\TradeHistory_data.csv\n" + " 0 datetime 176 non-null datetime64[ns]\n", + " 1 exchange 176 non-null object \n", + " 2 segment 176 non-null object \n", + " 3 stock_name 176 non-null object \n", + " 4 scrip_code 176 non-null object \n", + " 5 expiry_date 176 non-null object \n", + " 6 side 176 non-null object \n", + " 7 quantity 176 non-null float64 \n", + " 8 price 176 non-null float64 \n", + " 9 amount 176 non-null float64 \n", + " 10 holding_quantity 176 non-null float64 \n", + " 11 avg_price 176 non-null float64 \n", + " 12 holding_amount 176 non-null float64 \n", + " 13 pnl_amount 176 non-null float64 \n", + " 14 pnl_percentage 176 non-null float64 \n", + "dtypes: datetime64[ns](1), float64(8), object(6)\n", + "memory usage: 22.0+ KB\n" ] } ], "source": [ - "df_TradeHistory = df_TradeHistory[\n", - " [\n", - " \"datetime\",\n", - " \"exchange\",\n", - " \"segment\",\n", - " \"stock_name\",\n", - " \"scrip_code\",\n", - " \"side\",\n", - " \"quantity\",\n", - " \"price\",\n", - " \"amount\",\n", - " \"holding_quantity\",\n", - " \"avg_price\",\n", - " \"holding_amount\",\n", - " \"pnl_amount\",\n", + "# Save the result as a CSV file\n", + "try:\n", + " # Sort the DataFrame by 'segment', 'stock_name', and 'datetime'\n", + " df_trade_history = df_trade_history.sort_values(\n", + " by=[\"segment\", \"stock_name\", \"datetime\"]\n", + " )\n", + " # Select relevant columns\n", + " df_trade_history = df_trade_history[\n", + " [\n", + " \"datetime\",\n", + " \"exchange\",\n", + " \"segment\",\n", + " \"stock_name\",\n", + " \"scrip_code\",\n", + " \"expiry_date\",\n", + " \"side\",\n", + " \"quantity\",\n", + " \"price\",\n", + " \"amount\",\n", + " \"holding_quantity\",\n", + " \"avg_price\",\n", + " \"holding_amount\",\n", + " \"pnl_amount\",\n", + " \"pnl_percentage\"\n", + " ]\n", " ]\n", - "]\n", - "\n", - "# sort the dataframe by date\n", - "df_TradeHistory = df_TradeHistory.sort_values(\n", - " by=[\"segment\", \"stock_name\", \"datetime\"]\n", - ")\n", "\n", - "# Save the result as a csv file\n", - "df_TradeHistory.to_csv(global_path.tradehistory_gold_file_path, index=None)\n", - "df_TradeHistory.info()\n", - "print(\"GOLD Layer csv file for trade history successfully created at:\")\n", - "print(global_path.tradehistory_gold_file_path.resolve())" + " df_trade_history.to_csv(global_path.tradehistory_gold_file_path, index=None)\n", + " logger.info(\n", + " \"GOLD Layer CSV file for trade history successfully created at:\"\n", + " )\n", + " logger.info(global_path.tradehistory_gold_file_path.resolve())\n", + " # Log the DataFrame info\n", + " df_trade_history.info()\n", + "except Exception as e:\n", + " logger.error(f\"Failed to save GOLD Layer CSV file due to error: {e}\")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/NOTEBOOKS/06_ETL_Silver2Gold_Layer_ProfitLoss.ipynb b/NOTEBOOKS/06_ETL_Silver2Gold_Layer_ProfitLoss.ipynb index 507d562d..b2cbe210 100644 --- a/NOTEBOOKS/06_ETL_Silver2Gold_Layer_ProfitLoss.ipynb +++ b/NOTEBOOKS/06_ETL_Silver2Gold_Layer_ProfitLoss.ipynb @@ -4,63 +4,65 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## BRONZE TO SILVER LAYER\n" + "## BRONZE TO SILVER LAYER\n", + "\n", + "### SILVER Layer - Process ProfitLoss History\n" ] }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ + "# Import necessary libraries and utility functions\n", "import pandas as pd\n", - "from common_utilities import global_path" + "from common_utilities import global_path, logger" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "### SILVER Layer - Process ProfitLoss History\n" + "### Data Processing\n", + "\n", + "- Read the trade history data from the Gold layer.\n", + "- Filter, transform, and calculate new columns.\n", + "- Save the processed data to the Silver layer.\n" ] }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 6, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "\n", - "RangeIndex: 83 entries, 0 to 82\n", - "Data columns (total 12 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 datetime 83 non-null datetime64[ns]\n", - " 1 exchange 83 non-null object \n", - " 2 segment 83 non-null object \n", - " 3 stock_name 83 non-null object \n", - " 4 side 83 non-null object \n", - " 5 quantity 83 non-null float64 \n", - " 6 open_side_price 83 non-null float64 \n", - " 7 open_side_amount 83 non-null float64 \n", - " 8 close_side_price 83 non-null float64 \n", - " 9 close_side_amount 83 non-null float64 \n", - " 10 pnl_amount 83 non-null float64 \n", - " 11 pnl_percentage 83 non-null float64 \n", - "dtypes: datetime64[ns](1), float64(7), object(4)\n", - "memory usage: 7.9+ KB\n", - "GOLD Layer csv file for ProfitLoss successfully created at:\n", - "C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\GOLD\\ProfitLoss\\ProfitLoss_data.csv\n" + "2024-08-01T13:56:37Z - INFO - Read GOLD Layer trade history data from: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\GOLD\\TradeHistory\\TradeHistory_data.csv\n" ] } ], "source": [ - "# Read the CSV file into a DataFrame\n", - "df = pd.read_csv(global_path.tradehistory_gold_file_path)\n", - "\n", + "try:\n", + " # Read the CSV file into a DataFrame\n", + " df = pd.read_csv(global_path.tradehistory_gold_file_path)\n", + " logger.info(\n", + " f\"Read GOLD Layer trade history data from: {global_path.tradehistory_gold_file_path}\"\n", + " )\n", + "except Exception as e:\n", + " logger.error(\n", + " f\"Failed to read GOLD Layer trade history data due to error: {e}\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ "# Convert 'datetime' column to datetime objects\n", "df[\"datetime\"] = pd.to_datetime(df[\"datetime\"])\n", "\n", @@ -68,66 +70,107 @@ "df = df[df[\"pnl_amount\"] != 0]\n", "\n", "# Update 'side' column: 'SELL' becomes 'LONG', 'BUY' becomes 'SHORT'\n", - "df[\"side\"] = df[\"side\"].apply(lambda x: \"LONG\" if x == \"SELL\" else \"SHORT\" if x == \"BUY\" else x)\n", + "df[\"side\"] = df[\"side\"].apply(\n", + " lambda x: \"LONG\" if x == \"SELL\" else \"SHORT\" if x == \"BUY\" else x\n", + ")\n", "\n", "# Rename columns for clarity\n", "df = df.rename(\n", " columns={\n", - " \"price\": \"close_side_price\",\n", - " \"avg_price\": \"open_side_price\",\n", + " \"price\": \"close_price\",\n", + " \"avg_price\": \"open_price\",\n", " }\n", ")\n", "\n", "# Calculate open and close side amounts\n", - "df[\"open_side_amount\"] = df[\"open_side_price\"] * df[\"quantity\"]\n", - "df[\"close_side_amount\"] = df[\"close_side_price\"] * df[\"quantity\"]\n", - "\n", - "# Calculate PnL percentage\n", - "df[\"pnl_percentage\"] = ((df[\"close_side_amount\"] - df[\"open_side_amount\"]) / df[\"open_side_amount\"]) * 100\n", + "df[\"open_amount\"] = df[\"open_price\"] * df[\"quantity\"]\n", + "df[\"close_amount\"] = df[\"close_price\"] * df[\"quantity\"]\n", "\n", "# Sort the DataFrame by 'segment', 'stock_name', and 'datetime'\n", "df = df.sort_values(by=[\"segment\", \"stock_name\", \"datetime\"])\n", "\n", - "# Create a list of column names that contain the substrings 'price', 'amount', or 'pnl'\n", - "columns_to_round = [\n", - " col \n", - " for col in df.columns\n", - " if \"price\" in col or \"amount\" in col or \"pnl\" in col \n", - "]\n", - "\n", - "# Round the values in the selected columns to two decimal places\n", - "df[columns_to_round] = df[columns_to_round].round(2)\n", + "# Round the values in to two decimal places\n", + "df = df.round(2)\n", "\n", "# Reset index to ensure it starts from 0\n", - "df = df.reset_index(drop=True)\n", - "\n", - "# Select and reorder the columns for the final DataFrame\n", - "df = df[\n", - " [\n", - " \"datetime\",\n", - " \"exchange\",\n", - " \"segment\",\n", - " \"stock_name\",\n", - " \"side\",\n", - " \"quantity\",\n", - " \"open_side_price\",\n", - " \"open_side_amount\",\n", - " \"close_side_price\",\n", - " \"close_side_amount\",\n", - " \"pnl_amount\",\n", - " \"pnl_percentage\"\n", + "df = df.reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T13:56:37Z - INFO - GOLD Layer CSV file for ProfitLoss successfully created at:\n", + "2024-08-01T13:56:37Z - INFO - C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\GOLD\\ProfitLoss\\ProfitLoss_data.csv\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 84 entries, 0 to 83\n", + "Data columns (total 12 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 datetime 84 non-null datetime64[ns]\n", + " 1 exchange 84 non-null object \n", + " 2 segment 84 non-null object \n", + " 3 stock_name 84 non-null object \n", + " 4 side 84 non-null object \n", + " 5 quantity 84 non-null float64 \n", + " 6 open_price 84 non-null float64 \n", + " 7 open_amount 84 non-null float64 \n", + " 8 close_price 84 non-null float64 \n", + " 9 close_amount 84 non-null float64 \n", + " 10 pnl_amount 84 non-null float64 \n", + " 11 pnl_percentage 84 non-null float64 \n", + "dtypes: datetime64[ns](1), float64(7), object(4)\n", + "memory usage: 8.0+ KB\n" + ] + } + ], + "source": [ + "try:\n", + " # Select and reorder the columns for the final DataFrame\n", + " df = df[\n", + " [\n", + " \"datetime\",\n", + " \"exchange\",\n", + " \"segment\",\n", + " \"stock_name\",\n", + " \"side\",\n", + " \"quantity\",\n", + " \"open_price\",\n", + " \"open_amount\",\n", + " \"close_price\",\n", + " \"close_amount\",\n", + " \"pnl_amount\",\n", + " \"pnl_percentage\",\n", + " ]\n", " ]\n", - "]\n", - "\n", - "# Save the final DataFrame to a CSV file\n", - "df.to_csv(global_path.profitloss_gold_file_path, index=None)\n", - "\n", - "# Display the DataFrame information\n", - "df.info()\n", - "\n", - "# Print the success message with the path of the saved file\n", - "print(\"GOLD Layer csv file for ProfitLoss successfully created at:\")\n", - "print(global_path.profitloss_gold_file_path.resolve())\n" + " # Save the final DataFrame to a CSV file\n", + " df.to_csv(global_path.profitloss_gold_file_path, index=None)\n", + " logger.info(\"GOLD Layer CSV file for ProfitLoss successfully created at:\")\n", + " logger.info(global_path.profitloss_gold_file_path.resolve())\n", + " # Display the DataFrame information\n", + " df.info()\n", + "except Exception as e:\n", + " logger.error(f\"Failed to save GOLD Layer CSV file due to error: {e}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "# print(amount)" ] } ], diff --git a/NOTEBOOKS/07_ETL_Silver2Gold_Layer_Holdings.ipynb b/NOTEBOOKS/07_ETL_Silver2Gold_Layer_Holdings.ipynb index 95e3cf5c..0e1837d6 100644 --- a/NOTEBOOKS/07_ETL_Silver2Gold_Layer_Holdings.ipynb +++ b/NOTEBOOKS/07_ETL_Silver2Gold_Layer_Holdings.ipynb @@ -4,17 +4,20 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## BRONZE TO SILVER LAYER\n" + "## BRONZE TO SILVER LAYER\n", + "\n", + "### GOLD LAYER - PROCESS HOLDING RECORDS HISTORY\n" ] }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ + "# Import necessary libraries and utility functions\n", "import pandas as pd\n", - "from common_utilities import global_path\n", + "from common_utilities import global_path, logger\n", "import datetime" ] }, @@ -22,63 +25,94 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "### GOLD LAYER - PROCESS HOLDING RECORDS HISTORY\n" + "### Data Processing\n", + "\n", + "- Load and Filter trade history data from the Gold layer.\n" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T13:23:36Z - INFO - Loaded GOLD Layer trade history data from: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\GOLD\\TradeHistory\\TradeHistory_data.csv\n" + ] + } + ], "source": [ - "# Load trade history from CSV into DataFrame\n", - "df = pd.read_csv(global_path.tradehistory_gold_file_path)\n", - "\n", - "# Filter for specific segments\n", - "df = df[df[\"segment\"].isin([\"EQ\", \"MF\"])]\n", - "\n", - "df = df[\n", - " [\n", - " \"datetime\",\n", - " \"segment\",\n", - " \"stock_name\",\n", - " \"avg_price\",\n", - " \"holding_quantity\",\n", - " \"holding_amount\",\n", + "try:\n", + " # Load trade history from CSV into DataFrame\n", + " df = pd.read_csv(global_path.tradehistory_gold_file_path)\n", + " logger.info(\n", + " f\"Loaded GOLD Layer trade history data from: {global_path.tradehistory_gold_file_path}\"\n", + " )\n", + "\n", + " # Filter for specific segments\n", + " df = df[df[\"segment\"].isin([\"EQ\", \"MF\"])]\n", + "\n", + " # Select relevant columns\n", + " df = df[\n", + " [\n", + " \"datetime\",\n", + " \"segment\",\n", + " \"stock_name\",\n", + " \"avg_price\",\n", + " \"holding_quantity\",\n", + " \"holding_amount\",\n", + " ]\n", " ]\n", - "]" + "\n", + " # Convert 'datetime' column to datetime objects\n", + " df[\"datetime\"] = pd.to_datetime(df[\"datetime\"])\n", + "\n", + " # Add a 'date' column by extracting the date part from 'datetime'\n", + " df[\"date\"] = df[\"datetime\"].dt.date\n", + "\n", + " # Sort DataFrame by 'segment', 'stock_name', and 'datetime'\n", + " df = df.sort_values(by=[\"segment\", \"stock_name\", \"datetime\"])\n", + "except Exception as e:\n", + " logger.error(\n", + " f\"Failed to read GOLD Layer trade history data due to error: {e}\"\n", + " )" ] }, { - "cell_type": "code", - "execution_count": 19, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# Convert 'datetime' column to datetime objects\n", - "df[\"datetime\"] = pd.to_datetime(df[\"datetime\"])\n", - "\n", - "# Add a 'date' column by extracting the date part from 'datetime'\n", - "df[\"date\"] = df[\"datetime\"].dt.date\n", - "\n", - "# Sort DataFrame by 'segment', 'stock_name', and 'datetime'\n", - "df = df.sort_values(by=[\"segment\", \"stock_name\", \"datetime\"])\n", + "### Data Processing\n", "\n", - "# Get maximum 'datetime' for each 'date' and 'stock_name' combination\n", - "max_datetime_df = (\n", - " df.groupby([\"date\", \"stock_name\"])[\"datetime\"].max().reset_index()\n", - ")\n", - "\n", - "# Retain only rows with maximum datetime for each 'date' and 'stock_name'\n", - "df = df.merge(max_datetime_df, on=[\"date\", \"stock_name\", \"datetime\"])" + "- Process data to include all dates up to today.\n", + "- Merge with stock price data and calculate current values.\n", + "- Save the processed data to the Gold layer.\n" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T13:23:37Z - INFO - Loaded SILVER Layer stock price data from: C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\SILVER\\StockPrice\\StockPrice_data.csv\n" + ] + } + ], "source": [ + "# Get maximum 'datetime' for each 'date' and 'stock_name' combination\n", + "max_datetime_df = (\n", + " df.groupby([\"date\", \"stock_name\"])[\"datetime\"].max().reset_index()\n", + ")\n", + "\n", + "# Retain only rows with maximum datetime for each 'date' and 'stock_name'\n", + "df = df.merge(max_datetime_df, on=[\"date\", \"stock_name\", \"datetime\"])\n", + "\n", "# Sort the DataFrame by 'segment', 'stock_name', and 'date'\n", "df = df.sort_values(by=[\"segment\", \"stock_name\", \"date\"]).reset_index(drop=True)\n", "\n", @@ -103,19 +137,20 @@ " result.append(stock_data)\n", "\n", "# Combine processed DataFrames into one\n", - "df = pd.concat(result, ignore_index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "metadata": {}, - "outputs": [], - "source": [ + "df = pd.concat(result, ignore_index=True)\n", + "\n", "# Load stock prices and merge with the main DataFrame\n", - "df_StockPrice = pd.read_csv(global_path.stockprice_silver_file_path)\n", - "df_StockPrice = df_StockPrice[[\"date\", \"stock_name\", \"close\"]]\n", - "df_StockPrice[\"date\"] = pd.to_datetime(df_StockPrice[\"date\"])\n", + "try:\n", + " df_StockPrice = pd.read_csv(global_path.stockprice_silver_file_path)\n", + " df_StockPrice = df_StockPrice[[\"date\", \"stock_name\", \"close\"]]\n", + " df_StockPrice[\"date\"] = pd.to_datetime(df_StockPrice[\"date\"])\n", + " logger.info(\n", + " f\"Loaded SILVER Layer stock price data from: {global_path.stockprice_silver_file_path}\"\n", + " )\n", + "except Exception as e:\n", + " logger.error(\n", + " f\"Failed to read SILVER Layer stock price data due to error: {e}\"\n", + " )\n", "\n", "# Merge stock price data\n", "df = pd.merge(df, df_StockPrice, on=[\"date\", \"stock_name\"], how=\"left\")\n", @@ -125,67 +160,87 @@ " columns={\n", " \"holding_amount\": \"investment\",\n", " \"holding_quantity\": \"quantity\",\n", - " \"close\": \"current_price\",\n", + " \"close\": \"ltp\",\n", " }\n", ")\n", - "df[\"current_value\"] = df[\"current_price\"] * df[\"quantity\"]\n", + "df[\"current_value\"] = df[\"ltp\"] * df[\"quantity\"]\n", + "\n", + "# Calculate PnL and percentage\n", + "df[\"pnl_amount\"] = df[\"current_value\"] - df[\"investment\"]\n", + "df[\"pnl_percentage\"] = (df[\"pnl_amount\"] / df[\"investment\"]) * 100\n", + "\n", "\n", "# Filter out rows with zero 'holding_quantity'\n", - "df = df[(df[\"investment\"] != 0) & (df[\"current_value\"]!= 0)]" + "df = df[(df[\"investment\"] != 0) & (df[\"current_value\"] != 0)]\n", + "\n", + "# Round the values in to two decimal places\n", + "df = df.round(2)\n", + "\n", + "# Final sorting and column selection\n", + "df = df.sort_values(by=[\"segment\", \"stock_name\", \"date\"]).reset_index(drop=True)" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 4, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2024-08-01T13:23:37Z - INFO - GOLD Layer CSV file for Holdings successfully created at:\n", + "2024-08-01T13:23:37Z - INFO - C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\GOLD\\Holdings\\Holdings_data.csv\n" + ] + }, { "name": "stdout", "output_type": "stream", "text": [ "\n", - "RangeIndex: 5857 entries, 0 to 5856\n", - "Data columns (total 8 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 date 5857 non-null datetime64[ns]\n", - " 1 segment 5857 non-null object \n", - " 2 stock_name 5857 non-null object \n", - " 3 quantity 5857 non-null int64 \n", - " 4 avg_price 5857 non-null float64 \n", - " 5 investment 5857 non-null float64 \n", - " 6 current_price 3759 non-null float64 \n", - " 7 current_value 3759 non-null float64 \n", - "dtypes: datetime64[ns](1), float64(4), int64(1), object(2)\n", - "memory usage: 366.2+ KB\n", - "GOLD Layer CSV file for Holdings successfully created at:\n", - "C:\\Users\\prashant.tripathi\\Code\\Upstox\\DATA\\GOLD\\Holdings\\Holdings_data.csv\n" + "RangeIndex: 5869 entries, 0 to 5868\n", + "Data columns (total 10 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 date 5869 non-null datetime64[ns]\n", + " 1 segment 5869 non-null object \n", + " 2 stock_name 5869 non-null object \n", + " 3 quantity 5869 non-null float64 \n", + " 4 avg_price 5869 non-null float64 \n", + " 5 investment 5869 non-null float64 \n", + " 6 ltp 3759 non-null float64 \n", + " 7 current_value 3759 non-null float64 \n", + " 8 pnl_amount 3759 non-null float64 \n", + " 9 pnl_percentage 3759 non-null float64 \n", + "dtypes: datetime64[ns](1), float64(7), object(2)\n", + "memory usage: 458.6+ KB\n" ] } ], "source": [ - "# Final sorting and column selection\n", - "df = df.sort_values(by=[\"segment\", \"stock_name\", \"date\"]).reset_index(drop=True)\n", - "df = df[\n", - " [\n", - " \"date\",\n", - " \"segment\",\n", - " \"stock_name\",\n", - " \"quantity\",\n", - " \"avg_price\",\n", - " \"investment\",\n", - " \"current_price\",\n", - " \"current_value\",\n", - " ]\n", - "]\n", - "\n", "# Save the final DataFrame to a CSV file\n", - "df.to_csv(global_path.holdings_gold_file_path, index=None)\n", - "\n", - "# Display DataFrame information and print success message\n", - "df.info()\n", - "print(\"GOLD Layer CSV file for Holdings successfully created at:\")\n", - "print(global_path.holdings_gold_file_path.resolve())" + "try:\n", + " df = df[\n", + " [\n", + " \"date\",\n", + " \"segment\",\n", + " \"stock_name\",\n", + " \"quantity\",\n", + " \"avg_price\",\n", + " \"investment\",\n", + " \"ltp\",\n", + " \"current_value\",\n", + " \"pnl_amount\",\n", + " \"pnl_percentage\"\n", + " ]\n", + " ]\n", + " df.to_csv(global_path.holdings_gold_file_path, index=None)\n", + " logger.info(\"GOLD Layer CSV file for Holdings successfully created at:\")\n", + " logger.info(global_path.holdings_gold_file_path.resolve())\n", + " # Display DataFrame information and print success message\n", + " df.info()\n", + "except Exception as e:\n", + " logger.error(f\"Failed to save GOLD Layer CSV file due to error: {e}\")" ] } ], diff --git a/NOTEBOOKS/common_utilities.py b/NOTEBOOKS/common_utilities.py index a991ac5d..799068c0 100644 --- a/NOTEBOOKS/common_utilities.py +++ b/NOTEBOOKS/common_utilities.py @@ -1,81 +1,211 @@ -import os - # Importing necessary files and packages +import os import re +import copy import json +import logging import pathlib import datetime +from typing import Any, Dict, List + +# Set up the logger +logging.basicConfig( + level=logging.INFO, + format="{asctime} - {levelname} - {message}", + style="{", + datefmt="%Y-%m-%dT%H:%M:%SZ", +) -import dateutil +logger = logging.getLogger() +logger.setLevel(logging.DEBUG) class Portfolio: - def __init__(self): - self.stocks = dict() + """ + A class representing a portfolio of stocks. + It manages trades and checks for expired stocks. + """ + + def __init__(self) -> None: + """ + Initializes a new Portfolio instance with an empty dictionary of stocks. + """ + self.stocks: Dict[str, Stock] = {} + + def trade(self, record: Dict[str, Any]) -> Dict[str, Any]: + """ + Executes a trade for a stock in the portfolio based on the provided record. - def trade(self, record: dict = None): + Args: + record (dict): A dictionary containing details of the trade such as + stock_name, side, price, and quantity. + + Returns: + dict: The updated trade record with additional information. + """ stock_name = str(record.get("stock_name")) if stock_name not in self.stocks: - self.stocks[stock_name] = Stock() + self.stocks[stock_name] = Stock(record) - record.update( - self.stocks[stock_name].trade( - side=str(record.get("side")).upper(), - traded_price=float(record.get("price")), - traded_quantity=int(record.get("quantity")), - ) + trade_result = self.stocks[stock_name].trade( + side=str(record.get("side")).upper(), + traded_price=float(record.get("price")), + traded_quantity=float(record.get("quantity")), ) + + record.update(trade_result) return record + def check_expired_stocks(self) -> List[Dict[str, Any]]: + """ + Checks for expired stocks in the portfolio and performs necessary trades. + + Returns: + list: A list of expired trade records with detailed information. + """ + expired_trades = [] + + for stock in self.stocks.values(): + if stock.holding_quantity != 0: + # logger.info("%s => %s", stock.stock_name, stock.holding_quantity) + if self.is_expired(stock.expiry_date): + trade_result = stock.trade( + side="SELL" if stock.holding_quantity > 0 else "BUY", + traded_price=0, + traded_quantity=abs(stock.holding_quantity), + ) + trade_result.update( + { + "datetime": datetime.datetime.combine( + datetime.datetime.strptime( + stock.expiry_date, "%Y-%m-%d" + ), + datetime.time(15, 30), + ), + "side": "EXPIRED", + } + ) + expired_trades.append(trade_result) + + return expired_trades + + def is_expired(self, date_str: str) -> bool: + """ + Checks if a given date is in the past. + + Args: + date_str (str): The date string to check. + + Returns: + bool: True if the date is in the past, False otherwise. + """ + try: + return datetime.datetime.today() > datetime.datetime.strptime( + date_str, "%Y-%m-%d" + ) + except ValueError: + # logger.warning(e) + return False + class Stock: - def __init__(self): - self.holding_quantity = 0 - self.avg_price = 0 + """ + A class representing a single stock. + It manages trades and calculates the average price and profit/loss. + """ + + def __init__(self, record: Dict[str, Any]) -> None: + """ + Initializes a new Stock instance with the given record details. + + Args: + record (dict): A dictionary containing details of the stock such as + stock_name, exchange, segment, scrip_code, and expiry_date. + """ + self.stock_name = str(record.get("stock_name")) + self.exchange = str(record.get("exchange")) + self.segment = str(record.get("segment")) + self.scrip_code = str(record.get("scrip_code")) + self.expiry_date = str(record.get("expiry_date")).replace("nan", "") + self.holding_quantity = 0.0 + self.avg_price = 0.0 + + def trade( + self, side: str, traded_price: float, traded_quantity: float + ) -> Dict[str, Any]: + """ + Executes a trade for the stock and updates its state. - def trade(self, side: str, traded_price, traded_quantity): - # buy: positive position, sell: negative position + Args: + side (str): The side of the trade, either 'BUY' or 'SELL'. + traded_price (float): The price at which the stock was traded. + traded_quantity (float): The quantity of the stock traded. + + Returns: + dict: A dictionary containing details of the trade and updated stock state. + """ + # BUY: positive position, SELL: negative position traded_quantity = ( traded_quantity if side == "BUY" else (-1) * traded_quantity ) if (self.holding_quantity * traded_quantity) >= 0: - # realized pnl + # Realized PnL pnl_amount = 0 - # avg open price + pnl_percentage = 0 + # Avg open price self.avg_price = ( (self.avg_price * self.holding_quantity) + (traded_price * traded_quantity) ) / (self.holding_quantity + traded_quantity) else: + # Calculate PnL and percentage pnl_amount = ( (traded_price - self.avg_price) * min(abs(traded_quantity), abs(self.holding_quantity)) * (abs(self.holding_quantity) / self.holding_quantity) ) + pnl_percentage = ( + pnl_amount + / ( + self.avg_price + * min(abs(traded_quantity), abs(self.holding_quantity)) + ) + ) * 100 + # Check if it is close-and-open if abs(traded_quantity) > abs(self.holding_quantity): self.avg_price = traded_price - # net position + # Net position self.holding_quantity += traded_quantity - return { - "avg_price": self.avg_price, - "holding_quantity": self.holding_quantity, - "holding_amount": self.holding_quantity * self.avg_price, - "pnl_amount": pnl_amount, - } + trade_result = copy.deepcopy(self.__dict__) + trade_result.update( + { + "side": side, + "amount": abs(traded_price * traded_quantity), + "quantity": abs(traded_quantity), + "price": traded_price, + "holding_amount": self.holding_quantity * self.avg_price, + "pnl_amount": pnl_amount, + "pnl_percentage": pnl_percentage, + } + ) + return trade_result class GlobalPath: """ - Global Paths Class + A Global Paths Class for managing global paths for various data layers and files. """ def __init__(self) -> None: - # Base Location (Current Working Dirctory Path) + """ + Initializes a new GlobalPath instance and sets up directory paths. + """ + # Base Location (Current Working Directory Path) self.base_path = pathlib.Path(os.getcwd()) if self.base_path.name != "Upstox": self.base_path = self.base_path.parent @@ -132,9 +262,15 @@ def __init__(self) -> None: "GOLD/Holdings/Holdings_data.csv" ) - def make_path(self, source_path): + def make_path(self, source_path: str) -> pathlib.Path: """ - funcation to generate file path + Generates and creates a directory path. + + Args: + source_path (str): The source path to append to the base path. + + Returns: + pathlib.Path: The full resolved path. """ data_path = self.base_path.joinpath(source_path).resolve() data_path.parent.mkdir(parents=True, exist_ok=True) @@ -147,32 +283,20 @@ def make_path(self, source_path): # def get_stock_price_data(name, from_date, to_date): # """ # Fetches stock price data from Yahoo Finance for a given stock within the specified date range. - - # Parameters: # name (str): Stock ticker name (e.g., 'SBIN.NS' for SBI). # from_date (str): Start date in 'YYYY-MM-DD' format. # to_date (str): End date in 'YYYY-MM-DD' format. - - # Returns: # str: CSV data as text. # """ - - # # Convert date strings to Unix timestamps # from_date_unix_ts = int(time.mktime(datetime.strptime(from_date, "%Y-%m-%d").timetuple())) # to_date_unix_ts = int(time.mktime(datetime.strptime(to_date, "%Y-%m-%d").timetuple())) - - # # Construct the URL for the API call # url = f"https://query1.finance.yahoo.com/v7/finance/download/{name}?period1={from_date_unix_ts}&period2={to_date_unix_ts}&interval=1d&events=history&includeAdjustedClose=true" - - # # Make the API call # response = requests.get(url) - - # # Check if the request was successful # if response.status_code == 200: # # Return the CSV data as text @@ -183,19 +307,41 @@ def make_path(self, source_path): # Check for newly added or modified files -def check_files_availability(directory, file_pattern="*"): +def check_files_availability( + directory: str, + file_pattern: str = "*", + timestamp: datetime.datetime = datetime.datetime.strptime( + "2000-01-01", "%Y-%m-%d" + ), +) -> List[str]: + """ + Checks for newly added or modified files in a directory after a specific timestamp. + + Args: + directory (str): The directory to check for files. + file_pattern (str) : + timestamp (datetime.datetime): The timestamp to compare file modification times against. + + Returns: + list: A list of paths to files that were added or modified after the given timestamp. + """ # List to store paths of matched files file_paths = [] # Iterate over all files in the directory and subdirectories for path in pathlib.Path(directory).rglob(file_pattern): if path.is_file(): - file_paths.append(path) + file_modified_time = datetime.datetime.fromtimestamp( + os.path.getmtime(path) + ) + # Check if file was modified after the given timestamp + if file_modified_time > timestamp: + file_paths.append(path) # Log the number of detected files num_files = len(file_paths) if num_files > 0: - print(f"Number of Files Detected: {num_files}") + logger.info(f"Number of Files Detected: {num_files}") return file_paths else: raise FileNotFoundError("No processable data available") @@ -229,7 +375,6 @@ def replace_punctuation_from_columns(df_pandas): new_col_name = replace_punctuation_from_string(col_name) new_col_names.append(new_col_name) df_pandas.columns = new_col_names - # print("display from column_rename") return df_pandas @@ -275,17 +420,6 @@ def get_schema_from_data_contract(json_path): return schema -# UDF function to parse a datetime string -def parse_datetime(datetime_str): - """ - Attempt to parse the datetime string using dateutil.parser - """ - try: - return dateutil.parser.parse(datetime_str) - except ValueError: - return None - - # Auxiliary functions to gather info of given pandas dataframe def find_correct_sheetname(df_pandas, sheet_name_regex): """ @@ -305,7 +439,7 @@ def find_correct_sheetname(df_pandas, sheet_name_regex): for sheet_name in df_pandas.keys(): # Check if the sheet name matches the regex pattern if pattern.match(sheet_name): - print("Sheet name =>", sheet_name) + logger.info("Sheet name => %s", sheet_name) return df_pandas[sheet_name] # Raise an error if no matching sheet name is found @@ -439,7 +573,7 @@ def get_correct_datatype(input_datatype): for datatype_name, datatype_values in datatypes_list.items(): if input_datatype in datatype_values: return datatype_name - print(f"undefined data type => {input_datatype}") + logger.warning(f"undefined data type => {input_datatype}") return input_datatype diff --git a/cleanup.sh b/cleanup.sh deleted file mode 100644 index 02935f7c..00000000 --- a/cleanup.sh +++ /dev/null @@ -1,16 +0,0 @@ -rm -rf NOTEBOOKS/00_ETL_Source2Bronze_Layer_TradeHistory.py -rm -rf NOTEBOOKS/01_ETL_Bronze2Silver_Layer_Symbol.py -rm -rf NOTEBOOKS/02_ETL_Bronze2Silver_Layer_TradeHistory.py -rm -rf NOTEBOOKS/03_ETL_Bronze2Silver_Layer_StockPrice.py -rm -rf NOTEBOOKS/04_ETL_Bronze2Silver_Layer_Ledger.py -rm -rf NOTEBOOKS/05_ETL_Silver2Gold_Layer_TradeHistory.py -rm -rf NOTEBOOKS/06_ETL_Silver2Gold_Layer_ProfitLoss.py -rm -rf NOTEBOOKS/07_ETL_Silver2Gold_Layer_Holdings.py -rm -rf NOTEBOOKS/08_ETL_Silver2Gold_Layer_Investment.py -rm -rf NOTEBOOKS/09_PRESENTATION_Layer.py -rm -rf NOTEBOOKS/common_utilities.ipynb -rm -rf NOTEBOOKS/runner.ipynb -rm -rf NOTEBOOKS/t.ipynb -rm -rf NOTEBOOKS/test.py -rm -rf companyAction/companyAction.py -rm -rf test/trading_pnl.ipynb diff --git a/pyproject.toml b/pyproject.toml index 247e07c9..0fe08420 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "upstox" version = "0.1.0" -description = "Completed ETL from upstox exchanges and calculate PNL_AMOUNT and other stuff" +description = "Completed ETL from upstox exchanges and calculate PNL AMOUNT and other stuff" license = "MIT" authors = ["ptptrashanttripathi"] readme = "README.md" diff --git a/test/pnl_new.ipynb b/test/pnl_new.ipynb new file mode 100644 index 00000000..b9cda3d3 --- /dev/null +++ b/test/pnl_new.ipynb @@ -0,0 +1,557 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datetimeexchangesegmentstock_namescrip_codesideamountquantitypriceexpiry_date
1492024-07-15 13:23:50FONFONIFTY-PE-24650-18JUL2024NIFTYBUY5762.5050.0115.252024-07-18
1502024-07-15 14:18:31FONFONIFTY-PE-24650-18JUL2024NIFTYSELL5787.5050.0115.752024-07-18
1532024-07-16 13:57:37FONFONIFTY-PE-24650-18JUL2024NIFTYBUY1941.2525.077.652024-07-18
1542024-07-16 13:57:37FONFONIFTY-PE-24650-18JUL2024NIFTYBUY1942.5025.077.702024-07-18
1552024-07-18 09:47:40FONFONIFTY-PE-24650-18JUL2024NIFTYSELL3512.5050.070.252024-07-18
1632024-07-18 14:25:45FONFONIFTY-PE-24650-18JUL2024NIFTYBUY300.00100.03.002024-07-18
\n", + "
" + ], + "text/plain": [ + " datetime exchange segment stock_name \\\n", + "149 2024-07-15 13:23:50 FON FO NIFTY-PE-24650-18JUL2024 \n", + "150 2024-07-15 14:18:31 FON FO NIFTY-PE-24650-18JUL2024 \n", + "153 2024-07-16 13:57:37 FON FO NIFTY-PE-24650-18JUL2024 \n", + "154 2024-07-16 13:57:37 FON FO NIFTY-PE-24650-18JUL2024 \n", + "155 2024-07-18 09:47:40 FON FO NIFTY-PE-24650-18JUL2024 \n", + "163 2024-07-18 14:25:45 FON FO NIFTY-PE-24650-18JUL2024 \n", + "\n", + " scrip_code side amount quantity price expiry_date \n", + "149 NIFTY BUY 5762.50 50.0 115.25 2024-07-18 \n", + "150 NIFTY SELL 5787.50 50.0 115.75 2024-07-18 \n", + "153 NIFTY BUY 1941.25 25.0 77.65 2024-07-18 \n", + "154 NIFTY BUY 1942.50 25.0 77.70 2024-07-18 \n", + "155 NIFTY SELL 3512.50 50.0 70.25 2024-07-18 \n", + "163 NIFTY BUY 300.00 100.0 3.00 2024-07-18 " + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# filepath of the CSV file\n", + "filepath = \"../DATA/SILVER/TradeHistory/TradeHistory_data.csv\"\n", + "# Read the CSV file into a DataFrame\n", + "df = pd.read_csv(filepath)\n", + "# Filter for specific segments\n", + "df = df[df[\"stock_name\"].isin([\"NIFTY-PE-24650-18JUL2024\"])]\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
segmentstock_namebuy_quantitybuy_pricebuy_amountsell_quantitysell_pricesell_amountpnl_amountholding_quantityinvestment_amountavg_price
0FONIFTY-PE-24650-18JUL2024200.049.739946.25100.093.09300.04326.88100.04973.1249.73
\n", + "
" + ], + "text/plain": [ + " segment stock_name buy_quantity buy_price buy_amount \\\n", + "0 FO NIFTY-PE-24650-18JUL2024 200.0 49.73 9946.25 \n", + "\n", + " sell_quantity sell_price sell_amount pnl_amount holding_quantity \\\n", + "0 100.0 93.0 9300.0 4326.88 100.0 \n", + "\n", + " investment_amount avg_price \n", + "0 4973.12 49.73 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "\n", + "# Calculate the total buy and sell quantities and prices\n", + "df = df.groupby([\"segment\", \"stock_name\", \"side\"]).agg(\n", + " {\"quantity\": \"sum\", \"amount\": \"sum\", \"price\": \"sum\"}\n", + ")\n", + "\n", + "# Reset the index to get stock names as a column\n", + "df.reset_index(inplace=True)\n", + "\n", + "# Pivot the data for a better structure\n", + "df = df.pivot(\n", + " index=[\"segment\", \"stock_name\"],\n", + " columns=\"side\",\n", + " values=[\"quantity\", \"amount\", \"price\"],\n", + ").fillna(0)\n", + "\n", + "df.columns = [\n", + " \"_\".join(list(reversed(col))).strip().lower() for col in df.columns.values\n", + "]\n", + "\n", + "df[\"buy_price\"] = df[\"buy_amount\"] / df[\"buy_quantity\"]\n", + "df[\"sell_price\"] = df[\"sell_amount\"] / df[\"sell_quantity\"]\n", + "df[\"pnl_amount\"] = (df[\"sell_price\"] - df[\"buy_price\"]) * df[\"sell_quantity\"]\n", + "df[\"holding_quantity\"] = df[\"buy_quantity\"] - df[\"sell_quantity\"]\n", + "df[\"investment_amount\"] = df[\"buy_price\"] * df[\"holding_quantity\"]\n", + "df[\"avg_price\"] = df[\"investment_amount\"] / df[\"holding_quantity\"]\n", + "# Order by holding_quantity and stock_name\n", + "df = df.sort_values(by=[\"segment\", \"stock_name\"])\n", + "# Reset the index to get stock names as a column\n", + "df = df.reset_index()\n", + "df = df.fillna(0)\n", + "\n", + "# round the numbers\n", + "df = df.round(2)\n", + "df[\n", + " [\n", + " \"segment\",\n", + " \"stock_name\",\n", + " \"buy_quantity\",\n", + " \"buy_price\",\n", + " \"buy_amount\",\n", + " \"sell_quantity\",\n", + " \"sell_price\",\n", + " \"sell_amount\",\n", + " \"pnl_amount\",\n", + " \"holding_quantity\",\n", + " \"investment_amount\",\n", + " \"avg_price\",\n", + " ]\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
datetimeexchangesegmentstock_namesidequantityopen_priceopen_amountclose_priceclose_amountpnl_amountpnl_percentage
812024-07-15 14:18:31FONFONIFTY-PE-24650-18JUL2024LONG50.0115.255762.5115.755787.525.000.43
822024-07-18 09:47:40FONFONIFTY-PE-24650-18JUL2024LONG50.077.683884.070.253512.5-371.25-9.56
\n", + "
" + ], + "text/plain": [ + " datetime exchange segment stock_name side \\\n", + "81 2024-07-15 14:18:31 FON FO NIFTY-PE-24650-18JUL2024 LONG \n", + "82 2024-07-18 09:47:40 FON FO NIFTY-PE-24650-18JUL2024 LONG \n", + "\n", + " quantity open_price open_amount close_price \\\n", + "81 50.0 115.25 5762.5 115.75 \n", + "82 50.0 77.68 3884.0 70.25 \n", + "\n", + " close_amount pnl_amount pnl_percentage \n", + "81 5787.5 25.00 0.43 \n", + "82 3512.5 -371.25 -9.56 " + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# filepath of the CSV file\n", + "filepath = \"../DATA/GOLD/ProfitLoss/ProfitLoss_data.csv\"\n", + "\n", + "# Read the CSV file into a DataFrame\n", + "df_pnl = pd.read_csv(filepath)\n", + "df_pnl = df_pnl[df_pnl[\"stock_name\"].isin([\"NIFTY-PE-24650-18JUL2024\"])]\n", + "df_pnl" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
segmentstock_namepnl_amount
0FONIFTY-PE-24650-18JUL2024-346.25
\n", + "
" + ], + "text/plain": [ + " segment stock_name pnl_amount\n", + "0 FO NIFTY-PE-24650-18JUL2024 -346.25" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "# Calculate the total buy and sell quantities and prices\n", + "df_pnl = df_pnl.groupby([\"segment\", \"stock_name\"]).agg(\n", + " {\"pnl_amount\": \"sum\"}\n", + ")\n", + "\n", + "df_pnl = df_pnl.sort_values(by=[\"segment\", \"stock_name\"])\n", + "# Reset the index to get stock names as a column\n", + "df_pnl.reset_index(inplace=True)\n", + "df_pnl" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
stock_namecode_pnlmy_pnl
0NIFTY-PE-24650-18JUL20244326.88-346.25
\n", + "
" + ], + "text/plain": [ + " stock_name code_pnl my_pnl\n", + "0 NIFTY-PE-24650-18JUL2024 4326.88 -346.25" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Rename the pnl_amount columns to distinguish them\n", + "df1 = df[[\"stock_name\",\"pnl_amount\"]].rename(columns={'pnl_amount': 'code_pnl'})\n", + "df2 = df_pnl[[\"stock_name\",\"pnl_amount\"]].rename(columns={'pnl_amount': 'my_pnl'})\n", + "\n", + "# Perform an outer merge on the stock_name column\n", + "merged_df = pd.merge(df1, df2, on='stock_name', how='outer').fillna(0)\n", + "merged_df = merged_df[merged_df[\"code_pnl\"] != merged_df[\"my_pnl\"]]\n", + "merged_df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/test/trading_pnl.py b/test/trading_pnl.py index a754cf8c..1ea276fd 100644 --- a/test/trading_pnl.py +++ b/test/trading_pnl.py @@ -27,7 +27,7 @@ def __init__( close_time, symbol, quantity, - pnl, + pnl_amount, open_side, close_side, open_price, @@ -37,7 +37,7 @@ def __init__( self.close_time = close_time self.symbol = symbol self.quantity = quantity - self.pnl = pnl + self.pnl_amount = pnl_amount self.open_side = open_side self.close_side = close_side self.open_price = open_price @@ -50,7 +50,7 @@ def __str__(self): self.close_time, self.symbol, self.quantity, - self.pnl, + self.pnl_amount, self.open_side, self.close_side, self.open_price, @@ -58,12 +58,12 @@ def __str__(self): ) -def trade_pnl(input_file): +def trade_pnl_amount(input_file): with open(input_file, "rb") as csv_file: next(csv_file) csv_reader = csv.reader(csv_file, delimiter=",") line_count = 0 - total_pnl = 0 + total_pnl_amount = 0 # Array of Trade objects trade_list = [] @@ -86,7 +86,7 @@ def trade_pnl(input_file): if trade_list[0].symbol == symbol and trade_list[0].side != side: # CLOSING ORDER IS CLOSED AND MATCHED if trade_list[0].quantity >= quantity: - total_pnl += abs( + total_pnl_amount += abs( price * quantity - trade_list[0].price * quantity ) trade_list[0].quantity -= quantity @@ -114,7 +114,7 @@ def trade_pnl(input_file): # CLOSING ORDER IS BIGGER THAN OPENING ORDER elif trade_list[0].quantity < quantity: - total_pnl += abs( + total_pnl_amount += abs( trade_list[0].price * trade_list[0].quantity - price * trade_list[0].quantity ) @@ -142,7 +142,7 @@ def trade_pnl(input_file): if open_trade.symbol == symbol: # CLOSING TRADE IS FINALLY MATCHED if quantity <= open_trade.quantity: - total_pnl += abs( + total_pnl_amount += abs( price * quantity - open_trade.price * quantity ) @@ -170,7 +170,7 @@ def trade_pnl(input_file): break # CLOSING TRADE REMAINS UNMATCHED else: - total_pnl += abs( + total_pnl_amount += abs( open_trade.price * open_trade.quantity - price * open_trade.quantity ) @@ -202,11 +202,11 @@ def trade_pnl(input_file): # Printing out closed trades for x in closed_trades: print(x) - return total_pnl + return total_pnl_amount if __name__ == "__main__": print( - "OPEN_TIME,CLOSE_TIME,SYMBOL,QUANTITY,PNL,OPEN_SIDE,CLOSE_SIDE,OPEN_PRICE,CLOSE_PRICE" + "OPEN_TIME,CLOSE_TIME,SYMBOL,QUANTITY,PNL_AMOUNT,OPEN_SIDE,CLOSE_SIDE,OPEN_PRICE,CLOSE_PRICE" ) - print(trade_pnl(input_csv)) + print(trade_pnl_amount(input_csv))