Merge pull request #1 from SINTEF-Power-system-asset-management/dev

Version 2.0
SINTEF-Power-system-asset-management · Apr 7, 2022 · 9f64667 · 9f64667
2 parents 8af5446 + 903ef05
commit 9f64667
Show file tree

Hide file tree

Showing 54 changed files with 55,533 additions and 1,263 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,3 +1,2 @@
 __pycache__
-load_data
-temperature_data
+.vscode
diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -1,12 +1,9 @@
-# Stochastic Load Analysis
-
-Stochastic Load Analysis is a Python script for performing load modelling,
-with a focus on the method described in 
-[Erling Tønne's Doctoral Thesis](http://hdl.handle.net/11250/2476389). 
+# Flexible Load Analysis
+Flexible Load Analysis is a Python script for modelling and analysis of load-timeseries and net-data, with a focus on flexibility.  
+The program was originally made to test load-modelling methods described in [Erling Tønne's Doctoral Thesis](http://hdl.handle.net/11250/2476389).  
 The script is created by SINTEF Energi AS in conjunction with CINELDI WP1.
 
 ## Installation
-
 The script is installed by cloning this repository to your own local machine.
 Running the script requires the following dependencies:
 
@@ -16,25 +13,46 @@ Running the script requires the following dependencies:
 * [pandas](https://pandas.pydata.org/pandas-docs/stable/index.html#)
 * [openpyxl](https://openpyxl.readthedocs.io/en/stable/)
 * [toml](https://toml.io/en/)
+* [networkx](https://networkx.org/)
+* [pandapower](https://www.pandapower.org/)
+
+Install all dependencies by running  
+```Powershell
+python3 -m pip install -r requirements.txt
+```
 
 ## Usage
+To use this script, change config.toml to reflect placement and structure of your load-timeseries dataset, as well as changing any other relevant fields.
+Remember to update the path of the config in main.py as well.
+
+Required data-files and supported formats is described in example_data\TUTORIAL.md
 
-To use this script, change config.toml to reflect placement and structure of
-dataset, as well as changing any other fields.
 After this, the program may be ran by running
 ```Bash
-python main.py
+python src/main.py
 ```
 
-See the source-code for how to implement custom preprocessing-steps as well
-as other models.
+See the source-code for how to implement custom preprocessing-steps as well as other models.
 
 ## Development
-Development follows issues reported at the project's 
-[Jira](https://jira.code.sintef.no/projects/CINELDI/summary).
-
-The project follows PEP8-styling and numpy 
+The project follows PEP8-styling and the numpydoc-standard 
 [docstring-styling](https://numpydoc.readthedocs.io/en/latest/format.html).
 
 ## License
-Todo
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+# Authors
+Contributors: Eirik Haugen, Daniel Bjerkehagen, Iver Bakken Sperstad, Susanne Sandell
+
+Copyright &copy; 2021 SINTEF Energi AS
diff --git a/example_data/Temperatur_Strømtangen_fyr_2000-2019.xlsx b/example_data/Temperatur_Strømtangen_fyr_2000-2019.xlsx
diff --git a/example_data/Temperatur_Strømtangen_fyr_2018-2020.xlsx b/example_data/Temperatur_Strømtangen_fyr_2018-2020.xlsx
diff --git a/example_data/sample_load_data.xlsx b/example_data/sample_load_data.xlsx
diff --git a/in_data/.gitignore b/in_data/.gitignore
@@ -0,0 +1,4 @@
+*
+*/
+!example_data/
+!.gitignore
diff --git a/in_data/example_data/TUTORIAL.md b/in_data/example_data/TUTORIAL.md
@@ -0,0 +1,52 @@
+# How to use this directory
+This folder contains examples of all the necessary files needed to run this program, and an explanaition of how to use them.
+
+This directory may be used as a test of the script working, and as to gain insight into the allowed data-formats.
+You may also use the directory to test the data_formatting.py script. In that case: Delete the sub-directories
+"example_network_encoded" and "example_load_data_split" before running data_formatting.
+
+# Files
+## config.toml
+File containing configuration of the program runtime, such as which parts of the program to run. Important parameters such as path to your datafiles must be put in such a config-file, and referred to in main.py.
+See ../config.toml
+
+## load_data
+The program requires measurements of the loads at each customer/load-point as seperate files stored in it's own directory.
+
+If the load-measurements are stored in a single file, typically when exported from a database, data_formatting.py may be used together with an encoding to split the data-file into a directory based on customer ID's. More on encodings later.
+
+Currently the program supports the following file-formats:
+- Text (.txt)
+- Excel (.xlsx, .xls, etc)
+
+**Format-requirements:**
+
+- Must contain one column/row of timestamps and one column/row of measured value.
+- Filename must be the name you want the node in the network to take.
+- Must be in supported file-format.
+
+## temp_data
+To perform temperature-correction of the loads, temperature-measurements are also required. These follow the same rules for formatting as load_data.
+
+Since this requires computing the daily average temperature, a longer timespan is better.
+
+**Format-requirements:**
+
+- Must contain data for (at least) the same timespan as the load-data.
+
+## network
+
+Directory of non-encoded network-data on MATPOWER-format.
+
+Running data_formatting.py will create a new directory of these files, with ID's encoded based on encoding.
+
+## encoding
+
+To protect the privacy of the customer-data, both from load_data and network, the ID's are encoded to an arbritary format. 
+
+encoding.xlsx is used by the functions in data_formatting.py to encode the ID's from load_data during splitting and to encode ID's from the network-directory.
+
+**Format-requirements:**
+
+- Must be .xlsx-file
+- Must contain old_ID in first column, and the appropriate new_ID in the second column.
diff --git a/in_data/example_data/example_config.toml b/in_data/example_data/example_config.toml
@@ -0,0 +1,119 @@
+# Configuration-file for flexible load analysis.
+# Change values to achieve the desired functionality.
+# Non-empty strings equate to boolean True, empty strings equate to False.
+
+name = "Flexible Load Modelling"
+
+[data]
+
+    # The following fields relate to input path of data, how the timestamps are
+    # formatted and the date of the first datapoint.
+
+    # The script allows for loading of whole directories of similarily structured
+    # data if the path is a directory.
+    # The path must then end in "\\".
+    # The path must be relative main.py or absolute.
+
+
+    # Timestamp-format is described by the C standard (1989 version). 
+    # See i.e. https://docs.python.org/3.6/library/datetime.html#strftime-and-strptime-behavior
+    # Example usage: data has timestamp "September 2. 2020", then the 
+    # C representation would be "%B %d. %Y"
+    # The date-format may be given as a list if formats are used.
+
+    # The first date of the data must be in iso format.
+
+    # Additional fields are required based on the filetype the loaded data is on:
+    # .xlsx / .xls
+    # sheet = int               --      Zero-indexed sheet the data is stored on
+    # time_column = int         --      Zero-indexed column the timestamps occupies
+    # data_column = int         --      Zero-indexed column the data occupies
+    # vertical_data = string    --      Insert empty string if data is horizontal 
+    #                                   (it occupies rows instead of columns), instert "True" if data is vertical
+    #
+    # .txt
+    # separator                 --      ASCII-character which separates each column of data
+    # time_column = int         --      Zero-indexed column the timestamps occupies
+    # data_column = int         --      Zero-indexed column the data occupies
+
+    [data.load_measurements]
+    path = "in_data/example_data/example_load_data_split/"
+    date_format = "%H"
+    first_date_iso = "2020-01-01"
+    last_date_iso = "2020-12-31"
+
+    # For txt-files:
+    separator = ";"
+    time_column = 1
+    data_column = 2
+    vertical_data = "True"
+
+
+    [data.temperature_measurements]
+    path = "in_data/example_data/example_temperature_data/" # example_temp_data.xlsx
+    date_format = "%d.%m.%Y"
+    first_date_iso = "2000-01-01"
+
+    # For excel-files:
+    sheet = 0
+    time_column = 2
+    data_column = 3
+    vertical_data = "True"
+
+
+[preprocessing]
+
+# Any non-empty string will indicate the chosen preprocessing step is performed
+remove_NaN_and_None = "True"
+correct_for_temperature = "True"
+k_temperature_coefficient = 0.1
+x_temperature_sensitivity = 0.05
+
+
+[modelling]
+
+perform_modelling = "True"
+chosen_model = "toenne"
+
+    [modelling.toenne]
+    max_or_average_variation_calculation = "average"
+    # As presented in Tønne, A: monthly and by workday/weekend, multiplicative, B: workday/weekend monthly
+    variation_values_alternative = "B"
+    # As presented in WP1_pilot_Stokastisk_lastmodellering
+    histogram_periods = 1
+    # Chosen way of generatic stochastic model, either from "error_histogram" or "distribution_fitting"
+    stochastic_source = "error_histogram"
+
+    # For additional models, add additional fields for parameters and choice-variables here.
+    #[modelling.example]
+    #parameter1 = 3.14
+    #parameter2 = "Hello World"
+
+
+[network]
+path = "in_data/example_data/example_network_encoded/"
+separator = ";"
+
+
+[analysis]
+result_storage_path = "out_data/example_results/"
+
+
+[plotting]
+
+# Global plot-parameters
+font_size = 12
+
+    # Figures to plot, leave as empty string to not plot
+    [plotting.plots_to_be_made]
+    load_measurements = "True"
+    load_measurements_histogram = "True"
+    temperature_measurements = "True"
+    load_measurements_before_and_after_temperature_correction = "True"
+    # Specific for Tønne-algorithm
+    variation_curves = "True"
+    deterministic_model = ""
+    load_measurements_and_deterministic_model = "True"
+    relative_error = "True"
+    relative_error_histogram = "True"
+    load_measurements_and_stochastic_model = "True"
diff --git a/in_data/example_data/example_encoding.xlsx b/in_data/example_data/example_encoding.xlsx