From 0d4d87df4a9886380b0dd72f4573333ca3fe70b1 Mon Sep 17 00:00:00 2001 From: Mike Taves Date: Fri, 13 Dec 2024 07:36:14 +1300 Subject: [PATCH] Fix typos (#562) --- autotest/la_tests.py | 2 +- autotest/pst_from_tests.py | 26 ++--- autotest/pst_tests.py | 2 +- .../smoother/10par_xsec/_master_stdout.dat | 2 +- autotest/utils_tests.py | 10 +- examples/MatrixCovariance_demo.ipynb | 2 +- examples/Schurexample_freyberg.ipynb | 6 +- examples/Schurexample_henry.ipynb | 4 +- examples/errvarexample_freyberg.ipynb | 16 ++-- examples/errvarexample_henry.ipynb | 14 +-- examples/gpr_emulation_hosaki.ipynb | 12 +-- examples/pst_demo.ipynb | 6 +- examples/pstfrom_mf6.ipynb | 6 +- examples/pstfrom_mf6_ppu.ipynb | 4 +- examples/pyemu_ensemble_basics.ipynb | 6 +- .../understanding_array_thresholding.ipynb | 2 +- misc/kriging/Kriging_testing.py | 2 +- pyemu/eds.py | 10 +- pyemu/en.py | 6 +- pyemu/ev.py | 6 +- pyemu/la.py | 4 +- pyemu/legacy/__init__.py | 2 +- pyemu/legacy/pstfromflopy.py | 24 ++--- pyemu/mat/mat_handler.py | 24 ++--- pyemu/mc.py | 2 +- pyemu/plot/plot_utils.py | 28 +++--- pyemu/pst/pst_handler.py | 58 ++++++------ pyemu/pst/pst_utils.py | 22 ++--- pyemu/sc.py | 4 +- pyemu/utils/geostats.py | 18 ++-- pyemu/utils/gw_utils.py | 34 +++---- pyemu/utils/helpers.py | 46 ++++----- pyemu/utils/metrics.py | 2 +- pyemu/utils/optimization.py | 6 +- pyemu/utils/os_utils.py | 6 +- pyemu/utils/pp_utils.py | 6 +- pyemu/utils/pst_from.py | 94 +++++++++---------- pyemu/utils/smp_utils.py | 2 +- pyproject.toml | 22 +++++ .../Freyberg/verify_unc_results.ipynb | 2 +- verification/henry/verify_unc_results.ipynb | 4 +- verification/henry/verify_unc_results.tex | 4 +- 42 files changed, 290 insertions(+), 268 deletions(-) diff --git a/autotest/la_tests.py b/autotest/la_tests.py index 20899642a..5ec2f8640 100644 --- a/autotest/la_tests.py +++ b/autotest/la_tests.py @@ -477,7 +477,7 @@ def ends_freyberg_dev(): ax.set_xlabel("new obs group") ax.set_title("ensemble variance analysis for three Freyberg predictions",loc="left") plt.tight_layout() - plt.savefig("precent.pdf") + plt.savefig("percent.pdf") plt.close("all") diff --git a/autotest/pst_from_tests.py b/autotest/pst_from_tests.py index b1cc8d671..9f584a85e 100644 --- a/autotest/pst_from_tests.py +++ b/autotest/pst_from_tests.py @@ -299,7 +299,7 @@ def freyberg_test(tmp_path): sfodf_c.columns = sfodf_c.columns.str.lower() assert (sfrobs_p == sfodf_c.loc[sfrobs_p.index, sfrobs_p.columns]).all().all(), ( - "Mis-match between expected and processed obs values\n", + "Mismatch between expected and processed obs values\n", sfrobs_p.head(), sfodf_c.loc[sfrobs_p.index, sfrobs_p.columns].head()) @@ -338,7 +338,7 @@ def freyberg_test(tmp_path): sfodf_c.columns = sfodf_c.columns.str.lower() assert (sfrobs_p == sfodf_c.loc[sfrobs_p.index, sfrobs_p.columns]).all().all(), ( - "Mis-match between expected and processed obs values") + "Mismatch between expected and processed obs values") obsnmes = pd.concat([df.obgnme for df in pf.obs_dfs]).unique() assert all([gp in obsnmes for gp in ['qaquifer', 'qout']]) pf.post_py_cmds.append( @@ -758,7 +758,7 @@ def mf6_freyberg_test(tmp_path): # add the function call to make generic to the forward run script pf.add_py_function(__file__, "generic_function()", is_pre_cmd=False) - # add a function that isnt going to be called directly + # add a function that isn't going to be called directly pf.add_py_function(__file__, "another_generic_function(some_arg)", is_pre_cmd=None) @@ -2913,7 +2913,7 @@ def test_add_array_parameters(self): os.chdir(self.dest_ws) # first delete the model file in the template ws model_file.unlink() - # manually apply a multipler + # manually apply a multiplier mult = 4 mult_values = np.loadtxt(mult_file) mult_values[:] = mult @@ -2991,7 +2991,7 @@ def test_add_list_parameters(self): os.chdir(self.dest_ws) # first delete the model file in the template ws model_file.unlink() - # manually apply a multipler + # manually apply a multiplier mult = 4 mult_df = pd.read_csv(mult_file) # no idea why '3' is the column with multipliers and 'parval1_3' isn't @@ -3079,7 +3079,7 @@ def test_add_array_parameters_pps_grid(self): # first delete the model file in the template ws model_file = df['model_file'].values[mult2model_row] os.remove(model_file) - # manually apply a multipler + # manually apply a multiplier mult = 4 if par_type != "pilotpoints": mult_values = np.loadtxt(mult_file) @@ -3194,7 +3194,7 @@ def test_add_array_parameters_to_file_list(self): for file in array_file_input: shutil.copy(self.array_file, Path(self.dest_ws, file)) - # single 2D zone array applied to each file in filesnames + # single 2D zone array applied to each file in filenames self.pf.add_parameters(filenames=array_file_input, par_type='zone', zone_array=self.zone_array, par_name_base=tag, # basename for parameters that are set up @@ -3211,7 +3211,7 @@ def test_add_array_parameters_to_file_list(self): # first delete the model file in the template ws for model_file in df['model_file']: os.remove(model_file) - # manually apply a multipler + # manually apply a multiplier mult = 4 mult_values = np.loadtxt(mult_file) mult_values[:] = mult @@ -3761,7 +3761,7 @@ def usg_freyberg_test(tmp_path): for layer in layers: df_lay = df.loc[df.layer==layer,:].copy() df_lay.sort_values(by="node") - #substract off the min node number so that each layers node dict starts at zero + #subtract off the min node number so that each layers node dict starts at zero df_lay.loc[:,"node"] = df_lay.node - df_lay.node.min() print(df_lay) srd = {n:xy for n,xy in zip(df_lay.node.values,df_lay.xy.values)} @@ -4078,7 +4078,7 @@ def mf6_subdir_test(tmp_path): # add the function call to make generic to the forward run script pf.add_py_function(__file__, f"generic_function('{sd}')",is_pre_cmd=False) - # add a function that isnt going to be called directly + # add a function that isn't going to be called directly pf.add_py_function(__file__, "another_generic_function(some_arg)",is_pre_cmd=None) # pf.post_py_cmds.append("generic_function()") @@ -5218,7 +5218,7 @@ def plot_thresh(m_d): def test_array_fmt(tmp_path): from pyemu.utils.pst_from import _load_array_get_fmt - # psuedo ff option + # pseudo ff option with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write(" 3.000 3.0000 03.000\n" " 3.0 3.0000 03.000") @@ -5239,14 +5239,14 @@ def test_array_fmt(tmp_path): arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat")) assert fmt == ''.join([" %11.8F"] * 3) assert arr.sum(axis=1).sum() == 18 - # tru space delim option -- sep passed + # true space delim option -- sep passed with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write("3.000 3.00000 03.000\n" "3.0 3.0000 03.000") arr, fmt = _load_array_get_fmt(Path(tmp_path, "test.dat"), sep=' ') assert fmt == "%7.5F" assert arr.sum(axis=1).sum() == 18 - # tru space delim option with sep None + # true space delim option with sep None with open(Path(tmp_path, "test.dat"), 'w') as fp: fp.write("3.000 3.00000 03.000\n" "3.0 3.0000 03.000") diff --git a/autotest/pst_tests.py b/autotest/pst_tests.py index 262958793..450ddbdb4 100644 --- a/autotest/pst_tests.py +++ b/autotest/pst_tests.py @@ -272,7 +272,7 @@ def derivative_increment_test(): import pyemu pst = pyemu.Pst(os.path.join("pst", "inctest.pst")) - pst.calculate_pertubations() + pst.calculate_perturbations() def pestpp_args_test(tmp_path): diff --git a/autotest/smoother/10par_xsec/_master_stdout.dat b/autotest/smoother/10par_xsec/_master_stdout.dat index f0bb9c54b..f2a0eeaf9 100644 --- a/autotest/smoother/10par_xsec/_master_stdout.dat +++ b/autotest/smoother/10par_xsec/_master_stdout.dat @@ -1,6 +1,6 @@ - pestpp-swp - a parameteric sweep utility + pestpp-swp - a parametric sweep utility for PEST(++) datasets by the PEST++ development team diff --git a/autotest/utils_tests.py b/autotest/utils_tests.py index 3745120ea..14a34a5da 100644 --- a/autotest/utils_tests.py +++ b/autotest/utils_tests.py @@ -2136,7 +2136,7 @@ def maha_pdc_summary_test(tmp_path): # todo add back in? currently super slowww import pyemu Path(tmp_path).mkdir(exist_ok=True) l1_critical_value = 6.4 #chi squared value at df=1,p=0.01 - l2_critical_value = 9.2 #chi sqaured value at df=2,p=0.01 + l2_critical_value = 9.2 #chi squared value at df=2,p=0.01 pst_file = os.path.join("la", "pest.pst") shutil.copy(pst_file, tmp_path) pst = pyemu.Pst(os.path.join(tmp_path, "pest.pst")) @@ -2730,14 +2730,14 @@ def pypestworker_test(): num_workers=5 # looper over and start the workers - in this - # case they dont need unique dirs since they arent writing + # case they dont need unique dirs since they aren't writing # anything procs = [] for i in range(num_workers): pp = mp.Process(target=ppw_worker,args=(i,case,t_d,host,port,frun)) pp.start() procs.append(pp) - # if everyhing worked, the the workers should recieve the + # if everything worked, the the workers should receive the # shutdown signal from the master and exit gracefully... for pp in procs: pp.join() @@ -3221,7 +3221,7 @@ def gpr_zdt1_test(): #return # looper over and start the workers - in this - # case they dont need unique dirs since they arent writing + # case they dont need unique dirs since they aren't writing # anything procs = [] # try this test with 1 worker as an edge case @@ -3230,7 +3230,7 @@ def gpr_zdt1_test(): pp = mp.Process(target=gpr_zdt1_ppw) pp.start() procs.append(pp) - # if everyhing worked, the the workers should recieve the + # if everything worked, the the workers should receive the # shutdown signal from the master and exit gracefully... for pp in procs: pp.join() diff --git a/examples/MatrixCovariance_demo.ipynb b/examples/MatrixCovariance_demo.ipynb index 74bc9ea89..c9a92b5bc 100644 --- a/examples/MatrixCovariance_demo.ipynb +++ b/examples/MatrixCovariance_demo.ipynb @@ -113,7 +113,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# Convience methods of `Matrix`\n", + "# Convenience methods of `Matrix`\n", "\n", "several cool things are implemented in `Matrix` and accessed through `@property` decorated methods. For example, the SVD components of a `Matrix` object are simply accessed by name. The SVD routine is called on demand and the components are cast to `Matrix` objects, all opaque to the user:" ] diff --git a/examples/Schurexample_freyberg.ipynb b/examples/Schurexample_freyberg.ipynb index f2fea745a..33e297fa5 100644 --- a/examples/Schurexample_freyberg.ipynb +++ b/examples/Schurexample_freyberg.ipynb @@ -39,7 +39,7 @@ "## Model background\n", "This example is based on the synthetic classroom model of Freyberg(1988). The model is a 2-dimensional MODFLOW model with 1 layer, 40 rows, and 20 columns. The model has 2 stress periods: an initial steady-state stress period used for calibration, and a 5-year transient stress period. The calibration period uses the recharge and well flux of Freyberg(1988); the last stress period use 25% less recharge and 25% more pumping to represent future conditions for a forecast period.\n", "\n", - "The inverse problem has 761 parameters: hydraulic conductivity of each active model cell, calibration and forecast period recharge multipliers, storage and specific yield, calibration and forecast well flux for each of the six wells, and river bed conductance for each 40 cells with river-type boundary conditions. The inverse problem has 12 head obseravtions, measured at the end of the steady-state calibration period. The forecasts of interest include the sw-gw exchange flux during both stress periods (observations named ```sw_gw_0``` and ``sw_gw_1``), and the water level in well cell 6 located in at row 28 column 5 at the end of the stress periods (observations named ```or28c05_0``` and ```or28c05_1```). The forecasts are included in the Jacobian matrix as zero-weight observations. The model files, pest control file and previously-calculated jacobian matrix are in the `freyberg/` folder\n", + "The inverse problem has 761 parameters: hydraulic conductivity of each active model cell, calibration and forecast period recharge multipliers, storage and specific yield, calibration and forecast well flux for each of the six wells, and river bed conductance for each 40 cells with river-type boundary conditions. The inverse problem has 12 head observations, measured at the end of the steady-state calibration period. The forecasts of interest include the sw-gw exchange flux during both stress periods (observations named ```sw_gw_0``` and ``sw_gw_1``), and the water level in well cell 6 located in at row 28 column 5 at the end of the stress periods (observations named ```or28c05_0``` and ```or28c05_1```). The forecasts are included in the Jacobian matrix as zero-weight observations. The model files, pest control file and previously-calculated jacobian matrix are in the `freyberg/` folder\n", "\n", "\n", "Freyberg, David L. \"AN EXERCISE IN GROUND‐WATER MODEL CALIBRATION AND PREDICTION.\" Groundwater 26.3 (1988): 350-360." @@ -65,7 +65,7 @@ "outputs": [], "source": [ "# Because this model is old -- it predates flopy's modelgrid implementation. \n", - "# And because modelgrid has been implemented without backward compatability \n", + "# And because modelgrid has been implemented without backward compatibility \n", "# the modelgrid object is not constructed properly. \n", "# - We will use some sneaky pyemu to get things to how they should be \n", "import pyemu\n", @@ -166,7 +166,7 @@ "source": [ "# just set the path and filename for the jco file\n", "jco = os.path.join(\"Freyberg\",\"freyberg.jcb\") \n", - "# use the jco name with extention \"pst\" for the control file\n", + "# use the jco name with extension \"pst\" for the control file\n", "pst = pyemu.Pst(jco.replace(\".jcb\",\".pst\")) \n", "# get the list of forecast names from the pest++ argument\n", "la = pyemu.Schur(jco=jco, pst=pst, verbose=\"schur_example.log\")\n", diff --git a/examples/Schurexample_henry.ipynb b/examples/Schurexample_henry.ipynb index e168df43d..cc0b5ed50 100644 --- a/examples/Schurexample_henry.ipynb +++ b/examples/Schurexample_henry.ipynb @@ -27,7 +27,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The inverse problem has 603 parameters: 600 hydraulic conductivity pilot points, 1 global hydraulic conductivity, 1 specified flux multiplier for history matching and 1 specified flux multiplier for forecast conditions. The inverse problem has 36 obseravtions (21 heads and 15 concentrations) measured at the end of the steady-state calibration period. The forecasts of interest of the distance from the left model edge to the 10% seawater concentration in the basal model layer and the concentration at location 10. Both of there forecasts are \"measured\" at the end of the forecast stress period. The forecasts are both in the Jacobian matrix as zero-weight observations named `pd_ten` and `C_obs10_2`.I previously calculated the jacobian matrix, which is in the `henry/` folder, along with the PEST control file.\n", + "The inverse problem has 603 parameters: 600 hydraulic conductivity pilot points, 1 global hydraulic conductivity, 1 specified flux multiplier for history matching and 1 specified flux multiplier for forecast conditions. The inverse problem has 36 observations (21 heads and 15 concentrations) measured at the end of the steady-state calibration period. The forecasts of interest of the distance from the left model edge to the 10% seawater concentration in the basal model layer and the concentration at location 10. Both of there forecasts are \"measured\" at the end of the forecast stress period. The forecasts are both in the Jacobian matrix as zero-weight observations named `pd_ten` and `C_obs10_2`.I previously calculated the jacobian matrix, which is in the `henry/` folder, along with the PEST control file.\n", "\n" ] }, @@ -222,7 +222,7 @@ "## parameter contribution to forecast uncertainty\n", "\n", "\n", - "Lets look at which parameters are contributing most to forecast uncertainty, which we estimate as the decrese in forecast uncertainty from \"perfect\" knowledge of one or more parameters. for demostration purposes, lets group the hydraulic conductivity parameters by row." + "Lets look at which parameters are contributing most to forecast uncertainty, which we estimate as the decrease in forecast uncertainty from \"perfect\" knowledge of one or more parameters. for demonstration purposes, lets group the hydraulic conductivity parameters by row." ] }, { diff --git a/examples/errvarexample_freyberg.ipynb b/examples/errvarexample_freyberg.ipynb index 0cd7b4ecd..991997f77 100644 --- a/examples/errvarexample_freyberg.ipynb +++ b/examples/errvarexample_freyberg.ipynb @@ -24,7 +24,7 @@ "## Model background\n", "Here is an example based on the model of Freyberg, 1988. The synthetic model is a 2-dimensional MODFLOW model with 1 layer, 40 rows, and 20 columns. The model has 2 stress periods: an initial steady-state stress period used for calibration, and a 5-year transient stress period. The calibration period uses the recharge and well flux of Freyberg, 1988; the last stress period use 25% less recharge and 25% more pumping.\n", "\n", - "The inverse problem has 761 parameters: hydraulic conductivity of each active model cell, calibration and forecast period recharge multipliers, storage and specific yield, calibration and forecast well flux for each of the six wells, and river bed conductance for each 40 cells with river-type boundary conditions. The inverse problem has 12 head obseravtions, measured at the end of the steady-state calibration period. The forecasts of interest include the sw-gw exchange flux during both stress periods (observations named ```sw_gw_0``` and ``sw_gw_1``), and the water level in well cell 6 located in at row 28 column 5 at the end of the stress periods (observations named ```or28c05_0``` and ```or28c05_1```). The forecasts are included in the Jacobian matrix as zero-weight observations. The model files, pest control file and previously-calculated jacobian matrix are in the `freyberg/` folder\n", + "The inverse problem has 761 parameters: hydraulic conductivity of each active model cell, calibration and forecast period recharge multipliers, storage and specific yield, calibration and forecast well flux for each of the six wells, and river bed conductance for each 40 cells with river-type boundary conditions. The inverse problem has 12 head observations, measured at the end of the steady-state calibration period. The forecasts of interest include the sw-gw exchange flux during both stress periods (observations named ```sw_gw_0``` and ``sw_gw_1``), and the water level in well cell 6 located in at row 28 column 5 at the end of the stress periods (observations named ```or28c05_0``` and ```or28c05_1```). The forecasts are included in the Jacobian matrix as zero-weight observations. The model files, pest control file and previously-calculated jacobian matrix are in the `freyberg/` folder\n", "\n", "\n", "Freyberg, David L. \"AN EXERCISE IN GROUND‐WATER MODEL CALIBRATION AND PREDICTION.\" Groundwater 26.3 (1988): 350-360." @@ -50,7 +50,7 @@ "outputs": [], "source": [ "# Because this model is old -- it predates flopy's modelgrid implementation. \n", - "# And because modelgrid has been implemented without backward compatability \n", + "# And because modelgrid has been implemented without backward compatibility \n", "# the modelgrid object is not constructed properly. \n", "# - We will use some sneaky pyemu to get things to how they should be \n", "import pyemu\n", @@ -127,7 +127,7 @@ "source": [ "First create a linear_analysis object. We will use `ErrVar` derived type, which replicates the behavior of the `PREDVAR` suite of PEST as well as `ident_par` utility. We pass it the name of the jacobian matrix file. Since we don't pass an explicit argument for `parcov` or `obscov`, `pyemu` attempts to build them from the parameter bounds and observation weights in a pest control file (.pst) with the same base case name as the jacobian. Since we are interested in forecast uncertainty as well as parameter uncertainty, we also pass the names of the forecast sensitivity vectors we are interested in, which are stored in the jacobian as well. Note that the `forecasts` argument can be a mixed list of observation names, other jacobian files or PEST-compatible ASCII matrix files. Remember you can pass a filename to the `verbose` argument to write log file.\n", "\n", - "Since most groundwater model history-matching analyses focus on adjusting hetergeneous hydraulic properties and not boundary condition elements, let's identify the well flux and recharge parameters as `omitted` in the error variance analysis. We can conceptually think of this action as excluding these parameters from the history-matching process. Later we will explicitly calculate the penalty for not adjusting these parameters." + "Since most groundwater model history-matching analyses focus on adjusting heterogeneous hydraulic properties and not boundary condition elements, let's identify the well flux and recharge parameters as `omitted` in the error variance analysis. We can conceptually think of this action as excluding these parameters from the history-matching process. Later we will explicitly calculate the penalty for not adjusting these parameters." ] }, { @@ -153,7 +153,7 @@ "metadata": {}, "source": [ "# Parameter identifiability\n", - "The `errvar` dervied type exposes a method to get a `pandas` dataframe of parameter identifiability information. Recall that parameter identifiability is expressed as $d_i = \\Sigma(\\mathbf{V}_{1i})^2$, where $d_i$ is the parameter identifiability, which ranges from 0 (not identified by the data) to 1 (full identified by the data), and $\\mathbf{V}_1$ are the right singular vectors corresonding to non-(numerically) zero singular values. First let's look at the singular spectrum of $\\mathbf{Q}^{\\frac{1}{2}}\\mathbf{J}$, where $\\mathbf{Q}$ is the cofactor matrix and $\\mathbf{J}$ is the jacobian:" + "The `errvar` derived type exposes a method to get a `pandas` dataframe of parameter identifiability information. Recall that parameter identifiability is expressed as $d_i = \\Sigma(\\mathbf{V}_{1i})^2$, where $d_i$ is the parameter identifiability, which ranges from 0 (not identified by the data) to 1 (full identified by the data), and $\\mathbf{V}_1$ are the right singular vectors corresponding to non-(numerically) zero singular values. First let's look at the singular spectrum of $\\mathbf{Q}^{\\frac{1}{2}}\\mathbf{J}$, where $\\mathbf{Q}$ is the cofactor matrix and $\\mathbf{J}$ is the jacobian:" ] }, { @@ -186,7 +186,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We see that the singluar spectrum decays rapidly (not uncommon) and that we can really only support about 12 right singular vectors even though we have 700+ parameters in the inverse problem. \n", + "We see that the singular spectrum decays rapidly (not uncommon) and that we can really only support about 12 right singular vectors even though we have 700+ parameters in the inverse problem. \n", "\n", "Let's get the identifiability dataframe at 12 singular vectors:" ] @@ -315,11 +315,11 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Here we see the trade off between getting a good fit to push down the null-space (1st) term and the penalty for overfitting (the rise of the solution space (2nd) term)). The sum of the first two terms in the \"appearent\" error variance (e.g. the uncertainty that standard analyses would yield) without considering the contribution from the omitted parameters. You can verify this by checking prior uncertainty from the Schur's complement notebook against the zero singular value result using only terms 1 and 2. Note that the top of the green bar is the limit of traditional uncertainty/error variance analysis: accounting for parameter and observation\n", + "Here we see the trade off between getting a good fit to push down the null-space (1st) term and the penalty for overfitting (the rise of the solution space (2nd) term)). The sum of the first two terms in the \"apparent\" error variance (e.g. the uncertainty that standard analyses would yield) without considering the contribution from the omitted parameters. You can verify this by checking prior uncertainty from the Schur's complement notebook against the zero singular value result using only terms 1 and 2. Note that the top of the green bar is the limit of traditional uncertainty/error variance analysis: accounting for parameter and observation\n", "\n", "We also see the added penalty for not adjusting the well flux and recharge parameters. For the water level at the end of the calibration period forecast (``or28c05_0``), the fact the we have left parameters out doesn't matter - the parameter compensation associated with fixing uncertain model inputs can be \"calibrated out\" beyond 2 singular values. For the water level forecast during forecast period (``or28c05_1``), the penalty for fixed parameters persists -it s nearly constant over the range of singular values. \n", "\n", - "For ``sw_gw_0``, the situation is much worse: not only are we greatly underestimating uncertainty by omitting parameters, worse, calibration increases the uncertainty for this forecast because the adjustable parametres are compensating for the omitted, uncertaint parameters in ways that are damanaging to the forecast. \n", + "For ``sw_gw_0``, the situation is much worse: not only are we greatly underestimating uncertainty by omitting parameters, worse, calibration increases the uncertainty for this forecast because the adjustable parameters are compensating for the omitted, uncertaint parameters in ways that are damanaging to the forecast. \n", "\n", "For the forecast period sw-gw exchange (``sw_gw_1``), calibration doesn't help or hurt - this forecast depend entirely on null space parameter components. But treating the recharge and well pumpage as \"fixed\" (omitted) results in greatly underestimated uncertainty. \n", "\n", @@ -353,7 +353,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We see that the prior from ```schur``` class matches the two-term ```errvar``` result at zero singular values. We also see, as expected, the posterior from ```schur``` is slightly lower than the two-term ```errvar``` result. This shows us that the \"appearent\" uncertainty in these predictions, as found through application of Bayes equation, is being under estimated because if the ill effects of the omitted parameters." + "We see that the prior from ```schur``` class matches the two-term ```errvar``` result at zero singular values. We also see, as expected, the posterior from ```schur``` is slightly lower than the two-term ```errvar``` result. This shows us that the \"apparent\" uncertainty in these predictions, as found through application of Bayes equation, is being under estimated because if the ill effects of the omitted parameters." ] } ], diff --git a/examples/errvarexample_henry.ipynb b/examples/errvarexample_henry.ipynb index 861516568..b667f6352 100644 --- a/examples/errvarexample_henry.ipynb +++ b/examples/errvarexample_henry.ipynb @@ -27,7 +27,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The inverse problem has 603 parameters: 600 hydraulic conductivity pilot points, 1 global hydraulic conductivity, 1 specified flux multiplier for history matching and 1 specified flux multiplier for forecast conditions. The inverse problem has 36 obseravtions (21 heads and 15 concentrations) measured at the end of the steady-state calibration period. The forecasts of interest of the distance from the left model edge to the 10% seawater concentration in the basal model layer and the concentration at location 10. Both of there forecasts are \"measured\" at the end of the forecast stress period. The forecasts are both in the Jacobian matrix as zero-weight observations named `pd_ten` and `C_obs10_2`.I previously calculated the jacobian matrix, which is in the `henry/` folder, along with the PEST control file.\n", + "The inverse problem has 603 parameters: 600 hydraulic conductivity pilot points, 1 global hydraulic conductivity, 1 specified flux multiplier for history matching and 1 specified flux multiplier for forecast conditions. The inverse problem has 36 observations (21 heads and 15 concentrations) measured at the end of the steady-state calibration period. The forecasts of interest of the distance from the left model edge to the 10% seawater concentration in the basal model layer and the concentration at location 10. Both of there forecasts are \"measured\" at the end of the forecast stress period. The forecasts are both in the Jacobian matrix as zero-weight observations named `pd_ten` and `C_obs10_2`.I previously calculated the jacobian matrix, which is in the `henry/` folder, along with the PEST control file.\n", "\n", "Unlike the Schur's complement example notebook, here we will examine the consequences of not adjusting the specified flux multiplier parameters (```mult1``` and ```mult2```) during inversion, since these types of model inputs are not typically considered for adjustment.\n" ] @@ -54,7 +54,7 @@ "source": [ "First create a linear_analysis object. We will use `err_var` derived type, which replicates the behavior of the `PREDVAR` suite of PEST as well as `ident_par` utility. We pass it the name of the jacobian matrix file. Since we don't pass an explicit argument for `parcov` or `obscov`, `pyemu` attempts to build them from the parameter bounds and observation weights in a pest control file (.pst) with the same base case name as the jacobian. Since we are interested in forecast uncertainty as well as parameter uncertainty, we also pass the names of the forecast sensitivity vectors we are interested in, which are stored in the jacobian as well. Note that the `forecasts` argument can be a mixed list of observation names, other jacobian files or PEST-compatible ASCII matrix files. Remember you can pass a filename to the `verbose` argument to write log file.\n", "\n", - "Since most groundwater model history-matching analyses focus on adjusting hetergeneous hydraulic properties and not boundary condition elements, let's identify the ```mult1``` and ```mult2``` parameters as `omitted` in the error variance analysis. We can conceptually think of this action as excluding the ```mult1``` and ```mult2``` parameters from the history-matching process. Later we will explicitly calculate the penalty for not adjusting this parameter." + "Since most groundwater model history-matching analyses focus on adjusting heterogeneous hydraulic properties and not boundary condition elements, let's identify the ```mult1``` and ```mult2``` parameters as `omitted` in the error variance analysis. We can conceptually think of this action as excluding the ```mult1``` and ```mult2``` parameters from the history-matching process. Later we will explicitly calculate the penalty for not adjusting this parameter." ] }, { @@ -74,7 +74,7 @@ "metadata": {}, "source": [ "#Parameter identifiability\n", - "The `errvar` dervied type exposes a method to get a `pandas` dataframe of parameter identifiability information. Recall that parameter identifiability is expressed as $d_i = \\Sigma(\\mathbf{V}_{1i})^2$, where $d_i$ is the parameter identifiability, which ranges from 0 (not identified by the data) to 1 (full identified by the data), and $\\mathbf{V}_1$ are the right singular vectors corresonding to non-(numerically) zero singular values. First let's look at the singular spectrum of $\\mathbf{Q}^{\\frac{1}{2}}\\mathbf{J}$, where $\\mathbf{Q}$ is the cofactor matrix and $\\mathbf{J}$ is the jacobian:" + "The `errvar` derived type exposes a method to get a `pandas` dataframe of parameter identifiability information. Recall that parameter identifiability is expressed as $d_i = \\Sigma(\\mathbf{V}_{1i})^2$, where $d_i$ is the parameter identifiability, which ranges from 0 (not identified by the data) to 1 (full identified by the data), and $\\mathbf{V}_1$ are the right singular vectors corresponding to non-(numerically) zero singular values. First let's look at the singular spectrum of $\\mathbf{Q}^{\\frac{1}{2}}\\mathbf{J}$, where $\\mathbf{Q}$ is the cofactor matrix and $\\mathbf{J}$ is the jacobian:" ] }, { @@ -107,7 +107,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We see that the singluar spectrum decays rapidly (not uncommon) and that we can really only support about 3 right singular vectors even though we have 600+ parameters in the inverse problem. \n", + "We see that the singular spectrum decays rapidly (not uncommon) and that we can really only support about 3 right singular vectors even though we have 600+ parameters in the inverse problem. \n", "\n", "Let's get the identifiability dataframe at 15 singular vectors:" ] @@ -164,7 +164,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The `errvar` derived type exposes a convience method to get a multi-index pandas dataframe with each of the terms of the error variance equation:" + "The `errvar` derived type exposes a convenience method to get a multi-index pandas dataframe with each of the terms of the error variance equation:" ] }, { @@ -222,7 +222,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Here we see the trade off between getting a good fit to push down the null-space (1st) term and the penalty for overfitting (the rise of the solution space (2nd) term)). The sum of the first two terms in the \"appearent\" error variance (e.g. the uncertainty that standard analyses would yield) without considering the contribution from the omitted parameters. You can verify this be checking prior uncertainty from the Schur's complement notebook against the zero singular value result using only terms 1 and 2.\n", + "Here we see the trade off between getting a good fit to push down the null-space (1st) term and the penalty for overfitting (the rise of the solution space (2nd) term)). The sum of the first two terms in the \"apparent\" error variance (e.g. the uncertainty that standard analyses would yield) without considering the contribution from the omitted parameters. You can verify this be checking prior uncertainty from the Schur's complement notebook against the zero singular value result using only terms 1 and 2.\n", "\n", "We also see the added penalty for not adjusting the `mult1` and `mult2` parameters (3rd term). The ability to forecast the distance from the left edge of the model to the 10% saltwater concentration and the forecast the concentration at location 10 has been compromised by not adjusting `mult1` and `mult2` during calibration. \n", "\n", @@ -256,7 +256,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "We see that the prior from ```schur``` class matches the two-term ```errvar``` result at zero singular values. We also see, as expected, the posterior from ```schur``` is slightly lower than the two-term ```errvar``` result. This shows us that the \"appearent\" uncertainty in these predictions, as found through application of Bayes equation, is being under estimated because if the ill effects of the omitted ```mult1``` and ```mult2``` parameters." + "We see that the prior from ```schur``` class matches the two-term ```errvar``` result at zero singular values. We also see, as expected, the posterior from ```schur``` is slightly lower than the two-term ```errvar``` result. This shows us that the \"apparent\" uncertainty in these predictions, as found through application of Bayes equation, is being under estimated because if the ill effects of the omitted ```mult1``` and ```mult2``` parameters." ] } ], diff --git a/examples/gpr_emulation_hosaki.ipynb b/examples/gpr_emulation_hosaki.ipynb index 096dcd45a..e4203ad29 100644 --- a/examples/gpr_emulation_hosaki.ipynb +++ b/examples/gpr_emulation_hosaki.ipynb @@ -320,7 +320,7 @@ "id": "3d2f2533-8920-45cd-b33c-e082d69cb0d9", "metadata": {}, "source": [ - "One important consideration here: In this simple demo, we only have one objective. That means we have to be careful not to drive the emulator to a single best-fit solution (which we do by using only a few generations when we run mou), but we also need to sure that we start the emulator-driven mou runs with a fully dispersed initial population. But! In a multiobjective run, the final generation for mou is naturally disperesed because of the pareto frontier search, so in the multiobjective setting, we probably want to start the emulated mou run with the existing population to keep us from having to research all of decision variable space for the nondominated solutions. This isnt a hard-and-fast rule, but it seems to be generally applicable\n", + "One important consideration here: In this simple demo, we only have one objective. That means we have to be careful not to drive the emulator to a single best-fit solution (which we do by using only a few generations when we run mou), but we also need to sure that we start the emulator-driven mou runs with a fully dispersed initial population. But! In a multiobjective run, the final generation for mou is naturally disperesed because of the pareto frontier search, so in the multiobjective setting, we probably want to start the emulated mou run with the existing population to keep us from having to research all of decision variable space for the nondominated solutions. This isn't a hard-and-fast rule, but it seems to be generally applicable\n", "\n", "The GPR helper in pyemu accepts a list of input and output filenames to make it easier to repeatedly retrain the GPR emulators:" ] @@ -386,7 +386,7 @@ "id": "2c83d09e-98e0-4025-bb05-28decb6b26b2", "metadata": {}, "source": [ - "two very important files in that dir: the new forward run python script and a series of pickle files, one per objective and one per active constraint in the optimization problem. Essentially, we need to build a GPR-based emulator for each output that is relavent to the optimization problem. We also dont want to rebuild these emulators everytime we run the model, so we store the trained GPR emulators in pickle files and load them up as needed:" + "two very important files in that dir: the new forward run python script and a series of pickle files, one per objective and one per active constraint in the optimization problem. Essentially, we need to build a GPR-based emulator for each output that is relevant to the optimization problem. We also dont want to rebuild these emulators every time we run the model, so we store the trained GPR emulators in pickle files and load them up as needed:" ] }, { @@ -406,7 +406,7 @@ "id": "4b60a402-d9bb-419c-92ca-df0cc8919945", "metadata": {}, "source": [ - "So we simply loop over all relavent model outputs that have a GPR emulator and \"emulate\" the value of the model output given the current decision variable values. easy as!" + "So we simply loop over all relevant model outputs that have a GPR emulator and \"emulate\" the value of the model output given the current decision variable values. easy as!" ] }, { @@ -494,7 +494,7 @@ "id": "9668435c-ccb4-486b-8423-d864251d55d0", "metadata": {}, "source": [ - "OK! now we can see whats happening - the emulated objective function surface is strongly controlled by the location of the training data points, and, is this case, its not a good representation of the truth surface...yet...it should also be clear that the uncertainty in the GPR emulation is lowest near the training points but is highly uncertain as we move away from the training - just like geostatistics!\n", + "OK! now we can see what's happening - the emulated objective function surface is strongly controlled by the location of the training data points, and, is this case, its not a good representation of the truth surface...yet...it should also be clear that the uncertainty in the GPR emulation is lowest near the training points but is highly uncertain as we move away from the training - just like geostatistics!\n", "\n", "But now lets run pestpp-mou on the GPR emulated model. This is usually quite fast, especially if the process model that is being emulated takes more than a few mins to run..." ] @@ -574,7 +574,7 @@ "id": "738f6e14-b896-48db-8e1e-59dcbf270245", "metadata": {}, "source": [ - "So just what you expected? Essentially pestpp-mou converged to the minimum of the objective function we gave it, which is the emulated objective function...at this stage the emulated objective function is a poor represetation of the truth objective function....\n", + "So just what you expected? Essentially pestpp-mou converged to the minimum of the objective function we gave it, which is the emulated objective function...at this stage the emulated objective function is a poor representation of the truth objective function....\n", "\n", "Now this where some more cleverness happens: Lets take that last emulated decision variable population and actually run it thru the complex \"model\" (which in this case is just the hosaki function...). This is so that we can \"in-fill\" our GPR emulator with this new points in decision variable space. In practice, a lot more cleverness needs to happen to actually decide which points, but for this lil demo, it works..." ] @@ -943,7 +943,7 @@ "id": "7ab00c33-17a8-48a6-ae19-7e5c63136bc1", "metadata": {}, "source": [ - "And one last time...and this time with more emulator-based pestpp-mou generations to polish things off since we no longer want to keep a dispersed population (since we arent doing any more in-filling and retraining)" + "And one last time...and this time with more emulator-based pestpp-mou generations to polish things off since we no longer want to keep a dispersed population (since we aren't doing any more in-filling and retraining)" ] }, { diff --git a/examples/pst_demo.ipynb b/examples/pst_demo.ipynb index 54f63026a..bc41c1aac 100644 --- a/examples/pst_demo.ipynb +++ b/examples/pst_demo.ipynb @@ -94,7 +94,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The `pst` class has some `@decorated` convience methods related to the residuals:" + "The `pst` class has some `@decorated` convenience methods related to the residuals:" ] }, { @@ -110,7 +110,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Some additional `@decorated` convience methods:" + "Some additional `@decorated` convenience methods:" ] }, { @@ -166,7 +166,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The `Pst` class also exposes a method to get a new `Pst` instance with a subset of parameters and or obseravtions. Note this method does not propogate prior information to the new instance:" + "The `Pst` class also exposes a method to get a new `Pst` instance with a subset of parameters and or observations. Note this method does not propagate prior information to the new instance:" ] }, { diff --git a/examples/pstfrom_mf6.ipynb b/examples/pstfrom_mf6.ipynb index f1e12d6e8..c043ce20c 100644 --- a/examples/pstfrom_mf6.ipynb +++ b/examples/pstfrom_mf6.ipynb @@ -562,7 +562,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Not bad! We have everything we need, including our special post processing function...except we didnt set a command to run the model! Doh! \n", + "Not bad! We have everything we need, including our special post processing function...except we didn't set a command to run the model! Doh! \n", "\n", "Let's add that:" ] @@ -687,7 +687,7 @@ "outputs": [], "source": [ "# build up a container of stress period start datetimes - this will\n", - "# be used to specify the datetime of each multipler parameter\n", + "# be used to specify the datetime of each multiplier parameter\n", "dts = pd.to_datetime(pf.start_datetime) + pd.to_timedelta(np.cumsum(sim.tdis.perioddata.array[\"perlen\"]),unit='d')\n", "\n", "for wel_file in wel_files:\n", @@ -782,7 +782,7 @@ "source": [ "### Additional parameters in existing template files\n", "\n", - "In many cases, you will have additional odd-ball parameters that arent in list or array file format that you want to include in the pest control. To demonstrate how this works, lets make up a template file:" + "In many cases, you will have additional odd-ball parameters that aren't in list or array file format that you want to include in the pest control. To demonstrate how this works, lets make up a template file:" ] }, { diff --git a/examples/pstfrom_mf6_ppu.ipynb b/examples/pstfrom_mf6_ppu.ipynb index 4d9d30ccd..9283543d9 100644 --- a/examples/pstfrom_mf6_ppu.ipynb +++ b/examples/pstfrom_mf6_ppu.ipynb @@ -455,7 +455,7 @@ "source": [ "## \"These go to 11\" - amp'ing things up with categorization\n", "\n", - "Sometimes, the world we want to simulate might be better represented as categorical instead continuous. That is, rather than smoothly varying property fields, we want fields that are either a high value or a low value (please dont ask for more than 2 categories!). In this case, depending on how you plan to assimilate data (that is, what inversion algorithm you are planning to you), we can accomodate this preference for categorical fields. \n", + "Sometimes, the world we want to simulate might be better represented as categorical instead continuous. That is, rather than smoothly varying property fields, we want fields that are either a high value or a low value (please dont ask for more than 2 categories!). In this case, depending on how you plan to assimilate data (that is, what inversion algorithm you are planning to you), we can accommodate this preference for categorical fields. \n", "\n", "This is pretty advanced and also dense. There is another example notebook the describes the categorization process in detail. Here we will just blast thru it....\n", "\n", @@ -788,7 +788,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Stunning isnt it?! There is clealy a lot subjectivity in the form of defining the prior for the hyper parameters required to use these non-stationary geostats, but they do afford more opportunities to express (stochastic) expert knowledge. To be honest, there was a lot of experimenting with this notebook to get these figures to look this way - playing with variograms and parameter inital values and bounds a lot. You encouraged to do the same! scroll back up, change things, and \"restart kernel and run all\" - this will help build some better intution, promise...." + "Stunning isn't it?! There is clearly a lot subjectivity in the form of defining the prior for the hyper parameters required to use these non-stationary geostats, but they do afford more opportunities to express (stochastic) expert knowledge. To be honest, there was a lot of experimenting with this notebook to get these figures to look this way - playing with variograms and parameter initial values and bounds a lot. You encouraged to do the same! scroll back up, change things, and \"restart kernel and run all\" - this will help build some better intution, promise...." ] }, { diff --git a/examples/pyemu_ensemble_basics.ipynb b/examples/pyemu_ensemble_basics.ipynb index 103806220..8630c09b1 100644 --- a/examples/pyemu_ensemble_basics.ipynb +++ b/examples/pyemu_ensemble_basics.ipynb @@ -21,7 +21,7 @@ "## Model background\n", "Here is an example based on the model of Freyberg, 1988. The synthetic model is a 2-dimensional MODFLOW model with 1 layer, 40 rows, and 20 columns. The model has 2 stress periods: an initial steady-state stress period used for calibration, and a 5-year transient stress period. The calibration period uses the recharge and well flux of Freyberg, 1988; the last stress period use 25% less recharge and 25% more pumping.\n", "\n", - "The inverse problem has 761 parameters: hydraulic conductivity of each active model cell, calibration and forecast period recharge multipliers, storage and specific yield, calibration and forecast well flux for each of the six wells, and river bed conductance for each 40 cells with river-type boundary conditions. The inverse problem has 12 head obseravtions, measured at the end of the steady-state calibration period. The forecasts of interest include the sw-gw exchange flux during both stress periods (observations named ```sw_gw_0``` and ``sw_gw_1``), and the water level in well cell 6 located in at row 28 column 5 at the end of the stress periods (observations named ```or28c05_0``` and ```or28c05_1```). The forecasts are included in the Jacobian matrix as zero-weight observations. The model files, pest control file and previously-calculated jacobian matrix are in the `freyberg/` folder\n", + "The inverse problem has 761 parameters: hydraulic conductivity of each active model cell, calibration and forecast period recharge multipliers, storage and specific yield, calibration and forecast well flux for each of the six wells, and river bed conductance for each 40 cells with river-type boundary conditions. The inverse problem has 12 head observations, measured at the end of the steady-state calibration period. The forecasts of interest include the sw-gw exchange flux during both stress periods (observations named ```sw_gw_0``` and ``sw_gw_1``), and the water level in well cell 6 located in at row 28 column 5 at the end of the stress periods (observations named ```or28c05_0``` and ```or28c05_1```). The forecasts are included in the Jacobian matrix as zero-weight observations. The model files, pest control file and previously-calculated jacobian matrix are in the `freyberg/` folder\n", "\n", "\n", "Freyberg, David L. \"AN EXERCISE IN GROUND‐WATER MODEL CALIBRATION AND PREDICTION.\" Groundwater 26.3 (1988): 350-360." @@ -47,7 +47,7 @@ "outputs": [], "source": [ "# Because this model is old -- it predates flopy's modelgrid implementation. \n", - "# And because modelgrid has been implemented without backward compatability \n", + "# And because modelgrid has been implemented without backward compatibility \n", "# the modelgrid object is not constructed properly. \n", "# - We will use some sneaky pyemu to get things to how they should be \n", "import pyemu\n", @@ -105,7 +105,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "The plot shows the Freyberg (1988) model domain. The colorflood is the hydraulic conductivity ($\\frac{m}{d}$). Red and green cells coorespond to well-type and river-type boundary conditions. Blue dots indicate the locations of water levels used for calibration." + "The plot shows the Freyberg (1988) model domain. The colorflood is the hydraulic conductivity ($\\frac{m}{d}$). Red and green cells correspond to well-type and river-type boundary conditions. Blue dots indicate the locations of water levels used for calibration." ] }, { diff --git a/examples/understanding_array_thresholding.ipynb b/examples/understanding_array_thresholding.ipynb index 0e69c4b9f..f063b0f9d 100644 --- a/examples/understanding_array_thresholding.ipynb +++ b/examples/understanding_array_thresholding.ipynb @@ -248,7 +248,7 @@ "id": "968ee1a0-6d37-4f14-bcd3-c00a4c9e87f1", "metadata": {}, "source": [ - "So there it is! The original array for reference, the \"thresholding array\" (which is just a scaled and normed verson of the original array) and the resulting \"new array\".\n", + "So there it is! The original array for reference, the \"thresholding array\" (which is just a scaled and normed version of the original array) and the resulting \"new array\".\n", "\n", "\n", "Now let's experiment - feel free to change the quantities in `new_df`:" diff --git a/misc/kriging/Kriging_testing.py b/misc/kriging/Kriging_testing.py index 0484b4f52..34dbaac0a 100644 --- a/misc/kriging/Kriging_testing.py +++ b/misc/kriging/Kriging_testing.py @@ -48,7 +48,7 @@ def __init__(self, namfile, structfile, pointsfile, zonefile, factor_file, # get zone map self.pp_zones = np.loadtxt(zonefile, dtype=int) - # keep track of number of points to interpolate to and search radius and current zone of iterest + # keep track of number of points to interpolate to and search radius and current zone of interest self.minpts_interp = minpts_interp self.maxpts_interp = maxpts_interp self.search_radius = search_radius diff --git a/pyemu/eds.py b/pyemu/eds.py index f71f45ede..5b68f2611 100644 --- a/pyemu/eds.py +++ b/pyemu/eds.py @@ -379,7 +379,7 @@ def get_posterior_prediction_moments(self, obslist_dict=None,sim_ensemble=None,i - **dict**: dictionary of first-moment dataframes. Keys are `obslist_dict` keys. If `include_first_moment` is None, this is an empty dict. - **pd.DataFrame**: prediction standard deviation summary - - **pd.DataFrame**: precent prediction standard deviation reduction summary + - **pd.DataFrame**: percent prediction standard deviation reduction summary Example:: @@ -447,7 +447,7 @@ def get_posterior_prediction_moments(self, obslist_dict=None,sim_ensemble=None,i dd += self.obscov.get(onames,onames).x self.logger.log("adding noise cov to data block") - #todo: test for inveribility and shrink if needed... + #todo: test for invertibility and shrink if needed... self.logger.log("inverting data cov block") dd = np.linalg.inv(dd) self.logger.log("inverting data cov block") @@ -514,7 +514,7 @@ def prep_for_dsi(self,sim_ensemble=None,t_d="dsi_template", use_ztz (`bool`): flag to use the condensed ZtZ matrix for SVD. The ZtZ matrix has dimensions nreal*nreal, instead of the nreal*nobs dimensions of Z. This makes the SVD computation faster and more memory efficient when nobs >> nreal. Default is `False` - energy (`float`): energy threshold for truncating the sqrt(C) matrix. Default is `1.0` which applys no truncation. + energy (`float`): energy threshold for truncating the sqrt(C) matrix. Default is `1.0` which applies no truncation. Example:: @@ -559,7 +559,7 @@ def prep_for_dsi(self,sim_ensemble=None,t_d="dsi_template", #make sure names are ordered obs = obs.loc[names,:] #TODO: deal with "scale" and user-specified "offset" - obs["offset"] = 0.0 #TODO: more elegant? incase all 'none' are passed... + obs["offset"] = 0.0 #TODO: more elegant? in case all 'none' are passed... obsnmes = obs.loc[obs.obstransform=='log'].obsnme.values if len(obsnmes) > 0: for name in obsnmes: @@ -883,7 +883,7 @@ def moving_average_with_endpoints(y_values, window_size): # Handle the middle part with full window for i in range(half_window, len(y_values) - half_window): smoothed_y[i] = np.mean(y_values[i - half_window:i + half_window]) - #Enforce endoints + #Enforce endpoints smoothed_y[0] = y_values[0] smoothed_y[-1] = y_values[-1] # Ensure uniqueness by adding small increments if values are duplicated diff --git a/pyemu/en.py b/pyemu/en.py index 3f63b63cc..c612bc2f0 100644 --- a/pyemu/en.py +++ b/pyemu/en.py @@ -402,7 +402,7 @@ def to_dense(self, filename): Note: back transforms `ParameterEnsemble` before writing so that - values are in arithmatic space + values are in arithmetic space """ @@ -732,7 +732,7 @@ def from_gaussian_draw( assumes no correlation (covariates) between observation groups. fill (`bool`): flag to fill in zero-weighted observations with control file values. Default is False. - factor (`str`): how to factorize `cov` to form the projectin matrix. Can + factor (`str`): how to factorize `cov` to form the projection matrix. Can be "eigen" or "svd". The "eigen" option is default and is faster. But for (nearly) singular cov matrices (such as those generated empirically from ensembles), "svd" is the only way. Ignored for diagonal `cov`. @@ -1534,7 +1534,7 @@ def project( base = pyemu.pst_utils.read_parfile(center_on) except: raise Exception( - "'center_on' arg not found in index and couldnt be loaded as a '.par' file" + "'center_on' arg not found in index and couldn't be loaded as a '.par' file" ) else: raise Exception( diff --git a/pyemu/ev.py b/pyemu/ev.py index d9255f44c..29add62fe 100644 --- a/pyemu/ev.py +++ b/pyemu/ev.py @@ -23,7 +23,7 @@ class ErrVar(LinearAnalysis): the noise covariance matrix is loaded from a file using the file extension (".jcb"/".jco" for binary, ".cov"/".mat" for PEST-style ASCII matrix, or ".unc" for uncertainty files). If `None`, the noise covariance matrix is - constructed from the obsevation weights in `LinearAnalysis.pst`. Can also be a `pyemu.Cov` instance + constructed from the observation weights in `LinearAnalysis.pst`. Can also be a `pyemu.Cov` instance forecasts (varies, optional): forecast sensitivity vectors. If `str`, first an observation name is assumed (a row in `LinearAnalysis.jco`). If that is not found, a filename is assumed and predictions are loaded from a file using the file extension. If [`str`], a list of observation names is assumed. @@ -301,7 +301,7 @@ def get_errvar_dataframe(self, singular_values=None): Returns: `pandas.DataFrame`: a multi-indexed pandas dataframe summarizing each of the - error variance terms for each nominated forecast. Rows are the singluar values + error variance terms for each nominated forecast. Rows are the singular values tested, columns are a multi-index of forecast name and error variance term number (e.g. 1,2 or (optionally) 3). @@ -360,7 +360,7 @@ def get_identifiability_dataframe(self, singular_value=None, precondition=False) def variance_at(self, singular_value): """get the error variance of all three error variance terms at a - given singluar value + given singular value Args: singular_value (`int`): singular value to test diff --git a/pyemu/la.py b/pyemu/la.py index 1bbf35a68..a3ef8f6da 100644 --- a/pyemu/la.py +++ b/pyemu/la.py @@ -32,7 +32,7 @@ class LinearAnalysis(object): the noise covariance matrix is loaded from a file using the file extension (".jcb"/".jco" for binary, ".cov"/".mat" for PEST-style ASCII matrix, or ".unc" for uncertainty files). If `None`, the noise covariance matrix is - constructed from the obsevation weights in `LinearAnalysis.pst`. Can also be a `pyemu.Cov` instance + constructed from the observation weights in `LinearAnalysis.pst`. Can also be a `pyemu.Cov` instance forecasts (varies, optional): forecast sensitivity vectors. If `str`, first an observation name is assumed (a row in `LinearAnalysis.jco`). If that is not found, a filename is assumed and predictions are loaded from a file using the file extension. If [`str`], a list of observation names is assumed. @@ -469,7 +469,7 @@ def __load_predictions(self): else: raise Exception( "linear_analysis.__load_predictions(): " - + "ndarray passed for predicitons " + + "ndarray passed for predictions " + "requires jco or parcov to get " + "parameter names" ) diff --git a/pyemu/legacy/__init__.py b/pyemu/legacy/__init__.py index e439684a2..625f075e2 100644 --- a/pyemu/legacy/__init__.py +++ b/pyemu/legacy/__init__.py @@ -1,4 +1,4 @@ """This module contains legacy, deprecated classes and methods, -including PstFromFlopyModel -- superseeded by `pyemu.utils.PstFrom()`""" +including PstFromFlopyModel -- superseded by `pyemu.utils.PstFrom()`""" from .pstfromflopy import * diff --git a/pyemu/legacy/pstfromflopy.py b/pyemu/legacy/pstfromflopy.py index 8cecb2c9b..f64985520 100644 --- a/pyemu/legacy/pstfromflopy.py +++ b/pyemu/legacy/pstfromflopy.py @@ -296,7 +296,7 @@ def apply_list_pars(): org_files = os.listdir(org_dir) # for fname in df.filename.unique(): for fname in org_files: - # need to get the PAK name to handle stupid horrible expceptions for HFB... + # need to get the PAK name to handle stupid horrible exceptions for HFB... # try: # pakspat = sum([True if fname in i else False for i in spat_df.filename]) # if pakspat: @@ -397,8 +397,8 @@ def setup_temporal_diff_obs( the differencing. The order of the observations matters for the differencing. If False, then the control file order is used. If observation names have a datetime suffix, make sure the format is year-month-day to use this sorting. Default is True - long_names (`bool`, optional): flag to use long, descriptive names by concating the two observation names - that are being differenced. This will produce names that are too long for tradtional PEST(_HP). + long_names (`bool`, optional): flag to use long, descriptive names by concatenating the two observation names + that are being differenced. This will produce names that are too long for traditional PEST(_HP). Default is True. prefix (`str`, optional): prefix to prepend to observation names and group names. Default is "dif". @@ -433,7 +433,7 @@ def setup_temporal_diff_obs( out_file = ins_file.replace(".ins", "") # find obs groups from the obs names in the ins that have more than one observation - # (cant diff single entry groups) + # (can't diff single entry groups) obs = pst.observation_data if include_zero_weight: group_vc = pst.observation_data.loc[ins.obs_name_set, "obgnme"].value_counts() @@ -677,7 +677,7 @@ class PstFromFlopyModel(object): sfr_pars (`bool`): setup parameters for the stream flow routing modflow package. If list is passed it defines the parameters to set up. sfr_temporal_pars (`bool`) - flag to include stress-period level spatially-global multipler parameters in addition to + flag to include stress-period level spatially-global multiplier parameters in addition to the spatially-discrete `sfr_pars`. Requires `sfr_pars` to be passed. Default is False grid_geostruct (`pyemu.geostats.GeoStruct`): the geostatistical structure to build the prior parameter covariance matrix elements for grid-based parameters. If None, a generic GeoStruct is created @@ -705,14 +705,14 @@ class PstFromFlopyModel(object): 2.0. For parameters not found in par_bounds_dict, `pyemu.helpers.wildass_guess_par_bounds_dict` is used to set somewhat meaningful bounds. Default is None - temporal_list_geostruct (`pyemu.geostats.GeoStruct`): the geostastical struture to + temporal_list_geostruct (`pyemu.geostats.GeoStruct`): the geostastical structure to build the prior parameter covariance matrix for time-varying list-type multiplier parameters. This GeoStruct express the time correlation so that the 'a' parameter is the length of time that boundary condition multiplier parameters are correlated across. If None, then a generic GeoStruct is created that uses an 'a' parameter of 3 stress periods. Default is None - spatial_list_geostruct (`pyemu.geostats.GeoStruct`): the geostastical struture to + spatial_list_geostruct (`pyemu.geostats.GeoStruct`): the geostastical structure to build the prior parameter covariance matrix for spatially-varying list-type multiplier parameters. If None, a generic GeoStruct is created using an "a" parameter that @@ -1190,7 +1190,7 @@ def _setup_mult_dirs(self): self.log("setting up '{0}' dir".format(d)) def _setup_model(self, model, org_model_ws, new_model_ws): - """setup the flopy.mbase instance for use with multipler parameters. + """setup the flopy.mbase instance for use with multiplier parameters. Changes model_ws, sets external_path and writes new MODFLOW input files @@ -1222,7 +1222,7 @@ def _setup_model(self, model, org_model_ws, new_model_ws): self.new_model_ws = new_model_ws try: self.sr = self.m.sr - except AttributeError: # if sr doesnt exist anymore! + except AttributeError: # if sr doesn't exist anymore! # assume that we have switched to model grid self.sr = SpatialReference.from_namfile( os.path.join(self.org_model_ws, self.m.namefile), @@ -1259,7 +1259,7 @@ def _get_count(self, name): return c def _prep_mlt_arrays(self): - """prepare multipler arrays. Copies existing model input arrays and + """prepare multiplier arrays. Copies existing model input arrays and writes generic (ones) multiplier arrays """ @@ -1737,7 +1737,7 @@ def _kl_prep(self, mlt_df): # calc factors for each layer def _setup_array_pars(self): - """main entry point for setting up array multipler parameters""" + """main entry point for setting up array multiplier parameters""" mlt_df = self._prep_mlt_arrays() if mlt_df is None: return @@ -2155,7 +2155,7 @@ def build_pst(self, filename=None): observations. Args: - filename (`str`): the filename to save the contorl file to. If None, the + filename (`str`): the filename to save the control file to. If None, the name if formed from the model namfile name. Default is None. The control is saved in the `PstFromFlopy.m.model_ws` directory. Note: diff --git a/pyemu/mat/mat_handler.py b/pyemu/mat/mat_handler.py index 863913d62..5d193caf2 100644 --- a/pyemu/mat/mat_handler.py +++ b/pyemu/mat/mat_handler.py @@ -587,7 +587,7 @@ def hadamard_product(self, other): if `Matrix` and other (if applicable) have `autoalign` set to `True`, both `Matrix` and `other` are aligned based on row and column names. If names are not common between the two, this may result in a smaller - returned `Matrix`. If not common elements are shared, an excetion is raised + returned `Matrix`. If not common elements are shared, an exception is raised Example:: @@ -1023,7 +1023,7 @@ def as_2d(self): Returns: `numpy.ndarray` : numpy.ndarray - Exmaple:: + Example:: # A diagonal cov cov = pyemu.Cov.from_parameter_data @@ -1043,7 +1043,7 @@ def to_2d(self): Returns: `Martrix`: non-diagonal form of `Matrix` - Exmaple:: + Example:: # A diagonal cov cov = pyemu.Cov.from_parameter_data @@ -1067,7 +1067,7 @@ def shape(self): Returns: `int`: length of 2 tuple - Exmaple:: + Example:: jco = pyemu.Jco.from_binary("pest.jcb") shape = jco.shape @@ -1214,7 +1214,7 @@ def get_maxsing_from_s(s, eigthresh=1.0e-5): singular value, the index of this singular is returned. Returns: - `int`: the index of the singular value whos ratio with the + `int`: the index of the singular value who's ratio with the first singular value is less than or equal to `eigthresh` @@ -1245,7 +1245,7 @@ def get_maxsing(self, eigthresh=1.0e-5): singular value, the index of this singular is returned. Returns: - `int`: the index of the singular value whos ratio with the + `int`: the index of the singular value who's ratio with the first singular value is less than or equal to `eigthresh` Note: @@ -1487,7 +1487,7 @@ def zero2d(self): @staticmethod def find_rowcol_indices(names, row_names, col_names, axis=None): - """fast(er) look of row and colum names indices + """fast(er) look of row and column names indices Args: names ([`str`]): list of names to look for in `row_names` and/or `col_names` names @@ -2188,7 +2188,7 @@ def get_dense_binary_info(filename): Parameters ---------- - fileanme (`str`): dense binary filename + filename (`str`): dense binary filename Returns: @@ -2836,7 +2836,7 @@ class Jco(Matrix): """ def __init(self, **kwargs): - """Jco constuctor takes the same arguments as Matrix. + """Jco constructor takes the same arguments as Matrix. Args: **kwargs (`dict`): constructor arguments for `Matrix` @@ -2931,7 +2931,7 @@ class Cov(Matrix): mat.to_binary("mat.jco") Note: - `row_names` and `col_names` args are supported in the contructor + `row_names` and `col_names` args are supported in the constructor so support inheritance. However, users should only pass `names` """ @@ -3236,7 +3236,7 @@ def from_parbounds(cls, pst_file, sigma_range=4.0, scale_offset=True): represent 4 * sigma. Default is 4.0, representing approximately 95% confidence of implied normal distribution scale_offset (`bool`): flag to apply scale and offset to parameter upper and lower - bounds before calculating varaince. In some cases, not applying scale and + bounds before calculating variance. In some cases, not applying scale and offset can result in undefined (log) variance. Default is True. Returns: @@ -3265,7 +3265,7 @@ def from_parameter_data(cls, pst, sigma_range=4.0, scale_offset=True, represent 4 * sigma. Default is 4.0, representing approximately 95% confidence of implied normal distribution scale_offset (`bool`): flag to apply scale and offset to parameter upper and lower - bounds before calculating varaince. In some cases, not applying scale and + bounds before calculating variance. In some cases, not applying scale and offset can result in undefined (log) variance. Default is True. subset (`list`-like, optional): Subset of parameters to draw diff --git a/pyemu/mc.py b/pyemu/mc.py index 000d42ef9..750edf4a1 100644 --- a/pyemu/mc.py +++ b/pyemu/mc.py @@ -106,7 +106,7 @@ def get_null_proj(self, nsing=None): ---------- nsing: int optional number of singular components to use - If Nonte, then nsing is determined from + If None, then nsing is determined from call to MonteCarlo.get_nsing() Returns diff --git a/pyemu/plot/plot_utils.py b/pyemu/plot/plot_utils.py index a7129ff2e..21247dd74 100644 --- a/pyemu/plot/plot_utils.py +++ b/pyemu/plot/plot_utils.py @@ -54,7 +54,7 @@ def plot_summary_distributions( Args: df (`pandas.DataFrame`): a dataframe and csv file. Must have columns named: 'prior_mean','prior_stdev','post_mean','post_stdev'. If loaded - from a csv file, column 0 is assumed to tbe the index + from a csv file, column 0 is assumed to be the index ax (`matplotlib.pyplot.axis`): If None, and not subplots, then one is created and all distributions are plotted on a single plot label_post (`bool`): flag to add text labels to the peak of the posterior @@ -269,7 +269,7 @@ def phi_progress(pst, logger=None, filename=None, **kwargs): """ if logger is None: - logger = Logger("Default_Loggger.log", echo=False) + logger = Logger("Default_Logger.log", echo=False) logger.log("plot phi_progress") iobj_file = pst.filename.replace(".pst", ".iobj") @@ -327,7 +327,7 @@ def res_1to1( """ if logger is None: - logger = Logger("Default_Loggger.log", echo=False) + logger = Logger("Default_Logger.log", echo=False) logger.log("plot res_1to1") if "ensemble" in kwargs: @@ -517,7 +517,7 @@ def plot_id_bar(id_df, nsv=None, logger=None, **kwargs): Args: id_df (`pandas.DataFrame`) : dataframe of identifiability nsv (`int`): number of singular values to consider - logger (`pyemu.Logger`, optonal): a logger. If None, a generic + logger (`pyemu.Logger`, optional): a logger. If None, a generic one is created kwargs (`dict`): a dict of keyword arguments to pass to the plotting function @@ -535,7 +535,7 @@ def plot_id_bar(id_df, nsv=None, logger=None, **kwargs): """ if logger is None: - logger = Logger("Default_Loggger.log", echo=False) + logger = Logger("Default_Logger.log", echo=False) logger.log("plot id bar") df = id_df.copy() @@ -633,7 +633,7 @@ def res_phi_pie(pst, logger=None, **kwargs): """ if logger is None: - logger = Logger("Default_Loggger.log", echo=False) + logger = Logger("Default_Logger.log", echo=False) logger.log("plot res_phi_pie") if "ensemble" in kwargs: @@ -711,7 +711,7 @@ def pst_prior(pst, logger=None, filename=None, **kwargs): If None, return figs without saving. Default is None. kwargs (`dict`): additional plotting options. Accepts 'grouper' as dict to group parameters on to a single axis (use - parameter groups if not passed),'unqiue_only' to only show unique + parameter groups if not passed),'unique_only' to only show unique mean-stdev combinations within a given group. Any additional args are passed to `matplotlib`. @@ -726,7 +726,7 @@ def pst_prior(pst, logger=None, filename=None, **kwargs): """ if logger is None: - logger = Logger("Default_Loggger.log", echo=False) + logger = Logger("Default_Logger.log", echo=False) logger.log("plot pst_prior") par = pst.parameter_data @@ -889,7 +889,7 @@ def ensemble_helper( flag to use the same bin edges for all ensembles. Only applies if more than one ensemble is being plotted. Default is True deter_vals : dict - dict of deterministic values to plot as a vertical line. key is ensemble columnn name + dict of deterministic values to plot as a vertical line. key is ensemble column name std_window : float the number of standard deviations around the mean to mark as vertical lines. If None, nothing happens. Default is None @@ -1145,7 +1145,7 @@ def ensemble_change_summary( """ if logger is None: - logger = Logger("Default_Loggger.log", echo=False) + logger = Logger("Default_Logger.log", echo=False) logger.log("plot ensemble change") if isinstance(ensemble1, str): @@ -1380,7 +1380,7 @@ def ensemble_res_1to1( Args: ensemble (varies): the ensemble argument can be a pandas.DataFrame or derived type or a str, which - is treated as a fileanme. Optionally, ensemble can be a list of these types or + is treated as a filename. Optionally, ensemble can be a list of these types or a dict, in which case, the keys are treated as facecolor str (e.g., 'b', 'y', etc). pst (`pyemu.Pst`): a control file instance facecolor (`str`): the histogram facecolor. Only applies if `ensemble` is a single thing @@ -1423,7 +1423,7 @@ def _get_plotlims(oen, ben, obsnames): oemin = np.nanmin([oemin, oeni.min().min()]) oemax = np.nanmax([oemax, oeni.max().max()]) # get min and max of mean sim vals - # (incase we want plot to ignore extremes) + # (in case we want plot to ignore extremes) oemeanmin = np.nanmin([oemeanmin, oeni.mean().min()]) oemeanmax = np.nanmax([oemeanmax, oeni.mean().max()]) for _, beni in ben.items(): # same with base ensemble/obsval @@ -1466,7 +1466,7 @@ def _get_plotlims(oen, ben, obsnames): if logger is None: - logger = Logger("Default_Loggger.log", echo=False) + logger = Logger("Default_Logger.log", echo=False) logger.log("plot res_1to1") obs = pst.observation_data ensembles = _process_ensemble_arg(ensemble, facecolor, logger) @@ -1677,7 +1677,7 @@ def plot_jac_test( with pyemu using: pest_object.pestpp_options["sweep_parameter_csv_file"] = jactest_in_file.csv pest_object.pestpp_options["sweep_output_csv_file"] = jactest_out_file.csv targetobs ([`str`]): list of observation file names to plot, each parameter used for jactest can - have up to 32 observations plotted per page, throws a warning if more tha + have up to 32 observations plotted per page, throws a warning if more than 10 pages of output are requested per parameter. If none, all observations in the output csv file are used. filetype (`str`): file type to store output, if None, plt.show() is called. diff --git a/pyemu/pst/pst_handler.py b/pyemu/pst/pst_handler.py index c0d50fa9b..1e59e18c2 100644 --- a/pyemu/pst/pst_handler.py +++ b/pyemu/pst/pst_handler.py @@ -479,7 +479,7 @@ def nnz_obs_groups(self): @property def adj_par_groups(self): - """get the parameter groups with atleast one adjustable parameter + """get the parameter groups with at least one adjustable parameter Returns: [`str`]: a list of parameter groups with @@ -1103,7 +1103,7 @@ def _load_version2(self, filename): != self.control_data.ntplfle + self.control_data.ninsfle ): raise Exception( - "didnt find the right number of '* model input/output' lines," + "didn't find the right number of '* model input/output' lines," + "expecting {0} template files and {1} instruction files".format( self.control_data.ntplfle, self.control_data.ninsfle ) @@ -1235,7 +1235,7 @@ def _load_version2(self, filename): "* model input" in sections_found or "* model output" in sections_found ): raise Exception( - "'* model input/output cant be used with '* model input' or '* model output'" + "'* model input/output can't be used with '* model input' or '* model output'" ) def load(self, filename): @@ -1245,7 +1245,7 @@ def load(self, filename): filename (`str`): pst filename Note: - This method is called from the `Pst` construtor unless the `load` arg is `False`. + This method is called from the `Pst` constructor unless the `load` arg is `False`. @@ -1580,7 +1580,7 @@ def sanity_checks(self, forgive=False): Note: - checks for duplicate names, atleast 1 adjustable parameter + checks for duplicate names, at least 1 adjustable parameter and at least 1 non-zero-weighted observation Not nearly as comprehensive as pestchek @@ -2089,7 +2089,7 @@ def bounds_report(self, iterations=None): ) except FileNotFoundError: raise Exception( - "iteration {} does not have a paramter file associated with it in {}".format( + "iteration {} does not have a parameter file associated with it in {}".format( citer, pstdir ) ) @@ -2140,7 +2140,7 @@ def get(self, par_names=None, obs_names=None): If None, all parameters are in the new Pst instance. Default is None obs_names ([`str`]): a list of observation names to have in the new Pst instance. - If None, all observations are in teh new Pst instance. Default + If None, all observations are in the new Pst instance. Default is None Returns: @@ -2228,8 +2228,8 @@ def parrep( parfile (`str`, optional): parameter file to use. If None, try to find and use a parameter file that corresponds to the case name. If parfile has extension '.par' a single realization parameter file is used - If parfile has extention '.csv' an ensemble parameter file is used which invokes real_name - If parfile has extention '.jcb' a binary ensemble parameter file is used which invokes real_name + If parfile has extension '.csv' an ensemble parameter file is used which invokes real_name + If parfile has extension '.jcb' a binary ensemble parameter file is used which invokes real_name Default is None enforce_bounds (`bool`, optional): flag to enforce parameter bounds after parameter values are updated. This is useful because PEST and PEST++ round the parameter values in the @@ -2584,35 +2584,35 @@ def proportional_weights(self, fraction_stdev=1.0, wmax=100.0, leave_zero=True): new_weights.append(ow) self.observation_data.weight = new_weights - def calculate_pertubations(self): + def calculate_perturbations(self): """experimental method to calculate finite difference parameter - pertubations. + perturbations. Note: - The pertubation values are added to the + The perturbation values are added to the `Pst.parameter_data` attribute - user beware! """ self.build_increments() - self.parameter_data.loc[:, "pertubation"] = ( + self.parameter_data.loc[:, "perturbation"] = ( self.parameter_data.parval1 + self.parameter_data.increment ) self.parameter_data.loc[:, "out_forward"] = ( - self.parameter_data.loc[:, "pertubation"] + self.parameter_data.loc[:, "perturbation"] > self.parameter_data.loc[:, "parubnd"] ) out_forward = self.parameter_data.groupby("out_forward").groups if True in out_forward: - self.parameter_data.loc[out_forward[True], "pertubation"] = ( + self.parameter_data.loc[out_forward[True], "perturbation"] = ( self.parameter_data.loc[out_forward[True], "parval1"] - self.parameter_data.loc[out_forward[True], "increment"] ) self.parameter_data.loc[:, "out_back"] = ( - self.parameter_data.loc[:, "pertubation"] + self.parameter_data.loc[:, "perturbation"] < self.parameter_data.loc[:, "parlbnd"] ) out_back = self.parameter_data.groupby("out_back").groups @@ -2621,14 +2621,14 @@ def calculate_pertubations(self): print(self.parameter_data.loc[still_out, :], flush=True) raise Exception( - "Pst.calculate_pertubations(): " - + "can't calc pertubations for the following " + "Pst.calculate_perturbations(): " + + "can't calc perturbations for the following " + "Parameters {0}".format(",".join(still_out)) ) def build_increments(self): """experimental method to calculate parameter increments for use - in the finite difference pertubation calculations + in the finite difference perturbation calculations Note: user beware! @@ -2744,12 +2744,12 @@ def from_io_files( Default is None pst_path ('str'): the path from the control file to the IO files. For example, if the control will be in the same directory as the IO files, then `pst_path` should be '.'. - Default is None, which doesnt do any path manipulation on the I/O file names + Default is None, which doesn't do any path manipulation on the I/O file names Returns: `Pst`: new control file instance with parameter and observation names - found in `tpl_files` and `ins_files`, repsectively. + found in `tpl_files` and `ins_files`, respectively. Note: calls `pyemu.helpers.pst_from_io_files()` @@ -2934,7 +2934,7 @@ def drop_parameters(self, tpl_file, pst_path=None): `pandas.DataFrame`: the parameter data for the parameters that were removed. Note: - This method does not check for multiple occurences of the same parameter name(s) in + This method does not check for multiple occurrences of the same parameter name(s) in across template files so if you have the same parameter in multiple template files, this is not the method you are looking for @@ -2974,7 +2974,7 @@ def drop_parameters(self, tpl_file, pst_path=None): par_names = set(self.par_names) drop_pars_present = [p for p in drop_pars if p in par_names] - # check that other pars arent tied to the dropping pars + # check that other pars aren't tied to the dropping pars if "partied" in self.parameter_data.columns: par_tied = set( self.parameter_data.loc[ @@ -3264,7 +3264,7 @@ def write_par_summary_table( Args: - filename (`str`): filename. If None, use .par.tex to write as LaTeX. If filename extention is '.xls' or '.xlsx', + filename (`str`): filename. If None, use .par.tex to write as LaTeX. If filename extension is '.xls' or '.xlsx', tries to write as an Excel file. If `filename` is "none", no table is written Default is None group_names (`dict`): par group names : table names. For example {"w0":"well stress period 1"}. @@ -3404,13 +3404,13 @@ def write_par_summary_table( def write_obs_summary_table(self, filename=None, group_names=None): """write a stand alone observation summary latex table or Excel shet - filename (`str`): filename. If None, use .par.tex to write as LaTeX. If filename extention is '.xls' or '.xlsx', + filename (`str`): filename. If None, use .par.tex to write as LaTeX. If filename extension is '.xls' or '.xlsx', tries to write as an Excel file. If `filename` is "none", no table is written Default is None Args: filename (`str`): filename. If `filename` is "none", no table is written. - If None, use .obs.tex. If filename extention is '.xls' or '.xlsx', + If None, use .obs.tex. If filename extension is '.xls' or '.xlsx', tries to write as an Excel file. Default is None group_names (`dict`): obs group names : table names. For example {"hds":"simulated groundwater level"}. @@ -3607,7 +3607,7 @@ def greater_than_obs_constraints(self): are listed as active (non-zero weight) greater than inequality constraints. Returns: - `pandas.Series`: names obseravtions that are non-zero weighted + `pandas.Series`: names observations that are non-zero weighted greater than constraints (`obgnme` startsiwth "g_" or "greater") Note: @@ -3694,7 +3694,7 @@ def get_par_change_limits(self): change_df.loc[nfpars, "fac_upper"] = base_vals / fpm change_df.loc[nfpars, "fac_lower"] = base_vals * fpm - # postive fac pars + # positive fac pars pfpars = par.loc[base_vals.apply(lambda x: x > 0)].index change_df.loc[pfpars, "fac_upper"] = base_vals * fpm change_df.loc[pfpars, "fac_lower"] = base_vals / fpm @@ -3961,7 +3961,7 @@ def _multiprocess_obspar_rename(sys_file, map_dict, rex=None): # def _multiprocess_obspar_rename_v1(sys_file, map_dict, rex=None): # # memory intensive as whole file is read into memory -# # maybe faster than v2 when file is big but map_dict is relativly small +# # maybe faster than v2 when file is big but map_dict is relatively small # # but look out for memory # if rex is None: # rex = re.compile("|".join( diff --git a/pyemu/pst/pst_utils.py b/pyemu/pst/pst_utils.py index 74a02b0ac..e484ac3c7 100644 --- a/pyemu/pst/pst_utils.py +++ b/pyemu/pst/pst_utils.py @@ -898,7 +898,7 @@ def try_process_output_pst(pst): Note: This function first tries to process the output files using the - InstructionFile class, If that failes, then it tries to run + InstructionFile class, If that fails, then it tries to run INSCHEK. If an instructionfile is processed successfully, the extract simulated values are used to populate the `pst.observation_data.obsval` attribute. @@ -1071,7 +1071,7 @@ def csv_to_ins_file( `csv_filename` Note: - resulting observation names in `ins_filename` are a combiation of index and + resulting observation names in `ins_filename` are a combination of index and header values. @@ -1088,7 +1088,7 @@ def csv_to_ins_file( if only_cols is None: only_cols = set(df.columns.map(lambda x: x.lower().strip()).tolist()) else: - if isinstance(only_cols, str): # incase it is a single name + if isinstance(only_cols, str): # in case it is a single name only_cols = [only_cols] only_cols = set(only_cols) only_cols = {c.lower() if isinstance(c, str) else c for c in only_cols} @@ -1096,7 +1096,7 @@ def csv_to_ins_file( if only_rows is None: only_rows = set(df.index.map(lambda x: x.lower().strip()).tolist()) else: - if isinstance(only_rows, str): # incase it is a single name + if isinstance(only_rows, str): # in case it is a single name only_rows = [only_rows] only_rows = set(only_rows) only_rows = {r.lower() if isinstance(r, str) else r for r in only_rows} @@ -1191,7 +1191,7 @@ def csv_to_ins_file( oname = f"{nname}_{rlabel}" onames.append(oname) # append list of obs ovals.append(vals[i, j]) # store current obs val - # defin group name + # define group name if gpname is False or gpname[c_count] is False: # keeping consistent behaviour ngpname = None # nname @@ -1214,7 +1214,7 @@ def csv_to_ins_file( c_count += 1 elif ( j < len(clabels) - 1 - ): # this isnt a row-col to observationalize (nice word!) + ): # this isn't a row-col to observationalize (nice word!) if sep == ",": line += f" {marker},{marker} " else: @@ -1279,7 +1279,7 @@ def read_ins_file(self): first_line = self._readline_ins() if len(first_line) < 2: raise Exception( - "first line of ins file must have atleast two entries, not '{0}'".format( + "first line of ins file must have at least two entries, not '{0}'".format( ",".join(first_line) ) ) @@ -1494,7 +1494,7 @@ def _execute_ins_line(self, ins_line, ins_lcount): elif ins == "w": # whole string comparison raw = rline[cursor_pos : cursor_pos + maxsearch].split( None, 2 - ) # TODO: maybe slow for long strings -- hopefuly maxsearch helps + ) # TODO: maybe slow for long strings -- hopefully maxsearch helps if line[cursor_pos] in line_seps: raw.insert(0, "") if len(raw) == 1: @@ -1510,7 +1510,7 @@ def _execute_ins_line(self, ins_line, ins_lcount): # raw[1] # ) - elif i1 == "!": # indicates obs instruction folows + elif i1 == "!": # indicates obs instruction follows oname = ins.replace("!", "") # look a head for a second/closing marker if ii < n_ins - 1 and ins_line[ii + 1] == self._marker: @@ -1579,7 +1579,7 @@ def _execute_ins_line(self, ins_line, ins_lcount): raw = ins.split(")")[1] if ":" not in raw: self.throw_ins_error( - "couldnt find ':' in semi-fixed instruction: '{0}'".format(ins), + "couldn't find ':' in semi-fixed instruction: '{0}'".format(ins), lcount=self._instruction_lcount, ) raw = raw.split(":") @@ -1650,7 +1650,7 @@ def _execute_ins_line(self, ins_line, ins_lcount): raw = ins.split("]")[1] if ":" not in raw: self.throw_ins_error( - "couldnt find ':' in fixed instruction: '{0}'".format(ins), + "couldn't find ':' in fixed instruction: '{0}'".format(ins), lcount=self._instruction_lcount, ) raw = raw.split(":") diff --git a/pyemu/sc.py b/pyemu/sc.py index 563f2f895..0008af2e8 100644 --- a/pyemu/sc.py +++ b/pyemu/sc.py @@ -28,7 +28,7 @@ class Schur(LinearAnalysis): the noise covariance matrix is loaded from a file using the file extension (".jcb"/".jco" for binary, ".cov"/".mat" for PEST-style ASCII matrix, or ".unc" for uncertainty files). If `None`, the noise covariance matrix is - constructed from the obsevation weights in `LinearAnalysis.pst`. Can also be a `pyemu.Cov` instance + constructed from the observation weights in `LinearAnalysis.pst`. Can also be a `pyemu.Cov` instance forecasts (varies, optional): forecast sensitivity vectors. If `str`, first an observation name is assumed (a row in `LinearAnalysis.jco`). If that is not found, a filename is assumed and predictions are loaded from a file using the file extension. If [`str`], a list of observation names is assumed. @@ -367,7 +367,7 @@ def get_conditional_instance(self, parameter_names): if len(keep_names) == 0: raise Exception( "Schur.contribution_from_Parameters " - + "atleast one parameter must remain uncertain" + + "at least one parameter must remain uncertain" ) # get the reduced predictions if self.predictions is None: diff --git a/pyemu/utils/geostats.py b/pyemu/utils/geostats.py index d03ce17e1..8f938e5f5 100644 --- a/pyemu/utils/geostats.py +++ b/pyemu/utils/geostats.py @@ -887,9 +887,9 @@ def calc_factors_grid( entries with a zone value not found in zone_array will be skipped. If None, then all `point_data` will (potentially) be used for interpolating each grid node. Default is None - minpts_interp (`int`): minimum number of `point_data` entires to use for interpolation at + minpts_interp (`int`): minimum number of `point_data` entries to use for interpolation at a given grid node. grid nodes with less than `minpts_interp` - `point_data` found will be skipped (assigned np.nan). Defaut is 1 + `point_data` found will be skipped (assigned np.nan). Default is 1 maxpts_interp (`int`) maximum number of `point_data` entries to use for interpolation at a given grid node. A larger `maxpts_interp` will yield "smoother" interplation, but using a large `maxpts_interp` will slow the @@ -901,7 +901,7 @@ def calc_factors_grid( Default is False var_filename (`str`): a filename to save the kriging variance for each interpolated grid node. Default is None. - forgive (`bool`): flag to continue if inversion of the kriging matrix failes at one or more + forgive (`bool`): flag to continue if inversion of the kriging matrix fails at one or more grid nodes. Inversion usually fails if the kriging matrix is singular, resulting from `point_data` entries closer than EPSILON distance. If True, warnings are issued for each failed inversion. If False, an exception @@ -978,7 +978,7 @@ def calc_factors_grid( znt = zone_array.ravel().astype(int) #reset any missing values in znt to a zns value - - # doesnt matter in the end, just results in more nodes + # doesn't matter in the end, just results in more nodes # being solved for... znt_unique = np.unique(znt) zns_unique = np.unique(zns) @@ -1209,10 +1209,10 @@ def calc_factors( Args: x ([`float`]): x-coordinates to calculate kriging factors for y (([`float`]): y-coordinates to calculate kriging factors for - minpts_interp (`int`): minimum number of point_data entires to use for interpolation at + minpts_interp (`int`): minimum number of point_data entries to use for interpolation at a given x,y interplation point. interpolation points with less than `minpts_interp` `point_data` found will be skipped - (assigned np.nan). Defaut is 1 + (assigned np.nan). Default is 1 maxpts_interp (`int`): maximum number of point_data entries to use for interpolation at a given x,y interpolation point. A larger `maxpts_interp` will yield "smoother" interplation, but using a large `maxpts_interp` @@ -1222,7 +1222,7 @@ def calc_factors( interpolation point to search for `point_data` entries. Default is 1.0e+10 verbose (`bool`): a flag to echo process to stdout during the interpolatino process. Default is False - forgive (`bool`): flag to continue if inversion of the kriging matrix failes at one or more + forgive (`bool`): flag to continue if inversion of the kriging matrix fails at one or more interpolation points. Inversion usually fails if the kriging matrix is singular, resulting from `point_data` entries closer than EPSILON distance. If True, warnings are issued for each failed inversion. If False, an exception @@ -1800,7 +1800,7 @@ def bearing_rads(self): @property def rotation_coefs(self): - """get the rotation coefficents in radians + """get the rotation coefficients in radians Returns: [`float`]: the rotation coefficients implied by `Vario2d.bearing` @@ -1954,7 +1954,7 @@ def covariance_points(self, x0, y0, xother, yother): return self._h_function(h) def covariance(self, pt0, pt1): - """get the covarince between two points implied by Vario2d + """get the covariance between two points implied by Vario2d Args: pt0 : ([`float`]): first point x and y diff --git a/pyemu/utils/gw_utils.py b/pyemu/utils/gw_utils.py index 7e1e9913b..cb5d71c9d 100644 --- a/pyemu/utils/gw_utils.py +++ b/pyemu/utils/gw_utils.py @@ -202,7 +202,7 @@ def modflow_read_hydmod_file(hydmod_file, hydmod_outfile=None): vc = hyd_df.obsnme.value_counts().sort_values() vc = list(vc.loc[vc > 1].index.values) if len(vc) > 0: - hyd_df.to_csv("hyd_df.duplciates.csv") + hyd_df.to_csv("hyd_df.duplicates.csv") obs.get_dataframe().to_csv("hyd_org.duplicates.csv") raise Exception("duplicates in obsnme:{0}".format(vc)) # assert hyd_df.obsnme.value_counts().max() == 1,"duplicates in obsnme" @@ -547,11 +547,11 @@ def setup_hds_timeseries( text (`str`): the text record entry in the binary file (e.g. "constant_head"). Used to indicate that the binary file is a MODFLOW cell-by-cell budget file. If None, headsave or MT3D unformatted concentration file - is assummed. Default is None + is assumed. Default is None fill (`float`): fill value for NaNs in the extracted timeseries dataframe. If `None`, no filling is done, which may yield model run failures as the resulting processed timeseries CSV file (produced at runtime) may have missing values and - can't be processed with the cooresponding instruction file. Default is `None`. + can't be processed with the corresponding instruction file. Default is `None`. precision (`str`): the precision of the binary file. Can be "single" or "double". Default is "single". @@ -1007,7 +1007,7 @@ def setup_hds_obs( returns np.nan if the value should be skipped. prefix (`str`): the prefix to use for the observation names. default is "hds". text (`str`): the text tag the flopy HeadFile instance. Default is "head" - precison (`str`): the precision string for the flopy HeadFile instance. Default is "single" + precision (`str`): the precision string for the flopy HeadFile instance. Default is "single" include_path (`bool`, optional): flag to setup the binary file processing in directory where the hds_file is located (if different from where python is running). This is useful for setting up the process in separate directory for where python is running. @@ -1207,7 +1207,7 @@ def apply_hds_obs(hds_file, inact_abs_val=1.0e20, precision="single", text="head Args: hds_file (`str`): a modflow head save filename. if hds_file ends with 'ucn', then the file is treated as a UcnFile type. - inact_abs_val (`float`, optional): the value that marks the mininum and maximum + inact_abs_val (`float`, optional): the value that marks the minimum and maximum active value. values in the headsave file greater than `inact_abs_val` or less than -`inact_abs_val` are reset to `inact_abs_val` Returns: @@ -1230,7 +1230,7 @@ def apply_hds_obs(hds_file, inact_abs_val=1.0e20, precision="single", text="head df = pd.DataFrame({"obsnme": pst_utils.parse_ins_file(ins_file)}) df.index = df.obsnme - # populate metdata + # populate metadata items = ["k", "i", "j", "kper"] for i, item in enumerate(items): df.loc[:, item] = df.obsnme.apply(lambda x: int(x.split("_")[i + 1])) @@ -1412,7 +1412,7 @@ def setup_sfr_seg_parameters( available as pathed in the nam_file. Optionally, `nam_file` can be an existing `flopy.modflow.Modflow`. model_ws (`str`): model workspace for flopy to load the MODFLOW model from - par_cols ([`str`]): a list of segment data entires to parameterize + par_cols ([`str`]): a list of segment data entries to parameterize tie_hcond (`bool`): flag to use same mult par for hcond1 and hcond2 for a given segment. Default is `True`. include_temporal_pars ([`str`]): list of spatially-global multipliers to set up for @@ -1480,7 +1480,7 @@ def setup_sfr_seg_parameters( for kper, seg_data in m.sfr.segment_data.items(): assert ( seg_data.shape == shape - ), "cannot use: seg data must have the same number of entires for all kpers" + ), "cannot use: seg data must have the same number of entries for all kpers" seg_data_col_order = list(seg_data.dtype.names) # convert segment_data dictionary to multi index df - this could get ugly reform = { @@ -1523,7 +1523,7 @@ def setup_sfr_seg_parameters( ), PyemuWarning, ) - seg_data = seg_data[seg_data_col_order] # reset column orders to inital + seg_data = seg_data[seg_data_col_order] # reset column orders to initial seg_data_org = seg_data.copy() seg_data.to_csv(os.path.join(model_ws, "sfr_seg_pars.dat"), sep=",") @@ -1647,7 +1647,7 @@ def setup_sfr_seg_parameters( def setup_sfr_reach_parameters(nam_file, model_ws=".", par_cols=["strhc1"]): - """Setup multiplier paramters for reach data, when reachinput option is specififed in sfr. + """Setup multiplier parameters for reach data, when reachinput option is specified in sfr. Args: @@ -1655,7 +1655,7 @@ def setup_sfr_reach_parameters(nam_file, model_ws=".", par_cols=["strhc1"]): available as pathed in the nam_file. Optionally, `nam_file` can be an existing `flopy.modflow.Modflow`. model_ws (`str`): model workspace for flopy to load the MODFLOW model from - par_cols ([`str`]): a list of segment data entires to parameterize + par_cols ([`str`]): a list of segment data entries to parameterize tie_hcond (`bool`): flag to use same mult par for hcond1 and hcond2 for a given segment. Default is `True`. include_temporal_pars ([`str`]): list of spatially-global multipliers to set up for @@ -1692,7 +1692,7 @@ def setup_sfr_reach_parameters(nam_file, model_ws=".", par_cols=["strhc1"]): ) # get reachdata as dataframe reach_data = pd.DataFrame.from_records(m.sfr.reach_data) - # write inital reach_data as csv + # write initial reach_data as csv reach_data_orig = reach_data.copy() reach_data.to_csv(os.path.join(m.model_ws, "sfr_reach_pars.dat"), sep=",") @@ -1782,7 +1782,7 @@ def setup_sfr_reach_parameters(nam_file, model_ws=".", par_cols=["strhc1"]): def apply_sfr_seg_parameters(seg_pars=True, reach_pars=False): - """apply the SFR segement multiplier parameters. + """apply the SFR segment multiplier parameters. Args: seg_pars (`bool`, optional): flag to apply segment-based parameters. @@ -1910,7 +1910,7 @@ def setup_sfr_obs( Args: sft_out_file (`str`): the name and path to an existing SFR output file - seg_group_dict (`dict`): a dictionary of SFR segements to aggregate together for a single obs. + seg_group_dict (`dict`): a dictionary of SFR segments to aggregate together for a single obs. the key value in the dict is the base observation name. If None, all segments are used as individual observations. Default is None model (`flopy.mbase`): a flopy model. If passed, the observation names will have @@ -2033,7 +2033,7 @@ def apply_sfr_obs(): None Returns: - **pandas.DataFrame**: a dataframe of aggregrated sfr segment aquifer and outflow + **pandas.DataFrame**: a dataframe of aggregated sfr segment aquifer and outflow Note: This is the companion function of `gw_utils.setup_sfr_obs()`. @@ -2232,7 +2232,7 @@ def setup_sfr_reach_obs( seg_reach = [seg_reach] assert ( np.shape(seg_reach)[1] == 2 - ), "varible seg_reach expected shape (n,2), received {0}".format( + ), "variable seg_reach expected shape (n,2), received {0}".format( np.shape(seg_reach) ) seg_reach = pd.DataFrame(seg_reach, columns=["segment", "reach"]) @@ -2529,7 +2529,7 @@ def setup_gage_obs(gage_file, ins_file=None, start_datetime=None, times=None): obs_ids[col] = "g{0}{1}".format(gage_type[0], col[0:2]) with open( "_gage_obs_ids.csv", "w" - ) as f: # write file relating obs names to meaningfull keys! + ) as f: # write file relating obs names to meaningful keys! [f.write("{0},{1}\n".format(key, obs)) for key, obs in obs_ids.items()] # find passed times in df if times is None: diff --git a/pyemu/utils/helpers.py b/pyemu/utils/helpers.py index 12ecd5261..d60886fc9 100644 --- a/pyemu/utils/helpers.py +++ b/pyemu/utils/helpers.py @@ -418,7 +418,7 @@ def geostatistical_draws( scale_offset (`bool`,optional): flag to apply scale and offset to parameter bounds when calculating variances - this is passed through to `pyemu.Cov.from_parameter_data()`. Default is True. - subset (`array-like`, optional): list, array, set or pandas index defining subset of paramters + subset (`array-like`, optional): list, array, set or pandas index defining subset of parameters for draw. Returns @@ -770,7 +770,7 @@ def calc_observation_ensemble_quantiles( tuple containing - **pandas DataFrame**: same ens object that was input but with quantile realizations - appended as new rows labelled with 'q_#' where '#' is the slected quantile + appended as new rows labelled with 'q_#' where '#' is the selected quantile - **dict**: dictionary with keys being quantiles and values being realizations corresponding to each realization """ @@ -1631,7 +1631,7 @@ def pst_from_io_files( Returns: `Pst`: new control file instance with parameter and observation names - found in `tpl_files` and `ins_files`, repsectively. + found in `tpl_files` and `ins_files`, respectively. Note: calls `pyemu.helpers.pst_from_io_files()` @@ -1885,7 +1885,7 @@ def _process_array_file(model_file, df): def apply_array_pars(arr_par="arr_pars.csv", arr_par_file=None, chunk_len=50): - """a function to apply array-based multipler parameters. + """a function to apply array-based multiplier parameters. Args: arr_par (`str` or `pandas.DataFrame`): if type `str`, @@ -2038,8 +2038,8 @@ def setup_temporal_diff_obs(*args, **kwargs): the differencing. The order of the observations matters for the differencing. If False, then the control file order is used. If observation names have a datetime suffix, make sure the format is year-month-day to use this sorting. Default is True - long_names (`bool`, optional): flag to use long, descriptive names by concating the two observation names - that are being differenced. This will produce names that are too long for tradtional PEST(_HP). + long_names (`bool`, optional): flag to use long, descriptive names by concatenating the two observation names + that are being differenced. This will produce names that are too long for traditional PEST(_HP). Default is True. prefix (`str`, optional): prefix to prepend to observation names and group names. Default is "dif". @@ -2314,7 +2314,7 @@ def _process_list_file(model_file, df): lambda x: [str(i) for i in x] ) - # if writen by PstFrom this should always be comma delim - tidy + # if written by PstFrom this should always be comma delim - tidy org_data = pd.read_csv(org_file, skiprows=datastrtrow, header=header, dtype='object') # mult columns will be string type, so to make sure they align @@ -2427,10 +2427,10 @@ def build_jac_test_csv(pst, num_steps, par_names=None, forward=True): Args: pst (`pyemu.Pst`): existing control file - num_steps (`int`): number of pertubation steps for each parameter + num_steps (`int`): number of perturbation steps for each parameter par_names [`str`]: list of parameter names of pars to test. If None, all adjustable pars are used. Default is None - forward (`bool`): flag to start with forward pertubations. + forward (`bool`): flag to start with forward perturbations. Default is True Returns: @@ -2631,7 +2631,7 @@ class SpatialReference(object): """ a class to locate a structured model grid in x-y space. Lifted wholesale from Flopy, and preserved here... - ...maybe slighlty over-engineered for here + ...maybe slightly over-engineered for here Args: @@ -3830,7 +3830,7 @@ def parse_rmr_file(rmr_file): def setup_threshold_pars(orgarr_file,cat_dict,testing_workspace=".",inact_arr=None): - """setup a thresholding 2-category binary array prcoess. + """setup a thresholding 2-category binary array process. Parameters: orgarr_file (`str`): the input array that will ultimately be created at runtime @@ -3850,7 +3850,7 @@ def setup_threshold_pars(orgarr_file,cat_dict,testing_workspace=".",inact_arr=No """ assert os.path.exists(orgarr_file) - #atleast 2d for xsections + #at least 2d for xsections org_arr = np.atleast_2d(np.loadtxt(orgarr_file)) if len(cat_dict) != 2: @@ -3904,9 +3904,9 @@ def setup_threshold_pars(orgarr_file,cat_dict,testing_workspace=".",inact_arr=No def apply_threshold_pars(csv_file): """apply the thresholding process. everything keys off of csv_file name... - Note: if the standard deviation of the continous thresholding array is too low, + Note: if the standard deviation of the continuous thresholding array is too low, the line search will fail. Currently, if this stdev is less than 1.e-10, - then a homogenous array of the first category fill value will be created. User + then a homogeneous array of the first category fill value will be created. User beware! """ @@ -3945,7 +3945,7 @@ def apply_threshold_pars(csv_file): if tarr.std() < 1e-5: print("WARNING: thresholding array {0} has very low standard deviation".format(thresarr_file)) - print(" using a homogenous array with first category fill value {0}".format(tfill[0])) + print(" using a homogeneous array with first category fill value {0}".format(tfill[0])) farr = np.zeros_like(tarr) + tfill[0] if iarr is not None: @@ -4224,7 +4224,7 @@ def prep_for_gpr(pst_fname,input_fnames,output_fnames,gpr_t_d="gpr_template",gp_ vdf = pd.DataFrame({"y_verf":y_verf,"y_pred":pred_mean,"y_pred_std":pred_std}) verf_fname = os.path.join(gpr_t_d,"{0}_gpr_verf.csv".format(output_name)) vdf.to_csv(verf_fname) - print("saved ",output_fname,"verfication csv to",verf_fname) + print("saved ",output_fname,"verification csv to",verf_fname) mabs = np.abs(vdf.y_verf - vdf.y_pred).mean() print("...mean abs error",mabs) if plot_fits: @@ -4270,7 +4270,7 @@ def prep_for_gpr(pst_fname,input_fnames,output_fnames,gpr_t_d="gpr_template",gp_ assert len(set(par_names).symmetric_difference(set(gpst.par_names))) == 0 for col in pst.parameter_data.columns: # this gross thing is to avoid a future error warning in pandas - - # why is it getting so strict?! isnt python duck-typed? + # why is it getting so strict?! isn't python duck-typed? if col in gpst.parameter_data.columns and\ gpst.parameter_data.dtypes[col] != pst.parameter_data.dtypes[col]: gpst.parameter_data[col] = gpst.parameter_data[col].astype(pst.parameter_data.dtypes[col]) @@ -4278,7 +4278,7 @@ def prep_for_gpr(pst_fname,input_fnames,output_fnames,gpr_t_d="gpr_template",gp_ for col in pst.observation_data.columns: # this gross thing is to avoid a future error warning in pandas - - # why is it getting so strict?! isnt python duck-typed? + # why is it getting so strict?! isn't python duck-typed? if col in gpst.observation_data.columns and \ gpst.observation_data.dtypes[col] != pst.observation_data.dtypes[col]: gpst.observation_data[col] = gpst.obsveration_data[col].astype(pst.observation_data.dtypes[col]) @@ -4347,7 +4347,7 @@ def gpr_pyworker(pst,host,port,input_df=None,mdf=None): import numpy as np import pickle - # if explicit args werent passed, get the default ones... + # if explicit args weren't passed, get the default ones... if input_df is None: input_df = pd.read_csv("gpr_input.csv",index_col=0) if mdf is None: @@ -4382,7 +4382,7 @@ def gpr_pyworker(pst,host,port,input_df=None,mdf=None): # do the emulation simdf = emulate_with_gpr(input_df,mdf,gpr_model_dict) - # replace the emulated quantites in the obs series + # replace the emulated quantities in the obs series obs.loc[simdf.index] = simdf.sim.values obs.loc[simdf.index.map(lambda x: x+"_gprstd")] = simdf.sim_std.values #send the obs series to the master @@ -4476,7 +4476,7 @@ def dsi_pyworker(pst,host,port,pmat=None,ovals=None,pvals=None): import numpy as np - # if explicit args werent passed, get the default ones... + # if explicit args weren't passed, get the default ones... if pvals is None: pvals = pd.read_csv("dsi_pars.csv",index_col=0) if pmat is None: @@ -4506,7 +4506,7 @@ def dsi_pyworker(pst,host,port,pmat=None,ovals=None,pvals=None): # do the emulation simdf = dsi_forward_run(pmat=pmat,ovals=ovals,pvals=pvals,write_csv=False) - # replace the emulated quantites in the obs series + # replace the emulated quantities in the obs series obs.loc[simdf.index] = simdf.mn.values #send the obs series to the master @@ -4599,7 +4599,7 @@ def normal_score_transform(nstval, val, value): rank = np.searchsorted(val, value, side='right') - 1 if rank == len(val) - 1: return nstval[-1], len(val) - # if the value conincides with a value in the table, return the corresponding normal score + # if the value coincides with a value in the table, return the corresponding normal score nstdiff = nstval[rank + 1] - nstval[rank] diff = val[rank + 1] - val[rank] if nstdiff <= 0.0 or diff <= 0.0: diff --git a/pyemu/utils/metrics.py b/pyemu/utils/metrics.py index c50c6e0cc..05db7bfa9 100644 --- a/pyemu/utils/metrics.py +++ b/pyemu/utils/metrics.py @@ -295,7 +295,7 @@ def calc_metric_res(res, metric="all", bygroups=True, drop_zero_weight=True): ret_df = pd.DataFrame(index=["single_realization"]) - # calculate the matrics + # calculate the metrics for cm in metric: f = ALLMETRICS[cm.lower()] ret_df["{}_total".format(cm.upper())] = [ diff --git a/pyemu/utils/optimization.py b/pyemu/utils/optimization.py index 59b1a0710..926704cb2 100644 --- a/pyemu/utils/optimization.py +++ b/pyemu/utils/optimization.py @@ -49,7 +49,7 @@ def add_pi_obj_func(pst, obj_func_dict=None, out_pst_name=None): # def get_added_obs_importance(pst,obslist_dict=None,base_obslist=None, # reset_zero_weight=1.0): -# """get a dataframe fo the objective function +# """get a dataframe of the objective function # as a results of added some observations # Parameters # ---------- @@ -208,7 +208,7 @@ def add_pi_obj_func(pst, obj_func_dict=None, out_pst_name=None): # const_groups = [grp for grp in pst.obs_groups if grp.lower() in OPERATOR_WORDS] # if len(const_groups) == 0: # raise Exception("to_mps(): obs_constraint_sense is None and no "+\ -# "obseravtion groups in {0}".format(','.join(pst.obs_groups))) +# "observation groups in {0}".format(','.join(pst.obs_groups))) # obs_constraint_sense = {} # obs_groups = pst.observation_data.groupby(pst.observation_data.obgnme).groups # for og,obs_names in obs_groups.items(): @@ -240,7 +240,7 @@ def add_pi_obj_func(pst, obj_func_dict=None, out_pst_name=None): # obs_constraint_sense[obs_name.lower()] = obs_constraint_sense.\ # pop(obs_name) # -# #build a list of constaint names in order WRT jco row order +# #build a list of constraint names in order WRT jco row order # # order_obs_constraints = [name for name in jco.row_names if name in # # obs_constraint_sense] # diff --git a/pyemu/utils/os_utils.py b/pyemu/utils/os_utils.py index 4d98ec3a5..c7eefc3f2 100644 --- a/pyemu/utils/os_utils.py +++ b/pyemu/utils/os_utils.py @@ -593,7 +593,7 @@ def recv(self,s,dtype=None): # return -1 data = self.nonblocking_recv(s,self.header_size) if data is None: - raise Exception("didnt recv header after security message") + raise Exception("didn't recv header after security message") self.buf_size = int.from_bytes(data[self.buf_idx[0]:self.buf_idx[1]], "little") self.mtype = int.from_bytes(data[self.type_idx[0]:self.type_idx[1]], "little") self.group = int.from_bytes(data[self.group_idx[0]:self.group_idx[1]], "little") @@ -604,7 +604,7 @@ def recv(self,s,dtype=None): if data_len > 0: raw_data = self.nonblocking_recv(s,data_len) if raw_data is None: - raise Exception("didnt recv data pack after header of {0} bytes".format(data_len)) + raise Exception("didn't recv data pack after header of {0} bytes".format(data_len)) if dtype is None and self.mtype == 10: dtype = float self.data_pak = self.deserialize_data(raw_data,dtype) @@ -799,7 +799,7 @@ def listen(self,lock=None,send_lock=None): self.message("master is requesting run kill...") else: - print("WARNING: unsupported request recieved: {0}".format(NetPack.netpack_type[self.net_pack.mtype])) + print("WARNING: unsupported request received: {0}".format(NetPack.netpack_type[self.net_pack.mtype])) def get_parameters(self): diff --git a/pyemu/utils/pp_utils.py b/pyemu/utils/pp_utils.py index 774e7ba23..e14dcd47b 100644 --- a/pyemu/utils/pp_utils.py +++ b/pyemu/utils/pp_utils.py @@ -41,7 +41,7 @@ def setup_pilotpoints_grid( """setup a regularly-spaced (gridded) pilot point parameterization Args: - ml (`flopy.mbase`, optional): a flopy mbase dervied type. If None, `sr` must not be None. + ml (`flopy.mbase`, optional): a flopy mbase derived type. If None, `sr` must not be None. sr (`flopy.utils.reference.SpatialReference`, optional): a spatial reference use to locate the model grid in space. If None, `ml` must not be None. Default is None ibound (`numpy.ndarray`, optional): the modflow ibound integer array. THis is used to @@ -642,9 +642,9 @@ def get_zoned_ppoints_for_vertexgrid(spacing, zone_array, mg, zone_number=None, spacing (`float`): spacing in model length units between pilot points. zone_array (`numpy.ndarray`): the modflow 6 idomain integer array. This is used to set pilot points only in active areas and to assign zone numbers. - mg (`flopy.discretization.vertexgrid.VertexGrid`): a VertexGrid flopy discretization dervied type. + mg (`flopy.discretization.vertexgrid.VertexGrid`): a VertexGrid flopy discretization derived type. zone_number (`int`): zone number - add_buffer (`boolean`): specifies whether pilot points ar eplaced wihtin a buffer zone of size `distance` around the zone/active domain + add_buffer (`boolean`): specifies whether pilot points ar eplaced within a buffer zone of size `distance` around the zone/active domain Returns: `list`: a list of tuples with pilot point x and y coordinates diff --git a/pyemu/utils/pst_from.py b/pyemu/utils/pst_from.py index 0f776ffab..3a142f3a6 100644 --- a/pyemu/utils/pst_from.py +++ b/pyemu/utils/pst_from.py @@ -83,7 +83,7 @@ def _load_array_get_fmt(fname, sep=None, fullfile=False, logger=None): lens.append(ilens) prec.append(iprec) N = np.sum(~np.isnan(arr[:len(lines)])) - # try to catch if file is infact fixed format (like old mt3d files) + # try to catch if file is in fact fixed format (like old mt3d files) firsts = np.ravel([line.pop(0) for line in lens]) # first entry on each line rest = np.array(lens).ravel() # the len of the rest of the entries # if input file is fake free-format (actually fixed) then: @@ -95,7 +95,7 @@ def _load_array_get_fmt(fname, sep=None, fullfile=False, logger=None): # space + sign + unit + dec + 4(for exp) # if float format -- we can't know the width.precision relationship that # will leave us with enough space for space and sign. precision needs to be - # max width-3 but this wont allow for any growth of the LHS for float. + # max width-3 but this won't allow for any growth of the LHS for float. fmax = max(firsts) # max of first column rmax = max(rest) if len(rest) > 0 else 0 # max of rest of cols width = max([fmax, rmax]) # max len to max of these @@ -180,7 +180,7 @@ class PstFrom(object): new_d (`str` or Path): the path to where the model files and PEST interface files will be copied/built longnames (`bool`): flag to use longer-than-PEST-likes parameter and observation names. Default is True remove_existing (`bool`): flag to destroy any existing files and folders in `new_d`. Default is False - spatial_reference (varies): an object that faciliates geo-locating model cells based on index. Default is None + spatial_reference (varies): an object that facilitates geo-locating model cells based on index. Default is None zero_based (`bool`): flag if the model uses zero-based indices, Default is True start_datetime (`str` or Timestamp): a string that can be case to a datatime instance the represents the starting datetime of the model @@ -686,12 +686,12 @@ def draw(self, num_reals=100, sigma_range=6, use_specsim=False, scale_offset=Tru def build_pst(self, filename=None, update=False, version=1): """Build control file from i/o files in PstFrom object. Warning: This builds a pest control file from scratch, overwriting - anything already in self.pst object and anything already writen to `filename` + anything already in self.pst object and anything already written to `filename` Args: filename (`str`): the filename to save the control file to. If None, the name is formed from the `PstFrom.original_d` - ,the orginal directory name from which the forward model + ,the original directory name from which the forward model was extracted. Default is None. The control file is saved in the `PstFrom.new_d` directory. update (`bool`) or (str): flag to add to existing Pst object and @@ -704,7 +704,7 @@ def build_pst(self, filename=None, update=False, version=1): again before running. Note: This builds a pest control file from scratch, overwriting anything already - in self.pst object and anything already writen to `filename` + in self.pst object and anything already written to `filename` The new pest control file is assigned an NOPTMAX value of 0 @@ -747,7 +747,7 @@ def build_pst(self, filename=None, update=False, version=1): uupdate = False pst = pyemu.Pst(filename, load=False) - # TODO should this be under an if:? incase updating and prior info has been set + # TODO should this be under an if:? in case updating and prior info has been set pst.prior_information = pst.null_prior.merge(pd.DataFrame( data=[], columns=pst.prior_fieldnames)) @@ -779,7 +779,7 @@ def build_pst(self, filename=None, update=False, version=1): shtmx = 0 gshtmx = 0 if pst.parameter_data is not None: - # copy existing par data (incase it has been edited) + # copy existing par data (in case it has been edited) par_data_orig = pst.parameter_data.copy() if "longname" in par_data_orig.columns: # Support existing long names mapping @@ -864,7 +864,7 @@ def build_pst(self, filename=None, update=False, version=1): shtmx = 0 gshtmx = 0 if pst.observation_data is not None: - # copy existing obs data (incase it has been edited) + # copy existing obs data (in case it has been edited) obs_data_orig = pst.observation_data.copy() if "longname" in obs_data_orig.columns: # Support existing long names mapping @@ -1266,11 +1266,11 @@ def add_py_function( ) if "(" not in call_str or ")" not in call_str: self.logger.lraise( - "add_py_function(): call_str '{0}' missing paretheses".format(call_str) + "add_py_function(): call_str '{0}' missing parentheses".format(call_str) ) function_name = call_str[ : call_str.find("(") - ] # strip to first occurance of '(' + ] # strip to first occurrence of '(' if function_name in self.py_functions: # todo: could add more duplication options here: override, increment warnings.warn( @@ -1285,7 +1285,7 @@ def add_py_function( else: if not os.path.exists(file_name): self.logger.lraise( - "add_py_function(): couldnt find python source file '{0}'".format( + "add_py_function(): couldn't find python source file '{0}'".format( file_name ) ) @@ -1440,7 +1440,7 @@ def add_observations( Args: filename (`str`): model output file name(s) to set up as observations. By default filename should give relative - loction from top level of pest template directory + location from top level of pest template directory (`new_d` as passed to `PstFrom()`). insfile (`str`): desired instructions file filename index_cols (`list`-like or `int`): columns to denote are indices for obs @@ -1573,7 +1573,7 @@ def add_observations( inssep = [inssep] # rectify df? # if iloc[0] are strings and index_cols are ints, - # can we assume that there were infact column headers? + # can we assume that there were in fact column headers? if all(isinstance(c, str) for c in df.iloc[0]) and all( isinstance(a, (int, np.integer)) for a in index_cols ): @@ -1635,7 +1635,7 @@ def add_observations( if obsgp is not None: if use_cols_psd is None: # no use_cols defined (all are setup) if len([obsgp] if isinstance(obsgp, str) else obsgp) == 1: - # only 1 group provided, assume passed obsgp applys + # only 1 group provided, assume passed obsgp applies # to all use_cols fill = "first" else: @@ -1857,7 +1857,7 @@ def add_parameters( Args: filenames (`str`): Model input filenames to parameterize. By default filename should give relative - loction from top level of pest template directory + location from top level of pest template directory (`new_d` as passed to `PstFrom()`). par_type (`str`): One of `grid` - for every element, `constant` - for single parameter applied to every element, `zone` - for zone-based @@ -1880,13 +1880,13 @@ def add_parameters( index_cols (`list`-like): if not None, will attempt to parameterize expecting a tabular-style model input file. `index_cols` defines the unique columns used to set up pars. If passed as a - list of `str`, stings are expected to denote the columns + list of `str`, strings are expected to denote the columns headers in tabular-style parameter files; if `i` and `j` in list, these columns will be used to define spatial position for spatial correlations (if required). WARNING: If passed as list of `int`, `i` and `j` will be assumed to be in last two entries in the list. Can be passed as a dictionary using the keys - `i` and `j` to explicitly speficy the columns that relate to + `i` and `j` to explicitly specify the columns that relate to model rows and columns to be identified and processed to x,y. use_cols (`list`-like or `int`): for tabular-style model input file, defines the columns to be parameterised @@ -1896,7 +1896,7 @@ def add_parameters( If ndim(use_rows) < 2: use_rows is assumed to represent the row number, index slicer (equiv df.iloc), for all passed files (after headers stripped). So use_rows=[0,3,5], will parameterise the 1st, 4th and 6th rows of each passed list-like file. - If ndim(use_rows) = 2: use_rows represent the index value to paramterise according to index_cols. + If ndim(use_rows) = 2: use_rows represent the index value to parameterise according to index_cols. e.g. [(3,5,6)] or [[3,5,6]] would attempt to set parameters where the model file values for 3 `index_cols` are 3,5,6. N.B. values in tuple are the actual model file entry values. @@ -1911,7 +1911,7 @@ def add_parameters( DEPRECATED : use pp_options['pp_space'] instead. use_pp_zones (`bool`): a flag to use the greater-than-zero values DEPRECATED : use pp_options['use_pp_zones'] instead. - num_eig_kl: TODO - impliment with KL pars + num_eig_kl: TODO - implement with KL pars spatial_reference (`pyemu.helpers.SpatialReference`): If different spatial reference required for pilotpoint setup. If None spatial reference passed to `PstFrom()` will be used @@ -1926,7 +1926,7 @@ def add_parameters( when reading and reapply when writing. Can optionally be `str` in which case `mf_skip` will be treated as a `comment_char`. mfile_sep (`str`): separator/delimiter in model input file. - If None, separator will be interpretted from file name extension. + If None, separator will be interpreted from file name extension. `.csv` is assumed to be comma separator. Default is None ult_ubound (`float`): Ultimate upper bound for model input parameter once all mults are applied - ensure physical model par vals. If not passed, @@ -2023,12 +2023,12 @@ def add_parameters( if par_style in ["a", "add", "addend"]: transform = 'none' self.logger.statement( - "par_style is 'add' and transform was not passed, setting tranform to 'none'" + "par_style is 'add' and transform was not passed, setting transform to 'none'" ) else: transform = 'log' self.logger.statement( - "transform was not passed, setting default tranform to 'log'" + "transform was not passed, setting default transform to 'log'" ) if transform.lower().strip() not in ["none", "log", "fixed"]: self.logger.lraise( @@ -2038,7 +2038,7 @@ def add_parameters( ) if transform == "fixed" and geostruct is not None: self.logger.lraise( - "geostruct is not 'None', cant draw values for fixed pars" + "geostruct is not 'None', can't draw values for fixed pars" ) # some checks for direct parameters @@ -2121,7 +2121,7 @@ def add_parameters( self.logger.warn( "-) Better to pass an appropriately transformed geostruct" ) - # big sr and zone dependancy checker here: todo - tidy? + # big sr and zone dependency checker here: todo - tidy? checker = ( self._spatial_reference is not None and not isinstance(self._spatial_reference, dict) @@ -2209,7 +2209,7 @@ def add_parameters( f"len use_cols, not '{str(par_name_base)}'" ) - # otherewise, things get tripped up in the ensemble/cov stuff + # otherwise, things get tripped up in the ensemble/cov stuff if pargp is not None: if isinstance(pargp, list): pargp = [pg.lower() for pg in pargp] @@ -2849,7 +2849,7 @@ def add_parameters( self.par_struct_dict[geostruct].update({gp: gppars}) else: # if gp already assigned to this geostruct append par - # list to approprate group key + # list to appropriate group key self.par_struct_dict[geostruct][gp].extend(gppars) # self.par_struct_dict_l[geostruct].extend(list(gp_dict.values())) else: # TODO some rules for if geostruct is not passed.... @@ -2990,7 +2990,7 @@ def _prep_pp_args(self, zone_array, pp_kwargs=None): if isinstance(pp_kwargs['spatial_reference'], dict): # then we are unstructured and need pp_locs self.logger.lraise( "pilot point type parameters with an unstructured grid requires 'pp_space' " - "contain explict pilot point information" + "contain explicit pilot point information" ) else: # check pp_locs (if not None) cols = pp_locs.columns.tolist() @@ -3238,19 +3238,19 @@ def _prep_arg_list_lengths( reading options and setup columns for passing sequentially to load_listtype Args: - filenames (`str`) or (`list`): names for files ot eventually read - fmts (`str`) or (`list`): of column formaters for input file. + filenames (`str`) or (`list`): names for files to eventually read + fmts (`str`) or (`list`): of column formatters for input file. If `None`, free-formatting is assumed seps (`str`) or (`list`): column separator free formatter files. If `None`, a list of `None`s is returned and the delimiter is eventually governed by the file extension (`,` for .csv) skip_rows (`str`) or (`list`): Number of rows in file header to not form part of the dataframe - index_cols (`int`) or (`list`): Columns in tabular file to use as indicies + index_cols (`int`) or (`list`): Columns in tabular file to use as indices use_cols (`int`) or (`list`): Columns in tabular file to use as par or obs cols Returns: - algined lists of: + aligned lists of: filenames, fmts, seps, skip_rows, index_cols, use_cols for squentially passing to `_load_listtype_file()` @@ -3355,7 +3355,7 @@ def write_list_tpl( If None, pars are set up for all columns apart from index cols. use_rows (`list` of `int` or `tuple`): Setup parameters for only specific rows in list-style model input file. - If list of `int` -- assumed to be a row index selction (zero-based). + If list of `int` -- assumed to be a row index selection (zero-based). If list of `tuple` -- assumed to be selection based `index_cols` values. e.g. [(3,5,6)] would attempt to set parameters where the model file values for 3 `index_cols` are 3,5,6. N.B. values in @@ -3369,7 +3369,7 @@ def write_list_tpl( suffix (`str`): Optional par name suffix zone_array (`np.ndarray`): Array defining zone divisions. If not None and `par_type` is `grid` or `zone` it is expected that - `index_cols` provide the indicies for + `index_cols` provide the indices for querying `zone_array`. Therefore, array dimension should equal `len(index_cols)`. get_xy (`pyemu.PstFrom` method): Can be specified to get real-world xy @@ -3377,7 +3377,7 @@ def write_list_tpl( ij_in_idx (`list` or `array`): defining which `index_cols` contain i,j xy_in_idx (`list` or `array`): defining which `index_cols` contain x,y zero_based (`boolean`): IMPORTANT - pass as False if `index_cols` - are NOT zero-based indicies (e.g. MODFLOW row/cols). + are NOT zero-based indices (e.g. MODFLOW row/cols). If False 1 with be subtracted from `index_cols`. input_filename (`str`): Path to input file (paired with tpl file) par_style (`str`): either 'd','a', or 'm' @@ -3436,7 +3436,7 @@ def write_list_tpl( ) df_tpl = df_tpl.loc[use_rows, :] # direct pars done in direct function # can we just slice df_tpl here - for col in use_cols: # corellations flagged using pargp + for col in use_cols: # correlations flagged using pargp df_tpl["covgp{0}".format(col)] = df_tpl.loc[:, "pargp{0}".format(col)].values # needs modifying if colocated pars in same group if par_type == "grid" and "x" in df_tpl.columns: @@ -3554,7 +3554,7 @@ def _write_direct_df_tpl( tpl_filename (`str` ): template filename df (`pandas.DataFrame`): DataFrame of list-style input file name (`str`): Parameter name prefix - index_cols (`str` or `list`): columns of dataframes to use as indicies + index_cols (`str` or `list`): columns of dataframes to use as indices typ (`str`): 'constant','zone', or 'grid' used in parname generation. If `constant`, one par is set up for each `use_cols`. If `zone`, one par is set up for each zone for each `use_cols`. @@ -3565,14 +3565,14 @@ def _write_direct_df_tpl( suffix (`str`): Optional par name suffix. zone_array (`np.ndarray`): Array defining zone divisions. If not None and `par_type` is `grid` or `zone` it is expected that - `index_cols` provide the indicies for querying `zone_array`. + `index_cols` provide the indices for querying `zone_array`. Therefore, array dimension should equal `len(index_cols)`. get_xy (`pyemu.PstFrom` method): Can be specified to get real-world xy from `index_cols` passed (to include in obs/par name) ij_in_idx (`list` or `array`): defining which `index_cols` contain i,j xy_in_idx (`list` or `array`): defining which `index_cols` contain x,y zero_based (`boolean`): IMPORTANT - pass as False if `index_cols` - are NOT zero-based indicies (e.g. MODFLOW row/cols). + are NOT zero-based indices (e.g. MODFLOW row/cols). If False 1 with be subtracted from `index_cols`. Returns: @@ -3583,11 +3583,11 @@ def _write_direct_df_tpl( """ from pyemu.utils.helpers import _try_pdcol_numeric - # TODO much of this duplicates what is in _get_tpl_or_ins_df() -- could posssibly be consolidated + # TODO much of this duplicates what is in _get_tpl_or_ins_df() -- could possibly be consolidated # work out the union of indices across all dfs df_ti = df[index_cols].copy() - # adjust int-like indicies to zero-base + # adjust int-like indices to zero-base df_ti = df_ti.apply(_try_pdcol_numeric, intadj=0 if zero_based else -1, downcast='integer') @@ -3654,7 +3654,7 @@ def _get_use_rows(tpldf, idxcolvals, use_rows, zero_based, fnme, use_rows = [tuple(r) for r in use_rows] nxs = [len(set(use_rows).intersection(idx)) for idx in idxcolvals] orig_use_rows = use_rows.copy() - if not zero_based: # assume passed indicies are 1 based + if not zero_based: # assume passed indices are 1 based use_rows = [ tuple([i - 1 if isinstance(i, (int, np.integer)) else i for i in r]) if not isinstance(r, str) @@ -3885,7 +3885,7 @@ def _get_tpl_or_ins_df( dfs (`pandas.DataFrame` or `list`): DataFrames (can be list of DataFrames) to set up parameters or observations name (`str`): Parameter name or Observation name prefix - index_cols (`str` or `list`): columns of dataframes to use as indicies + index_cols (`str` or `list`): columns of dataframes to use as indices typ (`str`): 'obs' to set up observation names or, 'constant','zone', or 'grid' used in parname generation. If `constant`, one par is set up for each `use_cols`. @@ -3895,17 +3895,17 @@ def _get_tpl_or_ins_df( use_cols (`list`): Columns to parameterise. If None, pars are set up for all columns apart from index cols. Not used if `typ`==`obs`. suffix (`str`): Optional par name suffix. Not used if `typ`==`obs`. - zone_array (`np.ndarray`): Only used for paremeters (`typ` != `obs`). + zone_array (`np.ndarray`): Only used for parameters (`typ` != `obs`). Array defining zone divisions. If not None and `par_type` is `grid` or `zone` it is expected that - `index_cols` provide the indicies for querying `zone_array`. + `index_cols` provide the indices for querying `zone_array`. Therefore, array dimension should equal `len(index_cols)`. get_xy (`pyemu.PstFrom` method): Can be specified to get real-world xy from `index_cols` passed (to include in obs/par name) ij_in_idx (`list` or `array`): defining which `index_cols` contain i,j xy_in_idx (`list` or `array`): defining which `index_cols` contain x,y zero_based (`boolean`): IMPORTANT - pass as False if `index_cols` - are NOT zero-based indicies (e.g. MODFLOW row/cols). + are NOT zero-based indices (e.g. MODFLOW row/cols). If False 1 with be subtracted from `index_cols`.= par_fill_value (float): value to use as `parval1`,Default is 1.0 @@ -3927,7 +3927,7 @@ def _get_tpl_or_ins_df( # order matters for obs idxs = [df[index_cols] for df in dfs] df_ti = pd.concat(idxs).drop_duplicates() - # adjust int-like indicies to zero-base + # adjust int-like indices to zero-base df_ti = df_ti.apply(_try_pdcol_numeric, intadj=0 if zero_based else -1, downcast='integer') diff --git a/pyemu/utils/smp_utils.py b/pyemu/utils/smp_utils.py index 0fff2711f..0990d7c0b 100644 --- a/pyemu/utils/smp_utils.py +++ b/pyemu/utils/smp_utils.py @@ -175,7 +175,7 @@ def smp_to_dataframe(smp_filename, datetime_format=None): """load an smp file into a pandas dataframe Args: - smp_filename (`str`): path and nane of existing smp filename to load + smp_filename (`str`): path and name of existing smp filename to load datetime_format (`str`, optional): The format of the datetime strings in the smp file. Can be either "%m/%d/%Y %H:%M:%S" or "%d/%m/%Y %H:%M:%S" If None, then we will try to deduce the format for you, which diff --git a/pyproject.toml b/pyproject.toml index c85d1e5c6..2b5b36732 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -71,3 +71,25 @@ style = "pep440" versionfile_source = "pyemu/_version.py" versionfile_build = "pyemu/_version.py" tag_prefix = "" + +[tool.codespell] +check-filenames = true +#skip = "*.grb,*.pdf,./dependencies" +skip = "*.fpr,*.grb,*.pdf,*.pfm,*.pst,*.tex,*.list,*.rec,./docs/_build,./codespell.ignore" +ignore-regex = "[A-Za-z0-9+/\\\\]{100,}" # base64-encoded data in *.ipynb files +ignore-words-list = [ + "afile", + "coo", + "fo", + "fpr", + "dum", + "ect", + "ons", + "oen", + "nam", + "ptd", + "delt", + "parin", + "gage", + "wel", +] diff --git a/verification/Freyberg/verify_unc_results.ipynb b/verification/Freyberg/verify_unc_results.ipynb index 17d5209df..78e9850c1 100644 --- a/verification/Freyberg/verify_unc_results.ipynb +++ b/verification/Freyberg/verify_unc_results.ipynb @@ -601,7 +601,7 @@ "metadata": {}, "source": [ "##PREDVAR1b\n", - "write the nessecary files to run ```predvar1b```" + "write the necessary files to run ```predvar1b```" ] }, { diff --git a/verification/henry/verify_unc_results.ipynb b/verification/henry/verify_unc_results.ipynb index 26b48ae4d..92f6d5362 100644 --- a/verification/henry/verify_unc_results.ipynb +++ b/verification/henry/verify_unc_results.ipynb @@ -342,7 +342,7 @@ "metadata": {}, "source": [ "##PREDVAR1b\n", - "write the nessecary files to run ```predvar1b```" + "write the necessary files to run ```predvar1b```" ] }, { @@ -1727,7 +1727,7 @@ "#print(diff)\n", "axt.plot(diff)\n", "axt.set_ylim(-1,1)\n", - "ax.set_xlabel(\"parmaeter\")\n", + "ax.set_xlabel(\"parameter\")\n", "ax.set_ylabel(\"identifiability\")\n", "axt.set_ylabel(\"difference\")" ] diff --git a/verification/henry/verify_unc_results.tex b/verification/henry/verify_unc_results.tex index 151dec8a1..0dd3fa9e2 100644 --- a/verification/henry/verify_unc_results.tex +++ b/verification/henry/verify_unc_results.tex @@ -87,7 +87,7 @@ % Define a nice break command that doesn't care if a line doesn't already % exist. \def\br{\hspace*{\fill} \\* } - % Math Jax compatability definitions + % Math Jax compatibility definitions \def\gt{>} \def\lt{<} % Document parameters @@ -408,7 +408,7 @@ \subsection{PREDVAR1b}\label{predvar1b} -write the nessecary files to run \texttt{predvar1b} +write the necessary files to run \texttt{predvar1b} \begin{Verbatim}[commandchars=\\\{\}] {\color{incolor}In [{\color{incolor}12}]:} \PY{n}{f} \PY{o}{=} \PY{n+nb}{open}\PY{p}{(}\PY{l+s}{\PYZdq{}}\PY{l+s}{pred\PYZus{}list.dat}\PY{l+s}{\PYZdq{}}\PY{p}{,}\PY{l+s}{\PYZsq{}}\PY{l+s}{w}\PY{l+s}{\PYZsq{}}\PY{p}{)}