diff --git a/NAMESPACE b/NAMESPACE index b8036ad4..2043afd5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -31,11 +31,17 @@ export(fHMM_parameters) export(fit_model) export(ll_hmm) export(npar) +export(par2parCon) export(par2parUncon) +export(parCon2par) +export(parCon2parUncon) +export(parUncon2par) +export(parUncon2parCon) export(prepare_data) export(reorder_states) export(set_controls) export(simulate_hmm) +export(viterbi) importFrom(Rcpp,evalCpp) importFrom(checkmate,assert_integerish) importFrom(checkmate,assert_number) diff --git a/NEWS.md b/NEWS.md index 67e8f44b..5ce2e013 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ * Two more state-dependent distributions were added: `normal` and `poisson`. +* The Viterbi algorithm can be directly accessed via `viterbi()`. + * Renamed `simulate_data()` -> `simulate_hmm()` to make the functionality clearer. Furthermore, this function is now exported and can be used outside of the package to simulate HMM data. * `download_data()` no longer saves a .csv-file but returns the data as a `data.frame`. Its `verbose` argument is removed because the function no longer prints any messages. @@ -19,46 +21,63 @@ # fHMM 1.1.0 * Extended the time horizon of saved data and updated models for demonstration. + * The `download_data()` function now returns the data as a `data.frame` by default. However, specifying argument `file` still allows for saving the data as a .csv file. + * The `plot.fHMM_model()` function now has the additional argument `ll_relative` (default is `TRUE`) to plot the relative log-likelihood values when `plot_type = "ll"`. + * Significantly increased the test coverage and fixed minor bugs. + * Changed color of time series plot from `"lightgray"` to `"black"` for better readability. + * Added a title to the time series plot when calling `plot.fHMM_model(plot_type = "ts")`. Additionally, a time interval with arguments `from` and `to` can be selected to zoom into the data. # fHMM 1.0.3 * Added the following methods for an `fHMM_model` object: `AIC()`, `BIC()`, `logLik()`, `nobs()`, `npar()`, `residuals()`. + * The log-normal distribution can now be estimated by setting `sdds = "lnorm"` in the `controls` object. # fHMM 1.0.2 * Fixed bug in `reorder_states()` that did not order the fine-scale parameter sets when the coarse-scale order was changed. + * Fixed bug in `parameter_labels()` that returned the wrong order of parameter labels. + * Changed plot type of simulated data to lines. # fHMM 1.0.1 * In the vignette on controls, in the section about example specifications for `controls`, corrected `sdds = "gamma(mu = -1|1)"` to `sdds = "gamma(mu = 0.5|2)"` because mean of the Gamma distribution must be positive. + * Added `digits` argument to `print.fHMM_predict()`. + * Fixed bug in `reorder_states()` that allowed for misspecification of `state_order`. + * Added option to `fit_model()` to initialize at the estimates of another model (#73). # fHMM 1.0.0 * Enhanced the package by S3 classes. + * Added more `controls` specifications. + * Included a prediction function. + * Improved documentations. # fHMM 0.3.0 * Added vignettes. + * Improved specification of `controls`. + * Fixed minor bugs. # fHMM 0.2.0 * Improved documentation of functions and README. + * Improved specification of `controls`. (#37 and #38) # fHMM 0.1.0 diff --git a/R/decode_states.R b/R/decode_states.R index 9c055940..eb157f8b 100644 --- a/R/decode_states.R +++ b/R/decode_states.R @@ -32,52 +32,6 @@ decode_states <- function(x, verbose = TRUE) { stop("'verbose' must be either TRUE or FALSE.", call. = FALSE) } - ### definition of the Viterbi algorithm for state decoding - viterbi <- function(observations, nstates, Gamma, mu, sigma, df, sdd) { - T <- length(observations) - delta <- oeli::stationary_distribution(Gamma, soft_fail = TRUE) - allprobs <- matrix(0, nstates, T) - for (n in seq_len(nstates)) { - if (sdd == "t") { - allprobs[n, ] <- (1 / sigma[n]) * stats::dt( - (observations - mu[n]) / sigma[n], - df[n] - ) - } - if (sdd == "gamma") { - allprobs[n, ] <- stats::dgamma(observations, - shape = mu[n]^2 / sigma[n]^2, - scale = sigma[n]^2 / mu[n] - ) - } - if (sdd == "normal") { - allprobs[n, ] <- stats::dnorm(observations, mean = mu[n], sd = sigma[n]) - } - if (sdd == "lognormal") { - allprobs[n, ] <- stats::dlnorm(observations, meanlog = mu[n], - sdlog = sigma[n]) - } - if (sdd == "poisson") { - allprobs[n, ] <- stats::dpois(observations, lambda = mu[n]) - } - } - xi <- matrix(0, nstates, T) - for (n in seq_len(nstates)) { - xi[n, 1] <- log(delta[n]) + log(allprobs[n, 1]) - } - for (t in seq_len(T)[-1]) { - for (n in seq_len(nstates)) { - xi[n, t] <- max(xi[, t - 1] + log(Gamma[, n])) + log(allprobs[n, t]) - } - } - iv <- numeric(T) - iv[T] <- which.max(xi[, T]) - for (t in rev(seq_len(T - 1))) { - iv[t] <- which.max(xi[, t] + log(Gamma[, iv[t + 1]])) - } - return(iv) - } - ### apply Viterbi algorithm par <- parUncon2par(x$estimate, x$data$controls) if (!x$data$controls$hierarchy) { @@ -120,3 +74,102 @@ decode_states <- function(x, verbose = TRUE) { x$decoding <- decoding return(x) } + +#' @rdname decode_states +#' @param observations +#' A \code{numeric} \code{vector} of state-dependent observations. +#' @param nstates +#' The number of states. +#' @param sdd +#' A \code{character}, specifying the state-dependent distribution. One of +#' \itemize{ +#' \item \code{"normal"} (the normal distribution), +#' \item \code{"lognormal"} (the log-normal distribution), +#' \item \code{"t"} (the t-distribution), +#' \item \code{"gamma"} (the gamma distribution), +#' \item \code{"poisson"} (the Poisson distribution). +#' } +#' @param Gamma +#' A transition probability \code{matrix} of dimension \code{nstates}. +#' @param mu +#' A \code{numeric} vector of expected values for the state-dependent +#' distribution in the different states of length \code{nstates}. +#' +#' For the gamma- or Poisson-distribution, \code{mu} must be positive. +#' +#' @param sigma +#' A positive \code{numeric} vector of standard deviations for the +#' state-dependent distribution in the different states of length \code{nstates}. +#' +#' Not relevant in case of a state-dependent Poisson distribution. +#' +#' @param df +#' A positive \code{numeric} vector of degrees of freedom for the +#' state-dependent distribution in the different states of length \code{nstates}. +#' +#' Only relevant in case of a state-dependent t-distribution. +#' +#' @examples +#' viterbi( +#' observations = c(1, 1, 1, 10, 10, 10), +#' nstates = 2, +#' sdd = "poisson", +#' Gamma = matrix(0.5, 2, 2), +#' mu = c(1, 10) +#' ) +#' +#' @export + +viterbi <- function( + observations, nstates, sdd, Gamma, mu, sigma = NULL, df = NULL + ) { + T <- length(observations) + delta <- oeli::stationary_distribution(Gamma, soft_fail = TRUE) + allprobs <- matrix(0, nstates, T) + for (n in seq_len(nstates)) { + if (sdd == "t") { + allprobs[n, ] <- (1 / sigma[n]) * stats::dt( + (observations - mu[n]) / sigma[n], + df[n] + ) + } + if (sdd == "gamma") { + allprobs[n, ] <- stats::dgamma(observations, + shape = mu[n]^2 / sigma[n]^2, + scale = sigma[n]^2 / mu[n] + ) + } + if (sdd == "normal") { + allprobs[n, ] <- stats::dnorm(observations, mean = mu[n], sd = sigma[n]) + } + if (sdd == "lognormal") { + allprobs[n, ] <- stats::dlnorm(observations, meanlog = mu[n], + sdlog = sigma[n]) + } + if (sdd == "poisson") { + allprobs[n, ] <- stats::dpois(observations, lambda = mu[n]) + } + } + xi <- matrix(0, nstates, T) + for (n in seq_len(nstates)) { + xi[n, 1] <- log(delta[n]) + log(allprobs[n, 1]) + } + for (t in seq_len(T)[-1]) { + for (n in seq_len(nstates)) { + xi[n, t] <- max(xi[, t - 1] + log(Gamma[, n])) + log(allprobs[n, t]) + } + } + iv <- numeric(T) + iv[T] <- which.max(xi[, T]) + for (t in rev(seq_len(T - 1))) { + iv[t] <- which.max(xi[, t] + log(Gamma[, iv[t + 1]])) + } + return(iv) +} + + + + + + + diff --git a/R/fHMM_parameters.R b/R/fHMM_parameters.R index 65c4e5f2..897c06c3 100644 --- a/R/fHMM_parameters.R +++ b/R/fHMM_parameters.R @@ -612,6 +612,7 @@ par2parUncon <- function(par, controls, use_parameter_labels = TRUE) { #' @rdname parameter_transformations #' @return #' For \code{parUncon2parCon}: a vector of constrained model parameters. +#' @export parUncon2parCon <- function(parUncon, controls, use_parameter_labels = TRUE) { stopifnot(inherits(parUncon, "parUncon")) @@ -722,6 +723,7 @@ parUncon2parCon <- function(parUncon, controls, use_parameter_labels = TRUE) { #' @rdname parameter_transformations #' @return #' For \code{parCon2par}: an object of class \code{\link{fHMM_parameters}}. +#' @export parCon2par <- function(parCon, controls, use_parameter_labels = TRUE) { stopifnot(inherits(parCon, "parCon")) @@ -816,6 +818,7 @@ parCon2par <- function(parCon, controls, use_parameter_labels = TRUE) { #' @rdname parameter_transformations #' @return #' For \code{par2parCon}: a vector of constrained model parameters. +#' @export par2parCon <- function(par, controls, use_parameter_labels = TRUE) { stopifnot(inherits(par, "fHMM_parameters")) @@ -830,6 +833,7 @@ par2parCon <- function(par, controls, use_parameter_labels = TRUE) { #' @rdname parameter_transformations #' @return #' For \code{parCon2parUncon}: a vector of unconstrained model parameters. +#' @export parCon2parUncon <- function(parCon, controls, use_parameter_labels = TRUE) { stopifnot(inherits(parCon, "parCon")) @@ -844,6 +848,7 @@ parCon2parUncon <- function(parCon, controls, use_parameter_labels = TRUE) { #' @rdname parameter_transformations #' @return #' For \code{parUncon2par}: an object of class \code{fHMM_parameters}. +#' @export parUncon2par <- function( parUncon, controls, use_parameter_labels = TRUE diff --git a/README.Rmd b/README.Rmd index 511c72fd..8c8a3b2a 100644 --- a/README.Rmd +++ b/README.Rmd @@ -4,12 +4,13 @@ output: github_document -```{r, include = FALSE} +```{r, setup, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/README-" ) +set.seed(1) library("fHMM") model <- fHMM::dax_model_3t data <- model$data @@ -23,28 +24,31 @@ data <- model$data [![R-CMD-check](https://github.com/loelschlaeger/fHMM/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/loelschlaeger/fHMM/actions/workflows/R-CMD-check.yaml) -The `{fHMM}` R package allows for the detection and characterization of financial market regimes in time series data by applying hidden Markov Models (HMMs). The [detailed documentation](https://loelschlaeger.de/fHMM/articles/) outlines the functionality and the model formulation. Below, we provide an initial application to the German stock index [DAX](https://en.wikipedia.org/wiki/DAX). +The `{fHMM}` R package allows for the detection and characterization of financial market regimes in time series data by applying hidden Markov Models (HMMs). The [vignettes](https://loelschlaeger.de/fHMM/articles/) outline the package functionality and the model formulation. For a reference on the method, see + +> Oelschläger, L., and T. Adam. 2021. “Detecting Bearish and Bullish Markets in Financial Time Series Using Hierarchical Hidden Markov Models.” Statistical Modelling. https://doi.org/10.1177/1471082X211034048 + +Below, we illustrate an application to the German stock index [DAX](https://en.wikipedia.org/wiki/DAX). We also show how to use the package to simulate HMM data, compute the model likelihood, and decode the hidden states using the Viterbi algorithm. ## Installation -You can install the released version of `{fHMM}` from [CRAN](https://CRAN.R-project.org) with: +You can install the released package version from [CRAN](https://CRAN.R-project.org) with: ```{r, eval = FALSE} install.packages("fHMM") ``` -And the development version from [GitHub](https://github.com/) with: +## Contributing -```{r, eval = FALSE} -# install.packages("devtools") -devtools::install_github("loelschlaeger/fHMM") -``` +We are open to contributions and would appreciate your input: -## Contributing +- If you encounter any issues, please [submit bug reports as issues](https://github.com/loelschlaeger/fHMM/issues/new?assignees=&labels=bug&template=bug.md). + +- If you have any ideas for new features, please submit them as [feature requests](https://github.com/loelschlaeger/fHMM/issues/new?assignees=&labels=future&template=suggestion.md). -We are open to contributions and would appreciate your input! If you encounter any issues, please [submit bug reports as issues](https://github.com/loelschlaeger/fHMM/issues/new?assignees=&labels=bug&template=bug.md). If you have any ideas for new features, please submit them as [feature requests](https://github.com/loelschlaeger/fHMM/issues/new?assignees=&labels=future&template=suggestion.md). If you would like to add extensions to the package, please fork the "master" branch and submit merge requests. We look forward to your contributions! +- If you would like to add extensions to the package, please fork the `master` branch and submit a merge request. -## Example 1: Fitting an HMM to the DAX +## Example: Fitting an HMM to the DAX We fit a 3-state HMM with state-dependent t-distributions to the DAX log-returns from 2000 to 2022. The states can be interpreted as proxies for bearish (green below) and bullish markets (red) and an "in-between" market state (yellow). @@ -55,7 +59,7 @@ library("fHMM") The package has a build-in function to download financial data from [Yahoo Finance](https://finance.yahoo.com/): ```{r data download, eval = FALSE} -dax <- download_data(symbol = "^GDAXI", file = NULL, verbose = FALSE) +dax <- download_data(symbol = "^GDAXI") ``` We first need to define the model: @@ -115,52 +119,77 @@ The (pseudo-) residuals help to evaluate the model fit: plot(model, plot_type = "pr") ``` -## Example 2: Simulating HMM data and maximizing the likelihood +## Simulating HMM data -```{r, eval = FALSE} -### define the model: 3-state HMM with normal sdds -controls <- list(states = 3, sdds = "normal") - -### define the true parameters (un-specified parameters are set at random) -Gamma <- matrix(c(0.9, 0.05, 0.05, 0.05, 0.9, 0.05, 0.05, 0.05, 0.9), 3, 3) -mu <- -1:1 -sigma <- c(1, 1.5, 2) -true_parameters <- fHMM_parameters( - controls = controls, Gamma = Gamma, mu = mu, sigma = sigma +The `{fHMM}` package supports data simulation from an HMM and access to the model likelihood function for model fitting and the Viterbi algorithm for state decoding. + +1. As an example, we consider a 2-state HMM with state-dependent Gamma distributions and a time horizon of 1000 data points. + +```{r, define controls} +controls <- set_controls( + states = 2, + sdds = "gamma", + horizon = 1000 +) +``` + +2. Define the model parameters via the `fHMM_parameters()` function (unspecified parameters would be set at random). + +```{r, define model parameters} +par <- fHMM_parameters( + controls = controls, + Gamma = matrix(c(0.95, 0.05, 0.05, 0.95), 2, 2), + mu = c(1, 3), + sigma = c(1, 3) ) +``` + +3. Simulate data points from this model via the `simulate_hmm()` function. + +```{r, simulate-hmm-data, fig.dim = c(10,6)} +sim <- simulate_hmm( + controls = controls, + true_parameters = par +) +plot(sim$data, col = sim$markov_chain, type = "b") +``` + +4. The log-likelihood function `ll_hmm()` is evaluated at the identified and unconstrained parameter values, they can be derived via the `par2parUncon()` function. -### simulate 1000 data points -data <- simulate_hmm(controls, horizon = 1000, true_parameters = true_parameters) +```{r, unconstrained and identified parameters} +(parUncon <- par2parUncon(par, controls)) +``` -### plot the data just for fun -plot(data$data, col = data$markov_chain, type = "h") +Note that this transformation takes care of the restrictions, that `Gamma` must be a transition probability matrix (which we can ensure via the logit link) and that `mu` and `sigma` must be positive (an assumption for the Gamma distribution, which we can ensure via the exponential link). -### transform the parameters to a vector of the identified and unconstrained -### parameters -parUncon <- par2parUncon(true_parameters, controls) +```{r, evaluate likelihood} +ll_hmm(parUncon, sim$data, controls) +ll_hmm(parUncon, sim$data, controls, negative = TRUE) +``` -### evaluate the (negative) log-likelihood -ll_hmm(parUncon, data$observations, controls) -ll_hmm(parUncon, data$observations, controls, negative = TRUE) +5. For maximum likelihood estimation of the model parameters, we can numerically optimize `ll_hmm()` over `parUncon` (or rather minimize the negative log-likelihood). -### optimize the likelihood (should only take some seconds) -estimate <- nlm( - f = ll_hmm, p = parUncon, observations = data$observations, - controls = controls, negative = TRUE - )$estimate +```{r, maximize likelihood} +optimization <- nlm( + f = ll_hmm, p = parUncon, observations = sim$data, controls = controls, negative = TRUE +) -### transform the identified and unconstrained parameters back to the full and -### interpretable parameters +(estimate <- optimization$estimate) +``` + +6. To interpret the estimate, it needs to be back transformed to the constrained parameter space via the `parUncon2par()` function. The state-labeling is not identified. + +```{r, backtransform parameters} class(estimate) <- "parUncon" -estimated_parameters <- fHMM:::parUncon2par(estimate, controls) +estimate <- parUncon2par(estimate, controls) -### we can compare the true parameters with the estimated parameters (should be similar) -true_parameters$Gamma -estimated_parameters$Gamma +par$Gamma +estimate$Gamma -true_parameters$mu -estimated_parameters$mu +par$mu +estimate$mu -true_parameters$sigma -estimated_parameters$sigma +par$sigma +estimate$sigma ``` + diff --git a/README.md b/README.md index 81afd714..0cce0850 100644 --- a/README.md +++ b/README.md @@ -14,90 +14,43 @@ downloads](https://cranlogs.r-pkg.org/badges/last-month/fHMM)](https://cran.r-pr The `{fHMM}` R package allows for the detection and characterization of financial market regimes in time series data by applying hidden Markov -Models (HMMs). The [detailed -documentation](https://loelschlaeger.de/fHMM/articles/) outlines the -functionality and the model formulation. Below, we provide an initial -application to the German stock index -[DAX](https://en.wikipedia.org/wiki/DAX). +Models (HMMs). The [vignettes](https://loelschlaeger.de/fHMM/articles/) +outline the package functionality and the model formulation. For a +reference on the method, see + +> Oelschläger, L., and T. Adam. 2021. “Detecting Bearish and Bullish +> Markets in Financial Time Series Using Hierarchical Hidden Markov +> Models.” Statistical Modelling. +> + +Below, we illustrate an application to the German stock index +[DAX](https://en.wikipedia.org/wiki/DAX). We also show how to use the +package to simulate HMM data, compute the model likelihood, and decode +the hidden states using the Viterbi algorithm. ## Installation -You can install the released version of `{fHMM}` from +You can install the released package version from [CRAN](https://CRAN.R-project.org) with: ``` r install.packages("fHMM") ``` -And the development version from [GitHub](https://github.com/) with: - -``` r -# install.packages("devtools") -devtools::install_github("loelschlaeger/fHMM") -``` - ## Contributing -We are open to contributions and would appreciate your input! If you -encounter any issues, please [submit bug reports as -issues](https://github.com/loelschlaeger/fHMM/issues/new?assignees=&labels=bug&template=bug.md). -If you have any ideas for new features, please submit them as [feature -requests](https://github.com/loelschlaeger/fHMM/issues/new?assignees=&labels=future&template=suggestion.md). -If you would like to add extensions to the package, please fork the -“master” branch and submit merge requests. We look forward to your -contributions! - -## Example 1: Simulating HMM data and maximizing the likelihood - -``` r -### define the model: 3-state HMM with normal sdds -controls <- list(states = 3, sdds = "normal") - -### define the true parameters (un-specified parameters are set at random) -Gamma <- matrix(c(0.9, 0.05, 0.05, 0.05, 0.9, 0.05, 0.05, 0.05, 0.9), 3, 3) -mu <- -1:1 -sigma <- c(1, 1.5, 2) -true_parameters <- fHMM_parameters( - controls = controls, Gamma = Gamma, mu = mu, sigma = sigma -) - -### simulate 1000 data points -data <- simulate_hmm(controls, horizon = 1000, true_parameters = true_parameters) - -### plot the data just for fun -plot(data$data, col = data$markov_chain, type = "h") - -### transform the parameters to a vector of the identified and unconstrained -### parameters -parUncon <- par2parUncon(true_parameters, controls) +We are open to contributions and would appreciate your input: -### evaluate the (negative) log-likelihood -ll_hmm(parUncon, data$observations, controls) -ll_hmm(parUncon, data$observations, controls, negative = TRUE) +- If you encounter any issues, please [submit bug reports as + issues](https://github.com/loelschlaeger/fHMM/issues/new?assignees=&labels=bug&template=bug.md). -### optimize the likelihood (should only take some seconds) -estimate <- nlm( - f = ll_hmm, p = parUncon, observations = data$observations, - controls = controls, negative = TRUE - )$estimate +- If you have any ideas for new features, please submit them as [feature + requests](https://github.com/loelschlaeger/fHMM/issues/new?assignees=&labels=future&template=suggestion.md). -### transform the identified and unconstrained parameters back to the full and -### interpretable parameters -class(estimate) <- "parUncon" -estimated_parameters <- fHMM:::parUncon2par(estimate, controls) - -### we can compare the true parameters with the estimated parameters (should be similar) -true_parameters$Gamma -estimated_parameters$Gamma +- If you would like to add extensions to the package, please fork the + `master` branch and submit a merge request. -true_parameters$mu -estimated_parameters$mu - -true_parameters$sigma -estimated_parameters$sigma -``` - -## Example 2: Fitting an HMM to the DAX +## Example: Fitting an HMM to the DAX We fit a 3-state HMM with state-dependent t-distributions to the DAX log-returns from 2000 to 2022. The states can be interpreted as proxies @@ -112,23 +65,22 @@ The package has a build-in function to download financial data from [Yahoo Finance](https://finance.yahoo.com/): ``` r -dax <- download_data(symbol = "^GDAXI", file = NULL, verbose = FALSE) +dax <- download_data(symbol = "^GDAXI") ``` We first need to define the model: ``` r -controls <- list( - states = 3, - sdds = "t", - data = list(file = dax, - date_column = "Date", - data_column = "Close", - logreturns = TRUE, - from = "2000-01-01", - to = "2022-12-31") +controls <- set_controls( + states = 3, + sdds = "t", + file = dax, + date_column = "Date", + data_column = "Close", + logreturns = TRUE, + from = "2000-01-01", + to = "2022-12-31" ) -controls <- set_controls(controls) ``` The function `prepare_data()` then prepares the data for estimation: @@ -217,3 +169,117 @@ plot(model, plot_type = "pr") ``` ![](man/figures/README-plot_model_residuals-1.png) + +## Simulating HMM data + +The `{fHMM}` package supports data simulation from an HMM and access to +the model likelihood function for model fitting and the Viterbi +algorithm for state decoding. + +1. As an example, we consider a 2-state HMM with state-dependent Gamma + distributions and a time horizon of 1000 data points. + +``` r +controls <- set_controls( + states = 2, + sdds = "gamma", + horizon = 1000 +) +``` + +2. Define the model parameters via the `fHMM_parameters()` function + (unspecified parameters would be set at random). + +``` r +par <- fHMM_parameters( + controls = controls, + Gamma = matrix(c(0.95, 0.05, 0.05, 0.95), 2, 2), + mu = c(1, 3), + sigma = c(1, 3) +) +``` + +3. Simulate data points from this model via the `simulate_hmm()` + function. + +``` r +sim <- simulate_hmm( + controls = controls, + true_parameters = par +) +plot(sim$data, col = sim$markov_chain, type = "b") +``` + +![](man/figures/README-simulate-hmm-data-1.png) + +4. The log-likelihood function `ll_hmm()` is evaluated at the + identified and unconstrained parameter values, they can be derived + via the `par2parUncon()` function. + +``` r +(parUncon <- par2parUncon(par, controls)) +#> gammasUncon_21 gammasUncon_12 muUncon_1 muUncon_2 sigmaUncon_1 +#> -2.944439 -2.944439 0.000000 1.098612 0.000000 +#> sigmaUncon_2 +#> 1.098612 +#> attr(,"class") +#> [1] "parUncon" "numeric" +``` + +Note that this transformation takes care of the restrictions, that +`Gamma` must be a transition probability matrix (which we can ensure via +the logit link) and that `mu` and `sigma` must be positive (an +assumption for the Gamma distribution, which we can ensure via the +exponential link). + +``` r +ll_hmm(parUncon, sim$data, controls) +#> [1] -1620.515 +ll_hmm(parUncon, sim$data, controls, negative = TRUE) +#> [1] 1620.515 +``` + +5. For maximum likelihood estimation of the model parameters, we can + numerically optimize `ll_hmm()` over `parUncon` (or rather minimize + the negative log-likelihood). + +``` r +optimization <- nlm( + f = ll_hmm, p = parUncon, observations = sim$data, controls = controls, negative = TRUE +) + +(estimate <- optimization$estimate) +#> [1] -3.46338914 -3.44065510 0.05999846 1.06452908 0.11517809 1.07946253 +``` + +6. To interpret the estimate, it needs to be back transformed to the + constrained parameter space via the `parUncon2par()` function. The + state-labeling is not identified. + +``` r +class(estimate) <- "parUncon" +estimate <- parUncon2par(estimate, controls) + +par$Gamma +#> state_1 state_2 +#> state_1 0.95 0.05 +#> state_2 0.05 0.95 +estimate$Gamma +#> state_1 state_2 +#> state_1 0.96895123 0.03104877 +#> state_2 0.03037207 0.96962793 + +par$mu +#> muCon_1 muCon_2 +#> 1 3 +estimate$mu +#> muCon_1 muCon_2 +#> 1.061835 2.899473 + +par$sigma +#> sigmaCon_1 sigmaCon_2 +#> 1 3 +estimate$sigma +#> sigmaCon_1 sigmaCon_2 +#> 1.122073 2.943097 +``` diff --git a/_pkgdown.yml b/_pkgdown.yml index d7967f1d..51b29d14 100644 --- a/_pkgdown.yml +++ b/_pkgdown.yml @@ -22,9 +22,18 @@ reference: - download_data - simulate_hmm - prepare_data - - fHMM_parameters - fHMM_data - plot.fHMM_data +- title: Model parameters + desc: Use these functions to define and transform model parameters. + contents: + - fHMM_parameters + - par2parCon + - par2parUncon + - parCon2par + - parCon2parUncon + - parUncon2par + - parUncon2parCon - title: Model estimation desc: Use these function for model estimation and state decoding. contents: @@ -32,6 +41,7 @@ reference: - fit_model - fHMM_model - decode_states + - viterbi - reorder_states - title: Model evaluation desc: Use these functions to evaluate a fitted model. diff --git a/man/decode_states.Rd b/man/decode_states.Rd index 41ea517d..854f13f5 100644 --- a/man/decode_states.Rd +++ b/man/decode_states.Rd @@ -2,14 +2,47 @@ % Please edit documentation in R/decode_states.R \name{decode_states} \alias{decode_states} +\alias{viterbi} \title{Decode the underlying hidden state sequence} \usage{ decode_states(x, verbose = TRUE) + +viterbi(observations, nstates, sdd, Gamma, mu, sigma = NULL, df = NULL) } \arguments{ \item{x}{An object of class \code{\link{fHMM_model}}.} \item{verbose}{Set to \code{TRUE} to print progress messages.} + +\item{observations}{A \code{numeric} \code{vector} of state-dependent observations.} + +\item{nstates}{The number of states.} + +\item{sdd}{A \code{character}, specifying the state-dependent distribution. One of +\itemize{ +\item \code{"normal"} (the normal distribution), +\item \code{"lognormal"} (the log-normal distribution), +\item \code{"t"} (the t-distribution), +\item \code{"gamma"} (the gamma distribution), +\item \code{"poisson"} (the Poisson distribution). +}} + +\item{Gamma}{A transition probability \code{matrix} of dimension \code{nstates}.} + +\item{mu}{A \code{numeric} vector of expected values for the state-dependent +distribution in the different states of length \code{nstates}. + +For the gamma- or Poisson-distribution, \code{mu} must be positive.} + +\item{sigma}{A positive \code{numeric} vector of standard deviations for the +state-dependent distribution in the different states of length \code{nstates}. + +Not relevant in case of a state-dependent Poisson distribution.} + +\item{df}{A positive \code{numeric} vector of degrees of freedom for the +state-dependent distribution in the different states of length \code{nstates}. + +Only relevant in case of a state-dependent t-distribution.} } \value{ An object of class \code{\link{fHMM_model}} with decoded state sequence @@ -22,6 +55,14 @@ applying the Viterbi algorithm for global decoding. \examples{ decode_states(dax_model_3t) plot(dax_model_3t, type = "ts") +viterbi( + observations = c(1, 1, 1, 10, 10, 10), + nstates = 2, + sdd = "poisson", + Gamma = matrix(0.5, 2, 2), + mu = c(1, 10) +) + } \references{ \url{https://en.wikipedia.org/wiki/Viterbi_algorithm} diff --git a/man/figures/README-simulate-hmm-data-1.png b/man/figures/README-simulate-hmm-data-1.png new file mode 100644 index 00000000..ab25b102 Binary files /dev/null and b/man/figures/README-simulate-hmm-data-1.png differ diff --git a/src/fHMM.dll b/src/fHMM.dll index 6abd17d2..9ae8b559 100644 Binary files a/src/fHMM.dll and b/src/fHMM.dll differ diff --git a/vignettes/fHMM-decoded-ts-1.png b/vignettes/fHMM-decoded-ts-1.png index 69350d19..699450ff 100644 Binary files a/vignettes/fHMM-decoded-ts-1.png and b/vignettes/fHMM-decoded-ts-1.png differ diff --git a/vignettes/fHMM-events-1.png b/vignettes/fHMM-events-1.png index d87fc10b..392401c8 100644 Binary files a/vignettes/fHMM-events-1.png and b/vignettes/fHMM-events-1.png differ diff --git a/vignettes/fHMM-plot-lls-1.png b/vignettes/fHMM-plot-lls-1.png index f7d8e3ee..6e6c1755 100644 Binary files a/vignettes/fHMM-plot-lls-1.png and b/vignettes/fHMM-plot-lls-1.png differ diff --git a/vignettes/fHMM-plot-res-1.png b/vignettes/fHMM-plot-res-1.png index 097590d9..754a1e32 100644 Binary files a/vignettes/fHMM-plot-res-1.png and b/vignettes/fHMM-plot-res-1.png differ diff --git a/vignettes/fHMM-plot-sdds-1.png b/vignettes/fHMM-plot-sdds-1.png index 04b2b771..d7e23fd3 100644 Binary files a/vignettes/fHMM-plot-sdds-1.png and b/vignettes/fHMM-plot-sdds-1.png differ diff --git a/vignettes/fHMM.Rmd b/vignettes/fHMM.Rmd index d813fda3..b791d8d5 100644 --- a/vignettes/fHMM.Rmd +++ b/vignettes/fHMM.Rmd @@ -17,17 +17,17 @@ knitr::opts_chunk$set( ) ``` -Welcome to {fHMM}, an R package for modeling financial time series data with hidden Markov models (HMMs). This introduction [motivates the approach](#motivation), gives an [overview](#package-and-vignettes-overview) of the package functionality and the included vignettes, and [places the approach in the existing literature](#placement-in-the-literature). +Welcome to `{fHMM}`, an R package for modeling financial time series data with hidden Markov models (HMMs). This introduction [motivates the approach](#motivation), gives an [overview](#package-and-vignettes-overview) of the package functionality and the included vignettes, and [places the approach in the existing literature](#placement-in-the-literature). ## Motivation Earning money with stock trading is simple: one only needs to buy and sell stocks at the right moment. In general, stock traders seek to invest at the beginning of upward trends (hereon termed as bullish markets) and repel their stocks just in time before the prices fall again (hereon termed as bearish markets). As stock prices depend on a variety of environmental factors [@hum09; @coh13], chance certainly plays a fundamental role in hitting those exact moments. However, investigating market behavior can lead to a better understanding of how trends alternate and thereby increases the chance of making profitable investment decisions. -The {fHMM} package aims at contributing to those investigations by applying HMMs to detect bearish and bullish markets in financial time series. It also implemented the hierarchical model extension presented in @oel21, which improves the model's capability for distinguishing between short- and long-term trends and allows to interpret market dynamics at multiple time scales. +The `{fHMM}` package aims at contributing to those investigations by applying HMMs to detect bearish and bullish markets in financial time series. It also implemented the hierarchical model extension presented in @oel21, which improves the model's capability for distinguishing between short- and long-term trends and allows to interpret market dynamics at multiple time scales. ## Package and vignettes overview -The functionality of the {fHMM} package can be classified into functions for data preparation, model estimation, and model evaluation. The following flowchart visualizes their dependencies: +The functionality of the `{fHMM}` package can be classified into functions for data preparation, model estimation, and model evaluation. The following flowchart visualizes their dependencies: ![A flowchart of the {fHMM} package: Functions are boxed and classes displayed as circles.](flowchart.png){width=80%} diff --git a/vignettes/ref.bib b/vignettes/ref.bib index 87bfc4dc..7156872a 100644 --- a/vignettes/ref.bib +++ b/vignettes/ref.bib @@ -277,7 +277,8 @@ @article{oel21 author = {Oelschläger, L. and Adam, T.}, title ={Detecting bearish and bullish markets in financial time series using hierarchical hidden Markov models}, journal = {Statistical Modelling}, - year = {2021} + year = {2021}, + doi = {10.1177/1471082X211034048} } @article{pla08, diff --git a/vignettes/v02_controls.Rmd b/vignettes/v02_controls.Rmd index f15218ae..bc2a73bf 100644 --- a/vignettes/v02_controls.Rmd +++ b/vignettes/v02_controls.Rmd @@ -29,10 +29,11 @@ The `{fHMM}` philosophy is to start the modeling process by setting all data, mo ## Example specifications -For demonstration, we list example specifications using data from the Deutscher Aktienindex DAX^[The `download_data()` function is explained in the vignette on data management.] [@jan92]: +For demonstration, we list example specifications using data from the Deutscher Aktienindex DAX^[The `download_data()` function is explained in [the vignette on data management](https://loelschlaeger.de/fHMM/articles/v03_data_management.html).] [@jan92]: ```{r} -download_data(symbol = "^GDAXI", file = "dax.csv") +dax <- download_data(symbol = "^GDAXI") +head(dax) ``` ### HMMs for empirical data @@ -43,7 +44,7 @@ The following lines of code specify a 3-state HMM with state-dependent t-distrib controls <- list( states = 3, sdds = "t", - data = list(file = "dax.csv", + data = list(file = dax, date_column = "Date", data_column = "Close", logreturns = TRUE),