From 8b62c664f37234172ef80f4da88d451fa96e87d1 Mon Sep 17 00:00:00 2001
From: Max <mcschrader@crimson.ua.edu>
Date: Mon, 25 Sep 2023 09:21:02 -0500
Subject: [PATCH 1/3] feat: added halfnormal & lognormal

Signed-off-by: Max <mcschrader@crimson.ua.edu>
---
 pomegranate/distributions/halfnormal.py     | 308 ++++++++++++++++++++
 pomegranate/distributions/lognormal.py      | 303 +++++++++++++++++++
 pomegranate/distributions/lognormal_old.pyx | 112 +++++++
 3 files changed, 723 insertions(+)
 create mode 100644 pomegranate/distributions/halfnormal.py
 create mode 100644 pomegranate/distributions/lognormal.py
 create mode 100644 pomegranate/distributions/lognormal_old.pyx

diff --git a/pomegranate/distributions/halfnormal.py b/pomegranate/distributions/halfnormal.py
new file mode 100644
index 00000000..b3fa0f9d
--- /dev/null
+++ b/pomegranate/distributions/halfnormal.py
@@ -0,0 +1,308 @@
+# normal.py
+# Contact: Jacob Schreiber <jmschreiber91@gmail.com>
+
+import torch
+
+from .._utils import _cast_as_tensor
+from .._utils import _cast_as_parameter
+from .._utils import _update_parameter
+from .._utils import _check_parameter
+from .._utils import _check_shapes
+
+from ._distribution import Distribution
+
+
+# Define some useful constants
+NEGINF = float("-inf")
+INF = float("inf")
+SQRT_2_PI = 2.50662827463
+LOG_2_PI = 1.83787706641
+
+
+class HalfNormal(Distribution):
+    """A half-normal distribution object.
+
+    A half-normal distribution is a distribution over positive real numbers that
+    is zero for negative numbers. It is defined by a single parameter, sigma,
+    which is the standard deviation of the distribution. The mean of the
+    distribution is sqrt(2/pi) * sigma, and the variance is (1 - 2/pi) * sigma^2.
+
+    This distribution can assume that features are independent of the others if
+    the covariance type is 'diag' or 'sphere', but if the type is 'full' then
+    the features are not independent.
+
+    There are two ways to initialize this object. The first is to pass in
+    the tensor of probablity parameters, at which point they can immediately be
+    used. The second is to not pass in the rate parameters and then call
+    either `fit` or `summary` + `from_summaries`, at which point the probability
+    parameter will be learned from data.
+
+
+    Parameters
+    ----------
+    covs: list, numpy.ndarray, torch.Tensor, or None, optional
+            The variances and covariances of the distribution. If covariance_type
+            is 'full', the shape should be (self.d, self.d); if 'diag', the shape
+            should be (self.d,); if 'sphere', it should be (1,). Note that this is
+            the variances or covariances in all settings, and not the standard
+            deviation, as may be more common for diagonal covariance matrices.
+            Default is None.
+
+    covariance_type: str, optional
+            The type of covariance matrix. Must be one of 'full', 'diag', or
+            'sphere'. Default is 'full'.
+
+    min_cov: float or None, optional
+            The minimum variance or covariance.
+
+    inertia: float, [0, 1], optional
+            Indicates the proportion of the update to apply to the parameters
+            during training. When the inertia is 0.0, the update is applied in
+            its entirety and the previous parameters are ignored. When the
+            inertia is 1.0, the update is entirely ignored and the previous
+            parameters are kept, equivalently to if the parameters were frozen.
+
+    frozen: bool, optional
+            Whether all the parameters associated with this distribution are frozen.
+            If you want to freeze individual pameters, or individual values in those
+            parameters, you must modify the `frozen` attribute of the tensor or
+            parameter directly. Default is False.
+    """
+
+    def __init__(
+        self,
+        covs=None,
+        covariance_type="full",
+        min_cov=None,
+        inertia=0.0,
+        frozen=False,
+        check_data=True,
+    ):
+        super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
+        self.name = "HalfNormal"
+
+        self.covs = _check_parameter(_cast_as_parameter(covs), "covs", ndim=(1, 2))
+
+        _check_shapes([self.means, self.covs], ["means", "covs"])
+
+        self.min_cov = _check_parameter(min_cov, "min_cov", min_value=0, ndim=0)
+        self.covariance_type = covariance_type
+
+        self._initialized = covs is not None
+        self.d = self.means.shape[-1] if self._initialized else None
+        self._reset_cache()
+
+    def _initialize(self, d):
+        """Initialize the probability distribution.
+
+        This method is meant to only be called internally. It initializes the
+        parameters of the distribution and stores its dimensionality. For more
+        complex methods, this function will do more.
+
+
+        Parameters
+        ----------
+        d: int
+                The dimensionality the distribution is being initialized to.
+        """
+        if self.covariance_type == "full":
+            self.covs = _cast_as_parameter(
+                torch.zeros(d, d, dtype=self.dtype, device=self.device)
+            )
+        elif self.covariance_type == "diag":
+            self.covs = _cast_as_parameter(
+                torch.zeros(d, dtype=self.dtype, device=self.device)
+            )
+        elif self.covariance_type == "sphere":
+            self.covs = _cast_as_parameter(
+                torch.tensor(0, dtype=self.dtype, device=self.device)
+            )
+
+        self._initialized = True
+        super()._initialize(d)
+
+    def _reset_cache(self):
+        """Reset the internally stored statistics.
+
+        This method is meant to only be called internally. It resets the
+        stored statistics used to update the model parameters as well as
+        recalculates the cached values meant to speed up log probability
+        calculations.
+        """
+
+        if self._initialized == False:
+            return
+
+        self.register_buffer(
+            "_w_sum", torch.zeros(self.d, dtype=self.dtype, device=self.device)
+        )
+        self.register_buffer(
+            "_xw_sum", torch.zeros(self.d, dtype=self.dtype, device=self.device)
+        )
+
+        if self.covariance_type == "full":
+            self.register_buffer(
+                "_xxw_sum",
+                torch.zeros(self.d, self.d, dtype=self.dtype, device=self.device),
+            )
+
+            if self.covs.sum() > 0.0:
+                chol = torch.linalg.cholesky(self.covs)
+                _inv_cov = torch.linalg.solve_triangular(
+                    chol,
+                    torch.eye(len(self.covs), dtype=self.dtype, device=self.device),
+                    upper=False,
+                ).T
+                _log_det = -0.5 * torch.linalg.slogdet(self.covs)[1]
+                _theta = _log_det - 0.5 * (self.d * LOG_2_PI)
+
+                self.register_buffer("_inv_cov", _inv_cov)
+                self.register_buffer("_log_det", _log_det)
+                self.register_buffer("_theta", _theta)
+
+        elif self.covariance_type in ("diag", "sphere"):
+            self.register_buffer(
+                "_xxw_sum", torch.zeros(self.d, dtype=self.dtype, device=self.device)
+            )
+
+            if self.covs.sum() > 0.0:
+                _log_sigma_sqrt_2pi = -torch.log(torch.sqrt(self.covs) * SQRT_2_PI)
+                _inv_two_sigma = 1.0 / (2 * self.covs)
+
+                self.register_buffer("_log_sigma_sqrt_2pi", _log_sigma_sqrt_2pi)
+                self.register_buffer("_inv_two_sigma", _inv_two_sigma)
+
+            if torch.any(self.covs < 0):
+                raise ValueError("Variances must be positive.")
+
+    def sample(self, n):
+        """Sample from the probability distribution.
+
+        This method will return `n` samples generated from the underlying
+        probability distribution.
+
+
+        Parameters
+        ----------
+        n: int
+                The number of samples to generate.
+
+
+        Returns
+        -------
+        X: torch.tensor, shape=(n, self.d)
+                Randomly generated samples.
+        """
+
+        if self.covariance_type == "diag":
+            return torch.distributions.HalfNormal(self.covs).sample([n])
+        elif self.covariance_type == "full":
+            return torch.distributions.HalfNormal(self.covs).sample([n])
+
+    def log_probability(self, X):
+        """Calculate the log probability of each example.
+
+        This method calculates the log probability of each example given the
+        parameters of the distribution. The examples must be given in a 2D
+        format.
+
+        Note: This differs from some other log probability calculation
+        functions, like those in torch.distributions, because it is not
+        returning the log probability of each feature independently, but rather
+        the total log probability of the entire example.
+
+
+        Parameters
+        ----------
+        X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
+                A set of examples to evaluate.
+
+
+        Returns
+        -------
+        logp: torch.Tensor, shape=(-1,)
+                The log probability of each example.
+        """
+
+        X = _check_parameter(
+            _cast_as_tensor(X, dtype=self.means.dtype),
+            "X",
+            ndim=2,
+            shape=(-1, self.d),
+            check_parameter=self.check_data,
+        )
+
+        # if self.covariance_type == 'full':
+        # 	# logp = torch.matmul(X, self._inv_cov) - self._inv_cov_dot_mu
+        # 	# logp = self.d * LOG_2_PI + torch.sum(logp ** 2, dim=-1)
+        # 	# logp = self._log_det - 0.5 * logp
+        # 	# return logp
+        return 0.5 * LOG_2_PI + (X**2 / 2).sum(dim=-1)
+
+        # elif self.covariance_type in ('diag', 'sphere'):
+        # 	return torch.sum(self._log_sigma_sqrt_2pi - ((X - self.means) ** 2)
+        # 		* self._inv_two_sigma, dim=-1)
+
+    def summarize(self, X, sample_weight=None):
+        """Extract the sufficient statistics from a batch of data.
+
+        This method calculates the sufficient statistics from optionally
+        weighted data and adds them to the stored cache. The examples must be
+        given in a 2D format. Sample weights can either be provided as one
+        value per example or as a 2D matrix of weights for each feature in
+        each example.
+
+
+        Parameters
+        ----------
+        X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
+                A set of examples to summarize.
+
+        sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
+                A set of weights for the examples. This can be either of shape
+                (-1, self.d) or a vector of shape (-1,). Default is ones.
+        """
+
+        if self.frozen == True:
+            return
+
+        X, sample_weight = super().summarize(X, sample_weight=sample_weight)
+        X = _cast_as_tensor(X, dtype=self.means.dtype)
+
+        if self.covariance_type == "full":
+            self._w_sum += torch.sum(sample_weight, dim=0)
+            self._xw_sum += torch.sum(X * sample_weight, axis=0)
+            self._xxw_sum += torch.matmul((X * sample_weight).T, X)
+
+        elif self.covariance_type in ("diag", "sphere"):
+            self._w_sum[:] = self._w_sum + torch.sum(sample_weight, dim=0)
+            self._xw_sum[:] = self._xw_sum + torch.sum(X * sample_weight, dim=0)
+            self._xxw_sum[:] = self._xxw_sum + torch.sum(X**2 * sample_weight, dim=0)
+
+    def from_summaries(self):
+        """Update the model parameters given the extracted statistics.
+
+        This method uses calculated statistics from calls to the `summarize`
+        method to update the distribution parameters. Hyperparameters for the
+        update are passed in at initialization time.
+
+        Note: Internally, a call to `fit` is just a successive call to the
+        `summarize` method followed by the `from_summaries` method.
+        """
+
+        if self.frozen == True:
+            return
+
+        if self.covariance_type == "full":
+            v = self._xw_sum.unsqueeze(0) * self._xw_sum.unsqueeze(1)
+            covs = self._xxw_sum / self._w_sum - v / self._w_sum**2.0
+
+        elif self.covariance_type in ["diag", "sphere"]:
+            covs = (
+                self._xxw_sum / self._w_sum - self._xw_sum**2.0 / self._w_sum**2.0
+            )
+            if self.covariance_type == "sphere":
+                covs = covs.mean(dim=-1)
+
+        _update_parameter(self.covs, covs, self.inertia)
+        self._reset_cache()
diff --git a/pomegranate/distributions/lognormal.py b/pomegranate/distributions/lognormal.py
new file mode 100644
index 00000000..307f5c76
--- /dev/null
+++ b/pomegranate/distributions/lognormal.py
@@ -0,0 +1,303 @@
+# normal.py
+# Contact: Jacob Schreiber <jmschreiber91@gmail.com>
+
+import torch
+
+from .._utils import _cast_as_tensor
+from .._utils import _cast_as_parameter
+from .._utils import _update_parameter
+from .._utils import _check_parameter
+from .._utils import _check_shapes
+
+from ._distribution import Distribution
+
+
+# Define some useful constants
+NEGINF = float("-inf")
+INF = float("inf")
+SQRT_2_PI = 2.50662827463
+LOG_2_PI = 1.83787706641
+
+
+class LogNormal(Distribution):
+	"""A lognormal object.
+
+	The parameters are the mu and sigma of the normal distribution, which 
+	is the the exponential of the log normal distribution. This
+	distribution can assume that features are independent of the others if
+	the covariance type is 'diag' or 'sphere', but if the type is 'full' then
+	the features are not independent.
+
+	There are two ways to initialize this object. The first is to pass in
+	the tensor of probablity parameters, at which point they can immediately be
+	used. The second is to not pass in the rate parameters and then call
+	either `fit` or `summary` + `from_summaries`, at which point the probability
+	parameter will be learned from data.
+
+
+	Parameters
+	----------
+	means: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional
+		The mean values of the normal distributions. Default is None.
+
+	covs: list, numpy.ndarray, torch.Tensor, or None, optional
+		The variances and covariances of the distribution. If covariance_type
+		is 'full', the shape should be (self.d, self.d); if 'diag', the shape
+		should be (self.d,); if 'sphere', it should be (1,). Note that this is
+		the variances or covariances in all settings, and not the standard
+		deviation, as may be more common for diagonal covariance matrices.
+		Default is None.
+
+	covariance_type: str, optional
+		The type of covariance matrix. Must be one of 'full', 'diag', or
+		'sphere'. Default is 'full'. 
+
+	min_cov: float or None, optional
+		The minimum variance or covariance.
+
+	inertia: float, [0, 1], optional
+		Indicates the proportion of the update to apply to the parameters
+		during training. When the inertia is 0.0, the update is applied in
+		its entirety and the previous parameters are ignored. When the
+		inertia is 1.0, the update is entirely ignored and the previous
+		parameters are kept, equivalently to if the parameters were frozen.
+
+	frozen: bool, optional
+		Whether all the parameters associated with this distribution are frozen.
+		If you want to freeze individual pameters, or individual values in those
+		parameters, you must modify the `frozen` attribute of the tensor or
+		parameter directly. Default is False.
+	"""
+
+	def __init__(self, means=None, covs=None, covariance_type='full', 
+		min_cov=None, inertia=0.0, frozen=False, check_data=True):
+		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
+		self.name = "LogNormal"
+
+		self.means = _check_parameter(_cast_as_parameter(means), "means", 
+			ndim=1)
+		self.covs = _check_parameter(_cast_as_parameter(covs), "covs", 
+			ndim=(1, 2))
+
+		_check_shapes([self.means, self.covs], ["means", "covs"])
+
+		self.min_cov = _check_parameter(min_cov, "min_cov", min_value=0, ndim=0)
+		self.covariance_type = covariance_type
+
+		self._initialized = (means is not None) and (covs is not None)
+		self.d = self.means.shape[-1] if self._initialized else None
+		self._reset_cache()
+
+	def _initialize(self, d):
+		"""Initialize the probability distribution.
+
+		This method is meant to only be called internally. It initializes the
+		parameters of the distribution and stores its dimensionality. For more
+		complex methods, this function will do more.
+
+
+		Parameters
+		----------
+		d: int
+			The dimensionality the distribution is being initialized to.
+		"""
+
+		self.means = _cast_as_parameter(torch.zeros(d, dtype=self.dtype,
+			device=self.device))
+		
+		if self.covariance_type == 'full':
+			self.covs = _cast_as_parameter(torch.zeros(d, d, 
+				dtype=self.dtype, device=self.device))
+		elif self.covariance_type == 'diag':
+			self.covs = _cast_as_parameter(torch.zeros(d, dtype=self.dtype,
+				device=self.device))
+		elif self.covariance_type == 'sphere':
+			self.covs = _cast_as_parameter(torch.tensor(0, dtype=self.dtype,
+				device=self.device))
+
+		self._initialized = True
+		super()._initialize(d)
+
+	def _reset_cache(self):
+		"""Reset the internally stored statistics.
+
+		This method is meant to only be called internally. It resets the
+		stored statistics used to update the model parameters as well as
+		recalculates the cached values meant to speed up log probability
+		calculations.
+		"""
+
+		if self._initialized == False:
+			return
+
+		self.register_buffer("_w_sum", torch.zeros(self.d, dtype=self.dtype, 
+			device=self.device))
+		self.register_buffer("_xw_sum", torch.zeros(self.d, dtype=self.dtype,
+			device=self.device))
+
+		if self.covariance_type == 'full':
+			self.register_buffer("_xxw_sum", torch.zeros(self.d, self.d, 
+				dtype=self.dtype, device=self.device))
+
+			if self.covs.sum() > 0.0:
+				chol = torch.linalg.cholesky(self.covs)
+				_inv_cov = torch.linalg.solve_triangular(chol, torch.eye(
+					len(self.covs), dtype=self.dtype, device=self.device), 
+					upper=False).T
+				_inv_cov_dot_mu = torch.matmul(self.means, _inv_cov)
+				_log_det = -0.5 * torch.linalg.slogdet(self.covs)[1]
+				_theta = _log_det - 0.5 * (self.d * LOG_2_PI)
+
+				self.register_buffer("_inv_cov", _inv_cov)
+				self.register_buffer("_inv_cov_dot_mu", _inv_cov_dot_mu)
+				self.register_buffer("_log_det", _log_det)
+				self.register_buffer("_theta", _theta)
+
+		elif self.covariance_type in ('diag', 'sphere'):
+			self.register_buffer("_xxw_sum", torch.zeros(self.d, 
+				dtype=self.dtype, device=self.device))
+
+			if self.covs.sum() > 0.0:
+				_log_sigma_sqrt_2pi = -torch.log(torch.sqrt(self.covs) * 
+					SQRT_2_PI)
+				_inv_two_sigma = 1. / (2 * self.covs)
+
+				self.register_buffer("_log_sigma_sqrt_2pi", _log_sigma_sqrt_2pi)
+				self.register_buffer("_inv_two_sigma", _inv_two_sigma)
+			
+			if any(self.covs < 0):
+				raise ValueError("Variances must be positive.")
+
+	def sample(self, n):
+		"""Sample from the probability distribution.
+
+		This method will return `n` samples generated from the underlying
+		probability distribution.
+
+
+		Parameters
+		----------
+		n: int
+			The number of samples to generate.
+		
+
+		Returns
+		-------
+		X: torch.tensor, shape=(n, self.d)
+			Randomly generated samples.
+		"""
+
+		if self.covariance_type == 'diag':
+			return torch.distributions.Normal(self.means, self.covs).sample([n]).exp()
+		elif self.covariance_type == 'full':
+			return torch.distributions.MultivariateNormal(self.means, 
+				self.covs).sample([n]).exp()
+
+	def log_probability(self, X):
+		"""Calculate the log probability of each example.
+
+		This method calculates the log probability of each example given the
+		parameters of the distribution. The examples must be given in a 2D
+		format.
+
+		Note: This differs from some other log probability calculation
+		functions, like those in torch.distributions, because it is not
+		returning the log probability of each feature independently, but rather
+		the total log probability of the entire example.
+
+
+		Parameters
+		----------
+		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
+			A set of examples to evaluate.
+
+
+		Returns
+		-------
+		logp: torch.Tensor, shape=(-1,)
+			The log probability of each example.
+		"""
+
+		X = _check_parameter(_cast_as_tensor(X, dtype=self.means.dtype), "X", 
+			ndim=2, shape=(-1, self.d), check_parameter=self.check_data)
+
+		# take the log of X
+		x_log = X.log()
+
+		if self.covariance_type == 'full':
+			logp = torch.matmul(x_log, self._inv_cov) - self._inv_cov_dot_mu
+			logp = self.d * LOG_2_PI + torch.sum(logp ** 2, dim=-1)
+			logp = self._log_det - 0.5 * logp
+			return logp
+		
+		elif self.covariance_type in ('diag', 'sphere'):
+			return torch.sum(self._log_sigma_sqrt_2pi - ((x_log - self.means) ** 2) 
+				* self._inv_two_sigma, dim=-1)
+
+	def summarize(self, X, sample_weight=None):
+		"""Extract the sufficient statistics from a batch of data.
+
+		This method calculates the sufficient statistics from optionally
+		weighted data and adds them to the stored cache. The examples must be
+		given in a 2D format. Sample weights can either be provided as one
+		value per example or as a 2D matrix of weights for each feature in
+		each example.
+
+
+		Parameters
+		----------
+		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
+			A set of examples to summarize.
+
+		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
+			A set of weights for the examples. This can be either of shape
+			(-1, self.d) or a vector of shape (-1,). Default is ones.
+		"""
+
+		if self.frozen is True:
+			return
+
+		X = _cast_as_tensor(X, dtype=self.means.dtype)
+		X = X.log()
+
+		X, sample_weight = super().summarize(X, sample_weight=sample_weight)
+		X = _cast_as_tensor(X, dtype=self.means.dtype)
+
+		if self.covariance_type == 'full':
+			self._w_sum += torch.sum(sample_weight, dim=0)
+			self._xw_sum += torch.sum(X * sample_weight, axis=0)
+			self._xxw_sum += torch.matmul((X * sample_weight).T, X)
+
+		elif self.covariance_type in ('diag', 'sphere'):
+			self._w_sum[:] = self._w_sum + torch.sum(sample_weight, dim=0)
+			self._xw_sum[:] = self._xw_sum + torch.sum(X * sample_weight, dim=0)
+			self._xxw_sum[:] = self._xxw_sum + torch.sum(X ** 2 * 
+				sample_weight, dim=0)
+
+	def from_summaries(self):
+		"""Update the model parameters given the extracted statistics.
+
+		This method uses calculated statistics from calls to the `summarize`
+		method to update the distribution parameters. Hyperparameters for the
+		update are passed in at initialization time.
+
+		Note: Internally, a call to `fit` is just a successive call to the
+		`summarize` method followed by the `from_summaries` method.
+		"""
+
+		if self.frozen == True:
+			return
+
+		means = self._xw_sum / self._w_sum
+
+		if self.covariance_type == 'full':
+			v = self._xw_sum.unsqueeze(0) * self._xw_sum.unsqueeze(1)
+			covs = self._xxw_sum / self._w_sum -  v / self._w_sum ** 2.0
+
+		elif self.covariance_type == 'diag':
+			covs = self._xxw_sum / self._w_sum - \
+				self._xw_sum ** 2.0 / self._w_sum ** 2.0
+
+		_update_parameter(self.means, means, self.inertia)
+		_update_parameter(self.covs, covs, self.inertia)
+		self._reset_cache()
diff --git a/pomegranate/distributions/lognormal_old.pyx b/pomegranate/distributions/lognormal_old.pyx
new file mode 100644
index 00000000..7cd4a8e5
--- /dev/null
+++ b/pomegranate/distributions/lognormal_old.pyx
@@ -0,0 +1,112 @@
+#!python
+#cython: boundscheck=False
+#cython: cdivision=True
+# LogNormalDistribution.pyx
+# Contact: Jacob Schreiber <jmschreiber91@gmail.com>
+
+import numpy
+
+from ..utils cimport _log
+from ..utils cimport isnan
+from ..utils import check_random_state
+
+from libc.math cimport sqrt as csqrt
+
+# Define some useful constants
+DEF NEGINF = float("-inf")
+DEF INF = float("inf")
+DEF SQRT_2_PI = 2.50662827463
+DEF LOG_2_PI = 1.83787706641
+
+cdef class LogNormalDistribution(Distribution):
+	"""A lognormal distribution over non-negative floats.
+
+	The parameters are the mu and sigma of the normal distribution, which 
+	is the the exponential of the log normal distribution.
+	"""
+
+	property parameters:
+		def __get__(self):
+			return [self.mu, self.sigma]
+		def __set__(self, parameters):
+			self.mu, self.sigma = parameters
+
+	def __init__(self, double mu, double sigma, double min_std=0.0, frozen=False):
+		self.mu = mu
+		self.sigma = sigma
+		self.summaries = [0, 0, 0]
+		self.name = "LogNormalDistribution"
+		self.frozen = frozen
+		self.min_std = min_std
+
+	def __reduce__(self):
+		"""Serialize distribution for pickling."""
+		return self.__class__, (self.mu, self.sigma, self.frozen)
+
+	cdef void _log_probability(self, double* X, double* log_probability, int n) nogil:
+		cdef int i
+		for i in range(n):
+			if isnan(X[i]):
+				log_probability[i] = 0.
+			else:
+				log_probability[i] = -_log(X[i] * self.sigma * SQRT_2_PI) - 0.5\
+					* ((_log(X[i]) - self.mu) / self.sigma) ** 2
+
+	def sample(self, n=None, random_state=None):
+		random_state = check_random_state(random_state)
+		return random_state.lognormal(self.mu, self.sigma, n)
+
+	cdef double _summarize(self, double* items, double* weights, int n,
+		int column_idx, int d) nogil:
+		"""Cython function to get the MLE estimate for a Gaussian."""
+
+		cdef int i
+		cdef double x_sum = 0.0, x2_sum = 0.0, w_sum = 0.0
+		cdef double item, log_item
+
+		for i in range(n):
+			item = items[i*d + column_idx]
+			if isnan(item):
+				continue
+
+			log_item = _log(item)
+			w_sum += weights[i]
+			x_sum += weights[i] * log_item
+			x2_sum += weights[i] * log_item * log_item
+
+		with gil:
+			self.summaries[0] += w_sum
+			self.summaries[1] += x_sum
+			self.summaries[2] += x2_sum
+
+	def from_summaries(self, inertia=0.0):
+		"""
+		Takes in a series of summaries, represented as a mean, a variance, and
+		a weight, and updates the underlying distribution. Notes on how to do
+		this for a Gaussian distribution were taken from here:
+		http://math.stackexchange.com/questions/453113/how-to-merge-two-gaussians
+		"""
+
+		# If no summaries stored or the summary is frozen, don't do anything.
+		if self.summaries[0] == 0 or self.frozen == True:
+			return
+
+		mu = self.summaries[1] / self.summaries[0]
+		var = self.summaries[2] / self.summaries[0] - self.summaries[1] ** 2.0 / self.summaries[0] ** 2.0
+
+		sigma = csqrt(var)
+		if sigma < self.min_std:
+			sigma = self.min_std
+
+		self.mu = self.mu*inertia + mu*(1-inertia)
+		self.sigma = self.sigma*inertia + sigma*(1-inertia)
+		self.summaries = [0, 0, 0]
+
+	def clear_summaries(self):
+		"""Clear the summary statistics stored in the object."""
+
+		self.summaries = [0, 0, 0]
+
+	@classmethod
+	def blank(cls):
+		return cls(0, 1)

From 70c7dd7a251c1a58921ebbacd5e7ced423766189 Mon Sep 17 00:00:00 2001
From: mschrader15 <mcschrader@crimson.ua.edu>
Date: Mon, 25 Sep 2023 11:10:33 -0500
Subject: [PATCH 2/3] feat: halfnormal and lognormal

Signed-off-by: mschrader15 <mcschrader@crimson.ua.edu>
---
 pomegranate/distributions/__init__.py       |   2 +
 pomegranate/distributions/halfnormal.py     | 121 +++------------
 pomegranate/distributions/lognormal.py      | 159 ++------------------
 pomegranate/distributions/lognormal_old.pyx | 112 --------------
 4 files changed, 29 insertions(+), 365 deletions(-)
 delete mode 100644 pomegranate/distributions/lognormal_old.pyx

diff --git a/pomegranate/distributions/__init__.py b/pomegranate/distributions/__init__.py
index c73fe65d..0c57f351 100644
--- a/pomegranate/distributions/__init__.py
+++ b/pomegranate/distributions/__init__.py
@@ -11,3 +11,5 @@
 from .student_t import StudentT
 from .uniform import Uniform
 from .zero_inflated import ZeroInflated
+from .lognormal import LogNormal
+from .halfnormal import HalfNormal
\ No newline at end of file
diff --git a/pomegranate/distributions/halfnormal.py b/pomegranate/distributions/halfnormal.py
index b3fa0f9d..93322968 100644
--- a/pomegranate/distributions/halfnormal.py
+++ b/pomegranate/distributions/halfnormal.py
@@ -10,6 +10,7 @@
 from .._utils import _check_shapes
 
 from ._distribution import Distribution
+from .normal import Normal
 
 
 # Define some useful constants
@@ -17,9 +18,10 @@
 INF = float("inf")
 SQRT_2_PI = 2.50662827463
 LOG_2_PI = 1.83787706641
+LOG_2 = 0.6931471805599453
 
 
-class HalfNormal(Distribution):
+class HalfNormal(Normal):
     """A half-normal distribution object.
 
     A half-normal distribution is a distribution over positive real numbers that
@@ -78,20 +80,12 @@ def __init__(
         frozen=False,
         check_data=True,
     ):
-        super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
-        self.name = "HalfNormal"
-
-        self.covs = _check_parameter(_cast_as_parameter(covs), "covs", ndim=(1, 2))
-
-        _check_shapes([self.means, self.covs], ["means", "covs"])
-
-        self.min_cov = _check_parameter(min_cov, "min_cov", min_value=0, ndim=0)
-        self.covariance_type = covariance_type
-
-        self._initialized = covs is not None
-        self.d = self.means.shape[-1] if self._initialized else None
-        self._reset_cache()
 
+        self.name = "HalfNormal"
+        super().__init__(means=None, covs=covs, min_cov=min_cov,
+            covariance_type=covariance_type, inertia=inertia, frozen=frozen,
+            check_data=check_data)
+        
     def _initialize(self, d):
         """Initialize the probability distribution.
 
@@ -105,20 +99,6 @@ def _initialize(self, d):
         d: int
                 The dimensionality the distribution is being initialized to.
         """
-        if self.covariance_type == "full":
-            self.covs = _cast_as_parameter(
-                torch.zeros(d, d, dtype=self.dtype, device=self.device)
-            )
-        elif self.covariance_type == "diag":
-            self.covs = _cast_as_parameter(
-                torch.zeros(d, dtype=self.dtype, device=self.device)
-            )
-        elif self.covariance_type == "sphere":
-            self.covs = _cast_as_parameter(
-                torch.tensor(0, dtype=self.dtype, device=self.device)
-            )
-
-        self._initialized = True
         super()._initialize(d)
 
     def _reset_cache(self):
@@ -129,51 +109,7 @@ def _reset_cache(self):
         recalculates the cached values meant to speed up log probability
         calculations.
         """
-
-        if self._initialized == False:
-            return
-
-        self.register_buffer(
-            "_w_sum", torch.zeros(self.d, dtype=self.dtype, device=self.device)
-        )
-        self.register_buffer(
-            "_xw_sum", torch.zeros(self.d, dtype=self.dtype, device=self.device)
-        )
-
-        if self.covariance_type == "full":
-            self.register_buffer(
-                "_xxw_sum",
-                torch.zeros(self.d, self.d, dtype=self.dtype, device=self.device),
-            )
-
-            if self.covs.sum() > 0.0:
-                chol = torch.linalg.cholesky(self.covs)
-                _inv_cov = torch.linalg.solve_triangular(
-                    chol,
-                    torch.eye(len(self.covs), dtype=self.dtype, device=self.device),
-                    upper=False,
-                ).T
-                _log_det = -0.5 * torch.linalg.slogdet(self.covs)[1]
-                _theta = _log_det - 0.5 * (self.d * LOG_2_PI)
-
-                self.register_buffer("_inv_cov", _inv_cov)
-                self.register_buffer("_log_det", _log_det)
-                self.register_buffer("_theta", _theta)
-
-        elif self.covariance_type in ("diag", "sphere"):
-            self.register_buffer(
-                "_xxw_sum", torch.zeros(self.d, dtype=self.dtype, device=self.device)
-            )
-
-            if self.covs.sum() > 0.0:
-                _log_sigma_sqrt_2pi = -torch.log(torch.sqrt(self.covs) * SQRT_2_PI)
-                _inv_two_sigma = 1.0 / (2 * self.covs)
-
-                self.register_buffer("_log_sigma_sqrt_2pi", _log_sigma_sqrt_2pi)
-                self.register_buffer("_inv_two_sigma", _inv_two_sigma)
-
-            if torch.any(self.covs < 0):
-                raise ValueError("Variances must be positive.")
+        super()._reset_cache()
 
     def sample(self, n):
         """Sample from the probability distribution.
@@ -193,10 +129,7 @@ def sample(self, n):
         X: torch.tensor, shape=(n, self.d)
                 Randomly generated samples.
         """
-
-        if self.covariance_type == "diag":
-            return torch.distributions.HalfNormal(self.covs).sample([n])
-        elif self.covariance_type == "full":
+        if self.covariance_type in ["diag", "full"]:
             return torch.distributions.HalfNormal(self.covs).sample([n])
 
     def log_probability(self, X):
@@ -225,23 +158,15 @@ def log_probability(self, X):
         """
 
         X = _check_parameter(
-            _cast_as_tensor(X, dtype=self.means.dtype),
+            _cast_as_tensor(X, dtype=self.covs.dtype),
             "X",
             ndim=2,
             shape=(-1, self.d),
             check_parameter=self.check_data,
         )
+        return super().log_probability(X) + LOG_2
+        
 
-        # if self.covariance_type == 'full':
-        # 	# logp = torch.matmul(X, self._inv_cov) - self._inv_cov_dot_mu
-        # 	# logp = self.d * LOG_2_PI + torch.sum(logp ** 2, dim=-1)
-        # 	# logp = self._log_det - 0.5 * logp
-        # 	# return logp
-        return 0.5 * LOG_2_PI + (X**2 / 2).sum(dim=-1)
-
-        # elif self.covariance_type in ('diag', 'sphere'):
-        # 	return torch.sum(self._log_sigma_sqrt_2pi - ((X - self.means) ** 2)
-        # 		* self._inv_two_sigma, dim=-1)
 
     def summarize(self, X, sample_weight=None):
         """Extract the sufficient statistics from a batch of data.
@@ -263,21 +188,7 @@ def summarize(self, X, sample_weight=None):
                 (-1, self.d) or a vector of shape (-1,). Default is ones.
         """
 
-        if self.frozen == True:
-            return
-
-        X, sample_weight = super().summarize(X, sample_weight=sample_weight)
-        X = _cast_as_tensor(X, dtype=self.means.dtype)
-
-        if self.covariance_type == "full":
-            self._w_sum += torch.sum(sample_weight, dim=0)
-            self._xw_sum += torch.sum(X * sample_weight, axis=0)
-            self._xxw_sum += torch.matmul((X * sample_weight).T, X)
-
-        elif self.covariance_type in ("diag", "sphere"):
-            self._w_sum[:] = self._w_sum + torch.sum(sample_weight, dim=0)
-            self._xw_sum[:] = self._xw_sum + torch.sum(X * sample_weight, dim=0)
-            self._xxw_sum[:] = self._xxw_sum + torch.sum(X**2 * sample_weight, dim=0)
+        super().summarize(X, sample_weight=sample_weight)
 
     def from_summaries(self):
         """Update the model parameters given the extracted statistics.
@@ -293,6 +204,9 @@ def from_summaries(self):
         if self.frozen == True:
             return
 
+        #  the means are always zero for a half normal distribution
+        means = torch.zeros(self.d, dtype=self.covs.dtype)
+
         if self.covariance_type == "full":
             v = self._xw_sum.unsqueeze(0) * self._xw_sum.unsqueeze(1)
             covs = self._xxw_sum / self._w_sum - v / self._w_sum**2.0
@@ -305,4 +219,5 @@ def from_summaries(self):
                 covs = covs.mean(dim=-1)
 
         _update_parameter(self.covs, covs, self.inertia)
+        _update_parameter(self.means, means, self.inertia)
         self._reset_cache()
diff --git a/pomegranate/distributions/lognormal.py b/pomegranate/distributions/lognormal.py
index 307f5c76..92673507 100644
--- a/pomegranate/distributions/lognormal.py
+++ b/pomegranate/distributions/lognormal.py
@@ -9,7 +9,7 @@
 from .._utils import _check_parameter
 from .._utils import _check_shapes
 
-from ._distribution import Distribution
+from .normal import Normal
 
 
 # Define some useful constants
@@ -19,7 +19,7 @@
 LOG_2_PI = 1.83787706641
 
 
-class LogNormal(Distribution):
+class LogNormal(Normal):
 	"""A lognormal object.
 
 	The parameters are the mu and sigma of the normal distribution, which 
@@ -71,102 +71,10 @@ class LogNormal(Distribution):
 
 	def __init__(self, means=None, covs=None, covariance_type='full', 
 		min_cov=None, inertia=0.0, frozen=False, check_data=True):
-		super().__init__(inertia=inertia, frozen=frozen, check_data=check_data)
-		self.name = "LogNormal"
-
-		self.means = _check_parameter(_cast_as_parameter(means), "means", 
-			ndim=1)
-		self.covs = _check_parameter(_cast_as_parameter(covs), "covs", 
-			ndim=(1, 2))
-
-		_check_shapes([self.means, self.covs], ["means", "covs"])
-
-		self.min_cov = _check_parameter(min_cov, "min_cov", min_value=0, ndim=0)
-		self.covariance_type = covariance_type
-
-		self._initialized = (means is not None) and (covs is not None)
-		self.d = self.means.shape[-1] if self._initialized else None
-		self._reset_cache()
-
-	def _initialize(self, d):
-		"""Initialize the probability distribution.
-
-		This method is meant to only be called internally. It initializes the
-		parameters of the distribution and stores its dimensionality. For more
-		complex methods, this function will do more.
-
-
-		Parameters
-		----------
-		d: int
-			The dimensionality the distribution is being initialized to.
-		"""
-
-		self.means = _cast_as_parameter(torch.zeros(d, dtype=self.dtype,
-			device=self.device))
 		
-		if self.covariance_type == 'full':
-			self.covs = _cast_as_parameter(torch.zeros(d, d, 
-				dtype=self.dtype, device=self.device))
-		elif self.covariance_type == 'diag':
-			self.covs = _cast_as_parameter(torch.zeros(d, dtype=self.dtype,
-				device=self.device))
-		elif self.covariance_type == 'sphere':
-			self.covs = _cast_as_parameter(torch.tensor(0, dtype=self.dtype,
-				device=self.device))
-
-		self._initialized = True
-		super()._initialize(d)
-
-	def _reset_cache(self):
-		"""Reset the internally stored statistics.
-
-		This method is meant to only be called internally. It resets the
-		stored statistics used to update the model parameters as well as
-		recalculates the cached values meant to speed up log probability
-		calculations.
-		"""
-
-		if self._initialized == False:
-			return
-
-		self.register_buffer("_w_sum", torch.zeros(self.d, dtype=self.dtype, 
-			device=self.device))
-		self.register_buffer("_xw_sum", torch.zeros(self.d, dtype=self.dtype,
-			device=self.device))
-
-		if self.covariance_type == 'full':
-			self.register_buffer("_xxw_sum", torch.zeros(self.d, self.d, 
-				dtype=self.dtype, device=self.device))
-
-			if self.covs.sum() > 0.0:
-				chol = torch.linalg.cholesky(self.covs)
-				_inv_cov = torch.linalg.solve_triangular(chol, torch.eye(
-					len(self.covs), dtype=self.dtype, device=self.device), 
-					upper=False).T
-				_inv_cov_dot_mu = torch.matmul(self.means, _inv_cov)
-				_log_det = -0.5 * torch.linalg.slogdet(self.covs)[1]
-				_theta = _log_det - 0.5 * (self.d * LOG_2_PI)
-
-				self.register_buffer("_inv_cov", _inv_cov)
-				self.register_buffer("_inv_cov_dot_mu", _inv_cov_dot_mu)
-				self.register_buffer("_log_det", _log_det)
-				self.register_buffer("_theta", _theta)
-
-		elif self.covariance_type in ('diag', 'sphere'):
-			self.register_buffer("_xxw_sum", torch.zeros(self.d, 
-				dtype=self.dtype, device=self.device))
-
-			if self.covs.sum() > 0.0:
-				_log_sigma_sqrt_2pi = -torch.log(torch.sqrt(self.covs) * 
-					SQRT_2_PI)
-				_inv_two_sigma = 1. / (2 * self.covs)
-
-				self.register_buffer("_log_sigma_sqrt_2pi", _log_sigma_sqrt_2pi)
-				self.register_buffer("_inv_two_sigma", _inv_two_sigma)
-			
-			if any(self.covs < 0):
-				raise ValueError("Variances must be positive.")
+		self.name = "LogNormal"
+		super().__init__(means=means, covs=covs, covariance_type=covariance_type,
+			min_cov=min_cov, inertia=inertia, frozen=frozen, check_data=check_data)
 
 	def sample(self, n):
 		"""Sample from the probability distribution.
@@ -224,15 +132,9 @@ def log_probability(self, X):
 		# take the log of X
 		x_log = X.log()
 
-		if self.covariance_type == 'full':
-			logp = torch.matmul(x_log, self._inv_cov) - self._inv_cov_dot_mu
-			logp = self.d * LOG_2_PI + torch.sum(logp ** 2, dim=-1)
-			logp = self._log_det - 0.5 * logp
-			return logp
-		
-		elif self.covariance_type in ('diag', 'sphere'):
-			return torch.sum(self._log_sigma_sqrt_2pi - ((x_log - self.means) ** 2) 
-				* self._inv_two_sigma, dim=-1)
+		return super().log_probability(
+			x_log
+		)
 
 	def summarize(self, X, sample_weight=None):
 		"""Extract the sufficient statistics from a batch of data.
@@ -256,48 +158,5 @@ def summarize(self, X, sample_weight=None):
 
 		if self.frozen is True:
 			return
-
-		X = _cast_as_tensor(X, dtype=self.means.dtype)
-		X = X.log()
-
-		X, sample_weight = super().summarize(X, sample_weight=sample_weight)
 		X = _cast_as_tensor(X, dtype=self.means.dtype)
-
-		if self.covariance_type == 'full':
-			self._w_sum += torch.sum(sample_weight, dim=0)
-			self._xw_sum += torch.sum(X * sample_weight, axis=0)
-			self._xxw_sum += torch.matmul((X * sample_weight).T, X)
-
-		elif self.covariance_type in ('diag', 'sphere'):
-			self._w_sum[:] = self._w_sum + torch.sum(sample_weight, dim=0)
-			self._xw_sum[:] = self._xw_sum + torch.sum(X * sample_weight, dim=0)
-			self._xxw_sum[:] = self._xxw_sum + torch.sum(X ** 2 * 
-				sample_weight, dim=0)
-
-	def from_summaries(self):
-		"""Update the model parameters given the extracted statistics.
-
-		This method uses calculated statistics from calls to the `summarize`
-		method to update the distribution parameters. Hyperparameters for the
-		update are passed in at initialization time.
-
-		Note: Internally, a call to `fit` is just a successive call to the
-		`summarize` method followed by the `from_summaries` method.
-		"""
-
-		if self.frozen == True:
-			return
-
-		means = self._xw_sum / self._w_sum
-
-		if self.covariance_type == 'full':
-			v = self._xw_sum.unsqueeze(0) * self._xw_sum.unsqueeze(1)
-			covs = self._xxw_sum / self._w_sum -  v / self._w_sum ** 2.0
-
-		elif self.covariance_type == 'diag':
-			covs = self._xxw_sum / self._w_sum - \
-				self._xw_sum ** 2.0 / self._w_sum ** 2.0
-
-		_update_parameter(self.means, means, self.inertia)
-		_update_parameter(self.covs, covs, self.inertia)
-		self._reset_cache()
+		super().summarize(X.log(), sample_weight=sample_weight)
diff --git a/pomegranate/distributions/lognormal_old.pyx b/pomegranate/distributions/lognormal_old.pyx
deleted file mode 100644
index 7cd4a8e5..00000000
--- a/pomegranate/distributions/lognormal_old.pyx
+++ /dev/null
@@ -1,112 +0,0 @@
-#!python
-#cython: boundscheck=False
-#cython: cdivision=True
-# LogNormalDistribution.pyx
-# Contact: Jacob Schreiber <jmschreiber91@gmail.com>
-
-import numpy
-
-from ..utils cimport _log
-from ..utils cimport isnan
-from ..utils import check_random_state
-
-from libc.math cimport sqrt as csqrt
-
-# Define some useful constants
-DEF NEGINF = float("-inf")
-DEF INF = float("inf")
-DEF SQRT_2_PI = 2.50662827463
-DEF LOG_2_PI = 1.83787706641
-
-cdef class LogNormalDistribution(Distribution):
-	"""A lognormal distribution over non-negative floats.
-
-	The parameters are the mu and sigma of the normal distribution, which 
-	is the the exponential of the log normal distribution.
-	"""
-
-	property parameters:
-		def __get__(self):
-			return [self.mu, self.sigma]
-		def __set__(self, parameters):
-			self.mu, self.sigma = parameters
-
-	def __init__(self, double mu, double sigma, double min_std=0.0, frozen=False):
-		self.mu = mu
-		self.sigma = sigma
-		self.summaries = [0, 0, 0]
-		self.name = "LogNormalDistribution"
-		self.frozen = frozen
-		self.min_std = min_std
-
-	def __reduce__(self):
-		"""Serialize distribution for pickling."""
-		return self.__class__, (self.mu, self.sigma, self.frozen)
-
-	cdef void _log_probability(self, double* X, double* log_probability, int n) nogil:
-		cdef int i
-		for i in range(n):
-			if isnan(X[i]):
-				log_probability[i] = 0.
-			else:
-				log_probability[i] = -_log(X[i] * self.sigma * SQRT_2_PI) - 0.5\
-					* ((_log(X[i]) - self.mu) / self.sigma) ** 2
-
-	def sample(self, n=None, random_state=None):
-		random_state = check_random_state(random_state)
-		return random_state.lognormal(self.mu, self.sigma, n)
-
-	cdef double _summarize(self, double* items, double* weights, int n,
-		int column_idx, int d) nogil:
-		"""Cython function to get the MLE estimate for a Gaussian."""
-
-		cdef int i
-		cdef double x_sum = 0.0, x2_sum = 0.0, w_sum = 0.0
-		cdef double item, log_item
-
-		for i in range(n):
-			item = items[i*d + column_idx]
-			if isnan(item):
-				continue
-
-			log_item = _log(item)
-			w_sum += weights[i]
-			x_sum += weights[i] * log_item
-			x2_sum += weights[i] * log_item * log_item
-
-		with gil:
-			self.summaries[0] += w_sum
-			self.summaries[1] += x_sum
-			self.summaries[2] += x2_sum
-
-	def from_summaries(self, inertia=0.0):
-		"""
-		Takes in a series of summaries, represented as a mean, a variance, and
-		a weight, and updates the underlying distribution. Notes on how to do
-		this for a Gaussian distribution were taken from here:
-		http://math.stackexchange.com/questions/453113/how-to-merge-two-gaussians
-		"""
-
-		# If no summaries stored or the summary is frozen, don't do anything.
-		if self.summaries[0] == 0 or self.frozen == True:
-			return
-
-		mu = self.summaries[1] / self.summaries[0]
-		var = self.summaries[2] / self.summaries[0] - self.summaries[1] ** 2.0 / self.summaries[0] ** 2.0
-
-		sigma = csqrt(var)
-		if sigma < self.min_std:
-			sigma = self.min_std
-
-		self.mu = self.mu*inertia + mu*(1-inertia)
-		self.sigma = self.sigma*inertia + sigma*(1-inertia)
-		self.summaries = [0, 0, 0]
-
-	def clear_summaries(self):
-		"""Clear the summary statistics stored in the object."""
-
-		self.summaries = [0, 0, 0]
-
-	@classmethod
-	def blank(cls):
-		return cls(0, 1)

From ae0d3d219f9da4220dbae01cb185deb5c4833b7f Mon Sep 17 00:00:00 2001
From: Max <mcschrader@crimson.ua.edu>
Date: Sat, 11 Nov 2023 09:19:16 -0600
Subject: [PATCH 3/3] chore: fix documentation. remove unused vars. format

Signed-off-by: Max <mcschrader@crimson.ua.edu>
---
 pomegranate/distributions/halfnormal.py |  27 +--
 pomegranate/distributions/lognormal.py  | 307 ++++++++++++------------
 2 files changed, 172 insertions(+), 162 deletions(-)

diff --git a/pomegranate/distributions/halfnormal.py b/pomegranate/distributions/halfnormal.py
index 93322968..04cdd548 100644
--- a/pomegranate/distributions/halfnormal.py
+++ b/pomegranate/distributions/halfnormal.py
@@ -14,10 +14,6 @@
 
 
 # Define some useful constants
-NEGINF = float("-inf")
-INF = float("inf")
-SQRT_2_PI = 2.50662827463
-LOG_2_PI = 1.83787706641
 LOG_2 = 0.6931471805599453
 
 
@@ -36,7 +32,7 @@ class HalfNormal(Normal):
     There are two ways to initialize this object. The first is to pass in
     the tensor of probablity parameters, at which point they can immediately be
     used. The second is to not pass in the rate parameters and then call
-    either `fit` or `summary` + `from_summaries`, at which point the probability
+    either `fit` or `summarize` + `from_summaries`, at which point the probability
     parameter will be learned from data.
 
 
@@ -80,12 +76,17 @@ def __init__(
         frozen=False,
         check_data=True,
     ):
-
         self.name = "HalfNormal"
-        super().__init__(means=None, covs=covs, min_cov=min_cov,
-            covariance_type=covariance_type, inertia=inertia, frozen=frozen,
-            check_data=check_data)
-        
+        super().__init__(
+            means=None,
+            covs=covs,
+            min_cov=min_cov,
+            covariance_type=covariance_type,
+            inertia=inertia,
+            frozen=frozen,
+            check_data=check_data,
+        )
+
     def _initialize(self, d):
         """Initialize the probability distribution.
 
@@ -165,8 +166,6 @@ def log_probability(self, X):
             check_parameter=self.check_data,
         )
         return super().log_probability(X) + LOG_2
-        
-
 
     def summarize(self, X, sample_weight=None):
         """Extract the sufficient statistics from a batch of data.
@@ -212,9 +211,7 @@ def from_summaries(self):
             covs = self._xxw_sum / self._w_sum - v / self._w_sum**2.0
 
         elif self.covariance_type in ["diag", "sphere"]:
-            covs = (
-                self._xxw_sum / self._w_sum - self._xw_sum**2.0 / self._w_sum**2.0
-            )
+            covs = self._xxw_sum / self._w_sum - self._xw_sum**2.0 / self._w_sum**2.0
             if self.covariance_type == "sphere":
                 covs = covs.mean(dim=-1)
 
diff --git a/pomegranate/distributions/lognormal.py b/pomegranate/distributions/lognormal.py
index 92673507..0693023f 100644
--- a/pomegranate/distributions/lognormal.py
+++ b/pomegranate/distributions/lognormal.py
@@ -12,151 +12,164 @@
 from .normal import Normal
 
 
-# Define some useful constants
-NEGINF = float("-inf")
-INF = float("inf")
-SQRT_2_PI = 2.50662827463
-LOG_2_PI = 1.83787706641
-
-
 class LogNormal(Normal):
-	"""A lognormal object.
-
-	The parameters are the mu and sigma of the normal distribution, which 
-	is the the exponential of the log normal distribution. This
-	distribution can assume that features are independent of the others if
-	the covariance type is 'diag' or 'sphere', but if the type is 'full' then
-	the features are not independent.
-
-	There are two ways to initialize this object. The first is to pass in
-	the tensor of probablity parameters, at which point they can immediately be
-	used. The second is to not pass in the rate parameters and then call
-	either `fit` or `summary` + `from_summaries`, at which point the probability
-	parameter will be learned from data.
-
-
-	Parameters
-	----------
-	means: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional
-		The mean values of the normal distributions. Default is None.
-
-	covs: list, numpy.ndarray, torch.Tensor, or None, optional
-		The variances and covariances of the distribution. If covariance_type
-		is 'full', the shape should be (self.d, self.d); if 'diag', the shape
-		should be (self.d,); if 'sphere', it should be (1,). Note that this is
-		the variances or covariances in all settings, and not the standard
-		deviation, as may be more common for diagonal covariance matrices.
-		Default is None.
-
-	covariance_type: str, optional
-		The type of covariance matrix. Must be one of 'full', 'diag', or
-		'sphere'. Default is 'full'. 
-
-	min_cov: float or None, optional
-		The minimum variance or covariance.
-
-	inertia: float, [0, 1], optional
-		Indicates the proportion of the update to apply to the parameters
-		during training. When the inertia is 0.0, the update is applied in
-		its entirety and the previous parameters are ignored. When the
-		inertia is 1.0, the update is entirely ignored and the previous
-		parameters are kept, equivalently to if the parameters were frozen.
-
-	frozen: bool, optional
-		Whether all the parameters associated with this distribution are frozen.
-		If you want to freeze individual pameters, or individual values in those
-		parameters, you must modify the `frozen` attribute of the tensor or
-		parameter directly. Default is False.
-	"""
-
-	def __init__(self, means=None, covs=None, covariance_type='full', 
-		min_cov=None, inertia=0.0, frozen=False, check_data=True):
-		
-		self.name = "LogNormal"
-		super().__init__(means=means, covs=covs, covariance_type=covariance_type,
-			min_cov=min_cov, inertia=inertia, frozen=frozen, check_data=check_data)
-
-	def sample(self, n):
-		"""Sample from the probability distribution.
-
-		This method will return `n` samples generated from the underlying
-		probability distribution.
-
-
-		Parameters
-		----------
-		n: int
-			The number of samples to generate.
-		
-
-		Returns
-		-------
-		X: torch.tensor, shape=(n, self.d)
-			Randomly generated samples.
-		"""
-
-		if self.covariance_type == 'diag':
-			return torch.distributions.Normal(self.means, self.covs).sample([n]).exp()
-		elif self.covariance_type == 'full':
-			return torch.distributions.MultivariateNormal(self.means, 
-				self.covs).sample([n]).exp()
-
-	def log_probability(self, X):
-		"""Calculate the log probability of each example.
-
-		This method calculates the log probability of each example given the
-		parameters of the distribution. The examples must be given in a 2D
-		format.
-
-		Note: This differs from some other log probability calculation
-		functions, like those in torch.distributions, because it is not
-		returning the log probability of each feature independently, but rather
-		the total log probability of the entire example.
-
-
-		Parameters
-		----------
-		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
-			A set of examples to evaluate.
-
-
-		Returns
-		-------
-		logp: torch.Tensor, shape=(-1,)
-			The log probability of each example.
-		"""
-
-		X = _check_parameter(_cast_as_tensor(X, dtype=self.means.dtype), "X", 
-			ndim=2, shape=(-1, self.d), check_parameter=self.check_data)
-
-		# take the log of X
-		x_log = X.log()
-
-		return super().log_probability(
-			x_log
-		)
-
-	def summarize(self, X, sample_weight=None):
-		"""Extract the sufficient statistics from a batch of data.
-
-		This method calculates the sufficient statistics from optionally
-		weighted data and adds them to the stored cache. The examples must be
-		given in a 2D format. Sample weights can either be provided as one
-		value per example or as a 2D matrix of weights for each feature in
-		each example.
-
-
-		Parameters
-		----------
-		X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
-			A set of examples to summarize.
-
-		sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
-			A set of weights for the examples. This can be either of shape
-			(-1, self.d) or a vector of shape (-1,). Default is ones.
-		"""
-
-		if self.frozen is True:
-			return
-		X = _cast_as_tensor(X, dtype=self.means.dtype)
-		super().summarize(X.log(), sample_weight=sample_weight)
+    """A lognormal object.
+
+    The parameters are the mu and sigma of the normal distribution, which
+    is the the exponential of the log normal distribution. This
+    distribution can assume that features are independent of the others if
+    the covariance type is 'diag' or 'sphere', but if the type is 'full' then
+    the features are not independent.
+
+    There are two ways to initialize this object. The first is to pass in
+    the tensor of probablity parameters, at which point they can immediately be
+    used. The second is to not pass in the rate parameters and then call
+    either `fit` or `summarize` + `from_summaries`, at which point the probability
+    parameter will be learned from data.
+
+
+    Parameters
+    ----------
+    means: list, numpy.ndarray, torch.Tensor or None, shape=(d,), optional
+            The mean values of the normal distributions. Default is None.
+
+    covs: list, numpy.ndarray, torch.Tensor, or None, optional
+            The variances and covariances of the distribution. If covariance_type
+            is 'full', the shape should be (self.d, self.d); if 'diag', the shape
+            should be (self.d,); if 'sphere', it should be (1,). Note that this is
+            the variances or covariances in all settings, and not the standard
+            deviation, as may be more common for diagonal covariance matrices.
+            Default is None.
+
+    covariance_type: str, optional
+            The type of covariance matrix. Must be one of 'full', 'diag', or
+            'sphere'. Default is 'full'.
+
+    min_cov: float or None, optional
+            The minimum variance or covariance.
+
+    inertia: float, [0, 1], optional
+            Indicates the proportion of the update to apply to the parameters
+            during training. When the inertia is 0.0, the update is applied in
+            its entirety and the previous parameters are ignored. When the
+            inertia is 1.0, the update is entirely ignored and the previous
+            parameters are kept, equivalently to if the parameters were frozen.
+
+    frozen: bool, optional
+            Whether all the parameters associated with this distribution are frozen.
+            If you want to freeze individual pameters, or individual values in those
+            parameters, you must modify the `frozen` attribute of the tensor or
+            parameter directly. Default is False.
+    """
+
+    def __init__(
+        self,
+        means=None,
+        covs=None,
+        covariance_type="full",
+        min_cov=None,
+        inertia=0.0,
+        frozen=False,
+        check_data=True,
+    ):
+        self.name = "LogNormal"
+        super().__init__(
+            means=means,
+            covs=covs,
+            covariance_type=covariance_type,
+            min_cov=min_cov,
+            inertia=inertia,
+            frozen=frozen,
+            check_data=check_data,
+        )
+
+    def sample(self, n):
+        """Sample from the probability distribution.
+
+        This method will return `n` samples generated from the underlying
+        probability distribution.
+
+
+        Parameters
+        ----------
+        n: int
+                The number of samples to generate.
+
+
+        Returns
+        -------
+        X: torch.tensor, shape=(n, self.d)
+                Randomly generated samples.
+        """
+
+        if self.covariance_type == "diag":
+            return torch.distributions.Normal(self.means, self.covs).sample([n]).exp()
+        elif self.covariance_type == "full":
+            return (
+                torch.distributions.MultivariateNormal(self.means, self.covs)
+                .sample([n])
+                .exp()
+            )
+
+    def log_probability(self, X):
+        """Calculate the log probability of each example.
+
+        This method calculates the log probability of each example given the
+        parameters of the distribution. The examples must be given in a 2D
+        format.
+
+        Note: This differs from some other log probability calculation
+        functions, like those in torch.distributions, because it is not
+        returning the log probability of each feature independently, but rather
+        the total log probability of the entire example.
+
+
+        Parameters
+        ----------
+        X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
+                A set of examples to evaluate.
+
+
+        Returns
+        -------
+        logp: torch.Tensor, shape=(-1,)
+                The log probability of each example.
+        """
+
+        X = _check_parameter(
+            _cast_as_tensor(X, dtype=self.means.dtype),
+            "X",
+            ndim=2,
+            shape=(-1, self.d),
+            check_parameter=self.check_data,
+        )
+
+        # take the log of X
+        x_log = X.log()
+
+        return super().log_probability(x_log)
+
+    def summarize(self, X, sample_weight=None):
+        """Extract the sufficient statistics from a batch of data.
+
+        This method calculates the sufficient statistics from optionally
+        weighted data and adds them to the stored cache. The examples must be
+        given in a 2D format. Sample weights can either be provided as one
+        value per example or as a 2D matrix of weights for each feature in
+        each example.
+
+
+        Parameters
+        ----------
+        X: list, tuple, numpy.ndarray, torch.Tensor, shape=(-1, self.d)
+                A set of examples to summarize.
+
+        sample_weight: list, tuple, numpy.ndarray, torch.Tensor, optional
+                A set of weights for the examples. This can be either of shape
+                (-1, self.d) or a vector of shape (-1,). Default is ones.
+        """
+
+        if self.frozen is True:
+            return
+        X = _cast_as_tensor(X, dtype=self.means.dtype)
+        super().summarize(X.log(), sample_weight=sample_weight)