Source code for flowtorch.analysis.mssa

"""Implementation of multivariate singular spectrum analysis (MSSA).

The MSSA may be used as a preprocessing tool to reduce noise in time
series data. For large state vectors, the memory requirements due to
time delay embedding can be reduced by projecting the data onto a
truncated POD basis before performing MSSA. This variant is referred to
as projected MSSA (PMSSA). The implementation is based on the work_
of Ohmichi et al.

.. _work: https://doi.org/10.1007/s00348-022-03523-5
"""

import torch as pt
from .svd import SVD
from .hodmd import _create_time_delays


[docs] class MSSA(object): """Multivariate singular spectrum analysis (MSSA). Examples >>> import torch as pt >>> from flowtorch.analysis import MSSA >>> dm = pt.rand((10, 100)) >>> mssa = MSSA(dm, rank=20) >>> rec = mssa.reconstruction >>> err = mssa.reconstruction_error.norm() / dm.norm() """ def __init__( self, data_matrix: pt.Tensor, window_size: int = None, rank: int = None ): """Create a MSSA instance. :param data_matrix: data matrix whose columns are formed by the individual snapshots :type data_matrix: pt.Tensor :param window_size: window size (number of snapshots) to shift over the full snapshot sequence; the window size is inversely proportional to the number of time delays, i.e., D = N - L +1 (D - delays, N - number of snapshots, L - window size); if L is not specified, N // 2 is chosen as default :type window_size: int, optional :param rank: truncation rank for the SVD of the Hankel matrix; if unspecified, a suitable value is determined via Optimal Singular Value Hard Thresholding (SVHT) :type rank: int, optional """ self._dm = data_matrix self._rows, self._cols = data_matrix.shape if window_size is None: self._window_size = self._cols // 2 else: self._window_size = min(window_size, self._cols - 1) self._delay = self._cols - self._window_size + 1 self._svd = SVD(_create_time_delays(data_matrix, self._delay, 1), rank) @property def window_size(self) -> int: return self._window_size @property def delay(self) -> int: return self._delay @property def reconstruction(self) -> pt.Tensor: """Compute reconstruction of the original data matrix. In the rank-r approximation of the Hankel matrix, multiple instances of individual snapshots are reconstructed. To reconstruct the original data matrix, averaging over the anti-diagonals is performed. :return: rank-r reconstruction of the original data matrix :rtype: pt.Tensor """ rec = self._svd.reconstruct() m, n, d, l = self._rows, self._cols, self._delay, self._window_size rec_mean = pt.zeros(m * n, dtype=rec.dtype) count = pt.zeros(m * n, dtype=pt.int64) for i in range(l): rec_mean[m * i : m * (i + d)] += rec[:, i] count[m * i : m * (i + d)] += 1 return pt.vstack((rec_mean / count).split(m)).T @property def reconstruction_error(self) -> pt.Tensor: """Compute elementwise reconstruction error. :return: reconstruction error with one value for each element in the data matrix :rtype: pt.Tensor """ return self._dm - self.reconstruction @property def svd(self) -> SVD: """SVD of the Hankel matrix. :return: truncated SVD of the Hankel matrix :rtype: SVD """ return self._svd
[docs] class PMSSA(MSSA): """Projected multivariate singular spectrum analysis (PMSSA). Examples >>> import torch as pt >>> from flowtorch.analysis import PMSSA >>> dm = pt.rand((1000, 200)) >>> pmssa = PMSSA(dm, rank=20) >>> rec = pmssa.reconstruction >>> err = pmssa.reconstruction_error.norm() / dm.norm() """ def __init__( self, data_matrix: pt.Tensor, window_size: int = None, rank: int = None ): """Create a PMSSA instance :param data_matrix: data matrix whose columns are formed by the individual snapshots :type data_matrix: pt.Tensor :param window_size: window size (number of snapshots) to shift over the full snapshot sequence; the window size is inversely proportional to the number of time delays, i.e., D = N - L +1 (D - delays, N - number of snapshots, L - window size); if L is not specified, N // 2 is chosen as default :type window_size: int, optional :param rank: number of POD modes on which to project the data before running MSSA; the truncation rank for the SVD of the Hankel matrix is identical; if unspecified, a suitable value is determined via Optimal Singular Value Hard Thresholding (SVHT) :type rank: int, optional """ self._dm_org = data_matrix self._svd_dr = SVD(data_matrix, rank) super(PMSSA, self).__init__( self._svd_dr.U.T @ data_matrix, window_size, self._svd_dr.rank ) @property def reconstruction(self) -> pt.Tensor: """Compute reconstruction error in the original basis :return: reconstruction of the original data matrix :rtype: pt.Tensor """ return self._svd_dr.U @ super().reconstruction @property def reconstruction_error(self) -> pt.Tensor: """Compute elementwise reconstruction error.""" return self._dm_org - self.reconstruction @property def svd_dr(self) -> SVD: """SVD used for dimensionality reduction. :return: SVD used for the initial projection step in PMSSA :rtype: SVD """ return self._svd_dr