"""Implementation of multivariate singular spectrum analysis (MSSA).
The MSSA may be used as a preprocessing tool to reduce noise in time
series data. For large state vectors, the memory requirements due to
time delay embedding can be reduced by projecting the data onto a
truncated POD basis before performing MSSA. This variant is referred to
as projected MSSA (PMSSA). The implementation is based on the work_
of Ohmichi et al.
.. _work: https://doi.org/10.1007/s00348-022-03523-5
"""
import torch as pt
from .svd import SVD
from .hodmd import _create_time_delays
[docs]
class MSSA(object):
"""Multivariate singular spectrum analysis (MSSA).
Examples
>>> import torch as pt
>>> from flowtorch.analysis import MSSA
>>> dm = pt.rand((10, 100))
>>> mssa = MSSA(dm, rank=20)
>>> rec = mssa.reconstruction
>>> err = mssa.reconstruction_error.norm() / dm.norm()
"""
def __init__(
self, data_matrix: pt.Tensor, window_size: int = None, rank: int = None
):
"""Create a MSSA instance.
:param data_matrix: data matrix whose columns are formed by the individual snapshots
:type data_matrix: pt.Tensor
:param window_size: window size (number of snapshots) to shift over the full snapshot
sequence; the window size is inversely proportional to the number of time delays,
i.e., D = N - L +1 (D - delays, N - number of snapshots, L - window size); if L is
not specified, N // 2 is chosen as default
:type window_size: int, optional
:param rank: truncation rank for the SVD of the Hankel matrix; if unspecified, a suitable
value is determined via Optimal Singular Value Hard Thresholding (SVHT)
:type rank: int, optional
"""
self._dm = data_matrix
self._rows, self._cols = data_matrix.shape
if window_size is None:
self._window_size = self._cols // 2
else:
self._window_size = min(window_size, self._cols - 1)
self._delay = self._cols - self._window_size + 1
self._svd = SVD(_create_time_delays(data_matrix, self._delay, 1), rank)
@property
def window_size(self) -> int:
return self._window_size
@property
def delay(self) -> int:
return self._delay
@property
def reconstruction(self) -> pt.Tensor:
"""Compute reconstruction of the original data matrix.
In the rank-r approximation of the Hankel matrix, multiple instances
of individual snapshots are reconstructed. To reconstruct the original
data matrix, averaging over the anti-diagonals is performed.
:return: rank-r reconstruction of the original data matrix
:rtype: pt.Tensor
"""
rec = self._svd.reconstruct()
m, n, d, l = self._rows, self._cols, self._delay, self._window_size
rec_mean = pt.zeros(m * n, dtype=rec.dtype)
count = pt.zeros(m * n, dtype=pt.int64)
for i in range(l):
rec_mean[m * i : m * (i + d)] += rec[:, i]
count[m * i : m * (i + d)] += 1
return pt.vstack((rec_mean / count).split(m)).T
@property
def reconstruction_error(self) -> pt.Tensor:
"""Compute elementwise reconstruction error.
:return: reconstruction error with one value for each element in the
data matrix
:rtype: pt.Tensor
"""
return self._dm - self.reconstruction
@property
def svd(self) -> SVD:
"""SVD of the Hankel matrix.
:return: truncated SVD of the Hankel matrix
:rtype: SVD
"""
return self._svd
[docs]
class PMSSA(MSSA):
"""Projected multivariate singular spectrum analysis (PMSSA).
Examples
>>> import torch as pt
>>> from flowtorch.analysis import PMSSA
>>> dm = pt.rand((1000, 200))
>>> pmssa = PMSSA(dm, rank=20)
>>> rec = pmssa.reconstruction
>>> err = pmssa.reconstruction_error.norm() / dm.norm()
"""
def __init__(
self, data_matrix: pt.Tensor, window_size: int = None, rank: int = None
):
"""Create a PMSSA instance
:param data_matrix: data matrix whose columns are formed by the individual snapshots
:type data_matrix: pt.Tensor
:param window_size: window size (number of snapshots) to shift over the full snapshot
sequence; the window size is inversely proportional to the number of time delays,
i.e., D = N - L +1 (D - delays, N - number of snapshots, L - window size); if L is
not specified, N // 2 is chosen as default
:type window_size: int, optional
:param rank: number of POD modes on which to project the data before running MSSA; the
truncation rank for the SVD of the Hankel matrix is identical; if unspecified, a suitable
value is determined via Optimal Singular Value Hard Thresholding (SVHT)
:type rank: int, optional
"""
self._dm_org = data_matrix
self._svd_dr = SVD(data_matrix, rank)
super(PMSSA, self).__init__(
self._svd_dr.U.T @ data_matrix, window_size, self._svd_dr.rank
)
@property
def reconstruction(self) -> pt.Tensor:
"""Compute reconstruction error in the original basis
:return: reconstruction of the original data matrix
:rtype: pt.Tensor
"""
return self._svd_dr.U @ super().reconstruction
@property
def reconstruction_error(self) -> pt.Tensor:
"""Compute elementwise reconstruction error."""
return self._dm_org - self.reconstruction
@property
def svd_dr(self) -> SVD:
"""SVD used for dimensionality reduction.
:return: SVD used for the initial projection step in PMSSA
:rtype: SVD
"""
return self._svd_dr