diff --git a/stumpy/sdp.py b/stumpy/sdp.py index 551e8f2f2..bd1d8fea7 100644 --- a/stumpy/sdp.py +++ b/stumpy/sdp.py @@ -1,6 +1,8 @@ import numpy as np from numba import njit -from scipy.signal import convolve +from scipy.fft import next_fast_len +from scipy.fft._pocketfft.basic import c2r, r2c +from scipy.signal import convolve, oaconvolve from . import config @@ -23,11 +25,14 @@ def _njit_sliding_dot_product(Q, T): out : numpy.ndarray Sliding dot product between `Q` and `T`. """ - m = Q.shape[0] - l = T.shape[0] - m + 1 + m = len(Q) + l = len(T) - m + 1 out = np.empty(l) for i in range(l): - out[i] = np.dot(Q, T[i : i + m]) + result = 0.0 + for j in range(m): + result += Q[j] * T[i + j] + out[i] = result return out @@ -49,12 +54,59 @@ def _convolve_sliding_dot_product(Q, T): output : numpy.ndarray Sliding dot product between `Q` and `T`. """ - n = T.shape[0] - m = Q.shape[0] - Qr = np.flipud(Q) # Reverse/flip Q - QT = convolve(Qr, T) + return convolve(np.flipud(Q), T, mode="valid") - return QT.real[m - 1 : n] + +def _oaconvolve_sliding_dot_product(Q, T): + """ + Use scipy's oaconvolve to calculate the sliding dot product. + + Parameters + ---------- + Q : numpy.ndarray + Query array or subsequence + + T : numpy.ndarray + Time series or sequence + + Returns + ------- + output : numpy.ndarray + Sliding dot product between `Q` and `T`. + """ + return oaconvolve(np.ascontiguousarray(Q[::-1]), T, mode="valid") + + +def _pocketfft_sliding_dot_product(Q, T): + """ + Use scipy.fft._pocketfft to compute + the sliding dot product. + + Parameters + ---------- + Q : numpy.ndarray + Query array or subsequence + + T : numpy.ndarray + Time series or sequence + + Returns + ------- + output : numpy.ndarray + Sliding dot product between `Q` and `T`. + """ + n = len(T) + m = len(Q) + next_fast_n = next_fast_len(n, real=True) + + tmp = np.empty((2, next_fast_n)) + tmp[0, :m] = Q[::-1] + tmp[0, m:] = 0.0 + tmp[1, :n] = T + tmp[1, n:] = 0.0 + fft_2d = r2c(True, tmp, axis=-1) + + return c2r(False, np.multiply(fft_2d[0], fft_2d[1]), n=next_fast_n)[m - 1 : n] def _sliding_dot_product(Q, T): diff --git a/tests/test_sdp.py b/tests/test_sdp.py index 41e956274..b8d98eeb9 100644 --- a/tests/test_sdp.py +++ b/tests/test_sdp.py @@ -1,3 +1,7 @@ +import inspect +import warnings +from operator import eq, lt + import naive import numpy as np import pytest @@ -5,32 +9,146 @@ from stumpy import sdp -test_data = [ - (np.array([-1, 1, 2], dtype=np.float64), np.array(range(5), dtype=np.float64)), +# README +# Real FFT algorithm performs more efficiently when the length +# of the input array `arr` is composed of small prime factors. +# The next_fast_len(arr, real=True) function from Scipy returns +# the same length if len(arr) is composed of a subset of +# prime numbers 2, 3, 5. Therefore, these radices are +# considered as the most efficient for the real FFT algorithm. + +# To ensure that the tests cover different cases, the following cases +# are considered: +# 1. len(T) is even, and len(T) == next_fast_len(len(T), real=True) +# 2. len(T) is odd, and len(T) == next_fast_len(len(T), real=True) +# 3. len(T) is even, and len(T) < next_fast_len(len(T), real=True) +# 4. len(T) is odd, and len(T) < next_fast_len(len(T), real=True) +# And 5. a special case of 1, where len(T) is power of 2. + +# Therefore: +# 1. len(T) is composed of 2 and a subset of {3, 5} +# 2. len(T) is composed of a subset of {3, 5} +# 3. len(T) is composed of a subset of {7, 11, 13, ...} and 2 +# 4. len(T) is composed of a subset of {7, 11, 13, ...} +# 5. len(T) is power of 2 + +# In some cases, the prime factors are raised to a power of +# certain degree to increase the length of array to be around +# 1000-2000. This allows us to test sliding_dot_product for +# wider range of query lengths. + +# test cases 1-4 +test_inputs = [ + # Input format: + # ( + # len(T), + # remainder, # from `len(T) % 2` + # comparator, # for len(T) comparator next_fast_len(len(T), real=True) + # ) + ( + 2 * (3**2) * (5**3), + 0, + eq, + ), # = 2250, Even `len(T)`, and `len(T) == next_fast_len(len(T), real=True)` + ( + (3**2) * (5**3), + 1, + eq, + ), # = 1125, Odd `len(T)`, and `len(T) == next_fast_len(len(T), real=True)`. + ( + 2 * 7 * 11 * 13, + 0, + lt, + ), # = 2002, Even `len(T)`, and `len(T) < next_fast_len(len(T), real=True)` ( - np.array([9, 8100, -60], dtype=np.float64), - np.array([584, -11, 23, 79, 1001], dtype=np.float64), - ), - (np.random.uniform(-1000, 1000, [8]), np.random.uniform(-1000, 1000, [64])), + 7 * 11 * 13, + 1, + lt, + ), # = 1001, Odd `len(T)`, and `len(T) < next_fast_len(len(T), real=True)` ] -@pytest.mark.parametrize("Q, T", test_data) -def test_njit_sliding_dot_product(Q, T): - ref_mp = naive.rolling_window_dot_product(Q, T) - comp_mp = sdp._njit_sliding_dot_product(Q, T) - npt.assert_almost_equal(ref_mp, comp_mp) +def get_sdp_function_names(): + out = [] + for func_name, func in inspect.getmembers(sdp, inspect.isfunction): + if func_name.endswith("sliding_dot_product"): + out.append(func_name) + + return out + + +@pytest.mark.parametrize("n_T, remainder, comparator", test_inputs) +def test_sdp(n_T, remainder, comparator): + # test_sdp for cases 1-4 + + n_Q_prime = [ + 2, + 3, + 5, + 7, + 11, + 13, + 17, + 19, + 23, + 29, + 31, + 37, + 41, + 43, + 47, + 53, + 59, + 61, + 67, + 71, + 73, + 79, + 83, + 89, + 97, + ] + n_Q_power2 = [2, 4, 8, 16, 32, 64] + n_Q_values = n_Q_prime + n_Q_power2 + [n_T] + n_Q_values = sorted(n_Q for n_Q in set(n_Q_values) if n_Q <= n_T) + + for n_Q in n_Q_values: + Q = np.random.rand(n_Q) + T = np.random.rand(n_T) + ref = naive.rolling_window_dot_product(Q, T) + for func_name in get_sdp_function_names(): + func = getattr(sdp, func_name) + try: + comp = func(Q, T) + npt.assert_allclose(comp, ref) + except Exception as e: # pragma: no cover + msg = f"Error in {func_name}, with n_Q={n_Q} and n_T={n_T}" + warnings.warn(msg) + raise e + + +def test_sdp_power2(): + # test for case 5. len(T) is power of 2 + pmin = 3 + pmax = 13 + for func_name in get_sdp_function_names(): + func = getattr(sdp, func_name) + try: + for q in range(pmin, pmax + 1): + n_Q = 2**q + for p in range(q, pmax + 1): + n_T = 2**p + Q = np.random.rand(n_Q) + T = np.random.rand(n_T) -@pytest.mark.parametrize("Q, T", test_data) -def test_convolve_sliding_dot_product(Q, T): - ref_mp = naive.rolling_window_dot_product(Q, T) - comp_mp = sdp._convolve_sliding_dot_product(Q, T) - npt.assert_almost_equal(ref_mp, comp_mp) + ref = naive.rolling_window_dot_product(Q, T) + comp = func(Q, T) + npt.assert_allclose(comp, ref) + except Exception as e: # pragma: no cover + msg = f"Error in {func_name}, with q={q} and p={p}" + warnings.warn(msg) + raise e -@pytest.mark.parametrize("Q, T", test_data) -def test_sliding_dot_product(Q, T): - ref_mp = naive.rolling_window_dot_product(Q, T) - comp_mp = sdp._sliding_dot_product(Q, T) - npt.assert_almost_equal(ref_mp, comp_mp) + return