chime-experiment · ssiegelx · Sep 22, 2022 · Jul 8, 2022 · Jul 8, 2022 · Jul 10, 2022
diff --git a/ch_util/cal_utils.py b/ch_util/cal_utils.py
@@ -2056,6 +2056,8 @@ def interpolate_gain(freq, gain, weight, flag=None, length_scale=30.0):
 
     nfreq, ninput = gain.shape
 
+    iscomplex = np.any(np.iscomplex(gain))
+
     interp_gain = gain.copy()
     interp_weight = weight.copy()
 
@@ -2073,9 +2075,13 @@ def interpolate_gain(freq, gain, weight, flag=None, length_scale=30.0):
             xtest = x[test, :]
 
             xtrain = x[train, :]
-            ytrain = np.hstack(
-                (gain[train, ii, np.newaxis].real, gain[train, ii, np.newaxis].imag)
-            )
+            if iscomplex:
+                ytrain = np.hstack(
+                    (gain[train, ii, np.newaxis].real, gain[train, ii, np.newaxis].imag)
+                )
+            else:
+                ytrain = gain[train, ii, np.newaxis].real
+
             # Mean subtract
             ytrain_mu = np.mean(ytrain, axis=0, keepdims=True)
             ytrain = ytrain - ytrain_mu
@@ -2091,23 +2097,31 @@ def interpolate_gain(freq, gain, weight, flag=None, length_scale=30.0):
                 )
                 ** 2
             )
+
             # Define kernel
-            kernel = ConstantKernel(var) * Matern(
-                length_scale=length_scale, length_scale_bounds="fixed", nu=1.5
-            )
+            kernel = ConstantKernel(
+                constant_value=var, constant_value_bounds=(0.01 * var, 100.0 * var)
+            ) * Matern(length_scale=length_scale, length_scale_bounds="fixed", nu=1.5)
 
             # Regress against non-flagged data
             gp = gaussian_process.GaussianProcessRegressor(
                 kernel=kernel, alpha=alpha[train, ii]
             )
+
             gp.fit(xtrain, ytrain)
 
             # Predict error
             ypred, err_ypred = gp.predict(xtest, return_std=True)
 
-            interp_gain[test, ii] = (ypred[:, 0] + ytrain_mu[:, 0]) + 1.0j * (
-                ypred[:, 1] + ytrain_mu[:, 1]
-            )
+            interp_gain[test, ii] = ypred[:, 0] + ytrain_mu[:, 0]
+            if iscomplex:
+                interp_gain[test, ii] += 1.0j * (ypred[:, 1] + ytrain_mu[:, 1])
+
+            if err_ypred.ndim > 1:
+                err_ypred = np.sqrt(
+                    np.sum(err_ypred**2, axis=-1) / err_ypred.shape[-1]
+                )
+
             interp_weight[test, ii] = tools.invert_no_zero(err_ypred**2)
 
         else:

diff --git a/ch_util/rfi.py b/ch_util/rfi.py
@@ -610,6 +610,174 @@ def nanmedian(*args, **kwargs):
         return np.nanmedian(*args, **kwargs)
 
 
+# Iterative HPF masking for identifying narrow-band features in gains or spectra
+def highpass_delay_filter(freq, tau_cut, flag, epsilon=1e-10):
+    """Construct a high-pass delay filter.
+
+    The stop band will range from [-tau_cut, tau_cut].
+    DAYENU is used to construct the filter in the presence
+    of masked frequencies.  See Ewall-Wice et al. 2021
+    (arXiv:2004.11397) for a description.
+
+    Parameters
+    ----------
+    freq : np.ndarray[nfreq,]
+        Frequency in MHz.
+    tau_cut : float
+        The half width of the stop band in micro-seconds.
+    flag : np.ndarray[nfreq,]
+        Boolean flag that indicates what frequencies are valid.
+    epsilon : float
+        The stop-band rejection of the filter.
+
+    Returns
+    -------
+    pinv : np.ndarray[nfreq, nfreq]
+        High pass delay filter.
+    """
+
+    nfreq = freq.size
+    assert (flag.ndim == 1) and (flag.size == nfreq)
+
+    mflag = (flag[:, np.newaxis] & flag[np.newaxis, :]).astype(np.float64)
+
+    cov = np.eye(nfreq, dtype=np.float64)
+    cov += (
+        np.sinc(2.0 * tau_cut * (freq[:, np.newaxis] - freq[np.newaxis, :])) / epsilon
+    )
+
+    pinv = np.linalg.pinv(cov * mflag, hermitian=True) * mflag
+
+    return pinv
+
+
+def iterative_hpf_masking(
+    freq,
+    y,
+    flag=None,
+    tau_cut=0.60,
+    epsilon=1e-10,
+    window=65,
+    threshold=6.0,
+    nperiter=1,
+    niter=40,
+):
+    """Mask features in a spectrum that have significant power at high delays.
+
+    Uses the following iterative procedure to generate the mask:
+
+        - Apply a high-pass filter to the spectrum.
+        - For each frequency channel, calculate the median absolute
+          deviation of nearby frequency channels to get an estimate
+          of the noise.  Divide the high-pass filtered spectrum by
+          the noise estimate.
+        - Mask excursions with the largest signal to noise.
+        - Regenerate the high-pass filter using the new mask.
+        - Repeat.
+
+    The procedure stops when the maximum number of iterations is reached
+    or there are no excursions beyond some threshold.
+
+    Parameters
+    ----------
+    freq: np.ndarray[nfreq,]
+        Frequency in MHz.
+    y: np.ndarray[nfreq,]
+        Spectrum to search for narrowband features.
+    flag: np.ndarray[nfreq,]
+        Boolean flag where True indicates valid data.
+    tau_cut: float
+        Cutoff of the high-pass filter in microseconds.
+    epsilon: float
+        Stop-band rejection of the filter.
+    threshold: float
+        Number of median absolute deviations beyond which
+        a frequency channel is considered an outlier.
+    window: int
+        Width of the window used to estimate the noise
+        (by calculating a local median absolute deviation).
+    nperiter: int
+        Maximum number of frequency channels to flag
+        on any iteration.
+    niter: int
+        Maximum number of iterations.
+
+    Returns
+    -------
+    yhpf: np.ndarray[nfreq,]
+        The high-pass filtered spectrum generated using
+        the mask from the last iteration.
+    flag: np.ndarray[nfreq,]
+        Boolean flag where True indicates valid data.
+        This is the logical complement to the mask
+        from the last iteration.
+    rsigma: np.ndarray[nfreq,]
+        The local median absolute deviation from the last
+        iteration.
+    """
+
+    from caput import weighted_median
+
+    assert y.ndim == 1
+
+    # Make sure the frequencies are float64, otherwise
+    # can have problems with construction of filter
+    freq = freq.astype(np.float64)
+
+    # Make sure the size of the window is odd
+    window = window + int(not (window % 2))
+
+    # If an initial flag was not provided, then use the static rfi mask.
+    if flag is None:
+        flag = ~frequency_mask(freq)
+
+    # We will be updating the flags on each iteration.  Make a copy of
+    # the input so that we do not overwrite.
+    new_flag = flag.copy()
+
+    # Iterate
+    itt = 0
+    while itt < niter:
+
+        # Construct the filter using the current mask
+        NF = highpass_delay_filter(freq, tau_cut, new_flag, epsilon=epsilon)
+
+        # Apply the filter
+        yhpf = np.matmul(NF, y)
+
+        # Calculate the local median absolute deviation
+        ry = np.ascontiguousarray(yhpf.astype(np.float64))
+        w = np.ascontiguousarray(new_flag.astype(np.float64))
+
+        rsigma = 1.48625 * weighted_median.moving_weighted_median(
+            np.abs(ry), w, window, method="split"
+        )
+
+        # Calculate the signal to noise
+        rs2n = np.abs(yhpf * tools.invert_no_zero(rsigma))
+
+        # Identify frequency channels that are above the signal to noise threshold
+        above_threshold = np.flatnonzero(rs2n > threshold)
+
+        if above_threshold.size == 0:
+            break
+
+        # Find the largest nperiter frequency channels that are above the threshold
+        ibad = above_threshold[np.argsort(-np.abs(yhpf[above_threshold]))[0:nperiter]]
+
+        # Flag those frequency channels, increment the counter
+        new_flag[ibad] = False
+
+        itt += 1
+
+    # Construct and apply the filter using the final flag
+    NF = highpass_delay_filter(freq, tau_cut, new_flag, epsilon=epsilon)
+
+    yhpf = np.matmul(NF, y)
+
+    return yhpf, new_flag, rsigma
+
+
 # Scale-invariant rank (SIR) functions
 def sir1d(basemask, eta=0.2):
     """Numpy implementation of the scale-invariant rank (SIR) operator.