Source code for autoarray.dataset.interferometer.dataset

import logging
import numpy as np
from typing import Optional

from autoconf.fitsable import ndarray_via_fits_from
from autoconf import cached_property

from autoarray.dataset.abstract.dataset import AbstractDataset
from autoarray.dataset.grids import GridsDataset
from autoarray.inversion.inversion.interferometer.inversion_interferometer_util import (
    InterferometerSparseOperator,
)
from autoarray.operators.transformer import TransformerDFT
from autoarray.operators.transformer import TransformerNUFFT
from autoarray.mask.mask_2d import Mask2D
from autoarray.structures.visibilities import Visibilities
from autoarray.structures.visibilities import VisibilitiesNoiseMap

from autoarray import exc
from autoarray.inversion.inversion.interferometer import (
    inversion_interferometer_util,
)

logger = logging.getLogger(__name__)


[docs] class Interferometer(AbstractDataset):
[docs] def __init__( self, data: Visibilities, noise_map: VisibilitiesNoiseMap, uv_wavelengths: np.ndarray, real_space_mask: Mask2D, transformer_class=TransformerNUFFT, sparse_operator: Optional[InterferometerSparseOperator] = None, raise_error_dft_visibilities_limit: bool = True, ): """ An interferometer dataset, containing the visibilities data, noise-map, real-space msk, Fourier transformer and associated quantities for calculations like the grid. This object is the input to the `FitInterferometer` object, which fits the dataset with model visibilities and quantifies the goodness-of-fit via a residual map, likelihood, chi-squared and other quantities. The following quantities of the interferometer data are available and used for the following tasks: - `data`: The visibilities data, which shows the signal that is analysed and fitted with model visibilities. - `noise_map`: The RMS standard deviation error in every visibility, which is used to compute the chi-squared value and likelihood of a fit. - `uv_wavelengths`: The baselines of the interferometer which are used to Fourier transform a real space image to the uv-plane. `real_space_mask`: Defines in real space where the signal is present. This mask is used to transform images to Fourier space via the Fourier transform. The grids contained in the settings are aligned with this mask. The dataset also has a number of (y,x) grids of coordinates associated with it, which map to the centres of its image pixels. They are used for performing calculations which map directly to the data and have over sampling calculations built in which approximate the 2D line integral of these calculations within a pixel. This is explained in more detail in the `GridsDataset` class. Parameters ---------- data The array of the visibilities data containing the signal that is fitted. noise_map An array describing the RMS standard deviation error in each visibility used for computing quantities like the chi-squared in a fit. uv_wavelengths The baselines of the interferometer which are used to Fourier transform a real space image to the uv-plane. real_space_mask Defines in real space where the signal is present. This mask is used to transform images to Fourier space via the Fourier transform. The grids contained in the settings are aligned with this mask. noise_covariance_matrix A noise-map covariance matrix representing the covariance between noise in every `data` value, which can be used via a bespoke fit to account for correlated noise in the data. transformer_class The class of the Fourier Transform which maps images from real space to Fourier space visibilities and the uv-plane. sparse_operator A precomputed `InterferometerSparseOperator` containing the NUFFT precision matrix for efficient pixelized source reconstruction. This is computed via `apply_sparse_operator()` and can be passed here directly to avoid recomputing it (e.g. when loading a cached result from disk). raise_error_dft_visibilities_limit If `True`, an exception is raised if the dataset has more than 10,000 visibilities and `transformer_class=TransformerDFT`. The DFT is too slow for large datasets and `TransformerNUFFT` should be used instead. Set to `False` to suppress this check. """ self.real_space_mask = real_space_mask super().__init__( data=data, noise_map=noise_map, over_sample_size_lp=1, over_sample_size_pixelization=1, ) self.uv_wavelengths = uv_wavelengths self.transformer = transformer_class( uv_wavelengths=uv_wavelengths, real_space_mask=real_space_mask, ) self.grids = GridsDataset( mask=self.real_space_mask, over_sample_size_lp=self.over_sample_size_lp, over_sample_size_pixelization=self.over_sample_size_pixelization, ) self.sparse_operator = sparse_operator if raise_error_dft_visibilities_limit: if ( self.uv_wavelengths.shape[0] > 10000 and transformer_class == TransformerDFT ): raise exc.DatasetException( """ Interferometer datasets with more than 10,000 visibilities should use the TransformerNUFFT class for efficient Fourier transforms between real and uv-space. The DFT (Discrete Fourier Transform) is too slow for large datasets. If you are certain you want to use the TransformerDFT class, you can disable this error by passing the input `raise_error_dft_visibilities_limit=False` when loading the Interferometer dataset. """ )
[docs] @classmethod def from_fits( cls, data_path, noise_map_path, uv_wavelengths_path, real_space_mask, visibilities_hdu=0, noise_map_hdu=0, uv_wavelengths_hdu=0, transformer_class=TransformerNUFFT, raise_error_dft_visibilities_limit: bool = True, ): """ Load an interferometer dataset from multiple .fits files. The visibilities (complex-valued Fourier-space data), noise map and uv_wavelengths (baseline coordinates) are each loaded from separate .fits files. A real-space mask defining the sky region used for Fourier transforms must be supplied separately. The visibilities are assumed to be stored as a 2D array of shape (total_visibilities, 2) where column 0 is the real component and column 1 is the imaginary component. The noise map has the same shape. The uv_wavelengths are a 2D array of shape (total_visibilities, 2) with columns corresponding to the (u, v) baseline coordinates in units of wavelengths. Parameters ---------- data_path The path to the .fits file containing the visibility data (e.g. '/path/to/visibilities.fits'). noise_map_path The path to the .fits file containing the visibility noise map (e.g. '/path/to/noise_map.fits'). uv_wavelengths_path The path to the .fits file containing the (u, v) baseline coordinates in units of wavelengths (e.g. '/path/to/uv_wavelengths.fits'). real_space_mask A `Mask2D` defining the real-space region of the sky that contains signal. This mask determines the pixel grid used by the Fourier transformer and the coordinate grids associated with the dataset. visibilities_hdu The HDU index in the visibilities .fits file from which data is loaded. noise_map_hdu The HDU index in the noise map .fits file from which data is loaded. uv_wavelengths_hdu The HDU index in the uv_wavelengths .fits file from which data is loaded. transformer_class The class of the Fourier Transform which maps images from real space to Fourier space visibilities. Defaults to `TransformerNUFFT` for efficiency with large datasets. raise_error_dft_visibilities_limit If True (default), raise a `DatasetException` when ``transformer_class`` is `TransformerDFT` and the dataset has more than 10,000 visibilities. Set to False to opt into the slow DFT path at ALMA-scale (e.g. when profiling the JAX-traceable DFT path before a JIT-friendly NUFFT is available). Returns ------- Interferometer The interferometer dataset loaded from the .fits files. """ visibilities = Visibilities.from_fits(file_path=data_path, hdu=visibilities_hdu) noise_map = VisibilitiesNoiseMap.from_fits( file_path=noise_map_path, hdu=noise_map_hdu ) uv_wavelengths = ndarray_via_fits_from( file_path=uv_wavelengths_path, hdu=uv_wavelengths_hdu ) return Interferometer( real_space_mask=real_space_mask, data=visibilities, noise_map=noise_map, uv_wavelengths=uv_wavelengths, transformer_class=transformer_class, raise_error_dft_visibilities_limit=raise_error_dft_visibilities_limit, )
[docs] def apply_sparse_operator( self, nufft_precision_operator=None, batch_size: int = 128, chunk_k: int = 2048, show_progress: bool = False, show_memory: bool = False, use_jax: bool = False, ): """ Precompute the NUFFT precision operator for efficient pixelized source reconstruction. The sparse linear algebra formalism precomputes the Fourier Transform response matrix for all visibility baselines, enabling fast repeated evaluation during model fitting. This avoids recomputing the full NUFFT on every likelihood call. The resulting `InterferometerSparseOperator` is stored on the returned `Interferometer` dataset and is used automatically by `FitInterferometer` when performing pixelized reconstructions via the inversion module. Computing the NUFFT precision matrix from scratch can be very slow (runtime scales with both the number of visibilities and the real-space mask resolution — potentially hours for large datasets). The result can be cached to disk and reloaded to avoid recomputation. Parameters ---------- nufft_precision_operator An already computed NUFFT precision matrix for this dataset (e.g. loaded from disk via `np.load`) to avoid an expensive recomputation. If `None` it is computed from scratch by calling `psf_precision_operator_from()`. batch_size The number of real-space pixels processed per batch when building the sparse operator. Reducing this lowers peak memory usage at the cost of speed. chunk_k The number of visibilities processed per chunk when computing the NUFFT precision matrix inside `psf_precision_operator_from()`. Reducing this lowers peak memory usage. show_progress If `True`, a progress bar is displayed while computing the NUFFT precision matrix. show_memory If `True`, memory usage statistics are printed while computing the NUFFT precision matrix. use_jax If `True`, JAX is used to accelerate the NUFFT precision matrix computation. Returns ------- Interferometer A new `Interferometer` dataset with the precomputed `InterferometerSparseOperator` attached, enabling efficient pixelized source reconstruction via the sparse linear algebra formalism. """ if nufft_precision_operator is None: logger.info( "INTERFEROMETER - Computing NUFFT Precision Operator; runtime scales with visibility count and mask resolution, CPU run times may exceed hours." ) nufft_precision_operator = self.psf_precision_operator_from( chunk_k=chunk_k, show_progress=show_progress, show_memory=show_memory, use_jax=use_jax, ) dirty_image = self.transformer.image_from( visibilities=self.data.real * self.noise_map.real**-2.0 + 1j * self.data.imag * self.noise_map.imag**-2.0, use_adjoint_scaling=True, ) sparse_operator = inversion_interferometer_util.InterferometerSparseOperator.from_nufft_precision_operator( nufft_precision_operator=nufft_precision_operator, dirty_image=dirty_image.array, batch_size=batch_size, ) return Interferometer( real_space_mask=self.real_space_mask, data=self.data, noise_map=self.noise_map, uv_wavelengths=self.uv_wavelengths, transformer_class=lambda uv_wavelengths, real_space_mask: self.transformer, sparse_operator=sparse_operator, )
[docs] def psf_precision_operator_from( self, chunk_k: int = 2048, show_progress: bool = False, show_memory: bool = False, use_jax: bool = False, ): """ Compute the NUFFT precision matrix for this interferometer dataset. The precision matrix encodes the response of every real-space pixel to every visibility baseline, weighted by the noise map. It is the core precomputed quantity required for efficient pixelized source reconstruction via the sparse linear algebra formalism. This computation can be very slow for large datasets (runtime scales with the number of visibilities multiplied by the number of unmasked real-space pixels). For datasets with tens of thousands of visibilities and high-resolution masks, computation can take hours on a CPU. The result should be saved to disk and reloaded rather than recomputed on each run. Use `apply_sparse_operator(nufft_precision_operator=...)` to attach a cached result. Parameters ---------- chunk_k The number of visibilities processed per chunk. Reducing this lowers peak memory usage during computation at the cost of speed. show_progress If `True`, a progress bar is shown during computation. show_memory If `True`, memory usage statistics are printed during computation. use_jax If `True`, JAX is used to accelerate the computation. Returns ------- np.ndarray The NUFFT precision matrix of shape (total_pixels, total_pixels) where total_pixels is the number of unmasked real-space pixels. """ return inversion_interferometer_util.nufft_precision_operator_from( noise_map_real=self.noise_map.array.real, uv_wavelengths=self.uv_wavelengths, shape_masked_pixels_2d=self.transformer.grid.mask.shape_native_masked_pixels, grid_radians_2d=self.transformer.grid.mask.derive_grid.all_false.in_radians.native.array, chunk_k=chunk_k, show_memory=show_memory, show_progress=show_progress, use_jax=use_jax, )
@property def mask(self): """ The real-space mask of the interferometer dataset. For an interferometer, this is the `real_space_mask` which defines the region of sky that contains signal. It is used as the spatial domain for the Fourier transform, determining the pixel grid size and coordinate grids. """ return self.real_space_mask @property def amplitudes(self): """ The amplitudes of the complex visibilities, defined as the absolute value of each visibility: amplitude = sqrt(real^2 + imag^2). """ return self.data.amplitudes @property def phases(self): """ The phases of the complex visibilities in radians, defined as arctan(imag / real) for each visibility. """ return self.data.phases @property def uv_distances(self): """ The radial distance of each visibility baseline from the origin of the UV-plane, in units of wavelengths. Computed as sqrt(u^2 + v^2) for each (u, v) baseline pair. """ return np.sqrt( np.square(self.uv_wavelengths[:, 0]) + np.square(self.uv_wavelengths[:, 1]) ) @property def dirty_image(self): """ The dirty image, computed as the inverse Fourier transform of the observed visibilities. This is the raw image obtained by back-projecting the visibilities without any deconvolution. It provides a quick visual representation of the data but is convolved with the synthesized beam (the Fourier transform of the UV-plane sampling function). """ return self.transformer.image_from(visibilities=self.data) @property def dirty_noise_map(self): """ The dirty noise map, computed as the inverse Fourier transform of the noise map visibilities. Provides a real-space representation of the noise levels in the dirty image. """ return self.transformer.image_from(visibilities=self.noise_map) @property def dirty_signal_to_noise_map(self): """ The dirty signal-to-noise map, computed as the inverse Fourier transform of the complex signal-to-noise visibility map. """ return self.transformer.image_from(visibilities=self.signal_to_noise_map) @property def signal_to_noise_map(self): """ The complex signal-to-noise map of the visibilities. Computed separately for the real and imaginary components as data / noise_map. Values below zero are clamped to zero, as negative signal-to-noise is not physically meaningful. Unlike the base class implementation (which operates on real-valued data), this override handles the complex nature of interferometric visibilities by treating the real and imaginary parts independently. """ signal_to_noise_map_real = np.divide( np.real(self.data.array), np.real(self.noise_map.array) ) signal_to_noise_map_real[signal_to_noise_map_real < 0] = 0.0 signal_to_noise_map_imag = np.divide( np.imag(self.data.array), np.imag(self.noise_map.array) ) signal_to_noise_map_imag[signal_to_noise_map_imag < 0] = 0.0 return self.data.with_new_array( signal_to_noise_map_real + 1j * signal_to_noise_map_imag ) @property def psf(self): """ Returns `None` for interferometer datasets. Interferometers do not have a Point Spread Function in the same sense as imaging datasets. The equivalent quantity is the synthesized beam, which is determined by the UV-plane coverage and is not stored explicitly. This property exists to satisfy the `AbstractDataset` interface. """ return None