Coverage for python/lsst/images/tests/_minify_for_fixtures.py: 0%

226 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 08:08 +0000

1# This file is part of lsst-images. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12"""Minify a real on-disk archive into a small JSON test fixture. 

13 

14Reads a FITS or NDF file via the appropriate input archive, takes a 

15small subset of the in-memory object, and writes JSON via 

16``JsonOutputArchive``. Used to populate ``tests/data/schema_v1/legacy/`` 

17with derived-from-real test data that exercises the full read path 

18including the absence-of-stamp legacy default. 

19 

20Per top-level type the subset rule is: 

21 

22 VisitImage Crop the image/mask/variance planes to a small (~16x16) 

23 corner, keeping the real single-instance structures (PSF 

24 such as Piff, detector frames) that synthetic fixtures 

25 cannot reproduce -- the whole point of deriving a fixture 

26 from real data. Homogeneous repeated collections (detector 

27 amplifiers, aperture-correction entries) are trimmed to a 

28 representative few, since one entry exercises the schema as 

29 well as sixteen. The projection's pixel->sky mapping is 

30 replaced by its linear (affine) approximation over the kept 

31 box: a real TAN-SIP WCS serializes as a ~100 KB AST 

32 polynomial dump, but over a 16x16 box it is linear to far 

33 below a pixel, so the affine form is schema-identical and 

34 orders of magnitude smaller. A Piff PSF's field 

35 interpolation is truncated to a low order (the order-4 

36 solution table is ~225 KB; order 0, the field-averaged PSF, 

37 is schema-identical and ~13x smaller). 

38 

39 CellCoadd Crop to a small block of cells (preferring a block that 

40 includes a missing cell so the sparse-grid path is 

41 exercised) and then *morph* that block onto a tiny cell 

42 grid: each cell's planes are decimated from the native 

43 cell size down to a few pixels and re-stitched, and the 

44 PSF kernels are cropped to a small odd window. The grid 

45 topology (number of cells, the missing-cell set, band, 

46 mask schema and provenance shape) is preserved; the pixel 

47 values and WCS are *not* physically meaningful. This is 

48 the "morph cells in place" fallback: it sidesteps the 

49 outer-ring problem (inputs/PSFs that overlap kept cells) 

50 by rebuilding a self-consistent miniature coadd rather 

51 than trying to carve an accurate subset out of the real 

52 one. An accurate per-cell subset would inline several 

53 150x150 planes per cell and produce multi-megabyte JSON, 

54 which defeats the purpose of a fixture. 

55 

56Run interactively (CellCoadd works with just this package installed; 

57VisitImage needs a full Rubin environment so the real PSF can be read):: 

58 

59 python -c " 

60 from lsst.images.tests._minify_for_fixtures import minify 

61 minify('cell_example.fits', 'tests/data/schema_v1/legacy/cell_coadd.json') 

62 minify('dp1.fits', 'tests/data/schema_v1/legacy/visit_image_dp1.json') 

63 minify('dp2.fits', 'tests/data/schema_v1/legacy/visit_image_dp2.json') 

64 " 

65 

66The helper is invoked manually by developers when they have a real 

67on-disk file to derive from; it is not exercised by CI. 

68""" 

69 

70from __future__ import annotations 

71 

72__all__ = ("minify",) 

73 

74import json 

75import os 

76from collections.abc import Callable 

77from typing import Any 

78 

79import astropy.io.fits 

80import numpy as np 

81 

82from .. import VisitImage 

83from .. import json as images_json 

84from .._cell_grid import CellGrid, CellGridBounds, CellIJ, PatchDefinition 

85from .._geom import YX, Box 

86from .._image import Image 

87from .._mask import Mask 

88from .._transforms import Projection, TractFrame, Transform 

89from .._transforms._ast import PolyMap 

90from ..cells import CellCoadd 

91from ..cells._provenance import CoaddProvenance 

92from ..cells._psf import CellPointSpreadFunction 

93from ..fits import read as fits_read 

94from ..psfs import PiffWrapper 

95from ._creation import make_random_projection 

96 

97# Default morph parameters for CellCoadd. ``CELL_SIZE`` should divide the 

98# native cell size evenly; ``KERNEL_SIZE`` must be odd. ``MAX_INPUTS`` caps 

99# the provenance ``inputs`` table (a real coadd has hundreds of visits); the 

100# full provenance schema is already exercised by the ``coadd_provenance`` 

101# fixture, so here we keep just enough rows to be representative. 

102_CELL_SIZE = 6 

103_KERNEL_SIZE = 5 

104_MAX_INPUTS = 6 

105 

106# Default trim parameters for VisitImage. Amplifiers and aperture-correction 

107# entries are homogeneous collections, so a couple of each cover the schema 

108# just as well as the full set (a real detector has 16 amplifiers and dozens 

109# of aperture corrections). 

110_MAX_AMPLIFIERS = 2 

111_MAX_APERTURE_CORRECTIONS = 2 

112 

113# Field-interpolation order to truncate a Piff PSF to (the solution table of a 

114# real order-4 PixelGrid PSF dominates the fixture at ~225 KB). Order 0 is the 

115# field-averaged PSF; set to `None` to leave the PSF untouched. 

116_PSF_INTERP_ORDER = 0 

117 

118# Maximum permitted deviation (radians) when approximating a projection's 

119# pixel->sky mapping with an affine one. Over a fixture's tiny box the real 

120# mapping is linear well below this, so the fit always succeeds. 

121_PROJECTION_LINEAR_APPROX_TOL = 1e-8 

122 

123 

124def minify(in_path: str, out_path: str, *, schema_name: str | None = None) -> None: 

125 """Read a real archive at ``in_path``, take a small subset, and write JSON. 

126 

127 Parameters 

128 ---------- 

129 in_path 

130 Path to a FITS (``.fits`` / ``.fits.gz``) or NDF (``.sdf`` / ``.ndf``) 

131 file to read. 

132 out_path 

133 Path to the JSON fixture to write. The parent directory is 

134 created if it does not exist. 

135 schema_name 

136 Top-level schema name (e.g. ``"visit_image"`` or ``"cell_coadd"``). 

137 If `None`, it is auto-detected from the file. 

138 

139 Raises 

140 ------ 

141 ValueError 

142 If the file extension is not recognised. 

143 NotImplementedError 

144 If the top-level type is not one this helper knows how to subset. 

145 """ 

146 if in_path.endswith(".fits") or in_path.endswith(".fits.gz"): 

147 backend = "fits" 

148 elif in_path.endswith(".sdf") or in_path.endswith(".ndf"): 

149 backend = "ndf" 

150 else: 

151 raise ValueError(f"Unrecognised file extension: {in_path}") 

152 

153 if schema_name is None: 

154 schema_name = _detect_schema_name(in_path, backend) 

155 

156 cls, subsetter = _dispatch(schema_name) 

157 

158 read = _read_function(backend) 

159 obj, _, _ = read(cls, in_path) 

160 subset = subsetter(obj) 

161 

162 tree = images_json.write(subset) 

163 dumped = tree.model_dump(mode="json") 

164 os.makedirs(os.path.dirname(os.path.abspath(out_path)), exist_ok=True) 

165 with open(out_path, "w") as stream: 

166 stream.write(json.dumps(dumped, indent=2, sort_keys=False) + "\n") 

167 

168 

169def _dispatch(schema_name: str) -> tuple[type, Callable[[Any], Any]]: 

170 """Return the ``(class, subsetter)`` pair for a top-level schema name.""" 

171 registry: dict[str, tuple[type, Callable[[Any], Any]]] = { 

172 "visit_image": (VisitImage, _subset_visit_image), 

173 "cell_coadd": (CellCoadd, _subset_cell_coadd), 

174 } 

175 try: 

176 return registry[schema_name] 

177 except KeyError: 

178 raise NotImplementedError( 

179 f"No minify rule for schema {schema_name!r}; supported: {sorted(registry)}." 

180 ) from None 

181 

182 

183def _read_function(backend: str) -> Callable[..., Any]: 

184 """Return the ``read`` free function for the given backend. 

185 

186 The NDF backend is imported lazily because it requires the optional 

187 ``h5py`` dependency; FITS-only users should not need it installed. 

188 """ 

189 if backend == "fits": 

190 return fits_read 

191 from ..ndf import read as ndf_read 

192 

193 return ndf_read 

194 

195 

196# -- Type detection -------------------------------------------------------- 

197 

198 

199def _detect_schema_name(in_path: str, backend: str) -> str: 

200 """Peek the stored top-level tree and return its schema name. 

201 

202 The schema name is derived from the ``schema_url`` field that every 

203 serialized tree carries, e.g. ``.../schemas/visit_image-1.0.0`` -> 

204 ``visit_image``. 

205 """ 

206 if backend == "fits": 

207 raw = _peek_fits_top_json(in_path) 

208 else: 

209 raw = _peek_ndf_top_json(in_path) 

210 url = raw.get("schema_url") 

211 if not url: 

212 raise NotImplementedError( 

213 f"Could not determine the schema of {in_path!r} (no schema_url " 

214 "in the top-level tree); pass schema_name explicitly." 

215 ) 

216 # schema_url is f".../schemas/{SCHEMA_NAME}-{SCHEMA_VERSION}". 

217 return url.rsplit("/", 1)[-1].rsplit("-", 1)[0] 

218 

219 

220def _peek_fits_top_json(in_path: str) -> dict[str, Any]: 

221 """Return the parsed top-level JSON object from a FITS archive. 

222 

223 Reads only the primary header and the special JSON HDU it points at (via 

224 the ``JSONADDR`` / ``JSONSIZE`` cards), so the potentially large image and 

225 table HDUs are never touched. 

226 """ 

227 with open(in_path, "rb") as stream: 

228 primary = astropy.io.fits.PrimaryHDU.readfrom(stream) 

229 try: 

230 json_address = primary.header["JSONADDR"] 

231 json_size = primary.header["JSONSIZE"] 

232 except KeyError: 

233 raise NotImplementedError( 

234 f"{in_path!r} is not an lsst.images FITS archive " 

235 "(no JSONADDR/JSONSIZE cards); pass schema_name explicitly." 

236 ) from None 

237 stream.seek(json_address) 

238 json_hdu = astropy.io.fits.BinTableHDU.fromstring(stream.read(json_size)) 

239 payload = bytes(json_hdu.data[0][0]) 

240 obj = json.loads(payload.decode("utf-8")) 

241 if not isinstance(obj, dict): 

242 raise NotImplementedError( 

243 f"Top-level JSON tree in {in_path!r} is not an object; pass schema_name explicitly." 

244 ) 

245 return obj 

246 

247 

248def _peek_ndf_top_json(in_path: str) -> dict[str, Any]: 

249 """Return the parsed top-level JSON object from an NDF archive.""" 

250 import h5py 

251 

252 found: dict[str, Any] = {} 

253 

254 def visit(name: str, item: Any) -> Any: 

255 if found: 

256 return True # stop walking 

257 if isinstance(item, h5py.Dataset) and name.rsplit("/", 1)[-1] == "JSON": 

258 try: 

259 payload = bytes(np.asarray(item).tobytes()) 

260 obj = json.loads(payload.decode("utf-8").rstrip("\x00").strip()) 

261 except (UnicodeDecodeError, ValueError, TypeError): 

262 return None 

263 if isinstance(obj, dict) and "schema_url" in obj: 

264 found.update(obj) 

265 return True 

266 return None 

267 

268 with h5py.File(in_path, "r") as handle: 

269 handle.visititems(visit) 

270 if not found: 

271 raise NotImplementedError( 

272 f"Could not locate the top-level JSON tree in {in_path!r}; pass schema_name explicitly." 

273 ) 

274 return found 

275 

276 

277# -- VisitImage ------------------------------------------------------------ 

278 

279 

280def _subset_visit_image( 

281 visit_image: VisitImage, 

282 *, 

283 size: int = 16, 

284 max_amplifiers: int = _MAX_AMPLIFIERS, 

285 max_aperture_corrections: int = _MAX_APERTURE_CORRECTIONS, 

286 linearize_projection: bool = True, 

287 projection_tol: float = _PROJECTION_LINEAR_APPROX_TOL, 

288 psf_interp_order: int | None = _PSF_INTERP_ORDER, 

289) -> VisitImage: 

290 """Crop a VisitImage's pixel planes to a small corner and trim its 

291 homogeneous collections. 

292 

293 The detector frames are a single structure carried through unchanged by 

294 ``__getitem__``. The detector's amplifiers and the aperture-correction map 

295 are repeated, schema-identical entries, so they are trimmed to a 

296 representative few. The projection's pixel->sky mapping is replaced by its 

297 affine approximation over the kept box (see ``_linear_approx_projection``) 

298 unless ``linearize_projection`` is false. A Piff PSF's field interpolation 

299 is truncated to ``psf_interp_order`` (see ``_simplify_piff_psf``) unless 

300 that is `None`. 

301 """ 

302 bbox = visit_image.bbox 

303 y0 = bbox.y.start 

304 x0 = bbox.x.start 

305 y1 = min(y0 + size, bbox.y.stop) 

306 x1 = min(x0 + size, bbox.x.stop) 

307 subset = visit_image[Box.factory[y0:y1, x0:x1]] 

308 

309 # ``subset`` is a fresh throwaway object whose detector amplifier list, 

310 # aperture-correction map and PSF are live, mutable components. Trim them 

311 # in place through the public accessors rather than reaching for private 

312 # attributes. 

313 del subset.detector.amplifiers[max_amplifiers:] 

314 aperture_corrections = subset.aperture_corrections 

315 for key in list(aperture_corrections)[max_aperture_corrections:]: 

316 del aperture_corrections[key] 

317 if psf_interp_order is not None and isinstance(subset.psf, PiffWrapper): 

318 _simplify_piff_psf(subset.psf, order=psf_interp_order) 

319 

320 if not linearize_projection or subset.projection is None: 

321 return subset 

322 

323 # The pixel planes carry the projection immutably (there is no public 

324 # setter for it), so install the affine approximation by rebuilding the 

325 # VisitImage from its public components with re-viewed planes. Only the 

326 # image plane's projection is actually serialized, but keeping all three 

327 # consistent avoids surprises. 

328 linear = _linear_approx_projection(subset.projection, subset.image.bbox, tol=projection_tol) 

329 return VisitImage( 

330 subset.image.view(projection=linear), 

331 mask=subset.mask.view(projection=linear), 

332 variance=subset.variance.view(projection=linear), 

333 projection=linear, 

334 psf=subset.psf, 

335 obs_info=subset.obs_info, 

336 bounds=subset.bounds, 

337 summary_stats=subset.summary_stats, 

338 detector=subset.detector, 

339 photometric_scaling=subset.photometric_scaling, 

340 aperture_corrections=subset.aperture_corrections, 

341 backgrounds=subset.backgrounds, 

342 band=subset.band, 

343 metadata=subset.metadata, 

344 ) 

345 

346 

347def _linear_approx_projection(projection: Projection, bbox: Box, *, tol: float) -> Projection: 

348 """Return a copy of ``projection`` whose pixel->sky mapping is replaced by 

349 its best linear (affine) approximation over ``bbox``. 

350 

351 Real WCS mappings (e.g. TAN-SIP) serialize as large AST polynomial dumps. 

352 Over the small box of a fixture they are linear to far below a pixel, so 

353 an affine approximation is schema-identical but orders of magnitude 

354 smaller. The result carries no FITS approximation (the affine is itself 

355 trivially FITS-representable). 

356 

357 This is written as a self-contained ``projection -> projection`` transform 

358 so it can be promoted to a public ``Projection.linear_approx(bbox, tol)`` 

359 method later with essentially no change. It assumes a 2-D pixel->sky 

360 mapping. 

361 

362 Parameters 

363 ---------- 

364 projection 

365 The projection to approximate. 

366 bbox 

367 Box (in pixel coordinates) over which the approximation must hold. 

368 tol 

369 Maximum permitted deviation from linearity, as a Cartesian 

370 displacement in the output (sky, radians) coordinates. AST raises 

371 ``RuntimeError`` if no fit within ``tol`` exists. 

372 """ 

373 transform = projection.pixel_to_sky_transform 

374 mapping = transform._ast_mapping 

375 lbnd = [bbox.x.start, bbox.y.start] 

376 ubnd = [bbox.x.stop, bbox.y.stop] 

377 # linearApprox yields [offsets; Jacobian] as a (1 + n_out, n_in) array on 

378 # both AST backends (astshim returns the flat buffer in the same order, so 

379 # the reshape recovers the same layout the starlink-pyast bridge returns). 

380 fit = np.asarray(mapping.linearApprox(lbnd, ubnd, tol), dtype=float).reshape(3, 2) 

381 offset = fit[0] # (lon0, lat0), radians 

382 jacobian = fit[1:] # jacobian[i, j] = d(out_i) / d(in_j), in = (x, y) 

383 jacobian_inv = np.linalg.inv(jacobian) 

384 forward = _affine_polymap_coeffs(jacobian, offset) 

385 inverse = _affine_polymap_coeffs(jacobian_inv, -jacobian_inv @ offset) 

386 affine = Transform( 

387 transform.in_frame, 

388 transform.out_frame, 

389 PolyMap(forward, inverse), 

390 in_bounds=projection.pixel_bounds, 

391 ) 

392 return affine.as_projection() 

393 

394 

395def _affine_polymap_coeffs(matrix: np.ndarray, offset: np.ndarray) -> np.ndarray: 

396 """Build AST ``PolyMap`` coefficients for ``out = matrix @ in + offset``. 

397 

398 Each row is ``[coefficient, output_axis (1-based), power_of_in_1, ...]``; 

399 one constant row plus one row per input per output axis. Returned as a 

400 float array, which is the form both AST backends require. 

401 """ 

402 n = len(offset) 

403 coeffs: list[list[float]] = [] 

404 for i in range(n): 

405 coeffs.append([float(offset[i]), i + 1, *([0] * n)]) 

406 for j in range(n): 

407 powers = [1 if k == j else 0 for k in range(n)] 

408 coeffs.append([float(matrix[i][j]), i + 1, *powers]) 

409 return np.array(coeffs, dtype=float) 

410 

411 

412def _simplify_piff_psf(psf: PiffWrapper, *, order: int) -> None: 

413 """Truncate a Piff PSF's field interpolation to ``order``, in place. 

414 

415 A real Piff PSF interpolates a per-pixel model across the focal plane with 

416 a high-order 2-D polynomial; that solution table dominates the serialized 

417 size (a 25x25 PixelGrid x order-4 polynomial is ~225 KB). Truncating to 

418 ``order`` keeps only the lowest-order field terms -- order 0 is the 

419 field-averaged PSF -- which is schema-identical but far smaller, and needs 

420 no stars or refit (the fitted ``stars`` are already dropped on serialize). 

421 

422 Only ``BasisPolynomial``-interpolated PSFs are handled; anything else (a 

423 higher-order model already at/under ``order``, a non-polynomial interp) is 

424 left untouched. 

425 

426 ``piff`` is imported lazily because it is an optional dependency; this is 

427 only ever reached when the PSF being simplified is itself a Piff PSF. 

428 """ 

429 interp = getattr(psf.piff_psf, "interp", None) 

430 if interp is None or type(interp).__name__ != "BasisPolynomial" or interp.q is None: 

431 return 

432 if order >= max(interp._orders): 

433 return 

434 

435 from piff import BasisPolynomial 

436 

437 # ``q`` has one column per active basis term; the terms are the True cells 

438 # of ``_mask`` in row-major (i, j) order (see BasisPolynomial.basis). Make 

439 # the same ordering for a lower-order interp and copy the shared columns. 

440 def _terms(orders: tuple[int, ...], mask: np.ndarray) -> list[tuple[int, ...]]: 

441 grids = np.meshgrid(*[np.arange(o + 1) for o in orders], indexing="ij") 

442 return list(zip(*(grid[mask].tolist() for grid in grids))) 

443 

444 old_terms = _terms(interp._orders, interp._mask) 

445 truncated = BasisPolynomial(order, keys=list(interp._keys)) 

446 new_terms = _terms(truncated._orders, truncated._mask) 

447 column_of = {term: index for index, term in enumerate(old_terms)} 

448 truncated.q = np.ascontiguousarray(interp.q[:, [column_of[term] for term in new_terms]]) 

449 psf.piff_psf.interp = truncated 

450 

451 

452# -- CellCoadd ------------------------------------------------------------- 

453 

454 

455def _subset_cell_coadd( 

456 cell_coadd: CellCoadd, 

457 *, 

458 cell_size: int = _CELL_SIZE, 

459 kernel_size: int = _KERNEL_SIZE, 

460 max_inputs: int = _MAX_INPUTS, 

461) -> CellCoadd: 

462 """Crop a CellCoadd to a small block of cells and morph it onto a tiny 

463 grid (see the module docstring for the rationale). 

464 """ 

465 if kernel_size % 2 == 0: 

466 raise ValueError(f"kernel_size must be odd, got {kernel_size}.") 

467 

468 # 1. Pick a block of (up to) 2x2 cells, preferring one that contains a 

469 # missing cell so the sparse-grid path is exercised. Falls back to the 

470 # first available block when the coadd is fully dense. 

471 block = cell_coadd[_choose_block_bbox(cell_coadd)] 

472 

473 grid = block.grid 

474 cs = grid.cell_shape 

475 start = block.bounds.grid_start 

476 stop = block.bounds.grid_stop 

477 n_i = stop.i - start.i 

478 n_j = stop.j - start.j 

479 

480 # 2. Build a tiny full-patch grid with the same cell *count* as the 

481 # original patch but ``cell_size`` pixels per cell, anchored at (0, 0). 

482 full_shape = grid.grid_shape 

483 new_grid = CellGrid( 

484 bbox=Box.factory[0 : full_shape.i * cell_size, 0 : full_shape.j * cell_size], 

485 cell_shape=YX(y=cell_size, x=cell_size), 

486 ) 

487 new_block_bbox = _scale_box_to_grid(block.bbox, grid, cell_size) 

488 new_bounds = CellGridBounds(grid=new_grid, bbox=new_block_bbox, missing=block.bounds.missing) 

489 

490 # 3. Decimate each plane. Because the block's planes tile the kept cells 

491 # contiguously, a uniform stride that maps one native cell onto 

492 # ``cell_size`` samples is equivalent to per-cell decimation. 

493 step_y = max(1, cs.y // cell_size) 

494 step_x = max(1, cs.x // cell_size) 

495 ny = n_i * cell_size 

496 nx = n_j * cell_size 

497 

498 def shrink2d(array: np.ndarray) -> np.ndarray: 

499 return np.ascontiguousarray(array[::step_y, ::step_x][:ny, :nx]) 

500 

501 def shrink3d(array: np.ndarray) -> np.ndarray: 

502 return np.ascontiguousarray(array[::step_y, ::step_x, :][:ny, :nx, :]) 

503 

504 # 4. Synthetic-but-valid projection over the tiny tract frame. 

505 rng = np.random.default_rng(0) 

506 tract_frame = TractFrame(skymap=cell_coadd.skymap, tract=cell_coadd.tract, bbox=new_grid.bbox) 

507 projection = make_random_projection(rng, tract_frame, new_block_bbox) 

508 

509 unit = cell_coadd.unit 

510 image = Image(shrink2d(block.image.array), bbox=new_block_bbox, unit=unit, projection=projection) 

511 mask = Mask(shrink3d(block.mask.array), schema=block.mask.schema, bbox=new_block_bbox) 

512 variance = Image(shrink2d(block.variance.array), bbox=new_block_bbox, unit=unit**2) 

513 mask_fractions = { 

514 name: Image(shrink2d(plane.array), bbox=new_block_bbox) 

515 for name, plane in block.mask_fractions.items() 

516 } 

517 noise_realizations = [ 

518 Image(shrink2d(plane.array), bbox=new_block_bbox) for plane in block.noise_realizations 

519 ] 

520 

521 # 5. Crop the PSF kernels to a small odd window about their centre, 

522 # keeping the (n_i, n_j) per-cell structure and NaN-for-missing cells. 

523 psf_array = block.psf._array 

524 ky, kx = psf_array.shape[2:] 

525 half = kernel_size // 2 

526 cy, cx = ky // 2, kx // 2 

527 psf_array = np.ascontiguousarray(psf_array[:, :, cy - half : cy + half + 1, cx - half : cx + half + 1]) 

528 psf = CellPointSpreadFunction(psf_array, bounds=new_bounds) 

529 

530 # 6. Patch geometry scaled onto the tiny grid; provenance and backgrounds 

531 # are reused as-is (provenance is cell-indexed and already subset). 

532 patch = PatchDefinition( 

533 id=block.patch.id, 

534 index=block.patch.index, 

535 inner_bbox=_scale_box_to_grid(block.patch.inner_bbox, grid, cell_size), 

536 cells=new_grid, 

537 ) 

538 

539 provenance = block._provenance 

540 if provenance is not None: 

541 provenance = _trim_provenance(provenance, max_inputs=max_inputs) 

542 

543 return CellCoadd( 

544 image, 

545 mask=mask, 

546 variance=variance, 

547 mask_fractions=mask_fractions, 

548 noise_realizations=noise_realizations, 

549 projection=projection, 

550 band=block.band, 

551 psf=psf, 

552 patch=patch, 

553 provenance=provenance, 

554 backgrounds=block._backgrounds, 

555 ) 

556 

557 

558def _trim_provenance(provenance: CoaddProvenance, *, max_inputs: int) -> CoaddProvenance: 

559 """Cap the provenance ``inputs`` table to ``max_inputs`` rows and drop any 

560 contributions that reference the removed inputs. 

561 

562 The two-table structure, polygon arrays and string dictionary-compression 

563 paths are all preserved; only the number of contributing visits shrinks. 

564 """ 

565 inputs = provenance.inputs 

566 if len(inputs) <= max_inputs: 

567 return provenance 

568 kept_inputs = inputs[:max_inputs] 

569 keys = {(str(row["instrument"]), int(row["visit"]), int(row["detector"])) for row in kept_inputs} 

570 contributions = provenance.contributions 

571 mask = np.array( 

572 [ 

573 (str(instrument), int(visit), int(detector)) in keys 

574 for instrument, visit, detector in zip( 

575 contributions["instrument"], contributions["visit"], contributions["detector"] 

576 ) 

577 ], 

578 dtype=bool, 

579 ) 

580 return CoaddProvenance(inputs=kept_inputs, contributions=contributions[mask]) 

581 

582 

583def _choose_block_bbox(cell_coadd: CellCoadd) -> Box: 

584 """Return the pixel bbox of a (up to) 2x2 block of cells to keep. 

585 

586 Prefers a block containing a missing cell; otherwise the block anchored at 

587 the start of the populated region. Never raises if there is no missing 

588 cell. 

589 """ 

590 bounds = cell_coadd.bounds 

591 grid = bounds.grid 

592 start = bounds.grid_start 

593 stop = bounds.grid_stop 

594 span_i = min(2, stop.i - start.i) 

595 span_j = min(2, stop.j - start.j) 

596 

597 target = next(iter(sorted(bounds.missing)), None) 

598 if target is not None: 

599 # Anchor the block so it includes the missing cell, clamped to the 

600 # populated index range. 

601 i0 = min(max(target.i, start.i), stop.i - span_i) 

602 j0 = min(max(target.j, start.j), stop.j - span_j) 

603 else: 

604 i0 = start.i 

605 j0 = start.j 

606 

607 lo = grid.bbox_of(CellIJ(i=i0, j=j0)) 

608 hi = grid.bbox_of(CellIJ(i=i0 + span_i - 1, j=j0 + span_j - 1)) 

609 return Box.factory[lo.y.start : hi.y.stop, lo.x.start : hi.x.stop] 

610 

611 

612def _scale_box_to_grid(box: Box, grid: CellGrid, cell_size: int) -> Box: 

613 """Map a grid-aligned box onto a grid with ``cell_size`` pixels per cell, 

614 anchored at the origin. 

615 """ 

616 cs = grid.cell_shape 

617 s = grid.bbox.start 

618 iy0 = (box.y.start - s.y) // cs.y 

619 iy1 = (box.y.stop - s.y) // cs.y 

620 ix0 = (box.x.start - s.x) // cs.x 

621 ix1 = (box.x.stop - s.x) // cs.x 

622 return Box.factory[iy0 * cell_size : iy1 * cell_size, ix0 * cell_size : ix1 * cell_size]