Coverage for python / lsst / images / fits / formatters.py: 0%

146 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-13 08:46 +0000

1# This file is part of lsst-images. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14__all__ = ( 

15 "CellCoaddFormatter", 

16 "GenericFormatter", 

17 "ImageFormatter", 

18 "MaskedImageFormatter", 

19 "VisitImageFormatter", 

20) 

21 

22import enum 

23import hashlib 

24import json 

25from typing import Any, ClassVar 

26 

27import astropy.io.fits 

28from astro_metadata_translator import ObservationInfo 

29 

30from lsst.daf.butler import DatasetProvenance, FormatterV2 

31from lsst.resources import ResourcePath 

32 

33from .._geom import Box 

34from .._image import Image 

35from .._mask import Mask 

36from .._masked_image import MaskedImageSerializationModel 

37from .._transforms import Projection, ProjectionSerializationModel 

38from .._visit_image import VisitImageSerializationModel 

39from ..cameras import Detector 

40from ..serialization import ButlerInfo 

41from ._common import FitsCompressionOptions, PointerModel 

42from ._input_archive import FitsInputArchive, read 

43from ._output_archive import write 

44 

45 

46class GenericFormatter(FormatterV2): 

47 """The butler interface to FITS archive serialization. 

48 

49 Serialized types must meet all the requirements of the `read` and `write` 

50 functions. 

51 

52 Notes 

53 ----- 

54 This formatter just forwards all read parameters it receives as 

55 ``**kwargs`` to `.read` and hence the ``deserialize`` method of the type it 

56 is reading. This may or may not be appropriate. 

57 

58 This formatter must be subclassed to add component support. 

59 

60 The write parameter configuration for this formatter is designed to be 

61 identical to that for the legacy FITS formatters defined in 

62 `lsst.obs.base`. 

63 

64 Butler provenance is written to both FITS headers and the archive tree. 

65 """ 

66 

67 default_extension: ClassVar[str] = ".fits" 

68 can_read_from_uri: ClassVar[bool] = True 

69 supported_write_parameters: ClassVar[frozenset[str]] = frozenset({"recipe"}) 

70 

71 butler_provenance: DatasetProvenance | None = None 

72 

73 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any: 

74 pytype = self.dataset_ref.datasetType.storageClass.pytype 

75 kwargs = self.file_descriptor.parameters or {} 

76 return read(pytype, uri, **kwargs).deserialized 

77 

78 def write_local_file(self, in_memory_dataset: Any, uri: ResourcePath) -> None: 

79 butler_info = ButlerInfo( 

80 dataset=self.dataset_ref.to_simple(), 

81 provenance=self.butler_provenance if self.butler_provenance is not None else DatasetProvenance(), 

82 ) 

83 write( 

84 in_memory_dataset, 

85 uri.ospath, 

86 update_header=self._update_header, 

87 compression_options=self._get_compression_options(), 

88 compression_seed=self._get_compression_seed(), 

89 butler_info=butler_info, 

90 ) 

91 

92 def add_provenance( 

93 self, in_memory_dataset: Any, /, *, provenance: DatasetProvenance | None = None 

94 ) -> Any: 

95 # Instead of attaching the provenance to the object we remember it on 

96 # the formatter, since a Formatter instance is only used once. 

97 self.butler_provenance = provenance 

98 return in_memory_dataset 

99 

100 def _get_compression_seed(self) -> int: 

101 # Set the seed based on data ID (all logic here duplicated from 

102 # obs_base). We can't just use 'hash', since like 'set' that's not 

103 # deterministic. And we can't rely on a DimensionPacker because those 

104 # are only defined for certain combinations of dimensions. Doing an MD5 

105 # of the JSON feels like overkill but I don't really see anything much 

106 # simpler. 

107 hash_bytes = hashlib.md5( 

108 json.dumps(list(self.data_id.required_values)).encode(), 

109 usedforsecurity=False, 

110 ).digest() 

111 # And it *really* feels like overkill when we squash that into the [1, 

112 # 10000] range allowed by FITS. 

113 return 1 + int.from_bytes(hash_bytes) % 9999 

114 

115 def _get_compression_options(self) -> dict[str, FitsCompressionOptions]: 

116 recipe = self.write_parameters.get("recipe", "default") 

117 try: 

118 config = self.write_recipes[recipe] 

119 except KeyError: 

120 if recipe == "default": 

121 # If there's no default recipe just use the software defaults. 

122 return {} 

123 raise RuntimeError(f"Invalid recipe for ImageFormatter: {recipe!r}.") from None 

124 return {k: FitsCompressionOptions.model_validate(v) for k, v in config.items()} 

125 

126 def _update_header(self, header: astropy.io.fits.Header) -> None: 

127 # Logic here largely lifted from lsst.obs.base.utils, which we 

128 # can't use directly for dependency and maybe mapping-type 

129 # (PropertyList vs. astropy) reasons. We assume we can always add 

130 # long cards (astropy will CONTINUE them) but not comments 

131 # (astropy will truncate and warn on long cards). 

132 for key in list(header): 

133 if key.startswith("LSST BUTLER"): 

134 del header[key] 

135 if self.butler_provenance is not None: 

136 for key, value in self.butler_provenance.to_flat_dict( 

137 self.dataset_ref, prefix="HIERARCH LSST BUTLER", sep=" ", simple_types=True, max_inputs=3_000 

138 ).items(): 

139 header.set(key, value) 

140 

141 

142class ComponentSentinel(enum.Enum): 

143 """Special values returned by `ImageFormatter.read_component`.""" 

144 

145 UNRECOGNIZED_COMPONENT = enum.auto() 

146 """This formatter does not recognize the given component, but a subclass 

147 might. 

148 """ 

149 

150 INVALID_COMPONENT_MODEL = enum.auto() 

151 """This formatter recognizes the given component, but the expected 

152 attribute of the top-level `..serialization.ArchiveTree` did not exist 

153 or had the wrong type. 

154 """ 

155 

156 

157class ImageFormatter(GenericFormatter): 

158 """The specialized butler interface to FITS archive serialization of 

159 image-like objects with ``projection`` and ``bbox`` components. 

160 

161 Notes 

162 ----- 

163 This formatter works by assuming the `..serialization.ArchiveTree` for the 

164 top-level object has a ``projection`` attribute (a 

165 `..ProjectionSerializationModel`) and a ``bbox`` property (a `..Box`). 

166 

167 Subclasses can add support for additional components by overriding 

168 `read_component`, delegating to `super`, and handling the cases where it 

169 returns a `ComponentSentinel` instance. 

170 """ 

171 

172 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any: 

173 pytype: Any = self.file_descriptor.storageClass.pytype 

174 if component is None: 

175 result = read(pytype, uri, bbox=self.pop_bbox_from_parameters()).deserialized 

176 else: 

177 with FitsInputArchive.open(uri, partial=True) as archive: 

178 tree = archive.get_tree(pytype._get_archive_tree_type(PointerModel)) 

179 result = self.read_component(component, tree, archive) 

180 if result is ComponentSentinel.UNRECOGNIZED_COMPONENT: 

181 raise NotImplementedError( 

182 f"Unrecognized component {component!r} for {type(self).__name__}." 

183 ) 

184 if result is ComponentSentinel.INVALID_COMPONENT_MODEL: 

185 raise NotImplementedError( 

186 f"Invalid serialization model for component {component!r} for {type(self).__name__}." 

187 ) 

188 self.check_unhandled_parameters() 

189 return result 

190 

191 def pop_bbox_from_parameters(self) -> Box | None: 

192 parameters = self.file_descriptor.parameters or {} 

193 return parameters.pop("bbox", None) 

194 

195 def check_unhandled_parameters(self) -> None: 

196 if self.file_descriptor.parameters: 

197 raise RuntimeError(f"Parameters {list(self.file_descriptor.parameters.keys())} not recognized.") 

198 

199 def read_component( 

200 self, 

201 component: str, 

202 tree: Any, 

203 archive: FitsInputArchive, 

204 ) -> Any: 

205 match component: 

206 case "projection": 

207 if isinstance( 

208 serialized_projection := getattr(tree, "projection", None), ProjectionSerializationModel 

209 ): 

210 return Projection.deserialize(serialized_projection, archive) 

211 else: 

212 return ComponentSentinel.INVALID_COMPONENT_MODEL 

213 case "bbox": 

214 if isinstance(bbox := getattr(tree, "bbox", None), Box): 

215 return bbox 

216 else: 

217 return ComponentSentinel.INVALID_COMPONENT_MODEL 

218 case "obs_info": 

219 if isinstance(obs_info := getattr(tree, "obs_info", None), ObservationInfo): 

220 return obs_info 

221 else: 

222 return ComponentSentinel.INVALID_COMPONENT_MODEL 

223 return ComponentSentinel.UNRECOGNIZED_COMPONENT 

224 

225 

226class MaskedImageFormatter(ImageFormatter): 

227 """A specialized butler interface to FITS archive serialization of 

228 the `..MaskedImage` class. 

229 """ 

230 

231 def read_component( 

232 self, 

233 component: str, 

234 tree: Any, 

235 archive: FitsInputArchive, 

236 ) -> Any: 

237 match super().read_component(component, tree, archive): 

238 case ComponentSentinel(): 

239 pass 

240 case handled: 

241 return handled 

242 if not isinstance(tree, MaskedImageSerializationModel): 

243 return ComponentSentinel.INVALID_COMPONENT_MODEL 

244 match component: 

245 case "image": 

246 return Image.deserialize(tree.image, archive, bbox=self.pop_bbox_from_parameters()) 

247 case "mask": 

248 return Mask.deserialize(tree.mask, archive, bbox=self.pop_bbox_from_parameters()) 

249 case "variance": 

250 return Image.deserialize(tree.variance, archive, bbox=self.pop_bbox_from_parameters()) 

251 return ComponentSentinel.UNRECOGNIZED_COMPONENT 

252 

253 

254class VisitImageFormatter(MaskedImageFormatter): 

255 """A specialized butler interface to FITS archive serialization of 

256 the `..VisitImage` class. 

257 """ 

258 

259 def read_component( 

260 self, 

261 component: str, 

262 tree: Any, 

263 archive: FitsInputArchive, 

264 ) -> Any: 

265 match super().read_component(component, tree, archive): 

266 case ComponentSentinel(): 

267 pass 

268 case handled: 

269 return handled 

270 if not isinstance(tree, VisitImageSerializationModel): 

271 return ComponentSentinel.INVALID_COMPONENT_MODEL 

272 match component: 

273 case "psf": 

274 return tree.deserialize_psf(archive) 

275 case "summary_stats": 

276 return tree.summary_stats 

277 case "detector": 

278 return Detector.deserialize(tree.detector, archive) 

279 case "aperture_corrections": 

280 return tree.aperture_corrections.deserialize(archive) 

281 return ComponentSentinel.UNRECOGNIZED_COMPONENT 

282 

283 

284class CellCoaddFormatter(MaskedImageFormatter): 

285 """A specialized butler interface to FITS archive serialization of 

286 the `..cells.CellCoadd` class. 

287 """ 

288 

289 def read_component( 

290 self, 

291 component: str, 

292 tree: Any, 

293 archive: FitsInputArchive, 

294 ) -> Any: 

295 from ..cells import CellCoaddSerializationModel 

296 

297 match super().read_component(component, tree, archive): 

298 case ComponentSentinel(): 

299 pass 

300 case handled: 

301 return handled 

302 if not isinstance(tree, CellCoaddSerializationModel): 

303 return ComponentSentinel.INVALID_COMPONENT_MODEL 

304 match component: 

305 case "psf": 

306 bbox = self.pop_bbox_from_parameters() 

307 return tree.deserialize_psf(archive, bbox=bbox) 

308 case "provenance": 

309 return tree.deserialize_provenance(archive) 

310 return ComponentSentinel.UNRECOGNIZED_COMPONENT