Coverage for python / lsst / images / fits / formatters.py: 0%

143 statements  

« prev     ^ index     » next       coverage.py v7.14.0, created at 2026-05-15 08:42 +0000

1# This file is part of lsst-images. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (https://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# Use of this source code is governed by a 3-clause BSD-style 

10# license that can be found in the LICENSE file. 

11 

12from __future__ import annotations 

13 

14__all__ = ( 

15 "CellCoaddFormatter", 

16 "GenericFormatter", 

17 "ImageFormatter", 

18 "MaskedImageFormatter", 

19 "VisitImageFormatter", 

20) 

21 

22import enum 

23import hashlib 

24import json 

25from typing import Any, ClassVar 

26 

27import astropy.io.fits 

28from astro_metadata_translator import ObservationInfo 

29 

30from lsst.daf.butler import DatasetProvenance, FormatterV2 

31from lsst.resources import ResourcePath 

32 

33from .._geom import Box 

34from .._masked_image import MaskedImageSerializationModel 

35from .._transforms import ProjectionSerializationModel 

36from .._visit_image import VisitImageSerializationModel 

37from ..serialization import ButlerInfo 

38from ._common import FitsCompressionOptions, PointerModel 

39from ._input_archive import FitsInputArchive, read 

40from ._output_archive import write 

41 

42 

43class GenericFormatter(FormatterV2): 

44 """The butler interface to FITS archive serialization. 

45 

46 Serialized types must meet all the requirements of the `read` and `write` 

47 functions. 

48 

49 Notes 

50 ----- 

51 This formatter just forwards all read parameters it receives as 

52 ``**kwargs`` to `.read` and hence the ``deserialize`` method of the type it 

53 is reading. This may or may not be appropriate. 

54 

55 This formatter must be subclassed to add component support. 

56 

57 The write parameter configuration for this formatter is designed to be 

58 identical to that for the legacy FITS formatters defined in 

59 `lsst.obs.base`. 

60 

61 Butler provenance is written to both FITS headers and the archive tree. 

62 """ 

63 

64 default_extension: ClassVar[str] = ".fits" 

65 can_read_from_uri: ClassVar[bool] = True 

66 supported_write_parameters: ClassVar[frozenset[str]] = frozenset({"recipe"}) 

67 

68 butler_provenance: DatasetProvenance | None = None 

69 

70 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any: 

71 pytype = self.dataset_ref.datasetType.storageClass.pytype 

72 kwargs = self.file_descriptor.parameters or {} 

73 return read(pytype, uri, **kwargs).deserialized 

74 

75 def write_local_file(self, in_memory_dataset: Any, uri: ResourcePath) -> None: 

76 butler_info = ButlerInfo( 

77 dataset=self.dataset_ref.to_simple(), 

78 provenance=self.butler_provenance if self.butler_provenance is not None else DatasetProvenance(), 

79 ) 

80 write( 

81 in_memory_dataset, 

82 uri.ospath, 

83 update_header=self._update_header, 

84 compression_options=self._get_compression_options(), 

85 compression_seed=self._get_compression_seed(), 

86 butler_info=butler_info, 

87 ) 

88 

89 def add_provenance( 

90 self, in_memory_dataset: Any, /, *, provenance: DatasetProvenance | None = None 

91 ) -> Any: 

92 # Instead of attaching the provenance to the object we remember it on 

93 # the formatter, since a Formatter instance is only used once. 

94 self.butler_provenance = provenance 

95 return in_memory_dataset 

96 

97 def _get_compression_seed(self) -> int: 

98 # Set the seed based on data ID (all logic here duplicated from 

99 # obs_base). We can't just use 'hash', since like 'set' that's not 

100 # deterministic. And we can't rely on a DimensionPacker because those 

101 # are only defined for certain combinations of dimensions. Doing an MD5 

102 # of the JSON feels like overkill but I don't really see anything much 

103 # simpler. 

104 hash_bytes = hashlib.md5( 

105 json.dumps(list(self.data_id.required_values)).encode(), 

106 usedforsecurity=False, 

107 ).digest() 

108 # And it *really* feels like overkill when we squash that into the [1, 

109 # 10000] range allowed by FITS. 

110 return 1 + int.from_bytes(hash_bytes) % 9999 

111 

112 def _get_compression_options(self) -> dict[str, FitsCompressionOptions]: 

113 recipe = self.write_parameters.get("recipe", "default") 

114 try: 

115 config = self.write_recipes[recipe] 

116 except KeyError: 

117 if recipe == "default": 

118 # If there's no default recipe just use the software defaults. 

119 return {} 

120 raise RuntimeError(f"Invalid recipe for ImageFormatter: {recipe!r}.") from None 

121 return {k: FitsCompressionOptions.model_validate(v) for k, v in config.items()} 

122 

123 def _update_header(self, header: astropy.io.fits.Header) -> None: 

124 # Logic here largely lifted from lsst.obs.base.utils, which we 

125 # can't use directly for dependency and maybe mapping-type 

126 # (PropertyList vs. astropy) reasons. We assume we can always add 

127 # long cards (astropy will CONTINUE them) but not comments 

128 # (astropy will truncate and warn on long cards). 

129 for key in list(header): 

130 if key.startswith("LSST BUTLER"): 

131 del header[key] 

132 if self.butler_provenance is not None: 

133 for key, value in self.butler_provenance.to_flat_dict( 

134 self.dataset_ref, prefix="HIERARCH LSST BUTLER", sep=" ", simple_types=True, max_inputs=3_000 

135 ).items(): 

136 header.set(key, value) 

137 

138 

139class ComponentSentinel(enum.Enum): 

140 """Special values returned by `ImageFormatter.read_component`.""" 

141 

142 UNRECOGNIZED_COMPONENT = enum.auto() 

143 """This formatter does not recognize the given component, but a subclass 

144 might. 

145 """ 

146 

147 INVALID_COMPONENT_MODEL = enum.auto() 

148 """This formatter recognizes the given component, but the expected 

149 attribute of the top-level `..serialization.ArchiveTree` did not exist 

150 or had the wrong type. 

151 """ 

152 

153 

154class ImageFormatter(GenericFormatter): 

155 """The specialized butler interface to FITS archive serialization of 

156 image-like objects with ``projection`` and ``bbox`` components. 

157 

158 Notes 

159 ----- 

160 This formatter works by assuming the `..serialization.ArchiveTree` for the 

161 top-level object has a ``projection`` attribute (a 

162 `..ProjectionSerializationModel`) and a ``bbox`` property (a `..Box`). 

163 

164 Subclasses can add support for additional components by overriding 

165 `read_component`, delegating to `super`, and handling the cases where it 

166 returns a `ComponentSentinel` instance. 

167 """ 

168 

169 def read_from_uri(self, uri: ResourcePath, component: str | None = None, expected_size: int = -1) -> Any: 

170 pytype: Any = self.file_descriptor.storageClass.pytype 

171 if component is None: 

172 result = read(pytype, uri, bbox=self.pop_bbox_from_parameters()).deserialized 

173 else: 

174 with FitsInputArchive.open(uri, partial=True) as archive: 

175 tree = archive.get_tree(pytype._get_archive_tree_type(PointerModel)) 

176 result = self.read_component(component, tree, archive) 

177 if result is ComponentSentinel.UNRECOGNIZED_COMPONENT: 

178 raise NotImplementedError( 

179 f"Unrecognized component {component!r} for {type(self).__name__}." 

180 ) 

181 if result is ComponentSentinel.INVALID_COMPONENT_MODEL: 

182 raise NotImplementedError( 

183 f"Invalid serialization model for component {component!r} for {type(self).__name__}." 

184 ) 

185 self.check_unhandled_parameters() 

186 return result 

187 

188 def pop_bbox_from_parameters(self) -> Box | None: 

189 parameters = self.file_descriptor.parameters or {} 

190 return parameters.pop("bbox", None) 

191 

192 def check_unhandled_parameters(self) -> None: 

193 if self.file_descriptor.parameters: 

194 raise RuntimeError(f"Parameters {list(self.file_descriptor.parameters.keys())} not recognized.") 

195 

196 def read_component( 

197 self, 

198 component: str, 

199 tree: Any, 

200 archive: FitsInputArchive, 

201 ) -> Any: 

202 match component: 

203 case "projection": 

204 if isinstance( 

205 serialized_projection := getattr(tree, "projection", None), ProjectionSerializationModel 

206 ): 

207 return serialized_projection.deserialize(archive) 

208 else: 

209 return ComponentSentinel.INVALID_COMPONENT_MODEL 

210 case "bbox": 

211 if isinstance(bbox := getattr(tree, "bbox", None), Box): 

212 return bbox 

213 else: 

214 return ComponentSentinel.INVALID_COMPONENT_MODEL 

215 case "obs_info": 

216 if isinstance(obs_info := getattr(tree, "obs_info", None), ObservationInfo): 

217 return obs_info 

218 else: 

219 return ComponentSentinel.INVALID_COMPONENT_MODEL 

220 return ComponentSentinel.UNRECOGNIZED_COMPONENT 

221 

222 

223class MaskedImageFormatter(ImageFormatter): 

224 """A specialized butler interface to FITS archive serialization of 

225 the `..MaskedImage` class. 

226 """ 

227 

228 def read_component( 

229 self, 

230 component: str, 

231 tree: Any, 

232 archive: FitsInputArchive, 

233 ) -> Any: 

234 match super().read_component(component, tree, archive): 

235 case ComponentSentinel(): 

236 pass 

237 case handled: 

238 return handled 

239 if not isinstance(tree, MaskedImageSerializationModel): 

240 return ComponentSentinel.INVALID_COMPONENT_MODEL 

241 match component: 

242 case "image": 

243 return tree.image.deserialize(archive, bbox=self.pop_bbox_from_parameters()) 

244 case "mask": 

245 return tree.mask.deserialize(archive, bbox=self.pop_bbox_from_parameters()) 

246 case "variance": 

247 return tree.variance.deserialize(archive, bbox=self.pop_bbox_from_parameters()) 

248 return ComponentSentinel.UNRECOGNIZED_COMPONENT 

249 

250 

251class VisitImageFormatter(MaskedImageFormatter): 

252 """A specialized butler interface to FITS archive serialization of 

253 the `..VisitImage` class. 

254 """ 

255 

256 def read_component( 

257 self, 

258 component: str, 

259 tree: Any, 

260 archive: FitsInputArchive, 

261 ) -> Any: 

262 match super().read_component(component, tree, archive): 

263 case ComponentSentinel(): 

264 pass 

265 case handled: 

266 return handled 

267 if not isinstance(tree, VisitImageSerializationModel): 

268 return ComponentSentinel.INVALID_COMPONENT_MODEL 

269 match component: 

270 case "psf": 

271 return tree.psf.deserialize(archive) 

272 case "summary_stats": 

273 return tree.summary_stats 

274 case "detector": 

275 return tree.detector.deserialize(archive) 

276 case "aperture_corrections": 

277 return tree.aperture_corrections.deserialize(archive) 

278 return ComponentSentinel.UNRECOGNIZED_COMPONENT 

279 

280 

281class CellCoaddFormatter(MaskedImageFormatter): 

282 """A specialized butler interface to FITS archive serialization of 

283 the `..cells.CellCoadd` class. 

284 """ 

285 

286 def read_component( 

287 self, 

288 component: str, 

289 tree: Any, 

290 archive: FitsInputArchive, 

291 ) -> Any: 

292 from ..cells import CellCoaddSerializationModel 

293 

294 match super().read_component(component, tree, archive): 

295 case ComponentSentinel(): 

296 pass 

297 case handled: 

298 return handled 

299 if not isinstance(tree, CellCoaddSerializationModel): 

300 return ComponentSentinel.INVALID_COMPONENT_MODEL 

301 match component: 

302 case "psf": 

303 bbox = self.pop_bbox_from_parameters() 

304 return tree.deserialize_psf(archive, bbox=bbox) 

305 case "provenance": 

306 return tree.deserialize_provenance(archive) 

307 return ComponentSentinel.UNRECOGNIZED_COMPONENT