Coverage for python/lsst/images/serialization/_common.py: 55%
103 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 01:09 -0700
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 01:09 -0700
1# This file is part of lsst-images.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (https://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# Use of this source code is governed by a 3-clause BSD-style
10# license that can be found in the LICENSE file.
12from __future__ import annotations
14__all__ = (
15 "ArchiveReadError",
16 "ArchiveTree",
17 "ButlerInfo",
18 "InvalidComponentError",
19 "InvalidParameterError",
20 "JsonRef",
21 "MetadataValue",
22 "OpaqueArchiveMetadata",
23 "ReadResult",
24 "no_header_updates",
25)
27import operator
28from abc import ABC, abstractmethod
29from typing import TYPE_CHECKING, Any, ClassVar, NamedTuple, Protocol, Self
31import astropy.table
32import astropy.units
33import pydantic
35from .._geom import Box
36from ..utils import is_none
38try:
39 from lsst.daf.butler import DatasetProvenance, SerializedDatasetRef
40except ImportError:
41 type DatasetProvenance = Any # type: ignore[no-redef]
42 type SerializedDatasetRef = Any # type: ignore[no-redef]
44if TYPE_CHECKING:
45 import astropy.io.fits
47 from ._input_archive import InputArchive
50type MetadataValue = (
51 pydantic.StrictInt | pydantic.StrictFloat | pydantic.StrictStr | pydantic.StrictBool | None
52)
55class ButlerInfo(pydantic.BaseModel):
56 """Information about a butler dataset."""
58 dataset: SerializedDatasetRef
59 provenance: DatasetProvenance = pydantic.Field(default_factory=DatasetProvenance)
62class JsonRef(pydantic.BaseModel, serialize_by_alias=True):
63 """Pydantic model for JSON Reference / Pointer (IETF RFC 6901).
65 Notes
66 -----
67 This model does not do any of the escaping or special-character
68 interpretation required by the spec; it assumes that's already been done,
69 so its job is *just* putting a ``$ref`` field inside another model.
70 """
72 ref: str = pydantic.Field(alias="$ref")
75class ArchiveTree(
76 pydantic.BaseModel, ABC, ser_json_inf_nan="constants", ser_json_bytes="base64", val_json_bytes="base64"
77):
78 """An intermediate base class of `pydantic.BaseModel` that should be used
79 for all objects that may be used as the top-level tree models written to
80 archives.
82 See :ref:`lsst.images-schema-versioning` for how the ``SCHEMA_NAME`` /
83 ``SCHEMA_VERSION`` / ``MIN_READ_VERSION`` constants and the
84 ``schema_version`` / ``min_read_version`` / ``schema_url`` fields are used.
85 """
87 SCHEMA_NAME: ClassVar[str]
88 SCHEMA_VERSION: ClassVar[str]
89 MIN_READ_VERSION: ClassVar[int]
91 schema_version: str = pydantic.Field(
92 default="1.0.0",
93 description="Data-model schema version of this tree (major.minor.patch).",
94 )
95 min_read_version: int = pydantic.Field(
96 default=1,
97 description="Smallest reader major that can interpret this tree.",
98 )
99 metadata: dict[str, MetadataValue] = pydantic.Field(
100 default_factory=dict, description="Additional unstructured metadata.", exclude_if=operator.not_
101 )
102 butler_info: ButlerInfo | None = pydantic.Field(
103 default=None,
104 description="Information about the butler dataset backed by this file.",
105 exclude_if=is_none,
106 )
107 indirect: list[Any] = pydantic.Field(
108 default_factory=list,
109 description="Serialized nested objects that may be saved or read more than once.",
110 exclude_if=operator.not_,
111 )
113 @pydantic.computed_field(description="Canonical schema URL for this tree.") # type: ignore[prop-decorator]
114 @property
115 def schema_url(self) -> str:
116 """Return the schema URL of this tree's class.
118 Computed from ``SCHEMA_NAME`` and ``SCHEMA_VERSION`` ClassVars.
119 """
120 cls = type(self)
121 return f"https://images.lsst.io/schemas/{cls.SCHEMA_NAME}-{cls.SCHEMA_VERSION}"
123 @pydantic.model_validator(mode="after")
124 def _check_and_normalize_schema_version(self) -> Self:
125 """Validate and normalise the schema version fields.
127 Compares the on-tree ``schema_version`` / ``min_read_version`` against
128 the in-code values from the subclass's ClassVars; raises if
129 incompatible, otherwise normalises the fields to the in-code values.
130 """
131 cls = type(self)
132 # ArchiveTree itself is abstract (deserialize is @abstractmethod).
133 # Subclasses that haven't yet declared SCHEMA_NAME are skipped — this
134 # matters during incremental rollout and remains a safe no-op
135 # afterwards (a class-invariants test ensures every concrete subclass
136 # has the constants).
137 if not hasattr(cls, "SCHEMA_NAME"):
138 return self
139 _check_compat(
140 cls.SCHEMA_NAME,
141 self.schema_version,
142 self.min_read_version,
143 cls.SCHEMA_VERSION,
144 )
145 if self.schema_version != cls.SCHEMA_VERSION:
146 self.schema_version = cls.SCHEMA_VERSION
147 if self.min_read_version != cls.MIN_READ_VERSION:
148 self.min_read_version = cls.MIN_READ_VERSION
149 return self
151 @classmethod
152 def __pydantic_init_subclass__(cls, **kwargs: Any) -> None:
153 """Inject ``$id`` and ``title`` into the subclass's JSON Schema.
155 Populates ``model_config['json_schema_extra']`` with values derived
156 from the subclass's ``SCHEMA_NAME`` / ``SCHEMA_VERSION`` ClassVars.
157 Subclasses that haven't declared the ClassVars are skipped.
158 """
159 super().__pydantic_init_subclass__(**kwargs)
160 name = cls.__dict__.get("SCHEMA_NAME")
161 version = cls.__dict__.get("SCHEMA_VERSION")
162 if name is None or version is None:
163 return
164 json_schema_extra = cls.model_config.get("json_schema_extra") or {}
165 if not isinstance(json_schema_extra, dict): 165 ↛ 166line 165 didn't jump to line 166 because the condition on line 165 was never true
166 return
167 existing = dict(json_schema_extra)
168 existing.setdefault("$id", f"https://images.lsst.io/schemas/{name}-{version}")
169 existing.setdefault("title", name)
170 cls.model_config = {**cls.model_config, "json_schema_extra": existing}
172 @abstractmethod
173 def deserialize(self, archive: InputArchive[Any], **kwargs: Any) -> Any:
174 """Return the in-memory object that was serialized to this tree.
176 Parameters
177 ----------
178 archive
179 The input archive to read from.
180 **kwargs
181 Additional keyword arguments specific to this type.
183 Raises
184 ------
185 ~lsst.images.serialization.InvalidParameterError
186 Raised for unsupported ``**kwargs``.
188 Notes
189 -----
190 Subclass implementations may take additional keyword-only arguments.
191 Callers that invoke this method without knowing what those might be
192 should catch `TypeError` and re-raise as
193 `~lsst.images.serialization.InvalidParameterError` if they pass
194 additional keyword arguments.
195 """
196 raise NotImplementedError()
198 def deserialize_component(self, component: str, archive: InputArchive[Any], **kwargs: Any) -> Any:
199 """Return a component in-memory object that was serialized to this
200 tree.
202 Parameters
203 ----------
204 component
205 Name of the component to read.
206 archive
207 The input archive to read from.
208 **kwargs
209 Additional keyword arguments specific to this type.
211 Raises
212 ------
213 ~lsst.images.serialization.InvalidComponentError
214 Raise if ``component`` is not recognized.
215 ~lsst.images.serialization.InvalidParameterError
216 Raised for unsupported ``**kwargs``.
218 Notes
219 -----
220 The default implementation for this method tries to get an attribute
221 with the component's name from ``self``, and then:
223 - returns `None` if it is `None`;
224 - calls `deserialize` on that object if it is also an
225 `~lsst.images.serialization.ArchiveTree`;
226 - returns it directly otherwise.
228 If there is no such attribute, it raises
229 `~lsst.images.serialization.InvalidComponentError`.
231 ``**kwargs`` are forwarded to component `deserialize` methods, but
232 are otherwise not checked. Subclasses are generally expected to
233 implement this method to do that checking and handle any components
234 for which the other will not work, and then delegate to `super` at
235 the end.
236 """
237 try:
238 component_model = getattr(self, component)
239 except AttributeError:
240 raise InvalidComponentError(
241 f"Component {component!r} is not recognized by {type(self).__name__}."
242 ) from None
243 if component_model is None:
244 return None
245 if isinstance(component_model, ArchiveTree):
246 return component_model.deserialize(archive, **kwargs)
247 return component_model
250class ReadResult[T: Any](NamedTuple):
251 """A struct that can be used to return both a deserialized object and
252 metadata associated with it, even when the in-memory type cannot hold
253 metadata.
254 """
256 deserialized: T
257 """The deserialized object itself."""
259 metadata: dict[str, MetadataValue]
260 """Additional flexible metadata stored with the object."""
262 butler_info: ButlerInfo | None
263 """Butler provenance information for the dataset this file backs."""
266class ArchiveReadError(RuntimeError):
267 """Exception raised when the contents of an archive cannot be read."""
270class InvalidParameterError(ArchiveReadError):
271 """Exception raised by `ArchiveTree.deserialize` or
272 `ArchiveTree.deserialize_component` when passed an invalid keyword
273 argument.
274 """
277class InvalidComponentError(ArchiveReadError):
278 """Exception `ArchiveTree.deserialize_component` when passed an invalid
279 component name.
280 """
283class OpaqueArchiveMetadata(Protocol):
284 """Interface for opaque archive metadata.
286 In addition to implementing the methods defined here, all implementations
287 must be pickleable.
288 """
290 def copy(self) -> Self | None:
291 """Copy, reference, or discard metadata when its holding object is
292 copied.
293 """
294 ...
296 def subset(self, bbox: Box) -> Self | None:
297 """Copy, reference, or discard metadata when a subset of its its
298 holding object is extracted.
299 """
300 ...
303def no_header_updates(header: astropy.io.fits.Header) -> None:
304 """Do not make any modifications to the given FITS header."""
307def _parse_major(version: str) -> int:
308 """Return the integer major component of a major.minor.patch string.
310 Raises
311 ------
312 ArchiveReadError
313 If ``version`` is not a non-empty string of the form
314 ``major.minor.patch`` with integer components.
315 """
316 if not isinstance(version, str) or not version:
317 raise ArchiveReadError(f"Schema version {version!r} is not a non-empty string.")
318 head = version.split(".", 1)[0]
319 try:
320 return int(head)
321 except ValueError as exc:
322 raise ArchiveReadError(f"Schema version {version!r} has non-integer major.") from exc
325def _check_compat(
326 name: str,
327 on_disk_version: str,
328 on_disk_min_read: int,
329 in_code_version: str,
330) -> None:
331 """Raise `ArchiveReadError` if a tree written with the given
332 schema_version/min_read_version cannot be read by the current code.
334 See :ref:`lsst.images-schema-versioning` for the compatibility rule.
335 """
336 in_code_major = _parse_major(in_code_version)
337 if on_disk_min_read > in_code_major:
338 raise ArchiveReadError(
339 f"{name}: tree requires reader major >= {on_disk_min_read}; this release is {in_code_version}."
340 )
343def _check_format_version(name: str, on_disk: int, in_code: int) -> None:
344 """Raise `ArchiveReadError` if a backend file's container layout
345 version is newer than this release knows how to read.
346 """
347 if on_disk > in_code:
348 raise ArchiveReadError(
349 f"{name}: on-disk container format version {on_disk} is "
350 f"newer than this release ({in_code}); cannot read."
351 )