Coverage for python/lsst/daf/butler/datastore/_datastore.py: 57%
290 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-05-27 08:08 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-05-27 08:08 +0000
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
28"""Support for generic data stores."""
30from __future__ import annotations
32from .record_data import DatastoreRecordTable
34__all__ = (
35 "DatasetRefURIs",
36 "Datastore",
37 "DatastoreConfig",
38 "DatastoreOpaqueTable",
39 "DatastoreTransaction",
40 "DatastoreValidationError",
41 "NullDatastore",
42)
44import contextlib
45import dataclasses
46import logging
47import time
48from abc import ABCMeta, abstractmethod
49from collections import abc, defaultdict
50from collections.abc import Callable, Collection, Iterable, Iterator, Mapping
51from typing import TYPE_CHECKING, Any, ClassVar
53from lsst.utils import doImportType
55from .._config import Config, ConfigSubset
56from .._exceptions import DatasetTypeNotSupportedError, ValidationError
57from .._file_dataset import FileDataset
58from .._storage_class import StorageClassFactory
59from ._transfer import FileTransferMap, FileTransferSource
60from .constraints import Constraints
62if TYPE_CHECKING:
63 from lsst.resources import ResourcePath, ResourcePathExpression
65 from .. import ddl
66 from .._config_support import LookupKey
67 from .._dataset_provenance import DatasetProvenance
68 from .._dataset_ref import DatasetId, DatasetRef
69 from .._dataset_type import DatasetType
70 from .._storage_class import StorageClass
71 from ..datastores.file_datastore.get import DatasetLocationInformation
72 from ..datastores.file_datastore.retrieve_artifacts import ArtifactIndexInfo
73 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager
74 from .record_data import DatastoreRecordData
75 from .stored_file_info import StoredDatastoreItemInfo
77_LOG = logging.getLogger(__name__)
80class DatastoreConfig(ConfigSubset):
81 """Configuration for Datastores."""
83 component = "datastore"
84 requiredKeys = ("cls",)
85 defaultConfigFile = "datastore.yaml"
88class DatastoreValidationError(ValidationError):
89 """There is a problem with the Datastore configuration."""
91 pass
94@dataclasses.dataclass(frozen=True)
95class Event:
96 """Representation of an event that can be rolled back."""
98 __slots__ = {"name", "undoFunc", "args", "kwargs"}
99 name: str
100 undoFunc: Callable
101 args: tuple
102 kwargs: dict
105@dataclasses.dataclass(frozen=True)
106class DatastoreOpaqueTable:
107 """Definition of the opaque table which stores datastore records.
109 Table definition contains `.ddl.TableSpec` for a table and a class
110 of a record which must be a subclass of `StoredDatastoreItemInfo`.
111 """
113 __slots__ = {"table_spec", "record_class"}
114 table_spec: ddl.TableSpec
115 record_class: type[StoredDatastoreItemInfo]
118class IngestPrepData:
119 """A helper base class for `Datastore` ingest implementations.
121 Datastore implementations will generally need a custom implementation of
122 this class.
124 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct
125 import.
127 Parameters
128 ----------
129 refs : `~collections.abc.Iterable` of `DatasetRef`
130 References for the datasets that can be ingested by this datastore.
131 """
133 def __init__(self, refs: Iterable[DatasetRef]):
134 self.refs = {ref.id: ref for ref in refs}
137class DatastoreTransaction:
138 """Keeps a log of `Datastore` activity and allow rollback.
140 Parameters
141 ----------
142 parent : `DatastoreTransaction`, optional
143 The parent transaction (if any).
145 Notes
146 -----
147 This transaction object must be thread safe.
148 """
150 Event: ClassVar[type] = Event
152 parent: DatastoreTransaction | None
153 """The parent transaction. (`DatastoreTransaction`, optional)"""
155 def __init__(self, parent: DatastoreTransaction | None = None):
156 self.parent = parent
157 self._log: list[Event] = []
159 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None:
160 """Register event with undo function.
162 Parameters
163 ----------
164 name : `str`
165 Name of the event.
166 undoFunc : `~collections.abc.Callable`
167 Function to undo this event.
168 *args : `tuple`
169 Positional arguments to ``undoFunc``.
170 **kwargs
171 Keyword arguments to ``undoFunc``.
172 """
173 self._log.append(self.Event(name, undoFunc, args, kwargs))
175 @contextlib.contextmanager
176 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]:
177 """Register undo function if nested operation succeeds.
179 Calls `registerUndo`.
181 This can be used to wrap individual undo-able statements within a
182 DatastoreTransaction block. Multiple statements that can fail
183 separately should not be part of the same `undoWith` block.
185 All arguments are forwarded directly to `registerUndo`.
187 Parameters
188 ----------
189 name : `str`
190 The name to associate with this event.
191 undoFunc : `~collections.abc.Callable`
192 Function to undo this event.
193 *args : `tuple`
194 Positional arguments for ``undoFunc``.
195 **kwargs : `typing.Any`
196 Keyword arguments for ``undoFunc``.
197 """
198 try:
199 yield None
200 except BaseException:
201 raise
202 else:
203 self.registerUndo(name, undoFunc, *args, **kwargs)
205 def rollback(self) -> None:
206 """Roll back all events in this transaction."""
207 log = logging.getLogger(__name__)
208 while self._log:
209 ev = self._log.pop()
210 try:
211 log.debug(
212 "Rolling back transaction: %s: %s(%s,%s)",
213 ev.name,
214 ev.undoFunc,
215 ",".join(str(a) for a in ev.args),
216 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()),
217 )
218 except Exception:
219 # In case we had a problem in stringification of arguments
220 log.warning("Rolling back transaction: %s", ev.name)
221 try:
222 ev.undoFunc(*ev.args, **ev.kwargs)
223 except BaseException as e:
224 # Deliberately swallow error that may occur in unrolling
225 log.warning("Exception: %s caught while unrolling: %s", e, ev.name)
226 pass
228 def commit(self) -> None:
229 """Commit this transaction."""
230 if self.parent is None:
231 # Just forget about the events, they have already happened.
232 return
233 else:
234 # We may still want to events from this transaction as part of
235 # the parent.
236 self.parent._log.extend(self._log)
239@dataclasses.dataclass
240class DatasetRefURIs(abc.Sequence):
241 """Represents the primary and component ResourcePath(s) associated with a
242 DatasetRef.
244 This is used in places where its members used to be represented as a tuple
245 (``primaryURI``, ``componentURIs``). To maintain backward compatibility
246 this inherits from Sequence and so instances can be treated as a two-item
247 tuple.
249 Parameters
250 ----------
251 primaryURI : `lsst.resources.ResourcePath` or `None`, optional
252 The URI to the primary artifact associated with this dataset. If the
253 dataset was disassembled within the datastore this may be `None`.
254 componentURIs : `dict` [`str`, `~lsst.resources.ResourcePath`] or `None`
255 The URIs to any components associated with the dataset artifact
256 indexed by component name. This can be empty if there are no
257 components.
258 """
260 def __init__(
261 self,
262 primaryURI: ResourcePath | None = None,
263 componentURIs: dict[str, ResourcePath] | None = None,
264 ):
265 self.primaryURI = primaryURI
266 self.componentURIs = componentURIs or {}
268 def __getitem__(self, index: Any) -> Any:
269 """Get primaryURI and componentURIs by index.
271 Provides support for tuple-like access.
272 """
273 if index == 0:
274 return self.primaryURI
275 elif index == 1:
276 return self.componentURIs
277 raise IndexError("list index out of range")
279 def __len__(self) -> int:
280 """Get the number of data members.
282 Provides support for tuple-like access.
283 """
284 return 2
286 def __repr__(self) -> str:
287 return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})"
289 def iter_all(self) -> Iterator[ResourcePath]:
290 """Iterate over all URIs without regard to whether they are primary
291 or component.
292 """
293 if self.primaryURI is not None:
294 yield self.primaryURI
295 yield from self.componentURIs.values()
298class Datastore(FileTransferSource, metaclass=ABCMeta):
299 """Datastore interface.
301 Parameters
302 ----------
303 config : `DatastoreConfig` or `str`
304 Load configuration either from an existing config instance or by
305 referring to a configuration file.
306 bridgeManager : `DatastoreRegistryBridgeManager`
307 Object that manages the interface between `Registry` and datastores.
309 See Also
310 --------
311 lsst.daf.butler.Butler
312 """
314 defaultConfigFile: ClassVar[str | None] = None
315 """Path to configuration defaults. Accessed within the ``config`` resource
316 or relative to a search path. Can be None if no defaults specified.
317 """
319 containerKey: ClassVar[str | None] = None
320 """Name of the key containing a list of subconfigurations that also
321 need to be merged with defaults and will likely use different Python
322 datastore classes (but all using DatastoreConfig). Assumed to be a
323 list of configurations that can be represented in a DatastoreConfig
324 and containing a "cls" definition. None indicates that no containers
325 are expected in this Datastore."""
327 isEphemeral: bool = False
328 """Indicate whether this Datastore is ephemeral or not. An ephemeral
329 datastore is one where the contents of the datastore will not exist
330 across process restarts. This value can change per-instance."""
332 config: DatastoreConfig
333 """Configuration used to create Datastore."""
335 name: str
336 """Label associated with this Datastore."""
338 storageClassFactory: StorageClassFactory
339 """Factory for creating storage class instances from name."""
341 constraints: Constraints
342 """Constraints to apply when putting datasets into the datastore."""
344 # MyPy does not like for this to be annotated as any kind of type, because
345 # it can't do static checking on type variables that can change at runtime.
346 IngestPrepData: ClassVar[Any] = IngestPrepData
347 """Helper base class for ingest implementations.
348 """
350 @classmethod
351 @abstractmethod
352 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
353 """Set filesystem-dependent config options for this datastore.
355 The options will be appropriate for a new empty repository with the
356 given root.
358 Parameters
359 ----------
360 root : `str`
361 Filesystem path to the root of the data repository.
362 config : `Config`
363 A `Config` to update. Only the subset understood by
364 this component will be updated. Will not expand
365 defaults.
366 full : `Config`
367 A complete config with all defaults expanded that can be
368 converted to a `DatastoreConfig`. Read-only and will not be
369 modified by this method.
370 Repository-specific options that should not be obtained
371 from defaults when Butler instances are constructed
372 should be copied from ``full`` to ``config``.
373 overwrite : `bool`, optional
374 If `False`, do not modify a value in ``config`` if the value
375 already exists. Default is always to overwrite with the provided
376 ``root``.
378 Notes
379 -----
380 If a keyword is explicitly defined in the supplied ``config`` it
381 will not be overridden by this method if ``overwrite`` is `False`.
382 This allows explicit values set in external configs to be retained.
383 """
384 raise NotImplementedError()
386 @staticmethod
387 def fromConfig(
388 config: Config,
389 bridgeManager: DatastoreRegistryBridgeManager,
390 butlerRoot: ResourcePathExpression | None = None,
391 ) -> Datastore:
392 """Create datastore from type specified in config file.
394 Parameters
395 ----------
396 config : `Config` or `~lsst.resources.ResourcePathExpression`
397 Configuration instance.
398 bridgeManager : `DatastoreRegistryBridgeManager`
399 Object that manages the interface between `Registry` and
400 datastores.
401 butlerRoot : `str`, optional
402 Butler root directory.
403 """
404 config = DatastoreConfig(config)
405 cls = doImportType(config["cls"])
406 if not issubclass(cls, Datastore):
407 raise TypeError(f"Imported child class {config['cls']} is not a Datastore")
408 return cls._create_from_config(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot)
410 def __init__(
411 self,
412 config: DatastoreConfig,
413 bridgeManager: DatastoreRegistryBridgeManager,
414 ):
415 self.config = config
416 self.name = "ABCDataStore"
417 self._transaction: DatastoreTransaction | None = None
419 # All Datastores need storage classes and constraints
420 self.storageClassFactory = StorageClassFactory()
422 # And read the constraints list
423 constraintsConfig = self.config.get("constraints")
424 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe)
426 @classmethod
427 @abstractmethod
428 def _create_from_config(
429 cls,
430 config: DatastoreConfig,
431 bridgeManager: DatastoreRegistryBridgeManager,
432 butlerRoot: ResourcePathExpression | None,
433 ) -> Datastore:
434 """`Datastore`.``fromConfig`` calls this to instantiate Datastore
435 subclasses. This is the primary constructor for the individual
436 Datastore subclasses.
437 """
438 raise NotImplementedError()
440 @abstractmethod
441 def clone(self, bridgeManager: DatastoreRegistryBridgeManager) -> Datastore:
442 """Make an independent copy of this Datastore with a different
443 `DatastoreRegistryBridgeManager` instance.
445 Parameters
446 ----------
447 bridgeManager : `DatastoreRegistryBridgeManager`
448 New `DatastoreRegistryBridgeManager` object to use when
449 instantiating managers.
451 Returns
452 -------
453 datastore : `Datastore`
454 New `Datastore` instance with the same configuration as the
455 existing instance.
456 """
457 raise NotImplementedError()
459 def __str__(self) -> str:
460 return self.name
462 def __repr__(self) -> str:
463 return self.name
465 @property
466 def names(self) -> tuple[str, ...]:
467 """Names associated with this datastore returned as a list.
469 Can be different to ``name`` for a chaining datastore.
470 """
471 # Default implementation returns solely the name itself
472 return (self.name,)
474 @property
475 def roots(self) -> dict[str, ResourcePath | None]:
476 """Return the root URIs for each named datastore.
478 Mapping from datastore name to root URI. The URI can be `None`
479 if a datastore has no concept of a root URI.
480 (`dict` [`str`, `lsst.resources.ResourcePath` | `None`])
481 """
482 return {self.name: None}
484 @contextlib.contextmanager
485 def transaction(self) -> Iterator[DatastoreTransaction]:
486 """Context manager supporting `Datastore` transactions.
488 Transactions can be nested, and are to be used in combination with
489 `Registry.transaction`.
490 """
491 self._transaction = DatastoreTransaction(self._transaction)
492 try:
493 yield self._transaction
494 except BaseException:
495 self._transaction.rollback()
496 raise
497 else:
498 self._transaction.commit()
499 self._transaction = self._transaction.parent
501 def _set_trust_mode(self, mode: bool) -> None:
502 """Set the trust mode for this datastore.
504 Parameters
505 ----------
506 mode : `bool`
507 If `True`, get requests will be attempted even if the datastore
508 does not know about the dataset.
510 Notes
511 -----
512 This is a private method to indicate that trust mode might be a
513 transitory property that we do not want to make fully public. For now
514 only a `~lsst.daf.butler.datastores.FileDatastore` understands this
515 concept. By default this method does nothing.
516 """
517 return
519 @abstractmethod
520 def knows(self, ref: DatasetRef) -> bool:
521 """Check if the dataset is known to the datastore.
523 Does not check for existence of any artifact.
525 Parameters
526 ----------
527 ref : `DatasetRef`
528 Reference to the required dataset.
530 Returns
531 -------
532 exists : `bool`
533 `True` if the dataset is known to the datastore.
534 """
535 raise NotImplementedError()
537 def knows_these(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]:
538 """Check which of the given datasets are known to this datastore.
540 This is like ``mexist()`` but does not check that the file exists.
542 Parameters
543 ----------
544 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
545 The datasets to check.
547 Returns
548 -------
549 exists : `dict` [`DatasetRef`, `bool`]
550 Mapping of dataset to boolean indicating whether the dataset
551 is known to the datastore.
552 """
553 # Non-optimized default calls knows() repeatedly.
554 return {ref: self.knows(ref) for ref in refs}
556 def mexists(
557 self, refs: Iterable[DatasetRef], artifact_existence: dict[ResourcePath, bool] | None = None
558 ) -> dict[DatasetRef, bool]:
559 """Check the existence of multiple datasets at once.
561 Parameters
562 ----------
563 refs : `~collections.abc.Iterable` of `DatasetRef`
564 The datasets to be checked.
565 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
566 Optional mapping of datastore artifact to existence. Updated by
567 this method with details of all artifacts tested. Can be `None`
568 if the caller is not interested.
570 Returns
571 -------
572 existence : `dict` of [`DatasetRef`, `bool`]
573 Mapping from dataset to boolean indicating existence.
574 """
575 existence: dict[DatasetRef, bool] = {}
576 # Non-optimized default.
577 for ref in refs:
578 existence[ref] = self.exists(ref)
579 return existence
581 @abstractmethod
582 def exists(self, datasetRef: DatasetRef) -> bool:
583 """Check if the dataset exists in the datastore.
585 Parameters
586 ----------
587 datasetRef : `DatasetRef`
588 Reference to the required dataset.
590 Returns
591 -------
592 exists : `bool`
593 `True` if the entity exists in the `Datastore`.
594 """
595 raise NotImplementedError("Must be implemented by subclass")
597 @abstractmethod
598 def get(
599 self,
600 datasetRef: DatasetRef,
601 parameters: Mapping[str, Any] | None = None,
602 storageClass: StorageClass | str | None = None,
603 ) -> Any:
604 """Load an in-memory dataset from the store.
606 Parameters
607 ----------
608 datasetRef : `DatasetRef`
609 Reference to the required Dataset.
610 parameters : `dict`
611 `StorageClass`-specific parameters that specify a slice of the
612 Dataset to be loaded.
613 storageClass : `StorageClass` or `str`, optional
614 The storage class to be used to override the Python type
615 returned by this method. By default the returned type matches
616 the dataset type definition for this dataset. Specifying a
617 read `StorageClass` can force a different type to be returned.
618 This type must be compatible with the original type.
620 Returns
621 -------
622 inMemoryDataset : `object`
623 Requested Dataset or slice thereof as an in-memory dataset.
624 """
625 raise NotImplementedError("Must be implemented by subclass")
627 def prepare_get_for_external_client(self, ref: DatasetRef) -> list[DatasetLocationInformation] | None:
628 """Retrieve data that can be used to execute a ``get()``.
630 Parameters
631 ----------
632 ref : `DatasetRef`
633 Reference to the required dataset.
635 Returns
636 -------
637 payload : `list` [ `DatasetLocationInformation` ] | `None`
638 Information needed to perform a get() operation. Returns `None` if
639 the dataset is not known to this datastore.
640 """
641 raise NotImplementedError()
643 @abstractmethod
644 def put(
645 self, inMemoryDataset: Any, datasetRef: DatasetRef, provenance: DatasetProvenance | None = None
646 ) -> None:
647 """Write an in-memory dataset with a given `DatasetRef` to the store.
649 Parameters
650 ----------
651 inMemoryDataset : `object`
652 The Dataset to store.
653 datasetRef : `DatasetRef`
654 Reference to the associated Dataset.
655 provenance : `DatasetProvenance` or `None`, optional
656 Any provenance that should be attached to the serialized dataset.
657 Not supported by all serialization mechanisms.
658 """
659 raise NotImplementedError("Must be implemented by subclass")
661 @abstractmethod
662 def put_new(self, in_memory_dataset: Any, ref: DatasetRef) -> Mapping[str, DatasetRef]:
663 """Write an in-memory dataset with a given `DatasetRef` to the store.
665 Parameters
666 ----------
667 in_memory_dataset : `object`
668 The Dataset to store.
669 ref : `DatasetRef`
670 Reference to the associated Dataset.
672 Returns
673 -------
674 datastore_refs : `~collections.abc.Mapping` [`str`, `DatasetRef`]
675 Mapping of a datastore name to dataset reference stored in that
676 datastore, reference will include datastore records. Only
677 non-ephemeral datastores will appear in this mapping.
678 """
679 raise NotImplementedError("Must be implemented by subclass")
681 def _overrideTransferMode(self, *datasets: FileDataset, transfer: str | None = None) -> str | None:
682 """Allow ingest transfer mode to be defaulted based on datasets.
684 Parameters
685 ----------
686 *datasets : `FileDataset`
687 Each positional argument is a struct containing information about
688 a file to be ingested, including its path (either absolute or
689 relative to the datastore root, if applicable), a complete
690 `DatasetRef` (with ``dataset_id not None``), and optionally a
691 formatter class or its fully-qualified string name. If a formatter
692 is not provided, this method should populate that attribute with
693 the formatter the datastore would use for `put`. Subclasses are
694 also permitted to modify the path attribute (typically to put it
695 in what the datastore considers its standard form).
696 transfer : `str`, optional
697 How (and whether) the dataset should be added to the datastore.
698 See `ingest` for details of transfer modes.
700 Returns
701 -------
702 newTransfer : `str`
703 Transfer mode to use. Will be identical to the supplied transfer
704 mode unless "auto" is used.
705 """
706 if transfer != "auto":
707 return transfer
708 raise RuntimeError(f"{transfer} is not allowed without specialization.")
710 def _prepIngest(self, *datasets: FileDataset, transfer: str | None = None) -> IngestPrepData:
711 """Process datasets to identify which ones can be ingested.
713 Parameters
714 ----------
715 *datasets : `FileDataset`
716 Each positional argument is a struct containing information about
717 a file to be ingested, including its path (either absolute or
718 relative to the datastore root, if applicable), a complete
719 `DatasetRef` (with ``dataset_id not None``), and optionally a
720 formatter class or its fully-qualified string name. If a formatter
721 is not provided, this method should populate that attribute with
722 the formatter the datastore would use for `put`. Subclasses are
723 also permitted to modify the path attribute (typically to put it
724 in what the datastore considers its standard form).
725 transfer : `str`, optional
726 How (and whether) the dataset should be added to the datastore.
727 See `ingest` for details of transfer modes.
729 Returns
730 -------
731 data : `IngestPrepData`
732 An instance of a subclass of `IngestPrepData`, used to pass
733 arbitrary data from `_prepIngest` to `_finishIngest`. This should
734 include only the datasets this datastore can actually ingest;
735 others should be silently ignored (`Datastore.ingest` will inspect
736 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if
737 necessary).
739 Raises
740 ------
741 NotImplementedError
742 Raised if the datastore does not support the given transfer mode
743 (including the case where ingest is not supported at all).
744 FileNotFoundError
745 Raised if one of the given files does not exist.
746 FileExistsError
747 Raised if transfer is not `None` but the (internal) location the
748 file would be moved to is already occupied.
750 Notes
751 -----
752 This method (along with `_finishIngest`) should be implemented by
753 subclasses to provide ingest support instead of implementing `ingest`
754 directly.
756 `_prepIngest` should not modify the data repository or given files in
757 any way; all changes should be deferred to `_finishIngest`.
759 When possible, exceptions should be raised in `_prepIngest` instead of
760 `_finishIngest`. `NotImplementedError` exceptions that indicate that
761 the transfer mode is not supported must be raised by `_prepIngest`
762 instead of `_finishIngest`.
763 """
764 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
766 def _finishIngest(
767 self, prepData: IngestPrepData, *, transfer: str | None = None, record_validation_info: bool = True
768 ) -> None:
769 """Complete an ingest operation.
771 Parameters
772 ----------
773 prepData : `IngestPrepData`
774 An instance of a subclass of `IngestPrepData`. Guaranteed to be
775 the direct result of a call to `_prepIngest` on this datastore.
776 transfer : `str`, optional
777 How (and whether) the dataset should be added to the datastore.
778 See `ingest` for details of transfer modes.
779 record_validation_info : `bool`, optional
780 If `True`, the default, the datastore can record validation
781 information associated with the file. If `False` the datastore
782 will not attempt to track any information such as checksums
783 or file sizes. This can be useful if such information is tracked
784 in an external system or if the file is to be compressed in place.
785 It is up to the datastore whether this parameter is relevant.
787 Raises
788 ------
789 FileNotFoundError
790 Raised if one of the given files does not exist.
791 FileExistsError
792 Raised if transfer is not `None` but the (internal) location the
793 file would be moved to is already occupied.
795 Notes
796 -----
797 This method (along with `_prepIngest`) should be implemented by
798 subclasses to provide ingest support instead of implementing `ingest`
799 directly.
800 """
801 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.")
803 def ingest(
804 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True
805 ) -> None:
806 """Ingest one or more files into the datastore.
808 Parameters
809 ----------
810 *datasets : `FileDataset`
811 Each positional argument is a struct containing information about
812 a file to be ingested, including its path (either absolute or
813 relative to the datastore root, if applicable), a complete
814 `DatasetRef` (with ``dataset_id not None``), and optionally a
815 formatter class or its fully-qualified string name. If a formatter
816 is not provided, the one the datastore would use for ``put`` on
817 that dataset is assumed.
818 transfer : `str`, optional
819 How (and whether) the dataset should be added to the datastore.
820 If `None` (default), the file must already be in a location
821 appropriate for the datastore (e.g. within its root directory),
822 and will not be modified. Other choices include "move", "copy",
823 "link", "symlink", "relsymlink", and "hardlink". "link" is a
824 special transfer mode that will first try to make a hardlink and
825 if that fails a symlink will be used instead. "relsymlink" creates
826 a relative symlink rather than use an absolute path.
827 Most datastores do not support all transfer modes.
828 "auto" is a special option that will let the
829 data store choose the most natural option for itself.
830 record_validation_info : `bool`, optional
831 If `True`, the default, the datastore can record validation
832 information associated with the file. If `False` the datastore
833 will not attempt to track any information such as checksums
834 or file sizes. This can be useful if such information is tracked
835 in an external system or if the file is to be compressed in place.
836 It is up to the datastore whether this parameter is relevant.
838 Returns
839 -------
840 None
842 Raises
843 ------
844 NotImplementedError
845 Raised if the datastore does not support the given transfer mode
846 (including the case where ingest is not supported at all).
847 DatasetTypeNotSupportedError
848 Raised if one or more files to be ingested have a dataset type that
849 is not supported by the datastore.
850 FileNotFoundError
851 Raised if one of the given files does not exist.
852 FileExistsError
853 Raised if transfer is not `None` but the (internal) location the
854 file would be moved to is already occupied.
856 Notes
857 -----
858 Subclasses should implement `_prepIngest` and `_finishIngest` instead
859 of implementing `ingest` directly. Datastores that hold and
860 delegate to child datastores may want to call those methods as well.
862 Subclasses are encouraged to document their supported transfer modes
863 in their class documentation.
864 """
865 # Allow a datastore to select a default transfer mode
866 transfer = self._overrideTransferMode(*datasets, transfer=transfer)
867 prepData = self._prepIngest(*datasets, transfer=transfer)
868 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs}
869 if refs.keys() != prepData.refs.keys():
870 unsupported = refs.keys() - prepData.refs.keys()
871 # Group unsupported refs by DatasetType for an informative
872 # but still concise error message.
873 byDatasetType = defaultdict(list)
874 for datasetId in unsupported:
875 ref = refs[datasetId]
876 byDatasetType[ref.datasetType].append(ref)
877 raise DatasetTypeNotSupportedError(
878 "DatasetType(s) not supported in ingest: "
879 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items())
880 )
881 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info)
883 def transfer_from(
884 self,
885 source_records: FileTransferMap,
886 refs: Collection[DatasetRef],
887 transfer: str = "auto",
888 artifact_existence: dict[ResourcePath, bool] | None = None,
889 dry_run: bool = False,
890 ) -> tuple[set[DatasetRef], set[DatasetRef]]:
891 """Transfer dataset artifacts from another datastore to this one.
893 Parameters
894 ----------
895 source_records : `FileTransferMap`
896 The artifacts to be transferred into this datastore.
897 refs : `~collections.abc.Collection` of `DatasetRef`
898 The datasets to transfer from the source datastore.
899 transfer : `str`, optional
900 How (and whether) the dataset should be added to the datastore.
901 Choices include "move", "copy",
902 "link", "symlink", "relsymlink", and "hardlink". "link" is a
903 special transfer mode that will first try to make a hardlink and
904 if that fails a symlink will be used instead. "relsymlink" creates
905 a relative symlink rather than use an absolute path.
906 Most datastores do not support all transfer modes.
907 "auto" (the default) is a special option that will let the
908 data store choose the most natural option for itself.
909 If the source location and transfer location are identical the
910 transfer mode will be ignored.
911 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`]
912 Optional mapping of datastore artifact to existence. Updated by
913 this method with details of all artifacts tested. Can be `None`
914 if the caller is not interested.
915 dry_run : `bool`, optional
916 Process the supplied source refs without updating the target
917 datastore.
919 Returns
920 -------
921 accepted : `set` [`DatasetRef`]
922 The datasets that were transferred.
923 rejected : `set` [`DatasetRef`]
924 The datasets that were rejected due to a constraints violation.
926 Raises
927 ------
928 TypeError
929 Raised if the two datastores are not compatible.
930 """
931 raise NotImplementedError(f"Datastore {type(self)} does not implement a transfer_from method.")
933 def getManyURIs(
934 self,
935 refs: Iterable[DatasetRef],
936 predict: bool = False,
937 allow_missing: bool = False,
938 ) -> dict[DatasetRef, DatasetRefURIs]:
939 """Return URIs associated with many datasets.
941 Parameters
942 ----------
943 refs : `~collections.abc.Iterable` of `DatasetIdRef`
944 References to the required datasets.
945 predict : `bool`, optional
946 If `True`, allow URIs to be returned of datasets that have not
947 been written.
948 allow_missing : `bool`
949 If `False`, and ``predict`` is `False`, will raise if a
950 `DatasetRef` does not exist.
952 Returns
953 -------
954 URIs : `dict` of [`DatasetRef`, `DatasetRefUris`]
955 A dict of primary and component URIs, indexed by the passed-in
956 refs.
958 Raises
959 ------
960 FileNotFoundError
961 A URI has been requested for a dataset that does not exist and
962 guessing is not allowed.
964 Notes
965 -----
966 In file-based datastores, getManyURIs does not check that the file is
967 really there, it's assuming it is if datastore is aware of the file
968 then it actually exists.
969 """
970 uris: dict[DatasetRef, DatasetRefURIs] = {}
971 missing_refs = []
972 for ref in refs:
973 try:
974 uris[ref] = self.getURIs(ref, predict=predict)
975 except FileNotFoundError:
976 missing_refs.append(ref)
977 if missing_refs and not allow_missing:
978 num_missing = len(missing_refs)
979 raise FileNotFoundError(
980 f"Missing {num_missing} refs from datastore out of "
981 f"{num_missing + len(uris)} and predict=False."
982 )
983 return uris
985 @abstractmethod
986 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs:
987 """Return URIs associated with dataset.
989 Parameters
990 ----------
991 datasetRef : `DatasetRef`
992 Reference to the required dataset.
993 predict : `bool`, optional
994 If the datastore does not know about the dataset, controls whether
995 it should return a predicted URI or not.
997 Returns
998 -------
999 uris : `DatasetRefURIs`
1000 The URI to the primary artifact associated with this dataset (if
1001 the dataset was disassembled within the datastore this may be
1002 `None`), and the URIs to any components associated with the dataset
1003 artifact. (can be empty if there are no components).
1004 """
1005 raise NotImplementedError()
1007 @abstractmethod
1008 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
1009 """URI to the Dataset.
1011 Parameters
1012 ----------
1013 datasetRef : `DatasetRef`
1014 Reference to the required Dataset.
1015 predict : `bool`
1016 If `True` attempt to predict the URI for a dataset if it does
1017 not exist in datastore.
1019 Returns
1020 -------
1021 uri : `str`
1022 URI string pointing to the Dataset within the datastore. If the
1023 Dataset does not exist in the datastore, the URI may be a guess.
1024 If the datastore does not have entities that relate well
1025 to the concept of a URI the returned URI string will be
1026 descriptive. The returned URI is not guaranteed to be obtainable.
1028 Raises
1029 ------
1030 FileNotFoundError
1031 A URI has been requested for a dataset that does not exist and
1032 guessing is not allowed.
1033 """
1034 raise NotImplementedError("Must be implemented by subclass")
1036 @abstractmethod
1037 def retrieveArtifacts(
1038 self,
1039 refs: Iterable[DatasetRef],
1040 destination: ResourcePath,
1041 transfer: str = "auto",
1042 preserve_path: bool = True,
1043 overwrite: bool = False,
1044 write_index: bool = True,
1045 add_prefix: bool = False,
1046 ) -> dict[ResourcePath, ArtifactIndexInfo]:
1047 """Retrieve the artifacts associated with the supplied refs.
1049 Parameters
1050 ----------
1051 refs : `~collections.abc.Iterable` of `DatasetRef`
1052 The datasets for which artifacts are to be retrieved.
1053 A single ref can result in multiple artifacts. The refs must
1054 be resolved.
1055 destination : `lsst.resources.ResourcePath`
1056 Location to write the artifacts.
1057 transfer : `str`, optional
1058 Method to use to transfer the artifacts. Must be one of the options
1059 supported by `lsst.resources.ResourcePath.transfer_from`.
1060 "move" is not allowed.
1061 preserve_path : `bool`, optional
1062 If `True` the full path of the artifact within the datastore
1063 is preserved. If `False` the final file component of the path
1064 is used.
1065 overwrite : `bool`, optional
1066 If `True` allow transfers to overwrite existing files at the
1067 destination.
1068 write_index : `bool`, optional
1069 If `True` write a file at the top level containing a serialization
1070 of a `ZipIndex` for the downloaded datasets.
1071 add_prefix : `bool`, optional
1072 If `True` and if ``preserve_path`` is `False`, apply a prefix to
1073 the filenames corresponding to some part of the dataset ref ID.
1074 This can be used to guarantee uniqueness.
1076 Returns
1077 -------
1078 artifact_map : `dict` [ `lsst.resources.ResourcePath`, \
1079 `ArtifactIndexInfo` ]
1080 Mapping of retrieved file to associated index information.
1082 Notes
1083 -----
1084 For non-file datastores the artifacts written to the destination
1085 may not match the representation inside the datastore. For example
1086 a hierarchical data structure in a NoSQL database may well be stored
1087 as a JSON file.
1088 """
1089 raise NotImplementedError()
1091 @abstractmethod
1092 def ingest_zip(self, zip_path: ResourcePath, transfer: str | None, *, dry_run: bool = False) -> None:
1093 """Ingest an indexed Zip file and contents.
1095 The Zip file must have an index file as created by `retrieveArtifacts`.
1097 Parameters
1098 ----------
1099 zip_path : `lsst.resources.ResourcePath`
1100 Path to the Zip file.
1101 transfer : `str`
1102 Method to use for transferring the Zip file into the datastore.
1103 dry_run : `bool`, optional
1104 If `True` the ingest will be processed without any modifications
1105 made to the target datastore and as if the target datastore did not
1106 have any of the datasets.
1107 """
1108 raise NotImplementedError()
1110 @abstractmethod
1111 def remove(self, datasetRef: DatasetRef) -> None:
1112 """Indicate to the Datastore that a Dataset can be removed.
1114 Parameters
1115 ----------
1116 datasetRef : `DatasetRef`
1117 Reference to the required Dataset.
1119 Raises
1120 ------
1121 FileNotFoundError
1122 When Dataset does not exist.
1124 Notes
1125 -----
1126 Some Datastores may implement this method as a silent no-op to
1127 disable Dataset deletion through standard interfaces.
1128 """
1129 raise NotImplementedError("Must be implemented by subclass")
1131 @abstractmethod
1132 def forget(self, refs: Iterable[DatasetRef]) -> None:
1133 """Indicate to the Datastore that it should remove all records of the
1134 given datasets, without actually deleting them.
1136 Parameters
1137 ----------
1138 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
1139 References to the datasets being forgotten.
1141 Notes
1142 -----
1143 Asking a datastore to forget a `DatasetRef` it does not hold should be
1144 a silent no-op, not an error.
1145 """
1146 raise NotImplementedError("Must be implemented by subclass")
1148 @abstractmethod
1149 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None:
1150 """Indicate to the Datastore that a Dataset can be moved to the trash.
1152 Parameters
1153 ----------
1154 ref : `DatasetRef` or iterable thereof
1155 Reference(s) to the required Dataset.
1156 ignore_errors : `bool`, optional
1157 Determine whether errors should be ignored. When multiple
1158 refs are being trashed there will be no per-ref check.
1160 Returns
1161 -------
1162 None
1164 Raises
1165 ------
1166 FileNotFoundError
1167 When Dataset does not exist and errors are not ignored. Only
1168 checked if a single ref is supplied (and not in a list).
1170 Notes
1171 -----
1172 Some Datastores may implement this method as a silent no-op to
1173 disable Dataset deletion through standard interfaces.
1174 """
1175 raise NotImplementedError("Must be implemented by subclass")
1177 @abstractmethod
1178 def emptyTrash(
1179 self, ignore_errors: bool = True, refs: Collection[DatasetRef] | None = None, dry_run: bool = False
1180 ) -> set[ResourcePath]:
1181 """Remove all datasets from the trash.
1183 Parameters
1184 ----------
1185 ignore_errors : `bool`, optional
1186 Determine whether errors should be ignored.
1187 refs : `collections.abc.Collection` [ `DatasetRef` ] or `None`
1188 Explicit list of datasets that can be removed from trash. If listed
1189 datasets are not already stored in the trash table they will be
1190 ignored. If `None` every entry in the trash table will be
1191 processed.
1192 dry_run : `bool`, optional
1193 If `True`, the trash table will be queried and results reported
1194 but no artifacts will be removed.
1196 Returns
1197 -------
1198 removed : `set` [ `lsst.resources.ResourcePath` ]
1199 List of artifacts that were removed. Can return nothing if
1200 artifacts cannot be represented by URIs.
1202 Notes
1203 -----
1204 Some Datastores may implement this method as a silent no-op to
1205 disable Dataset deletion through standard interfaces.
1206 """
1207 raise NotImplementedError("Must be implemented by subclass")
1209 @abstractmethod
1210 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
1211 """Transfer a dataset from another datastore to this datastore.
1213 Parameters
1214 ----------
1215 inputDatastore : `Datastore`
1216 The external `Datastore` from which to retrieve the Dataset.
1217 datasetRef : `DatasetRef`
1218 Reference to the required Dataset.
1219 """
1220 raise NotImplementedError("Must be implemented by subclass")
1222 def export(
1223 self,
1224 refs: Iterable[DatasetRef],
1225 *,
1226 directory: ResourcePathExpression | None = None,
1227 transfer: str | None = "auto",
1228 ) -> Iterable[FileDataset]:
1229 """Export datasets for transfer to another data repository.
1231 Parameters
1232 ----------
1233 refs : `~collections.abc.Iterable` of `DatasetRef`
1234 Dataset references to be exported.
1235 directory : `str`, optional
1236 Path to a directory that should contain files corresponding to
1237 output datasets. Ignored if ``transfer`` is explicitly `None`.
1238 transfer : `str`, optional
1239 Mode that should be used to move datasets out of the repository.
1240 Valid options are the same as those of the ``transfer`` argument
1241 to ``ingest``, and datastores may similarly signal that a transfer
1242 mode is not supported by raising `NotImplementedError`. If "auto"
1243 is given and no ``directory`` is specified, `None` will be
1244 implied.
1246 Returns
1247 -------
1248 dataset : `~collections.abc.Iterable` of `DatasetTransfer`
1249 Structs containing information about the exported datasets, in the
1250 same order as ``refs``.
1252 Raises
1253 ------
1254 NotImplementedError
1255 Raised if the given transfer mode is not supported.
1256 """
1257 raise NotImplementedError(f"Transfer mode {transfer} not supported.")
1259 @abstractmethod
1260 def validateConfiguration(
1261 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False
1262 ) -> None:
1263 """Validate some of the configuration for this datastore.
1265 Parameters
1266 ----------
1267 entities : `~collections.abc.Iterable` [`DatasetRef` | `DatasetType` |\
1268 `StorageClass`]
1269 Entities to test against this configuration. Can be differing
1270 types.
1271 logFailures : `bool`, optional
1272 If `True`, output a log message for every validation error
1273 detected.
1275 Raises
1276 ------
1277 DatastoreValidationError
1278 Raised if there is a validation problem with a configuration.
1280 Notes
1281 -----
1282 Which parts of the configuration are validated is at the discretion
1283 of each Datastore implementation.
1284 """
1285 raise NotImplementedError("Must be implemented by subclass")
1287 @abstractmethod
1288 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None:
1289 """Validate a specific look up key with supplied entity.
1291 Parameters
1292 ----------
1293 lookupKey : `LookupKey`
1294 Key to use to retrieve information from the datastore
1295 configuration.
1296 entity : `DatasetRef`, `DatasetType`, or `StorageClass`
1297 Entity to compare with configuration retrieved using the
1298 specified lookup key.
1300 Returns
1301 -------
1302 None
1304 Raises
1305 ------
1306 DatastoreValidationError
1307 Raised if there is a problem with the combination of entity
1308 and lookup key.
1310 Notes
1311 -----
1312 Bypasses the normal selection priorities by allowing a key that
1313 would normally not be selected to be validated.
1314 """
1315 raise NotImplementedError("Must be implemented by subclass")
1317 @abstractmethod
1318 def getLookupKeys(self) -> set[LookupKey]:
1319 """Return all the lookup keys relevant to this datastore.
1321 Returns
1322 -------
1323 keys : `set` of `LookupKey`
1324 The keys stored internally for looking up information based
1325 on `DatasetType` name or `StorageClass`.
1326 """
1327 raise NotImplementedError("Must be implemented by subclass")
1329 def needs_expanded_data_ids(
1330 self,
1331 transfer: str | None,
1332 entity: DatasetRef | DatasetType | StorageClass | None = None,
1333 ) -> bool:
1334 """Test whether this datastore needs expanded data IDs to ingest.
1336 Parameters
1337 ----------
1338 transfer : `str` or `None`
1339 Transfer mode for ingest.
1340 entity : `DatasetRef` or `DatasetType` or `StorageClass` or `None`, \
1341 optional
1342 Object representing what will be ingested. If not provided (or not
1343 specific enough), `True` may be returned even if expanded data
1344 IDs aren't necessary.
1346 Returns
1347 -------
1348 needed : `bool`
1349 If `True`, expanded data IDs may be needed. `False` only if
1350 expansion definitely isn't necessary.
1351 """
1352 return True
1354 @abstractmethod
1355 def import_records(
1356 self,
1357 data: Mapping[str, DatastoreRecordData],
1358 ) -> None:
1359 """Import datastore location and record data from an in-memory data
1360 structure.
1362 Parameters
1363 ----------
1364 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ]
1365 Datastore records indexed by datastore name. May contain data for
1366 other `Datastore` instances (generally because they are chained to
1367 this one), which should be ignored.
1369 Notes
1370 -----
1371 Implementations should generally not check that any external resources
1372 (e.g. files) referred to by these records actually exist, for
1373 performance reasons; we expect higher-level code to guarantee that they
1374 do.
1376 Implementations are responsible for calling
1377 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations``
1378 where the key is in `names`, as well as loading any opaque table data.
1380 Implementations may assume that datasets are either fully present or
1381 not at all (single-component exports are not permitted).
1382 """
1383 raise NotImplementedError()
1385 @abstractmethod
1386 def export_records(
1387 self,
1388 refs: Iterable[DatasetIdRef],
1389 ) -> Mapping[str, DatastoreRecordData]:
1390 """Export datastore records and locations to an in-memory data
1391 structure.
1393 Parameters
1394 ----------
1395 refs : `~collections.abc.Iterable` [ `DatasetIdRef` ]
1396 Datasets to save. This may include datasets not known to this
1397 datastore, which should be ignored. May not include component
1398 datasets.
1400 Returns
1401 -------
1402 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ]
1403 Exported datastore records indexed by datastore name.
1404 """
1405 raise NotImplementedError()
1407 def export_table(self, datasets: Collection[DatasetId]) -> DatastoreRecordTable:
1408 """Export datastore records to an arrow table.
1410 Parameters
1411 ----------
1412 datasets
1413 Dataset UUIDs for the records to export.
1415 Returns
1416 -------
1417 table
1418 Datastore records table.
1419 """
1420 return DatastoreRecordTable.create_empty()
1422 def import_table(self, table: DatastoreRecordTable) -> None:
1423 """Import datastore records from an arrow table.
1425 Parameters
1426 ----------
1427 table
1428 Table containing the datastore records to import.
1430 Raises
1431 ------
1432 ValueError
1433 If the given table contains entries for a datastore that is not
1434 known to this Butler.
1435 """
1436 pass
1438 def export_predicted_records(self, refs: Iterable[DatasetRef]) -> dict[str, DatastoreRecordData]:
1439 """Export predicted datastore records and locations to an in-memory
1440 data structure.
1442 Parameters
1443 ----------
1444 refs : `~collections.abc.Iterable` [ `DatasetRef` ]
1445 Datastore records that would be used if the given refs were to
1446 exist in this datastore. No attempt is made to determine if these
1447 datasets actually exist.
1449 Returns
1450 -------
1451 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ]
1452 Exported datastore records indexed by datastore name.
1453 """
1454 raise NotImplementedError()
1456 def set_retrieve_dataset_type_method(self, method: Callable[[str], DatasetType | None] | None) -> None:
1457 """Specify a method that can be used by datastore to retrieve
1458 registry-defined dataset type.
1460 Parameters
1461 ----------
1462 method : `~collections.abc.Callable` | `None`
1463 Method that takes a name of the dataset type and returns a
1464 corresponding `DatasetType` instance as defined in Registry. If
1465 dataset type name is not known to registry `None` is returned.
1467 Notes
1468 -----
1469 This method is only needed for a Datastore supporting a "trusted" mode
1470 when it does not have an access to datastore records and needs to
1471 guess dataset location based on its stored dataset type.
1472 """
1473 pass
1475 @abstractmethod
1476 def get_opaque_table_definitions(self) -> Mapping[str, DatastoreOpaqueTable]:
1477 """Make definitions of the opaque tables used by this Datastore.
1479 Returns
1480 -------
1481 tables : `~collections.abc.Mapping` [ `str`, `.ddl.TableSpec` ]
1482 Mapping of opaque table names to their definitions. This can be an
1483 empty mapping if Datastore does not use opaque tables to keep
1484 datastore records.
1485 """
1486 raise NotImplementedError()
1488 def get_file_info_for_transfer(self, dataset_ids: Iterable[DatasetId]) -> FileTransferMap:
1489 raise NotImplementedError(f"Transferring files is not supported by datastore {self}")
1491 def locate_missing_files_for_transfer(
1492 self, refs: Iterable[DatasetRef], artifact_existence: dict[ResourcePath, bool]
1493 ) -> FileTransferMap:
1494 return {}
1497class NullDatastore(Datastore):
1498 """A datastore that implements the `Datastore` API but always fails when
1499 it accepts any request.
1501 Parameters
1502 ----------
1503 config : `Config` or `~lsst.resources.ResourcePathExpression` or `None`
1504 Ignored.
1505 bridgeManager : `DatastoreRegistryBridgeManager` or `None`
1506 Ignored.
1507 butlerRoot : `~lsst.resources.ResourcePathExpression` or `None`
1508 Ignored.
1509 """
1511 @classmethod
1512 def _create_from_config(
1513 cls,
1514 config: Config,
1515 bridgeManager: DatastoreRegistryBridgeManager,
1516 butlerRoot: ResourcePathExpression | None = None,
1517 ) -> NullDatastore:
1518 return NullDatastore(config, bridgeManager, butlerRoot)
1520 def clone(self, bridgeManager: DatastoreRegistryBridgeManager) -> Datastore:
1521 return self
1523 @classmethod
1524 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None:
1525 # Nothing to do. This is not a real Datastore.
1526 pass
1528 def __init__(
1529 self,
1530 config: Config | ResourcePathExpression | None,
1531 bridgeManager: DatastoreRegistryBridgeManager | None,
1532 butlerRoot: ResourcePathExpression | None = None,
1533 ):
1534 # Name ourselves with the timestamp the datastore
1535 # was created.
1536 self.name = f"{type(self).__name__}@{time.time()}"
1537 _LOG.debug("Creating datastore %s", self.name)
1538 self._transaction: DatastoreTransaction | None = None
1539 return
1541 def knows(self, ref: DatasetRef) -> bool:
1542 return False
1544 def exists(self, datasetRef: DatasetRef) -> bool:
1545 return False
1547 def get(
1548 self,
1549 datasetRef: DatasetRef,
1550 parameters: Mapping[str, Any] | None = None,
1551 storageClass: StorageClass | str | None = None,
1552 ) -> Any:
1553 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
1555 def put(
1556 self, inMemoryDataset: Any, datasetRef: DatasetRef, provenance: DatasetProvenance | None = None
1557 ) -> None:
1558 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1560 def put_new(self, in_memory_dataset: Any, ref: DatasetRef) -> Mapping[str, DatasetRef]:
1561 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1563 def ingest(
1564 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True
1565 ) -> None:
1566 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1568 def transfer_from(
1569 self,
1570 source_records: FileTransferMap,
1571 refs: Iterable[DatasetRef],
1572 transfer: str = "auto",
1573 artifact_existence: dict[ResourcePath, bool] | None = None,
1574 dry_run: bool = False,
1575 ) -> tuple[set[DatasetRef], set[DatasetRef]]:
1576 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1578 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs:
1579 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
1581 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath:
1582 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore")
1584 def ingest_zip(self, zip_path: ResourcePath, transfer: str | None, *, dry_run: bool = False) -> None:
1585 raise NotImplementedError("Can only ingest a Zip into a real datastore.")
1587 def retrieveArtifacts(
1588 self,
1589 refs: Iterable[DatasetRef],
1590 destination: ResourcePath,
1591 transfer: str = "auto",
1592 preserve_path: bool = True,
1593 overwrite: bool = False,
1594 write_index: bool = True,
1595 add_prefix: bool = False,
1596 ) -> dict[ResourcePath, ArtifactIndexInfo]:
1597 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1599 def remove(self, datasetRef: DatasetRef) -> None:
1600 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1602 def forget(self, refs: Iterable[DatasetRef]) -> None:
1603 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1605 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None:
1606 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1608 def emptyTrash(
1609 self, ignore_errors: bool = True, refs: Collection[DatasetRef] | None = None, dry_run: bool = False
1610 ) -> set[ResourcePath]:
1611 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1613 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None:
1614 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1616 def export(
1617 self,
1618 refs: Iterable[DatasetRef],
1619 *,
1620 directory: ResourcePathExpression | None = None,
1621 transfer: str | None = "auto",
1622 ) -> Iterable[FileDataset]:
1623 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1625 def validateConfiguration(
1626 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False
1627 ) -> None:
1628 # No configuration so always validates.
1629 pass
1631 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None:
1632 pass
1634 def getLookupKeys(self) -> set[LookupKey]:
1635 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1637 def import_records(
1638 self,
1639 data: Mapping[str, DatastoreRecordData],
1640 ) -> None:
1641 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1643 def export_records(
1644 self,
1645 refs: Iterable[DatasetIdRef],
1646 ) -> Mapping[str, DatastoreRecordData]:
1647 raise NotImplementedError("This is a no-op datastore that can not access a real datastore")
1649 def get_opaque_table_definitions(self) -> Mapping[str, DatastoreOpaqueTable]:
1650 return {}