Coverage for python/lsst/daf/butler/datastore/_datastore.py: 57%

290 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-05-29 08:15 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Support for generic data stores.""" 

29 

30from __future__ import annotations 

31 

32from .record_data import DatastoreRecordTable 

33 

34__all__ = ( 

35 "DatasetRefURIs", 

36 "Datastore", 

37 "DatastoreConfig", 

38 "DatastoreOpaqueTable", 

39 "DatastoreTransaction", 

40 "DatastoreValidationError", 

41 "NullDatastore", 

42) 

43 

44import contextlib 

45import dataclasses 

46import logging 

47import time 

48from abc import ABCMeta, abstractmethod 

49from collections import abc, defaultdict 

50from collections.abc import Callable, Collection, Iterable, Iterator, Mapping 

51from typing import TYPE_CHECKING, Any, ClassVar 

52 

53from lsst.utils import doImportType 

54 

55from .._config import Config, ConfigSubset 

56from .._exceptions import DatasetTypeNotSupportedError, ValidationError 

57from .._file_dataset import FileDataset 

58from .._storage_class import StorageClassFactory 

59from ._transfer import FileTransferMap, FileTransferSource 

60from .constraints import Constraints 

61 

62if TYPE_CHECKING: 

63 from lsst.resources import ResourcePath, ResourcePathExpression 

64 

65 from .. import ddl 

66 from .._config_support import LookupKey 

67 from .._dataset_provenance import DatasetProvenance 

68 from .._dataset_ref import DatasetId, DatasetRef 

69 from .._dataset_type import DatasetType 

70 from .._storage_class import StorageClass 

71 from ..datastores.file_datastore.get import DatasetLocationInformation 

72 from ..datastores.file_datastore.retrieve_artifacts import ArtifactIndexInfo 

73 from ..registry.interfaces import DatasetIdRef, DatastoreRegistryBridgeManager 

74 from .record_data import DatastoreRecordData 

75 from .stored_file_info import StoredDatastoreItemInfo 

76 

77_LOG = logging.getLogger(__name__) 

78 

79 

80class DatastoreConfig(ConfigSubset): 

81 """Configuration for Datastores.""" 

82 

83 component = "datastore" 

84 requiredKeys = ("cls",) 

85 defaultConfigFile = "datastore.yaml" 

86 

87 

88class DatastoreValidationError(ValidationError): 

89 """There is a problem with the Datastore configuration.""" 

90 

91 pass 

92 

93 

94@dataclasses.dataclass(frozen=True) 

95class Event: 

96 """Representation of an event that can be rolled back.""" 

97 

98 __slots__ = {"name", "undoFunc", "args", "kwargs"} 

99 name: str 

100 undoFunc: Callable 

101 args: tuple 

102 kwargs: dict 

103 

104 

105@dataclasses.dataclass(frozen=True) 

106class DatastoreOpaqueTable: 

107 """Definition of the opaque table which stores datastore records. 

108 

109 Table definition contains `.ddl.TableSpec` for a table and a class 

110 of a record which must be a subclass of `StoredDatastoreItemInfo`. 

111 """ 

112 

113 __slots__ = {"table_spec", "record_class"} 

114 table_spec: ddl.TableSpec 

115 record_class: type[StoredDatastoreItemInfo] 

116 

117 

118class IngestPrepData: 

119 """A helper base class for `Datastore` ingest implementations. 

120 

121 Datastore implementations will generally need a custom implementation of 

122 this class. 

123 

124 Should be accessed as ``Datastore.IngestPrepData`` instead of via direct 

125 import. 

126 

127 Parameters 

128 ---------- 

129 refs : `~collections.abc.Iterable` of `DatasetRef` 

130 References for the datasets that can be ingested by this datastore. 

131 """ 

132 

133 def __init__(self, refs: Iterable[DatasetRef]): 

134 self.refs = {ref.id: ref for ref in refs} 

135 

136 

137class DatastoreTransaction: 

138 """Keeps a log of `Datastore` activity and allow rollback. 

139 

140 Parameters 

141 ---------- 

142 parent : `DatastoreTransaction`, optional 

143 The parent transaction (if any). 

144 

145 Notes 

146 ----- 

147 This transaction object must be thread safe. 

148 """ 

149 

150 Event: ClassVar[type] = Event 

151 

152 parent: DatastoreTransaction | None 

153 """The parent transaction. (`DatastoreTransaction`, optional)""" 

154 

155 def __init__(self, parent: DatastoreTransaction | None = None): 

156 self.parent = parent 

157 self._log: list[Event] = [] 

158 

159 def registerUndo(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> None: 

160 """Register event with undo function. 

161 

162 Parameters 

163 ---------- 

164 name : `str` 

165 Name of the event. 

166 undoFunc : `~collections.abc.Callable` 

167 Function to undo this event. 

168 *args : `tuple` 

169 Positional arguments to ``undoFunc``. 

170 **kwargs 

171 Keyword arguments to ``undoFunc``. 

172 """ 

173 self._log.append(self.Event(name, undoFunc, args, kwargs)) 

174 

175 @contextlib.contextmanager 

176 def undoWith(self, name: str, undoFunc: Callable, *args: Any, **kwargs: Any) -> Iterator[None]: 

177 """Register undo function if nested operation succeeds. 

178 

179 Calls `registerUndo`. 

180 

181 This can be used to wrap individual undo-able statements within a 

182 DatastoreTransaction block. Multiple statements that can fail 

183 separately should not be part of the same `undoWith` block. 

184 

185 All arguments are forwarded directly to `registerUndo`. 

186 

187 Parameters 

188 ---------- 

189 name : `str` 

190 The name to associate with this event. 

191 undoFunc : `~collections.abc.Callable` 

192 Function to undo this event. 

193 *args : `tuple` 

194 Positional arguments for ``undoFunc``. 

195 **kwargs : `typing.Any` 

196 Keyword arguments for ``undoFunc``. 

197 """ 

198 try: 

199 yield None 

200 except BaseException: 

201 raise 

202 else: 

203 self.registerUndo(name, undoFunc, *args, **kwargs) 

204 

205 def rollback(self) -> None: 

206 """Roll back all events in this transaction.""" 

207 log = logging.getLogger(__name__) 

208 while self._log: 

209 ev = self._log.pop() 

210 try: 

211 log.debug( 

212 "Rolling back transaction: %s: %s(%s,%s)", 

213 ev.name, 

214 ev.undoFunc, 

215 ",".join(str(a) for a in ev.args), 

216 ",".join(f"{k}={v}" for k, v in ev.kwargs.items()), 

217 ) 

218 except Exception: 

219 # In case we had a problem in stringification of arguments 

220 log.warning("Rolling back transaction: %s", ev.name) 

221 try: 

222 ev.undoFunc(*ev.args, **ev.kwargs) 

223 except BaseException as e: 

224 # Deliberately swallow error that may occur in unrolling 

225 log.warning("Exception: %s caught while unrolling: %s", e, ev.name) 

226 pass 

227 

228 def commit(self) -> None: 

229 """Commit this transaction.""" 

230 if self.parent is None: 

231 # Just forget about the events, they have already happened. 

232 return 

233 else: 

234 # We may still want to events from this transaction as part of 

235 # the parent. 

236 self.parent._log.extend(self._log) 

237 

238 

239@dataclasses.dataclass 

240class DatasetRefURIs(abc.Sequence): 

241 """Represents the primary and component ResourcePath(s) associated with a 

242 DatasetRef. 

243 

244 This is used in places where its members used to be represented as a tuple 

245 (``primaryURI``, ``componentURIs``). To maintain backward compatibility 

246 this inherits from Sequence and so instances can be treated as a two-item 

247 tuple. 

248 

249 Parameters 

250 ---------- 

251 primaryURI : `lsst.resources.ResourcePath` or `None`, optional 

252 The URI to the primary artifact associated with this dataset. If the 

253 dataset was disassembled within the datastore this may be `None`. 

254 componentURIs : `dict` [`str`, `~lsst.resources.ResourcePath`] or `None` 

255 The URIs to any components associated with the dataset artifact 

256 indexed by component name. This can be empty if there are no 

257 components. 

258 """ 

259 

260 def __init__( 

261 self, 

262 primaryURI: ResourcePath | None = None, 

263 componentURIs: dict[str, ResourcePath] | None = None, 

264 ): 

265 self.primaryURI = primaryURI 

266 self.componentURIs = componentURIs or {} 

267 

268 def __getitem__(self, index: Any) -> Any: 

269 """Get primaryURI and componentURIs by index. 

270 

271 Provides support for tuple-like access. 

272 """ 

273 if index == 0: 

274 return self.primaryURI 

275 elif index == 1: 

276 return self.componentURIs 

277 raise IndexError("list index out of range") 

278 

279 def __len__(self) -> int: 

280 """Get the number of data members. 

281 

282 Provides support for tuple-like access. 

283 """ 

284 return 2 

285 

286 def __repr__(self) -> str: 

287 return f"DatasetRefURIs({repr(self.primaryURI)}, {repr(self.componentURIs)})" 

288 

289 def iter_all(self) -> Iterator[ResourcePath]: 

290 """Iterate over all URIs without regard to whether they are primary 

291 or component. 

292 """ 

293 if self.primaryURI is not None: 

294 yield self.primaryURI 

295 yield from self.componentURIs.values() 

296 

297 

298class Datastore(FileTransferSource, metaclass=ABCMeta): 

299 """Datastore interface. 

300 

301 Parameters 

302 ---------- 

303 config : `DatastoreConfig` or `str` 

304 Load configuration either from an existing config instance or by 

305 referring to a configuration file. 

306 bridgeManager : `DatastoreRegistryBridgeManager` 

307 Object that manages the interface between `Registry` and datastores. 

308 

309 See Also 

310 -------- 

311 lsst.daf.butler.Butler 

312 """ 

313 

314 defaultConfigFile: ClassVar[str | None] = None 

315 """Path to configuration defaults. Accessed within the ``config`` resource 

316 or relative to a search path. Can be None if no defaults specified. 

317 """ 

318 

319 containerKey: ClassVar[str | None] = None 

320 """Name of the key containing a list of subconfigurations that also 

321 need to be merged with defaults and will likely use different Python 

322 datastore classes (but all using DatastoreConfig). Assumed to be a 

323 list of configurations that can be represented in a DatastoreConfig 

324 and containing a "cls" definition. None indicates that no containers 

325 are expected in this Datastore.""" 

326 

327 isEphemeral: bool = False 

328 """Indicate whether this Datastore is ephemeral or not. An ephemeral 

329 datastore is one where the contents of the datastore will not exist 

330 across process restarts. This value can change per-instance.""" 

331 

332 config: DatastoreConfig 

333 """Configuration used to create Datastore.""" 

334 

335 name: str 

336 """Label associated with this Datastore.""" 

337 

338 storageClassFactory: StorageClassFactory 

339 """Factory for creating storage class instances from name.""" 

340 

341 constraints: Constraints 

342 """Constraints to apply when putting datasets into the datastore.""" 

343 

344 # MyPy does not like for this to be annotated as any kind of type, because 

345 # it can't do static checking on type variables that can change at runtime. 

346 IngestPrepData: ClassVar[Any] = IngestPrepData 

347 """Helper base class for ingest implementations. 

348 """ 

349 

350 @classmethod 

351 @abstractmethod 

352 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

353 """Set filesystem-dependent config options for this datastore. 

354 

355 The options will be appropriate for a new empty repository with the 

356 given root. 

357 

358 Parameters 

359 ---------- 

360 root : `str` 

361 Filesystem path to the root of the data repository. 

362 config : `Config` 

363 A `Config` to update. Only the subset understood by 

364 this component will be updated. Will not expand 

365 defaults. 

366 full : `Config` 

367 A complete config with all defaults expanded that can be 

368 converted to a `DatastoreConfig`. Read-only and will not be 

369 modified by this method. 

370 Repository-specific options that should not be obtained 

371 from defaults when Butler instances are constructed 

372 should be copied from ``full`` to ``config``. 

373 overwrite : `bool`, optional 

374 If `False`, do not modify a value in ``config`` if the value 

375 already exists. Default is always to overwrite with the provided 

376 ``root``. 

377 

378 Notes 

379 ----- 

380 If a keyword is explicitly defined in the supplied ``config`` it 

381 will not be overridden by this method if ``overwrite`` is `False`. 

382 This allows explicit values set in external configs to be retained. 

383 """ 

384 raise NotImplementedError() 

385 

386 @staticmethod 

387 def fromConfig( 

388 config: Config, 

389 bridgeManager: DatastoreRegistryBridgeManager, 

390 butlerRoot: ResourcePathExpression | None = None, 

391 ) -> Datastore: 

392 """Create datastore from type specified in config file. 

393 

394 Parameters 

395 ---------- 

396 config : `Config` or `~lsst.resources.ResourcePathExpression` 

397 Configuration instance. 

398 bridgeManager : `DatastoreRegistryBridgeManager` 

399 Object that manages the interface between `Registry` and 

400 datastores. 

401 butlerRoot : `str`, optional 

402 Butler root directory. 

403 """ 

404 config = DatastoreConfig(config) 

405 cls = doImportType(config["cls"]) 

406 if not issubclass(cls, Datastore): 

407 raise TypeError(f"Imported child class {config['cls']} is not a Datastore") 

408 return cls._create_from_config(config=config, bridgeManager=bridgeManager, butlerRoot=butlerRoot) 

409 

410 def __init__( 

411 self, 

412 config: DatastoreConfig, 

413 bridgeManager: DatastoreRegistryBridgeManager, 

414 ): 

415 self.config = config 

416 self.name = "ABCDataStore" 

417 self._transaction: DatastoreTransaction | None = None 

418 

419 # All Datastores need storage classes and constraints 

420 self.storageClassFactory = StorageClassFactory() 

421 

422 # And read the constraints list 

423 constraintsConfig = self.config.get("constraints") 

424 self.constraints = Constraints(constraintsConfig, universe=bridgeManager.universe) 

425 

426 @classmethod 

427 @abstractmethod 

428 def _create_from_config( 

429 cls, 

430 config: DatastoreConfig, 

431 bridgeManager: DatastoreRegistryBridgeManager, 

432 butlerRoot: ResourcePathExpression | None, 

433 ) -> Datastore: 

434 """`Datastore`.``fromConfig`` calls this to instantiate Datastore 

435 subclasses. This is the primary constructor for the individual 

436 Datastore subclasses. 

437 """ 

438 raise NotImplementedError() 

439 

440 @abstractmethod 

441 def clone(self, bridgeManager: DatastoreRegistryBridgeManager) -> Datastore: 

442 """Make an independent copy of this Datastore with a different 

443 `DatastoreRegistryBridgeManager` instance. 

444 

445 Parameters 

446 ---------- 

447 bridgeManager : `DatastoreRegistryBridgeManager` 

448 New `DatastoreRegistryBridgeManager` object to use when 

449 instantiating managers. 

450 

451 Returns 

452 ------- 

453 datastore : `Datastore` 

454 New `Datastore` instance with the same configuration as the 

455 existing instance. 

456 """ 

457 raise NotImplementedError() 

458 

459 def __str__(self) -> str: 

460 return self.name 

461 

462 def __repr__(self) -> str: 

463 return self.name 

464 

465 @property 

466 def names(self) -> tuple[str, ...]: 

467 """Names associated with this datastore returned as a list. 

468 

469 Can be different to ``name`` for a chaining datastore. 

470 """ 

471 # Default implementation returns solely the name itself 

472 return (self.name,) 

473 

474 @property 

475 def roots(self) -> dict[str, ResourcePath | None]: 

476 """Return the root URIs for each named datastore. 

477 

478 Mapping from datastore name to root URI. The URI can be `None` 

479 if a datastore has no concept of a root URI. 

480 (`dict` [`str`, `lsst.resources.ResourcePath` | `None`]) 

481 """ 

482 return {self.name: None} 

483 

484 @contextlib.contextmanager 

485 def transaction(self) -> Iterator[DatastoreTransaction]: 

486 """Context manager supporting `Datastore` transactions. 

487 

488 Transactions can be nested, and are to be used in combination with 

489 `Registry.transaction`. 

490 """ 

491 self._transaction = DatastoreTransaction(self._transaction) 

492 try: 

493 yield self._transaction 

494 except BaseException: 

495 self._transaction.rollback() 

496 raise 

497 else: 

498 self._transaction.commit() 

499 self._transaction = self._transaction.parent 

500 

501 def _set_trust_mode(self, mode: bool) -> None: 

502 """Set the trust mode for this datastore. 

503 

504 Parameters 

505 ---------- 

506 mode : `bool` 

507 If `True`, get requests will be attempted even if the datastore 

508 does not know about the dataset. 

509 

510 Notes 

511 ----- 

512 This is a private method to indicate that trust mode might be a 

513 transitory property that we do not want to make fully public. For now 

514 only a `~lsst.daf.butler.datastores.FileDatastore` understands this 

515 concept. By default this method does nothing. 

516 """ 

517 return 

518 

519 @abstractmethod 

520 def knows(self, ref: DatasetRef) -> bool: 

521 """Check if the dataset is known to the datastore. 

522 

523 Does not check for existence of any artifact. 

524 

525 Parameters 

526 ---------- 

527 ref : `DatasetRef` 

528 Reference to the required dataset. 

529 

530 Returns 

531 ------- 

532 exists : `bool` 

533 `True` if the dataset is known to the datastore. 

534 """ 

535 raise NotImplementedError() 

536 

537 def knows_these(self, refs: Iterable[DatasetRef]) -> dict[DatasetRef, bool]: 

538 """Check which of the given datasets are known to this datastore. 

539 

540 This is like ``mexist()`` but does not check that the file exists. 

541 

542 Parameters 

543 ---------- 

544 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

545 The datasets to check. 

546 

547 Returns 

548 ------- 

549 exists : `dict` [`DatasetRef`, `bool`] 

550 Mapping of dataset to boolean indicating whether the dataset 

551 is known to the datastore. 

552 """ 

553 # Non-optimized default calls knows() repeatedly. 

554 return {ref: self.knows(ref) for ref in refs} 

555 

556 def mexists( 

557 self, refs: Iterable[DatasetRef], artifact_existence: dict[ResourcePath, bool] | None = None 

558 ) -> dict[DatasetRef, bool]: 

559 """Check the existence of multiple datasets at once. 

560 

561 Parameters 

562 ---------- 

563 refs : `~collections.abc.Iterable` of `DatasetRef` 

564 The datasets to be checked. 

565 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

566 Optional mapping of datastore artifact to existence. Updated by 

567 this method with details of all artifacts tested. Can be `None` 

568 if the caller is not interested. 

569 

570 Returns 

571 ------- 

572 existence : `dict` of [`DatasetRef`, `bool`] 

573 Mapping from dataset to boolean indicating existence. 

574 """ 

575 existence: dict[DatasetRef, bool] = {} 

576 # Non-optimized default. 

577 for ref in refs: 

578 existence[ref] = self.exists(ref) 

579 return existence 

580 

581 @abstractmethod 

582 def exists(self, datasetRef: DatasetRef) -> bool: 

583 """Check if the dataset exists in the datastore. 

584 

585 Parameters 

586 ---------- 

587 datasetRef : `DatasetRef` 

588 Reference to the required dataset. 

589 

590 Returns 

591 ------- 

592 exists : `bool` 

593 `True` if the entity exists in the `Datastore`. 

594 """ 

595 raise NotImplementedError("Must be implemented by subclass") 

596 

597 @abstractmethod 

598 def get( 

599 self, 

600 datasetRef: DatasetRef, 

601 parameters: Mapping[str, Any] | None = None, 

602 storageClass: StorageClass | str | None = None, 

603 ) -> Any: 

604 """Load an in-memory dataset from the store. 

605 

606 Parameters 

607 ---------- 

608 datasetRef : `DatasetRef` 

609 Reference to the required Dataset. 

610 parameters : `dict` 

611 `StorageClass`-specific parameters that specify a slice of the 

612 Dataset to be loaded. 

613 storageClass : `StorageClass` or `str`, optional 

614 The storage class to be used to override the Python type 

615 returned by this method. By default the returned type matches 

616 the dataset type definition for this dataset. Specifying a 

617 read `StorageClass` can force a different type to be returned. 

618 This type must be compatible with the original type. 

619 

620 Returns 

621 ------- 

622 inMemoryDataset : `object` 

623 Requested Dataset or slice thereof as an in-memory dataset. 

624 """ 

625 raise NotImplementedError("Must be implemented by subclass") 

626 

627 def prepare_get_for_external_client(self, ref: DatasetRef) -> list[DatasetLocationInformation] | None: 

628 """Retrieve data that can be used to execute a ``get()``. 

629 

630 Parameters 

631 ---------- 

632 ref : `DatasetRef` 

633 Reference to the required dataset. 

634 

635 Returns 

636 ------- 

637 payload : `list` [ `DatasetLocationInformation` ] | `None` 

638 Information needed to perform a get() operation. Returns `None` if 

639 the dataset is not known to this datastore. 

640 """ 

641 raise NotImplementedError() 

642 

643 @abstractmethod 

644 def put( 

645 self, inMemoryDataset: Any, datasetRef: DatasetRef, provenance: DatasetProvenance | None = None 

646 ) -> None: 

647 """Write an in-memory dataset with a given `DatasetRef` to the store. 

648 

649 Parameters 

650 ---------- 

651 inMemoryDataset : `object` 

652 The Dataset to store. 

653 datasetRef : `DatasetRef` 

654 Reference to the associated Dataset. 

655 provenance : `DatasetProvenance` or `None`, optional 

656 Any provenance that should be attached to the serialized dataset. 

657 Not supported by all serialization mechanisms. 

658 """ 

659 raise NotImplementedError("Must be implemented by subclass") 

660 

661 @abstractmethod 

662 def put_new(self, in_memory_dataset: Any, ref: DatasetRef) -> Mapping[str, DatasetRef]: 

663 """Write an in-memory dataset with a given `DatasetRef` to the store. 

664 

665 Parameters 

666 ---------- 

667 in_memory_dataset : `object` 

668 The Dataset to store. 

669 ref : `DatasetRef` 

670 Reference to the associated Dataset. 

671 

672 Returns 

673 ------- 

674 datastore_refs : `~collections.abc.Mapping` [`str`, `DatasetRef`] 

675 Mapping of a datastore name to dataset reference stored in that 

676 datastore, reference will include datastore records. Only 

677 non-ephemeral datastores will appear in this mapping. 

678 """ 

679 raise NotImplementedError("Must be implemented by subclass") 

680 

681 def _overrideTransferMode(self, *datasets: FileDataset, transfer: str | None = None) -> str | None: 

682 """Allow ingest transfer mode to be defaulted based on datasets. 

683 

684 Parameters 

685 ---------- 

686 *datasets : `FileDataset` 

687 Each positional argument is a struct containing information about 

688 a file to be ingested, including its path (either absolute or 

689 relative to the datastore root, if applicable), a complete 

690 `DatasetRef` (with ``dataset_id not None``), and optionally a 

691 formatter class or its fully-qualified string name. If a formatter 

692 is not provided, this method should populate that attribute with 

693 the formatter the datastore would use for `put`. Subclasses are 

694 also permitted to modify the path attribute (typically to put it 

695 in what the datastore considers its standard form). 

696 transfer : `str`, optional 

697 How (and whether) the dataset should be added to the datastore. 

698 See `ingest` for details of transfer modes. 

699 

700 Returns 

701 ------- 

702 newTransfer : `str` 

703 Transfer mode to use. Will be identical to the supplied transfer 

704 mode unless "auto" is used. 

705 """ 

706 if transfer != "auto": 

707 return transfer 

708 raise RuntimeError(f"{transfer} is not allowed without specialization.") 

709 

710 def _prepIngest(self, *datasets: FileDataset, transfer: str | None = None) -> IngestPrepData: 

711 """Process datasets to identify which ones can be ingested. 

712 

713 Parameters 

714 ---------- 

715 *datasets : `FileDataset` 

716 Each positional argument is a struct containing information about 

717 a file to be ingested, including its path (either absolute or 

718 relative to the datastore root, if applicable), a complete 

719 `DatasetRef` (with ``dataset_id not None``), and optionally a 

720 formatter class or its fully-qualified string name. If a formatter 

721 is not provided, this method should populate that attribute with 

722 the formatter the datastore would use for `put`. Subclasses are 

723 also permitted to modify the path attribute (typically to put it 

724 in what the datastore considers its standard form). 

725 transfer : `str`, optional 

726 How (and whether) the dataset should be added to the datastore. 

727 See `ingest` for details of transfer modes. 

728 

729 Returns 

730 ------- 

731 data : `IngestPrepData` 

732 An instance of a subclass of `IngestPrepData`, used to pass 

733 arbitrary data from `_prepIngest` to `_finishIngest`. This should 

734 include only the datasets this datastore can actually ingest; 

735 others should be silently ignored (`Datastore.ingest` will inspect 

736 `IngestPrepData.refs` and raise `DatasetTypeNotSupportedError` if 

737 necessary). 

738 

739 Raises 

740 ------ 

741 NotImplementedError 

742 Raised if the datastore does not support the given transfer mode 

743 (including the case where ingest is not supported at all). 

744 FileNotFoundError 

745 Raised if one of the given files does not exist. 

746 FileExistsError 

747 Raised if transfer is not `None` but the (internal) location the 

748 file would be moved to is already occupied. 

749 

750 Notes 

751 ----- 

752 This method (along with `_finishIngest`) should be implemented by 

753 subclasses to provide ingest support instead of implementing `ingest` 

754 directly. 

755 

756 `_prepIngest` should not modify the data repository or given files in 

757 any way; all changes should be deferred to `_finishIngest`. 

758 

759 When possible, exceptions should be raised in `_prepIngest` instead of 

760 `_finishIngest`. `NotImplementedError` exceptions that indicate that 

761 the transfer mode is not supported must be raised by `_prepIngest` 

762 instead of `_finishIngest`. 

763 """ 

764 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

765 

766 def _finishIngest( 

767 self, prepData: IngestPrepData, *, transfer: str | None = None, record_validation_info: bool = True 

768 ) -> None: 

769 """Complete an ingest operation. 

770 

771 Parameters 

772 ---------- 

773 prepData : `IngestPrepData` 

774 An instance of a subclass of `IngestPrepData`. Guaranteed to be 

775 the direct result of a call to `_prepIngest` on this datastore. 

776 transfer : `str`, optional 

777 How (and whether) the dataset should be added to the datastore. 

778 See `ingest` for details of transfer modes. 

779 record_validation_info : `bool`, optional 

780 If `True`, the default, the datastore can record validation 

781 information associated with the file. If `False` the datastore 

782 will not attempt to track any information such as checksums 

783 or file sizes. This can be useful if such information is tracked 

784 in an external system or if the file is to be compressed in place. 

785 It is up to the datastore whether this parameter is relevant. 

786 

787 Raises 

788 ------ 

789 FileNotFoundError 

790 Raised if one of the given files does not exist. 

791 FileExistsError 

792 Raised if transfer is not `None` but the (internal) location the 

793 file would be moved to is already occupied. 

794 

795 Notes 

796 ----- 

797 This method (along with `_prepIngest`) should be implemented by 

798 subclasses to provide ingest support instead of implementing `ingest` 

799 directly. 

800 """ 

801 raise NotImplementedError(f"Datastore {self} does not support direct file-based ingest.") 

802 

803 def ingest( 

804 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True 

805 ) -> None: 

806 """Ingest one or more files into the datastore. 

807 

808 Parameters 

809 ---------- 

810 *datasets : `FileDataset` 

811 Each positional argument is a struct containing information about 

812 a file to be ingested, including its path (either absolute or 

813 relative to the datastore root, if applicable), a complete 

814 `DatasetRef` (with ``dataset_id not None``), and optionally a 

815 formatter class or its fully-qualified string name. If a formatter 

816 is not provided, the one the datastore would use for ``put`` on 

817 that dataset is assumed. 

818 transfer : `str`, optional 

819 How (and whether) the dataset should be added to the datastore. 

820 If `None` (default), the file must already be in a location 

821 appropriate for the datastore (e.g. within its root directory), 

822 and will not be modified. Other choices include "move", "copy", 

823 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

824 special transfer mode that will first try to make a hardlink and 

825 if that fails a symlink will be used instead. "relsymlink" creates 

826 a relative symlink rather than use an absolute path. 

827 Most datastores do not support all transfer modes. 

828 "auto" is a special option that will let the 

829 data store choose the most natural option for itself. 

830 record_validation_info : `bool`, optional 

831 If `True`, the default, the datastore can record validation 

832 information associated with the file. If `False` the datastore 

833 will not attempt to track any information such as checksums 

834 or file sizes. This can be useful if such information is tracked 

835 in an external system or if the file is to be compressed in place. 

836 It is up to the datastore whether this parameter is relevant. 

837 

838 Returns 

839 ------- 

840 None 

841 

842 Raises 

843 ------ 

844 NotImplementedError 

845 Raised if the datastore does not support the given transfer mode 

846 (including the case where ingest is not supported at all). 

847 DatasetTypeNotSupportedError 

848 Raised if one or more files to be ingested have a dataset type that 

849 is not supported by the datastore. 

850 FileNotFoundError 

851 Raised if one of the given files does not exist. 

852 FileExistsError 

853 Raised if transfer is not `None` but the (internal) location the 

854 file would be moved to is already occupied. 

855 

856 Notes 

857 ----- 

858 Subclasses should implement `_prepIngest` and `_finishIngest` instead 

859 of implementing `ingest` directly. Datastores that hold and 

860 delegate to child datastores may want to call those methods as well. 

861 

862 Subclasses are encouraged to document their supported transfer modes 

863 in their class documentation. 

864 """ 

865 # Allow a datastore to select a default transfer mode 

866 transfer = self._overrideTransferMode(*datasets, transfer=transfer) 

867 prepData = self._prepIngest(*datasets, transfer=transfer) 

868 refs = {ref.id: ref for dataset in datasets for ref in dataset.refs} 

869 if refs.keys() != prepData.refs.keys(): 

870 unsupported = refs.keys() - prepData.refs.keys() 

871 # Group unsupported refs by DatasetType for an informative 

872 # but still concise error message. 

873 byDatasetType = defaultdict(list) 

874 for datasetId in unsupported: 

875 ref = refs[datasetId] 

876 byDatasetType[ref.datasetType].append(ref) 

877 raise DatasetTypeNotSupportedError( 

878 "DatasetType(s) not supported in ingest: " 

879 + ", ".join(f"{k.name} ({len(v)} dataset(s))" for k, v in byDatasetType.items()) 

880 ) 

881 self._finishIngest(prepData, transfer=transfer, record_validation_info=record_validation_info) 

882 

883 def transfer_from( 

884 self, 

885 source_records: FileTransferMap, 

886 refs: Collection[DatasetRef], 

887 transfer: str = "auto", 

888 artifact_existence: dict[ResourcePath, bool] | None = None, 

889 dry_run: bool = False, 

890 ) -> tuple[set[DatasetRef], set[DatasetRef]]: 

891 """Transfer dataset artifacts from another datastore to this one. 

892 

893 Parameters 

894 ---------- 

895 source_records : `FileTransferMap` 

896 The artifacts to be transferred into this datastore. 

897 refs : `~collections.abc.Collection` of `DatasetRef` 

898 The datasets to transfer from the source datastore. 

899 transfer : `str`, optional 

900 How (and whether) the dataset should be added to the datastore. 

901 Choices include "move", "copy", 

902 "link", "symlink", "relsymlink", and "hardlink". "link" is a 

903 special transfer mode that will first try to make a hardlink and 

904 if that fails a symlink will be used instead. "relsymlink" creates 

905 a relative symlink rather than use an absolute path. 

906 Most datastores do not support all transfer modes. 

907 "auto" (the default) is a special option that will let the 

908 data store choose the most natural option for itself. 

909 If the source location and transfer location are identical the 

910 transfer mode will be ignored. 

911 artifact_existence : `dict` [`lsst.resources.ResourcePath`, `bool`] 

912 Optional mapping of datastore artifact to existence. Updated by 

913 this method with details of all artifacts tested. Can be `None` 

914 if the caller is not interested. 

915 dry_run : `bool`, optional 

916 Process the supplied source refs without updating the target 

917 datastore. 

918 

919 Returns 

920 ------- 

921 accepted : `set` [`DatasetRef`] 

922 The datasets that were transferred. 

923 rejected : `set` [`DatasetRef`] 

924 The datasets that were rejected due to a constraints violation. 

925 

926 Raises 

927 ------ 

928 TypeError 

929 Raised if the two datastores are not compatible. 

930 """ 

931 raise NotImplementedError(f"Datastore {type(self)} does not implement a transfer_from method.") 

932 

933 def getManyURIs( 

934 self, 

935 refs: Iterable[DatasetRef], 

936 predict: bool = False, 

937 allow_missing: bool = False, 

938 ) -> dict[DatasetRef, DatasetRefURIs]: 

939 """Return URIs associated with many datasets. 

940 

941 Parameters 

942 ---------- 

943 refs : `~collections.abc.Iterable` of `DatasetIdRef` 

944 References to the required datasets. 

945 predict : `bool`, optional 

946 If `True`, allow URIs to be returned of datasets that have not 

947 been written. 

948 allow_missing : `bool` 

949 If `False`, and ``predict`` is `False`, will raise if a 

950 `DatasetRef` does not exist. 

951 

952 Returns 

953 ------- 

954 URIs : `dict` of [`DatasetRef`, `DatasetRefUris`] 

955 A dict of primary and component URIs, indexed by the passed-in 

956 refs. 

957 

958 Raises 

959 ------ 

960 FileNotFoundError 

961 A URI has been requested for a dataset that does not exist and 

962 guessing is not allowed. 

963 

964 Notes 

965 ----- 

966 In file-based datastores, getManyURIs does not check that the file is 

967 really there, it's assuming it is if datastore is aware of the file 

968 then it actually exists. 

969 """ 

970 uris: dict[DatasetRef, DatasetRefURIs] = {} 

971 missing_refs = [] 

972 for ref in refs: 

973 try: 

974 uris[ref] = self.getURIs(ref, predict=predict) 

975 except FileNotFoundError: 

976 missing_refs.append(ref) 

977 if missing_refs and not allow_missing: 

978 num_missing = len(missing_refs) 

979 raise FileNotFoundError( 

980 f"Missing {num_missing} refs from datastore out of " 

981 f"{num_missing + len(uris)} and predict=False." 

982 ) 

983 return uris 

984 

985 @abstractmethod 

986 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs: 

987 """Return URIs associated with dataset. 

988 

989 Parameters 

990 ---------- 

991 datasetRef : `DatasetRef` 

992 Reference to the required dataset. 

993 predict : `bool`, optional 

994 If the datastore does not know about the dataset, controls whether 

995 it should return a predicted URI or not. 

996 

997 Returns 

998 ------- 

999 uris : `DatasetRefURIs` 

1000 The URI to the primary artifact associated with this dataset (if 

1001 the dataset was disassembled within the datastore this may be 

1002 `None`), and the URIs to any components associated with the dataset 

1003 artifact. (can be empty if there are no components). 

1004 """ 

1005 raise NotImplementedError() 

1006 

1007 @abstractmethod 

1008 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

1009 """URI to the Dataset. 

1010 

1011 Parameters 

1012 ---------- 

1013 datasetRef : `DatasetRef` 

1014 Reference to the required Dataset. 

1015 predict : `bool` 

1016 If `True` attempt to predict the URI for a dataset if it does 

1017 not exist in datastore. 

1018 

1019 Returns 

1020 ------- 

1021 uri : `str` 

1022 URI string pointing to the Dataset within the datastore. If the 

1023 Dataset does not exist in the datastore, the URI may be a guess. 

1024 If the datastore does not have entities that relate well 

1025 to the concept of a URI the returned URI string will be 

1026 descriptive. The returned URI is not guaranteed to be obtainable. 

1027 

1028 Raises 

1029 ------ 

1030 FileNotFoundError 

1031 A URI has been requested for a dataset that does not exist and 

1032 guessing is not allowed. 

1033 """ 

1034 raise NotImplementedError("Must be implemented by subclass") 

1035 

1036 @abstractmethod 

1037 def retrieveArtifacts( 

1038 self, 

1039 refs: Iterable[DatasetRef], 

1040 destination: ResourcePath, 

1041 transfer: str = "auto", 

1042 preserve_path: bool = True, 

1043 overwrite: bool = False, 

1044 write_index: bool = True, 

1045 add_prefix: bool = False, 

1046 ) -> dict[ResourcePath, ArtifactIndexInfo]: 

1047 """Retrieve the artifacts associated with the supplied refs. 

1048 

1049 Parameters 

1050 ---------- 

1051 refs : `~collections.abc.Iterable` of `DatasetRef` 

1052 The datasets for which artifacts are to be retrieved. 

1053 A single ref can result in multiple artifacts. The refs must 

1054 be resolved. 

1055 destination : `lsst.resources.ResourcePath` 

1056 Location to write the artifacts. 

1057 transfer : `str`, optional 

1058 Method to use to transfer the artifacts. Must be one of the options 

1059 supported by `lsst.resources.ResourcePath.transfer_from`. 

1060 "move" is not allowed. 

1061 preserve_path : `bool`, optional 

1062 If `True` the full path of the artifact within the datastore 

1063 is preserved. If `False` the final file component of the path 

1064 is used. 

1065 overwrite : `bool`, optional 

1066 If `True` allow transfers to overwrite existing files at the 

1067 destination. 

1068 write_index : `bool`, optional 

1069 If `True` write a file at the top level containing a serialization 

1070 of a `ZipIndex` for the downloaded datasets. 

1071 add_prefix : `bool`, optional 

1072 If `True` and if ``preserve_path`` is `False`, apply a prefix to 

1073 the filenames corresponding to some part of the dataset ref ID. 

1074 This can be used to guarantee uniqueness. 

1075 

1076 Returns 

1077 ------- 

1078 artifact_map : `dict` [ `lsst.resources.ResourcePath`, \ 

1079 `ArtifactIndexInfo` ] 

1080 Mapping of retrieved file to associated index information. 

1081 

1082 Notes 

1083 ----- 

1084 For non-file datastores the artifacts written to the destination 

1085 may not match the representation inside the datastore. For example 

1086 a hierarchical data structure in a NoSQL database may well be stored 

1087 as a JSON file. 

1088 """ 

1089 raise NotImplementedError() 

1090 

1091 @abstractmethod 

1092 def ingest_zip(self, zip_path: ResourcePath, transfer: str | None, *, dry_run: bool = False) -> None: 

1093 """Ingest an indexed Zip file and contents. 

1094 

1095 The Zip file must have an index file as created by `retrieveArtifacts`. 

1096 

1097 Parameters 

1098 ---------- 

1099 zip_path : `lsst.resources.ResourcePath` 

1100 Path to the Zip file. 

1101 transfer : `str` 

1102 Method to use for transferring the Zip file into the datastore. 

1103 dry_run : `bool`, optional 

1104 If `True` the ingest will be processed without any modifications 

1105 made to the target datastore and as if the target datastore did not 

1106 have any of the datasets. 

1107 """ 

1108 raise NotImplementedError() 

1109 

1110 @abstractmethod 

1111 def remove(self, datasetRef: DatasetRef) -> None: 

1112 """Indicate to the Datastore that a Dataset can be removed. 

1113 

1114 Parameters 

1115 ---------- 

1116 datasetRef : `DatasetRef` 

1117 Reference to the required Dataset. 

1118 

1119 Raises 

1120 ------ 

1121 FileNotFoundError 

1122 When Dataset does not exist. 

1123 

1124 Notes 

1125 ----- 

1126 Some Datastores may implement this method as a silent no-op to 

1127 disable Dataset deletion through standard interfaces. 

1128 """ 

1129 raise NotImplementedError("Must be implemented by subclass") 

1130 

1131 @abstractmethod 

1132 def forget(self, refs: Iterable[DatasetRef]) -> None: 

1133 """Indicate to the Datastore that it should remove all records of the 

1134 given datasets, without actually deleting them. 

1135 

1136 Parameters 

1137 ---------- 

1138 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1139 References to the datasets being forgotten. 

1140 

1141 Notes 

1142 ----- 

1143 Asking a datastore to forget a `DatasetRef` it does not hold should be 

1144 a silent no-op, not an error. 

1145 """ 

1146 raise NotImplementedError("Must be implemented by subclass") 

1147 

1148 @abstractmethod 

1149 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None: 

1150 """Indicate to the Datastore that a Dataset can be moved to the trash. 

1151 

1152 Parameters 

1153 ---------- 

1154 ref : `DatasetRef` or iterable thereof 

1155 Reference(s) to the required Dataset. 

1156 ignore_errors : `bool`, optional 

1157 Determine whether errors should be ignored. When multiple 

1158 refs are being trashed there will be no per-ref check. 

1159 

1160 Returns 

1161 ------- 

1162 None 

1163 

1164 Raises 

1165 ------ 

1166 FileNotFoundError 

1167 When Dataset does not exist and errors are not ignored. Only 

1168 checked if a single ref is supplied (and not in a list). 

1169 

1170 Notes 

1171 ----- 

1172 Some Datastores may implement this method as a silent no-op to 

1173 disable Dataset deletion through standard interfaces. 

1174 """ 

1175 raise NotImplementedError("Must be implemented by subclass") 

1176 

1177 @abstractmethod 

1178 def emptyTrash( 

1179 self, ignore_errors: bool = True, refs: Collection[DatasetRef] | None = None, dry_run: bool = False 

1180 ) -> set[ResourcePath]: 

1181 """Remove all datasets from the trash. 

1182 

1183 Parameters 

1184 ---------- 

1185 ignore_errors : `bool`, optional 

1186 Determine whether errors should be ignored. 

1187 refs : `collections.abc.Collection` [ `DatasetRef` ] or `None` 

1188 Explicit list of datasets that can be removed from trash. If listed 

1189 datasets are not already stored in the trash table they will be 

1190 ignored. If `None` every entry in the trash table will be 

1191 processed. 

1192 dry_run : `bool`, optional 

1193 If `True`, the trash table will be queried and results reported 

1194 but no artifacts will be removed. 

1195 

1196 Returns 

1197 ------- 

1198 removed : `set` [ `lsst.resources.ResourcePath` ] 

1199 List of artifacts that were removed. Can return nothing if 

1200 artifacts cannot be represented by URIs. 

1201 

1202 Notes 

1203 ----- 

1204 Some Datastores may implement this method as a silent no-op to 

1205 disable Dataset deletion through standard interfaces. 

1206 """ 

1207 raise NotImplementedError("Must be implemented by subclass") 

1208 

1209 @abstractmethod 

1210 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

1211 """Transfer a dataset from another datastore to this datastore. 

1212 

1213 Parameters 

1214 ---------- 

1215 inputDatastore : `Datastore` 

1216 The external `Datastore` from which to retrieve the Dataset. 

1217 datasetRef : `DatasetRef` 

1218 Reference to the required Dataset. 

1219 """ 

1220 raise NotImplementedError("Must be implemented by subclass") 

1221 

1222 def export( 

1223 self, 

1224 refs: Iterable[DatasetRef], 

1225 *, 

1226 directory: ResourcePathExpression | None = None, 

1227 transfer: str | None = "auto", 

1228 ) -> Iterable[FileDataset]: 

1229 """Export datasets for transfer to another data repository. 

1230 

1231 Parameters 

1232 ---------- 

1233 refs : `~collections.abc.Iterable` of `DatasetRef` 

1234 Dataset references to be exported. 

1235 directory : `str`, optional 

1236 Path to a directory that should contain files corresponding to 

1237 output datasets. Ignored if ``transfer`` is explicitly `None`. 

1238 transfer : `str`, optional 

1239 Mode that should be used to move datasets out of the repository. 

1240 Valid options are the same as those of the ``transfer`` argument 

1241 to ``ingest``, and datastores may similarly signal that a transfer 

1242 mode is not supported by raising `NotImplementedError`. If "auto" 

1243 is given and no ``directory`` is specified, `None` will be 

1244 implied. 

1245 

1246 Returns 

1247 ------- 

1248 dataset : `~collections.abc.Iterable` of `DatasetTransfer` 

1249 Structs containing information about the exported datasets, in the 

1250 same order as ``refs``. 

1251 

1252 Raises 

1253 ------ 

1254 NotImplementedError 

1255 Raised if the given transfer mode is not supported. 

1256 """ 

1257 raise NotImplementedError(f"Transfer mode {transfer} not supported.") 

1258 

1259 @abstractmethod 

1260 def validateConfiguration( 

1261 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False 

1262 ) -> None: 

1263 """Validate some of the configuration for this datastore. 

1264 

1265 Parameters 

1266 ---------- 

1267 entities : `~collections.abc.Iterable` [`DatasetRef` | `DatasetType` |\ 

1268 `StorageClass`] 

1269 Entities to test against this configuration. Can be differing 

1270 types. 

1271 logFailures : `bool`, optional 

1272 If `True`, output a log message for every validation error 

1273 detected. 

1274 

1275 Raises 

1276 ------ 

1277 DatastoreValidationError 

1278 Raised if there is a validation problem with a configuration. 

1279 

1280 Notes 

1281 ----- 

1282 Which parts of the configuration are validated is at the discretion 

1283 of each Datastore implementation. 

1284 """ 

1285 raise NotImplementedError("Must be implemented by subclass") 

1286 

1287 @abstractmethod 

1288 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None: 

1289 """Validate a specific look up key with supplied entity. 

1290 

1291 Parameters 

1292 ---------- 

1293 lookupKey : `LookupKey` 

1294 Key to use to retrieve information from the datastore 

1295 configuration. 

1296 entity : `DatasetRef`, `DatasetType`, or `StorageClass` 

1297 Entity to compare with configuration retrieved using the 

1298 specified lookup key. 

1299 

1300 Returns 

1301 ------- 

1302 None 

1303 

1304 Raises 

1305 ------ 

1306 DatastoreValidationError 

1307 Raised if there is a problem with the combination of entity 

1308 and lookup key. 

1309 

1310 Notes 

1311 ----- 

1312 Bypasses the normal selection priorities by allowing a key that 

1313 would normally not be selected to be validated. 

1314 """ 

1315 raise NotImplementedError("Must be implemented by subclass") 

1316 

1317 @abstractmethod 

1318 def getLookupKeys(self) -> set[LookupKey]: 

1319 """Return all the lookup keys relevant to this datastore. 

1320 

1321 Returns 

1322 ------- 

1323 keys : `set` of `LookupKey` 

1324 The keys stored internally for looking up information based 

1325 on `DatasetType` name or `StorageClass`. 

1326 """ 

1327 raise NotImplementedError("Must be implemented by subclass") 

1328 

1329 def needs_expanded_data_ids( 

1330 self, 

1331 transfer: str | None, 

1332 entity: DatasetRef | DatasetType | StorageClass | None = None, 

1333 ) -> bool: 

1334 """Test whether this datastore needs expanded data IDs to ingest. 

1335 

1336 Parameters 

1337 ---------- 

1338 transfer : `str` or `None` 

1339 Transfer mode for ingest. 

1340 entity : `DatasetRef` or `DatasetType` or `StorageClass` or `None`, \ 

1341 optional 

1342 Object representing what will be ingested. If not provided (or not 

1343 specific enough), `True` may be returned even if expanded data 

1344 IDs aren't necessary. 

1345 

1346 Returns 

1347 ------- 

1348 needed : `bool` 

1349 If `True`, expanded data IDs may be needed. `False` only if 

1350 expansion definitely isn't necessary. 

1351 """ 

1352 return True 

1353 

1354 @abstractmethod 

1355 def import_records( 

1356 self, 

1357 data: Mapping[str, DatastoreRecordData], 

1358 ) -> None: 

1359 """Import datastore location and record data from an in-memory data 

1360 structure. 

1361 

1362 Parameters 

1363 ---------- 

1364 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ] 

1365 Datastore records indexed by datastore name. May contain data for 

1366 other `Datastore` instances (generally because they are chained to 

1367 this one), which should be ignored. 

1368 

1369 Notes 

1370 ----- 

1371 Implementations should generally not check that any external resources 

1372 (e.g. files) referred to by these records actually exist, for 

1373 performance reasons; we expect higher-level code to guarantee that they 

1374 do. 

1375 

1376 Implementations are responsible for calling 

1377 `DatastoreRegistryBridge.insert` on all datasets in ``data.locations`` 

1378 where the key is in `names`, as well as loading any opaque table data. 

1379 

1380 Implementations may assume that datasets are either fully present or 

1381 not at all (single-component exports are not permitted). 

1382 """ 

1383 raise NotImplementedError() 

1384 

1385 @abstractmethod 

1386 def export_records( 

1387 self, 

1388 refs: Iterable[DatasetIdRef], 

1389 ) -> Mapping[str, DatastoreRecordData]: 

1390 """Export datastore records and locations to an in-memory data 

1391 structure. 

1392 

1393 Parameters 

1394 ---------- 

1395 refs : `~collections.abc.Iterable` [ `DatasetIdRef` ] 

1396 Datasets to save. This may include datasets not known to this 

1397 datastore, which should be ignored. May not include component 

1398 datasets. 

1399 

1400 Returns 

1401 ------- 

1402 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ] 

1403 Exported datastore records indexed by datastore name. 

1404 """ 

1405 raise NotImplementedError() 

1406 

1407 def export_table(self, datasets: Collection[DatasetId]) -> DatastoreRecordTable: 

1408 """Export datastore records to an arrow table. 

1409 

1410 Parameters 

1411 ---------- 

1412 datasets 

1413 Dataset UUIDs for the records to export. 

1414 

1415 Returns 

1416 ------- 

1417 table 

1418 Datastore records table. 

1419 """ 

1420 return DatastoreRecordTable.create_empty() 

1421 

1422 def import_table(self, table: DatastoreRecordTable) -> None: 

1423 """Import datastore records from an arrow table. 

1424 

1425 Parameters 

1426 ---------- 

1427 table 

1428 Table containing the datastore records to import. 

1429 

1430 Raises 

1431 ------ 

1432 ValueError 

1433 If the given table contains entries for a datastore that is not 

1434 known to this Butler. 

1435 """ 

1436 pass 

1437 

1438 def export_predicted_records(self, refs: Iterable[DatasetRef]) -> dict[str, DatastoreRecordData]: 

1439 """Export predicted datastore records and locations to an in-memory 

1440 data structure. 

1441 

1442 Parameters 

1443 ---------- 

1444 refs : `~collections.abc.Iterable` [ `DatasetRef` ] 

1445 Datastore records that would be used if the given refs were to 

1446 exist in this datastore. No attempt is made to determine if these 

1447 datasets actually exist. 

1448 

1449 Returns 

1450 ------- 

1451 data : `~collections.abc.Mapping` [ `str`, `DatastoreRecordData` ] 

1452 Exported datastore records indexed by datastore name. 

1453 """ 

1454 raise NotImplementedError() 

1455 

1456 def set_retrieve_dataset_type_method(self, method: Callable[[str], DatasetType | None] | None) -> None: 

1457 """Specify a method that can be used by datastore to retrieve 

1458 registry-defined dataset type. 

1459 

1460 Parameters 

1461 ---------- 

1462 method : `~collections.abc.Callable` | `None` 

1463 Method that takes a name of the dataset type and returns a 

1464 corresponding `DatasetType` instance as defined in Registry. If 

1465 dataset type name is not known to registry `None` is returned. 

1466 

1467 Notes 

1468 ----- 

1469 This method is only needed for a Datastore supporting a "trusted" mode 

1470 when it does not have an access to datastore records and needs to 

1471 guess dataset location based on its stored dataset type. 

1472 """ 

1473 pass 

1474 

1475 @abstractmethod 

1476 def get_opaque_table_definitions(self) -> Mapping[str, DatastoreOpaqueTable]: 

1477 """Make definitions of the opaque tables used by this Datastore. 

1478 

1479 Returns 

1480 ------- 

1481 tables : `~collections.abc.Mapping` [ `str`, `.ddl.TableSpec` ] 

1482 Mapping of opaque table names to their definitions. This can be an 

1483 empty mapping if Datastore does not use opaque tables to keep 

1484 datastore records. 

1485 """ 

1486 raise NotImplementedError() 

1487 

1488 def get_file_info_for_transfer(self, dataset_ids: Iterable[DatasetId]) -> FileTransferMap: 

1489 raise NotImplementedError(f"Transferring files is not supported by datastore {self}") 

1490 

1491 def locate_missing_files_for_transfer( 

1492 self, refs: Iterable[DatasetRef], artifact_existence: dict[ResourcePath, bool] 

1493 ) -> FileTransferMap: 

1494 return {} 

1495 

1496 

1497class NullDatastore(Datastore): 

1498 """A datastore that implements the `Datastore` API but always fails when 

1499 it accepts any request. 

1500 

1501 Parameters 

1502 ---------- 

1503 config : `Config` or `~lsst.resources.ResourcePathExpression` or `None` 

1504 Ignored. 

1505 bridgeManager : `DatastoreRegistryBridgeManager` or `None` 

1506 Ignored. 

1507 butlerRoot : `~lsst.resources.ResourcePathExpression` or `None` 

1508 Ignored. 

1509 """ 

1510 

1511 @classmethod 

1512 def _create_from_config( 

1513 cls, 

1514 config: Config, 

1515 bridgeManager: DatastoreRegistryBridgeManager, 

1516 butlerRoot: ResourcePathExpression | None = None, 

1517 ) -> NullDatastore: 

1518 return NullDatastore(config, bridgeManager, butlerRoot) 

1519 

1520 def clone(self, bridgeManager: DatastoreRegistryBridgeManager) -> Datastore: 

1521 return self 

1522 

1523 @classmethod 

1524 def setConfigRoot(cls, root: str, config: Config, full: Config, overwrite: bool = True) -> None: 

1525 # Nothing to do. This is not a real Datastore. 

1526 pass 

1527 

1528 def __init__( 

1529 self, 

1530 config: Config | ResourcePathExpression | None, 

1531 bridgeManager: DatastoreRegistryBridgeManager | None, 

1532 butlerRoot: ResourcePathExpression | None = None, 

1533 ): 

1534 # Name ourselves with the timestamp the datastore 

1535 # was created. 

1536 self.name = f"{type(self).__name__}@{time.time()}" 

1537 _LOG.debug("Creating datastore %s", self.name) 

1538 self._transaction: DatastoreTransaction | None = None 

1539 return 

1540 

1541 def knows(self, ref: DatasetRef) -> bool: 

1542 return False 

1543 

1544 def exists(self, datasetRef: DatasetRef) -> bool: 

1545 return False 

1546 

1547 def get( 

1548 self, 

1549 datasetRef: DatasetRef, 

1550 parameters: Mapping[str, Any] | None = None, 

1551 storageClass: StorageClass | str | None = None, 

1552 ) -> Any: 

1553 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore") 

1554 

1555 def put( 

1556 self, inMemoryDataset: Any, datasetRef: DatasetRef, provenance: DatasetProvenance | None = None 

1557 ) -> None: 

1558 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1559 

1560 def put_new(self, in_memory_dataset: Any, ref: DatasetRef) -> Mapping[str, DatasetRef]: 

1561 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1562 

1563 def ingest( 

1564 self, *datasets: FileDataset, transfer: str | None = None, record_validation_info: bool = True 

1565 ) -> None: 

1566 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1567 

1568 def transfer_from( 

1569 self, 

1570 source_records: FileTransferMap, 

1571 refs: Iterable[DatasetRef], 

1572 transfer: str = "auto", 

1573 artifact_existence: dict[ResourcePath, bool] | None = None, 

1574 dry_run: bool = False, 

1575 ) -> tuple[set[DatasetRef], set[DatasetRef]]: 

1576 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1577 

1578 def getURIs(self, datasetRef: DatasetRef, predict: bool = False) -> DatasetRefURIs: 

1579 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore") 

1580 

1581 def getURI(self, datasetRef: DatasetRef, predict: bool = False) -> ResourcePath: 

1582 raise FileNotFoundError("This is a no-op datastore that can not access a real datastore") 

1583 

1584 def ingest_zip(self, zip_path: ResourcePath, transfer: str | None, *, dry_run: bool = False) -> None: 

1585 raise NotImplementedError("Can only ingest a Zip into a real datastore.") 

1586 

1587 def retrieveArtifacts( 

1588 self, 

1589 refs: Iterable[DatasetRef], 

1590 destination: ResourcePath, 

1591 transfer: str = "auto", 

1592 preserve_path: bool = True, 

1593 overwrite: bool = False, 

1594 write_index: bool = True, 

1595 add_prefix: bool = False, 

1596 ) -> dict[ResourcePath, ArtifactIndexInfo]: 

1597 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1598 

1599 def remove(self, datasetRef: DatasetRef) -> None: 

1600 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1601 

1602 def forget(self, refs: Iterable[DatasetRef]) -> None: 

1603 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1604 

1605 def trash(self, ref: DatasetRef | Iterable[DatasetRef], ignore_errors: bool = True) -> None: 

1606 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1607 

1608 def emptyTrash( 

1609 self, ignore_errors: bool = True, refs: Collection[DatasetRef] | None = None, dry_run: bool = False 

1610 ) -> set[ResourcePath]: 

1611 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1612 

1613 def transfer(self, inputDatastore: Datastore, datasetRef: DatasetRef) -> None: 

1614 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1615 

1616 def export( 

1617 self, 

1618 refs: Iterable[DatasetRef], 

1619 *, 

1620 directory: ResourcePathExpression | None = None, 

1621 transfer: str | None = "auto", 

1622 ) -> Iterable[FileDataset]: 

1623 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1624 

1625 def validateConfiguration( 

1626 self, entities: Iterable[DatasetRef | DatasetType | StorageClass], logFailures: bool = False 

1627 ) -> None: 

1628 # No configuration so always validates. 

1629 pass 

1630 

1631 def validateKey(self, lookupKey: LookupKey, entity: DatasetRef | DatasetType | StorageClass) -> None: 

1632 pass 

1633 

1634 def getLookupKeys(self) -> set[LookupKey]: 

1635 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1636 

1637 def import_records( 

1638 self, 

1639 data: Mapping[str, DatastoreRecordData], 

1640 ) -> None: 

1641 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1642 

1643 def export_records( 

1644 self, 

1645 refs: Iterable[DatasetIdRef], 

1646 ) -> Mapping[str, DatastoreRecordData]: 

1647 raise NotImplementedError("This is a no-op datastore that can not access a real datastore") 

1648 

1649 def get_opaque_table_definitions(self) -> Mapping[str, DatastoreOpaqueTable]: 

1650 return {}