Coverage for python/lsst/daf/butler/datastores/fileLikeDatastore.py : 90%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
# This file is part of daf_butler. # # Developed for the LSST Data Management System. # This product includes software developed by the LSST Project # (http://www.lsst.org). # See the COPYRIGHT file at the top-level directory of this distribution # for details of code ownership. # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/>.
Config, FileDataset, DatasetRef, DatasetTypeNotSupportedError, Datastore, DatastoreConfig, DatastoreValidationError, FileDescriptor, FileTemplates, FileTemplateValidationError, Formatter, FormatterFactory, Location, LocationFactory, StorageClass, StoredFileInfo, )
"""Helper class for FileLikeDatastore ingest implementation.
Parameters ---------- datasets : `list` of `FileDataset` Files to be ingested by this datastore. """
class DatastoreFileGetInformation: """Collection of useful parameters needed to retrieve a file from a Datastore. """
"""The location from which to read the dataset."""
"""The `Formatter` to use to deserialize the dataset."""
"""Stored information about this file and its formatter."""
"""Parameters to use for post-processing the retrieved dataset."""
"""The component to be retrieved (can be `None`)."""
"""Generic Datastore for file-based implementations.
Should always be sub-classed since key abstract methods are missing.
Parameters ---------- config : `DatastoreConfig` or `str` Configuration as either a `Config` object or URI to file.
Raises ------ ValueError If root location does not exist and ``create`` is `False` in the configuration. """
"""Path to configuration defaults. Relative to $DAF_BUTLER_DIR/config or absolute path. Can be None if no defaults specified. """
"""Root directory or URI of this `Datastore`."""
"""Factory for creating locations relative to the datastore root."""
"""Factory for creating instances of formatters."""
"""File templates that can be used by this `Datastore`."""
"""Set any filesystem-dependent config options for this Datastore to be appropriate for a new empty repository with the given root.
Parameters ---------- root : `str` URI to the root of the data repository. config : `Config` A `Config` to update. Only the subset understood by this component will be updated. Will not expand defaults. full : `Config` A complete config with all defaults expanded that can be converted to a `DatastoreConfig`. Read-only and will not be modified by this method. Repository-specific options that should not be obtained from defaults when Butler instances are constructed should be copied from ``full`` to ``config``. overwrite : `bool`, optional If `False`, do not modify a value in ``config`` if the value already exists. Default is always to overwrite with the provided ``root``.
Notes ----- If a keyword is explicitly defined in the supplied ``config`` it will not be overridden by this method if ``overwrite`` is `False`. This allows explicit values set in external configs to be retained. """ toUpdate={"root": root}, toCopy=("cls", ("records", "table")), overwrite=overwrite)
def makeTableSpec(cls): fields=NamedValueSet([ ddl.FieldSpec(name="dataset_id", dtype=Integer, primaryKey=True), ddl.FieldSpec(name="path", dtype=String, length=256, nullable=False), ddl.FieldSpec(name="formatter", dtype=String, length=128, nullable=False), ddl.FieldSpec(name="storage_class", dtype=String, length=64, nullable=False), # TODO: should checksum be Base64Bytes instead? ddl.FieldSpec(name="checksum", dtype=String, length=128, nullable=True), ddl.FieldSpec(name="file_size", dtype=Integer, nullable=True), ]), unique=frozenset(), foreignKeys=[ddl.ForeignKeySpec(table="dataset", source=("dataset_id",), target=("dataset_id",), onDelete="CASCADE")] )
raise ValueError("No root directory specified in configuration")
# Name ourselves either using an explicit name or a name # derived from the (unexpanded) root else: # We use the unexpanded root in the name to indicate that this # datastore can be moved without having to update registry. self.config["root"])
# Support repository relocation in config # Existence of self.root is checked in subclass
# Now associate formatters with storage classes universe=self.registry.dimensions)
# Read the file naming templates universe=self.registry.dimensions)
# Storage of paths and formatters, keyed by dataset_id
# Determine whether checksums should be used
# Docstring inherited from GenericBaseDatastore dict(dataset_id=ref.id, formatter=info.formatter, path=info.path, storage_class=info.storageClass.name, checksum=info.checksum, file_size=info.file_size) )
# Docstring inherited from GenericBaseDatastore # Convert name of StorageClass to instance path=record["path"], storageClass=storageClass, checksum=record["checksum"], file_size=record["file_size"])
"""Return all dataset refs associated with the supplied path.
Parameters ---------- pathInStore : `str` Path of interest in the data store.
Returns ------- ids : `set` of `int` All `DatasetRef` IDs associated with this path. """
# Docstring inherited from GenericBaseDatastore
"""Find the `Location` of the requested dataset in the `Datastore` and the associated stored file information.
Parameters ---------- ref : `DatasetRef` Reference to the required `Dataset`.
Returns ------- location : `Location` Location of the dataset within the datastore. Returns `None` if the dataset can not be located. info : `StoredFileInfo` Stored information about this file and its formatter. """ # Get the file information (this will fail if no file)
# Use the path to determine the location
"""Check that there is only one dataset associated with the specified artifact.
Parameters ---------- ref : `DatasetRef` Dataset to be removed.
Returns ------- can_remove : `Bool` True if the artifact can be safely removed. """
# Get all entries associated with this path raise RuntimeError(f"Datastore inconsistency error. {storedFileInfo.path} not in registry")
# Get all the refs associated with this dataset if it is a composite
# Remove these refs from all the refs and if there is nothing left # then we can delete
"""Check parameters for ``get`` and obtain formatter and location.
Parameters ---------- ref : `DatasetRef` Reference to the required Dataset. parameters : `dict` `StorageClass`-specific parameters that specify, for example, a slice of the Dataset to be loaded.
Returns ------- getInfo : `DatastoreFileGetInformation` Parameters needed to retrieve the file. """
# Get file metadata and internal metadata
# We have a write storage class and a read storage class and they # can be different for concrete composites.
# Check that the supplied parameters are suitable for the type read
# Is this a component request?
FileDescriptor(location, readStorageClass=readStorageClass, storageClass=writeStorageClass, parameters=parameters), ref.dataId)
assemblerParams, component, readStorageClass)
"""Check the arguments for ``put`` and obtain formatter and location.
Parameters ---------- inMemoryDataset : `object` The Dataset to store. ref : `DatasetRef` Reference to the associated Dataset.
Returns ------- location : `Location` The location to write the dataset. formatter : `Formatter` The `Formatter` to use to write the dataset.
Raises ------ TypeError Supplied object and storage class are inconsistent. DatasetTypeNotSupportedError The associated `DatasetType` is not handled by this datastore. """
# Work out output file name except KeyError as e: raise DatasetTypeNotSupportedError(f"Unable to find template for {ref}") from e
# Get the formatter based on the storage class FileDescriptor(location, storageClass=storageClass), ref.dataId)
"""Standardize the path of a to-be-ingested file.
Parameters ---------- path : `str` Path of a file to be ingested. transfer : `str`, optional How (and whether) the dataset should be added to the datastore. If `None` (default), the file must already be in a location appropriate for the datastore (e.g. within its root directory), and will not be moved. Other choices include "move", "copy", "symlink", and "hardlink". This is provided only so `NotImplementedError` can be raised if the mode is not supported; actual transfers are deferred to `_extractIngestInfo`.
Returns ------- path : `str` New path in what the datastore considers standard form.
Notes ----- Subclasses of `FileLikeDatastore` should implement this method instead of `_prepIngest`. It should not modify the data repository or given file in any way.
Raises ------ NotImplementedError Raised if the datastore does not support the given transfer mode (including the case where ingest is not supported at all). FileNotFoundError Raised if one of the given files does not exist. """ raise NotImplementedError("Must be implemented by subclasses.")
transfer: Optional[str] = None) -> StoredFileInfo: """Relocate (if necessary) and extract `StoredFileInfo` from a to-be-ingested file.
Parameters ---------- path : `str` Path of a file to be ingested. ref : `DatasetRef` Reference for the dataset being ingested. Guaranteed to have ``dataset_id not None`. formatter : `type` `Formatter` subclass to use for this dataset. transfer : `str`, optional How (and whether) the dataset should be added to the datastore. If `None` (default), the file must already be in a location appropriate for the datastore (e.g. within its root directory), and will not be modified. Other choices include "move", "copy", "symlink", and "hardlink".
Returns ------- info : `StoredFileInfo` Internal datastore record for this file. This will be inserted by the caller; the `_extractIngestInfo` is only resposible for creating and populating the struct.
Raises ------ FileNotFoundError Raised if one of the given files does not exist. FileExistsError Raised if transfer is not `None` but the (internal) location the file would be moved to is already occupied. """ raise NotImplementedError("Must be implemented by subclasses.")
# Docstring inherited from Datastore._prepIngest. else: else:
# Docstring inherited from Datastore._finishIngest. # Do ingest as if the first dataset ref is associated with the file transfer=transfer)
"""URI to the Dataset.
Parameters ---------- ref : `DatasetRef` Reference to the required Dataset. predict : `bool` If `True`, allow URIs to be returned of datasets that have not been written.
Returns ------- uri : `str` URI string pointing to the Dataset within the datastore. If the Dataset does not exist in the datastore, and if ``predict`` is `True`, the URI will be a prediction and will include a URI fragment "#predicted". If the datastore does not have entities that relate well to the concept of a URI the returned URI string will be descriptive. The returned URI is not guaranteed to be obtainable.
Raises ------ FileNotFoundError A URI has been requested for a dataset that does not exist and guessing is not allowed.
Notes ----- When a predicted URI is requested an attempt will be made to form a reasonable URI based on file templates and the expected formatter. """ # if this has never been written then we have to guess
storageClass=storageClass)) # Try to use the extension attribute but ignore problems if the # formatter does not define one. except Exception: # Use the default extension pass
# Add a URI fragment to indicate this is a guess
# If this is a ref that we have written we can get the path. # Get file metadata and internal metadata
# Use the path to determine the location
"""Validate some of the configuration for this datastore.
Parameters ---------- entities : iterable of `DatasetRef`, `DatasetType`, or `StorageClass` Entities to test against this configuration. Can be differing types. logFailures : `bool`, optional If `True`, output a log message for every validation error detected.
Raises ------ DatastoreValidationError Raised if there is a validation problem with a configuration. All the problems are reported in a single exception.
Notes ----- This method checks that all the supplied entities have valid file templates and also have formatters defined. """
except FileTemplateValidationError as e: templateFailed = str(e)
messages.append(templateFailed)
# Docstring is inherited from base class self.constraints.getLookupKeys()
# Docstring is inherited from base class # The key can be valid in either formatters or templates so we can # only check the template if it exists if lookupKey in self.templates: try: self.templates[lookupKey].validateTemplate(entity) except FileTemplateValidationError as e: raise DatastoreValidationError(e) from e |