Coverage for python/lsst/daf/butler/datastores/fileDatastoreClient.py: 59%
28 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:43 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-16 10:43 +0000
1__all__ = ("get_dataset_as_python_object", "FileDatastoreGetPayload")
3from typing import Any, Literal
5import pydantic
6from lsst.daf.butler import DatasetRef, DimensionUniverse, Location, SerializedDatasetRef, StorageClass
7from lsst.daf.butler.datastore.cache_manager import DatastoreDisabledCacheManager
8from lsst.daf.butler.datastore.stored_file_info import SerializedStoredFileInfo, StoredFileInfo
9from lsst.daf.butler.datastores.file_datastore.get import (
10 DatasetLocationInformation,
11 Mapping,
12 generate_datastore_get_information,
13 get_dataset_as_python_object_from_get_info,
14)
15from pydantic import AnyHttpUrl
18class FileDatastoreGetPayloadFileInfo(pydantic.BaseModel):
19 """Information required to read a single file stored in `FileDatastore`."""
21 # This is intentionally restricted to HTTP for security reasons. Allowing
22 # arbitrary URLs here would allow the server to trick the client into
23 # fetching data from any file on its local filesystem or from remote
24 # storage using credentials laying around in the environment.
25 url: AnyHttpUrl
26 """An HTTP URL that can be used to read the file."""
28 datastoreRecords: SerializedStoredFileInfo
29 """`FileDatastore` metadata records for this file."""
32class FileDatastoreGetPayload(pydantic.BaseModel):
33 """A serializable representation of the data needed for retrieving an
34 artifact and converting it to a python object.
35 """
37 datastore_type: Literal["file"]
39 file_info: list[FileDatastoreGetPayloadFileInfo]
40 """List of retrieval information for each file associated with this
41 artifact.
42 """
44 dataset_ref: SerializedDatasetRef
45 """Registry information associated with this artifact."""
48def get_dataset_as_python_object(
49 payload: FileDatastoreGetPayload,
50 *,
51 universe: DimensionUniverse,
52 parameters: Mapping[str, Any] | None,
53 storageClass: StorageClass | str | None,
54 component: str | None,
55) -> Any:
56 """Retrieve an artifact from storage and return it as a Python object.
58 Parameters
59 ----------
60 payload : `FileDatastoreGetPayload`
61 Pre-processed information about each file associated with this
62 artifact.
63 universe : `DimensionUniverse`
64 The universe of dimensions associated with the `DatasetRef` contained
65 in ``payload``.
66 parameters : `Mapping`[`str`, `typing.Any`]
67 `StorageClass` and `Formatter` parameters to be used when converting
68 the artifact to a Python object.
69 storageClass : `StorageClass` | `str` | `None`
70 Overrides the `StorageClass` to be used when converting the artifact to
71 a Python object. If `None`, uses the `StorageClass` specified by
72 ``payload``.
73 component : `str` | `None`
74 Selects which component of the artifact to retrieve.
76 Returns
77 -------
78 python_object : `typing.Any`
79 The retrieved artifact, converted to a Python object.
80 """
81 fileLocations: list[DatasetLocationInformation] = [
82 (Location(None, str(file_info.url)), StoredFileInfo.from_simple(file_info.datastoreRecords))
83 for file_info in payload.file_info
84 ]
86 ref = DatasetRef.from_simple(payload.dataset_ref, universe=universe)
88 # If we have both a component override and a storage class override, the
89 # component override has to be applied first. DatasetRef cares because it
90 # is checking compatibility of the storage class with its DatasetType.
91 if component is not None:
92 ref = ref.makeComponentRef(component)
93 if storageClass is not None:
94 ref = ref.overrideStorageClass(storageClass)
96 datastore_file_info = generate_datastore_get_information(
97 fileLocations,
98 ref=ref,
99 parameters=parameters,
100 )
101 return get_dataset_as_python_object_from_get_info(
102 datastore_file_info, ref=ref, parameters=parameters, cache_manager=DatastoreDisabledCacheManager()
103 )