Coverage for python/lsst/daf/butler/datastores/fileDatastoreClient.py: 59%

28 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-25 10:48 +0000

1__all__ = ("get_dataset_as_python_object", "FileDatastoreGetPayload") 

2 

3from typing import Any, Literal 

4 

5import pydantic 

6from lsst.daf.butler import DatasetRef, DimensionUniverse, Location, SerializedDatasetRef, StorageClass 

7from lsst.daf.butler.datastore.cache_manager import DatastoreDisabledCacheManager 

8from lsst.daf.butler.datastore.stored_file_info import SerializedStoredFileInfo, StoredFileInfo 

9from lsst.daf.butler.datastores.file_datastore.get import ( 

10 DatasetLocationInformation, 

11 Mapping, 

12 generate_datastore_get_information, 

13 get_dataset_as_python_object_from_get_info, 

14) 

15from pydantic import AnyHttpUrl 

16 

17 

18class FileDatastoreGetPayloadFileInfo(pydantic.BaseModel): 

19 """Information required to read a single file stored in `FileDatastore`.""" 

20 

21 # This is intentionally restricted to HTTP for security reasons. Allowing 

22 # arbitrary URLs here would allow the server to trick the client into 

23 # fetching data from any file on its local filesystem or from remote 

24 # storage using credentials laying around in the environment. 

25 url: AnyHttpUrl 

26 """An HTTP URL that can be used to read the file.""" 

27 

28 datastoreRecords: SerializedStoredFileInfo 

29 """`FileDatastore` metadata records for this file.""" 

30 

31 

32class FileDatastoreGetPayload(pydantic.BaseModel): 

33 """A serializable representation of the data needed for retrieving an 

34 artifact and converting it to a python object. 

35 """ 

36 

37 datastore_type: Literal["file"] 

38 

39 file_info: list[FileDatastoreGetPayloadFileInfo] 

40 """List of retrieval information for each file associated with this 

41 artifact. 

42 """ 

43 

44 dataset_ref: SerializedDatasetRef 

45 """Registry information associated with this artifact.""" 

46 

47 

48def get_dataset_as_python_object( 

49 payload: FileDatastoreGetPayload, 

50 *, 

51 universe: DimensionUniverse, 

52 parameters: Mapping[str, Any] | None, 

53 storageClass: StorageClass | str | None, 

54 component: str | None, 

55) -> Any: 

56 """Retrieve an artifact from storage and return it as a Python object. 

57 

58 Parameters 

59 ---------- 

60 payload : `FileDatastoreGetPayload` 

61 Pre-processed information about each file associated with this 

62 artifact. 

63 universe : `DimensionUniverse` 

64 The universe of dimensions associated with the `DatasetRef` contained 

65 in ``payload``. 

66 parameters : `Mapping`[`str`, `typing.Any`] 

67 `StorageClass` and `Formatter` parameters to be used when converting 

68 the artifact to a Python object. 

69 storageClass : `StorageClass` | `str` | `None` 

70 Overrides the `StorageClass` to be used when converting the artifact to 

71 a Python object. If `None`, uses the `StorageClass` specified by 

72 ``payload``. 

73 component : `str` | `None` 

74 Selects which component of the artifact to retrieve. 

75 

76 Returns 

77 ------- 

78 python_object : `typing.Any` 

79 The retrieved artifact, converted to a Python object. 

80 """ 

81 fileLocations: list[DatasetLocationInformation] = [ 

82 (Location(None, str(file_info.url)), StoredFileInfo.from_simple(file_info.datastoreRecords)) 

83 for file_info in payload.file_info 

84 ] 

85 

86 ref = DatasetRef.from_simple(payload.dataset_ref, universe=universe) 

87 

88 # If we have both a component override and a storage class override, the 

89 # component override has to be applied first. DatasetRef cares because it 

90 # is checking compatibility of the storage class with its DatasetType. 

91 if component is not None: 

92 ref = ref.makeComponentRef(component) 

93 if storageClass is not None: 

94 ref = ref.overrideStorageClass(storageClass) 

95 

96 datastore_file_info = generate_datastore_get_information( 

97 fileLocations, 

98 ref=ref, 

99 parameters=parameters, 

100 ) 

101 return get_dataset_as_python_object_from_get_info( 

102 datastore_file_info, ref=ref, parameters=parameters, cache_manager=DatastoreDisabledCacheManager() 

103 )