Coverage for python / lsst / daf / butler / _file_dataset.py: 49%

49 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-28 08:36 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ("FileDataset", "SerializedFileDataset") 

31 

32import uuid 

33from collections.abc import Callable 

34from dataclasses import dataclass 

35from typing import Any, TypeAlias 

36 

37import pydantic 

38 

39from lsst.resources import ResourcePath, ResourcePathExpression 

40 

41from ._dataset_ref import DatasetRef, MinimalistSerializableDatasetRef 

42from ._dataset_type import DatasetType 

43from ._formatter import FormatterParameter 

44from .dimensions import DimensionUniverse 

45 

46 

47@dataclass 

48class FileDataset: 

49 """A struct that represents a dataset exported to a file. 

50 

51 Parameters 

52 ---------- 

53 path : `lsst.resources.ResourcePath` or `str` 

54 Path to the dataset (`lsst.resources.ResourcePath` or `str`). 

55 

56 If the dataset was exported with ``transfer=None`` (i.e. in-place), 

57 this is relative to the datastore root (only datastores that have a 

58 well-defined root in the local filesystem can be expected to support 

59 in-place exports). Otherwise this is relative to the directory passed 

60 to `Datastore.export`. 

61 refs : `list` [ `DatasetRef` ] 

62 Registry information about the dataset. 

63 formatter : `Formatter` or `str` or `None`, optional 

64 A `Formatter` class or fully-qualified name. 

65 """ 

66 

67 __slots__ = ("refs", "path", "formatter") 

68 

69 refs: list[DatasetRef] 

70 path: str | ResourcePath 

71 formatter: FormatterParameter | None 

72 

73 def __init__( 

74 self, 

75 path: ResourcePathExpression, 

76 refs: DatasetRef | list[DatasetRef], 

77 *, 

78 formatter: FormatterParameter | None = None, 

79 ): 

80 # Do not want to store all possible options supported by ResourcePath 

81 # so force a conversion for the non-str parameters. 

82 self.path = path if isinstance(path, str) else ResourcePath(path, forceAbsolute=False) 

83 if isinstance(refs, DatasetRef): 

84 refs = [refs] 

85 runs = {ref.run for ref in refs} 

86 if len(runs) != 1: 

87 raise ValueError(f"Supplied refs must all share the same run. Got: {runs}") 

88 self.refs = refs 

89 self.formatter = formatter 

90 

91 def __lt__(self, other: Any) -> bool: 

92 # Sort on path alone 

93 if not isinstance(other, type(self)): 

94 return NotImplemented 

95 return str(self.path) < str(other.path) 

96 

97 def to_simple(self) -> SerializedFileDataset: 

98 """ 

99 Convert this instance to a simplified, JSON-serializable object. 

100 

101 Returns 

102 ------- 

103 serialized : `SerializedFileDataset` 

104 Serializable representation of this `FileDataset` instance. 

105 """ 

106 if self.formatter is None: 

107 formatter = None 

108 elif isinstance(self.formatter, str): 

109 formatter = self.formatter 

110 else: 

111 formatter = self.formatter.name() 

112 

113 refs = {ref.id: MinimalistSerializableDatasetRef.from_dataset_ref(ref) for ref in self.refs} 

114 

115 return SerializedFileDataset( 

116 refs=refs, 

117 path=str(self.path), 

118 formatter=formatter, 

119 ) 

120 

121 @staticmethod 

122 def from_simple( 

123 dataset: SerializedFileDataset, *, dataset_type_loader: DatasetTypeLoader, universe: DimensionUniverse 

124 ) -> FileDataset: 

125 """ 

126 Deserialize a `SerializedFileDataset` into a `FileDataset`. 

127 

128 Parameters 

129 ---------- 

130 dataset : `SerializedFileDataset` 

131 Object to deserialize. 

132 dataset_type_loader : `~collections.abc.Callable` \ 

133 [[ `str` ], `DatasetType` ] 

134 Function that takes a string dataset type name as its 

135 only parameter, and returns an instance of `DatasetType`. 

136 Used to deserialize the `DatasetRef` instances contained 

137 in the serialized `FileDataset`. 

138 universe : `DimensionUniverse` 

139 Dimension universe associated with the `Butler` instance that 

140 created the serialized `FileDataset` instance. 

141 

142 Returns 

143 ------- 

144 file_dataset : `FileDataset` 

145 Deserialized equivalent of the input dataset. 

146 """ 

147 refs = [ 

148 ref.to_dataset_ref(id, universe=universe, dataset_type=dataset_type_loader(ref.dataset_type_name)) 

149 for id, ref in dataset.refs.items() 

150 ] 

151 return FileDataset(path=dataset.path, refs=refs, formatter=dataset.formatter) 

152 

153 

154DatasetTypeLoader: TypeAlias = Callable[[str], DatasetType] 

155"""Type signature for a function that takes a string dataset type name as its 

156only parameter, and returns an instance of `DatasetType`. 

157""" 

158 

159 

160class SerializedFileDataset(pydantic.BaseModel): 

161 """Serializable format of `FileDataset` object.""" 

162 

163 refs: dict[uuid.UUID, MinimalistSerializableDatasetRef] 

164 path: str 

165 formatter: str | None = None