Coverage for python/lsst/daf/butler/datastore/record_data.py: 33%

73 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-10-27 09:44 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28"""Support for generic data stores.""" 

29 

30from __future__ import annotations 

31 

32__all__ = ("DatastoreRecordData", "SerializedDatastoreRecordData") 

33 

34import dataclasses 

35import uuid 

36from collections.abc import Mapping 

37from typing import TYPE_CHECKING, Any, TypeAlias 

38 

39from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat 

40 

41from .._dataset_ref import DatasetId 

42from ..dimensions import DimensionUniverse 

43from ..persistence_context import PersistenceContextVars 

44from .stored_file_info import StoredDatastoreItemInfo 

45 

46if TYPE_CHECKING: 

47 from ..registry import Registry 

48 

49# Pydantic 2 requires we be explicit about the types that are used in 

50# datastore records. Without this UUID can not be handled. Pydantic v1 

51# wants the opposite and does not work unless we use Any. 

52if PYDANTIC_V2: 52 ↛ 53line 52 didn't jump to line 53, because the condition on line 52 was never true

53 _Record: TypeAlias = dict[str, int | str | uuid.UUID | None] 

54else: 

55 _Record: TypeAlias = dict[str, Any] # type: ignore 

56 

57 

58class SerializedDatastoreRecordData(_BaseModelCompat): 

59 """Representation of a `DatastoreRecordData` suitable for serialization.""" 

60 

61 dataset_ids: list[uuid.UUID] 

62 """List of dataset IDs""" 

63 

64 records: Mapping[str, Mapping[str, Mapping[str, list[_Record]]]] 

65 """List of records indexed by record class name, dataset ID (encoded as 

66 str, because JSON), and opaque table name. 

67 """ 

68 

69 @classmethod 

70 def direct( 

71 cls, 

72 *, 

73 dataset_ids: list[str | uuid.UUID], 

74 records: dict[str, dict[str, dict[str, list[_Record]]]], 

75 ) -> SerializedDatastoreRecordData: 

76 """Construct a `SerializedDatastoreRecordData` directly without 

77 validators. 

78 

79 This differs from the pydantic "construct" method in that the 

80 arguments are explicitly what the model requires, and it will recurse 

81 through members, constructing them from their corresponding `direct` 

82 methods. 

83 

84 This method should only be called when the inputs are trusted. 

85 """ 

86 data = cls.model_construct( 

87 _fields_set={"dataset_ids", "records"}, 

88 # JSON makes strings out of UUIDs, need to convert them back 

89 dataset_ids=[uuid.UUID(id) if isinstance(id, str) else id for id in dataset_ids], 

90 records=records, 

91 ) 

92 

93 return data 

94 

95 

96@dataclasses.dataclass 

97class DatastoreRecordData: 

98 """A struct that represents a tabular data export from a single 

99 datastore. 

100 """ 

101 

102 records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = dataclasses.field( 

103 default_factory=dict 

104 ) 

105 """Opaque table data, indexed by dataset ID and grouped by opaque table 

106 name.""" 

107 

108 def update(self, other: DatastoreRecordData) -> None: 

109 """Update contents of this instance with data from another instance. 

110 

111 Parameters 

112 ---------- 

113 other : `DatastoreRecordData` 

114 Records to merge into this instance. 

115 

116 Notes 

117 ----- 

118 If a ``(dataset_id, table_name)`` combination has any records in 

119 ``self``, it is assumed that all records for that combination are 

120 already present. This allows duplicates of the same dataset to be 

121 handled gracefully. 

122 """ 

123 for dataset_id, table_records in other.records.items(): 

124 this_table_records = self.records.setdefault(dataset_id, {}) 

125 for table_name, records in table_records.items(): 

126 # If this (dataset_id, table_name) combination already has 

127 # records in `self`, we assume that means all of the records 

128 # for that combination; we require other code to ensure entire 

129 # (parent) datasets are exported to these data structures 

130 # (never components). 

131 if not (this_records := this_table_records.setdefault(table_name, [])): 

132 this_records.extend(records) 

133 

134 def subset(self, dataset_ids: set[DatasetId]) -> DatastoreRecordData | None: 

135 """Extract a subset of the records that match given dataset IDs. 

136 

137 Parameters 

138 ---------- 

139 dataset_ids : `set` [ `DatasetId` ] 

140 Dataset IDs to match. 

141 

142 Returns 

143 ------- 

144 record_data : `DatastoreRecordData` or `None` 

145 `None` is returned if there are no matching refs. 

146 

147 Notes 

148 ----- 

149 Records in the returned instance are shared with this instance, clients 

150 should not update or extend records in the returned instance. 

151 """ 

152 matching_records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = {} 

153 for dataset_id in dataset_ids: 

154 if (id_records := self.records.get(dataset_id)) is not None: 

155 matching_records[dataset_id] = id_records 

156 if matching_records: 

157 return DatastoreRecordData(records=matching_records) 

158 else: 

159 return None 

160 

161 def to_simple(self, minimal: bool = False) -> SerializedDatastoreRecordData: 

162 """Make representation of the object for serialization. 

163 

164 Implements `~lsst.daf.butler.json.SupportsSimple` protocol. 

165 

166 Parameters 

167 ---------- 

168 minimal : `bool`, optional 

169 If True produce minimal representation, not used by this method. 

170 

171 Returns 

172 ------- 

173 simple : `dict` 

174 Representation of this instance as a simple dictionary. 

175 """ 

176 records: dict[str, dict[str, dict[str, list[_Record]]]] = {} 

177 for dataset_id, table_data in self.records.items(): 

178 for table_name, table_records in table_data.items(): 

179 class_name, infos = StoredDatastoreItemInfo.to_records(table_records) 

180 class_records = records.setdefault(class_name, {}) 

181 dataset_records = class_records.setdefault(dataset_id.hex, {}) 

182 dataset_records.setdefault(table_name, []).extend(dict(info) for info in infos) 

183 return SerializedDatastoreRecordData(dataset_ids=list(self.records.keys()), records=records) 

184 

185 @classmethod 

186 def from_simple( 

187 cls, 

188 simple: SerializedDatastoreRecordData, 

189 universe: DimensionUniverse | None = None, 

190 registry: Registry | None = None, 

191 ) -> DatastoreRecordData: 

192 """Make an instance of this class from serialized data. 

193 

194 Implements `~lsst.daf.butler.json.SupportsSimple` protocol. 

195 

196 Parameters 

197 ---------- 

198 data : `dict` 

199 Serialized representation returned from `to_simple` method. 

200 universe : `DimensionUniverse`, optional 

201 Dimension universe, not used by this method. 

202 registry : `Registry`, optional 

203 Registry instance, not used by this method. 

204 

205 Returns 

206 ------- 

207 item_info : `StoredDatastoreItemInfo` 

208 De-serialized instance of `StoredDatastoreItemInfo`. 

209 """ 

210 cache = PersistenceContextVars.dataStoreRecords.get() 

211 key = frozenset(simple.dataset_ids) 

212 if cache is not None and (cachedRecord := cache.get(key)) is not None: 

213 return cachedRecord 

214 records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = {} 

215 # make sure that all dataset IDs appear in the dict even if they don't 

216 # have records. 

217 for dataset_id in simple.dataset_ids: 

218 records[dataset_id] = {} 

219 for class_name, class_data in simple.records.items(): 

220 for dataset_id_str, dataset_data in class_data.items(): 

221 for table_name, table_records in dataset_data.items(): 

222 try: 

223 infos = StoredDatastoreItemInfo.from_records(class_name, table_records) 

224 except TypeError as exc: 

225 raise RuntimeError( 

226 "The class specified in the SerializedDatastoreRecordData " 

227 f"({class_name}) is not a StoredDatastoreItemInfo." 

228 ) from exc 

229 dataset_records = records.setdefault(uuid.UUID(dataset_id_str), {}) 

230 dataset_records.setdefault(table_name, []).extend(infos) 

231 newRecord = cls(records=records) 

232 if cache is not None: 

233 cache[key] = newRecord 

234 return newRecord