Coverage for python/lsst/daf/butler/core/datastoreRecordData.py: 29%

84 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-08-12 09:20 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreRecordData", "SerializedDatastoreRecordData") 

27 

28import dataclasses 

29import uuid 

30from collections.abc import Mapping 

31from typing import TYPE_CHECKING, Any, TypeAlias 

32 

33from lsst.daf.butler._compat import PYDANTIC_V2, _BaseModelCompat 

34from lsst.utils import doImportType 

35from lsst.utils.introspection import get_full_type_name 

36 

37from .datasets import DatasetId 

38from .dimensions import DimensionUniverse 

39from .persistenceContext import PersistenceContextVars 

40from .storedFileInfo import StoredDatastoreItemInfo 

41 

42if TYPE_CHECKING: 

43 from ..registry import Registry 

44 

45# Pydantic 2 requires we be explicit about the types that are used in 

46# datastore records. Without this UUID can not be handled. Pydantic v1 

47# wants the opposite and does not work unless we use Any. 

48if PYDANTIC_V2: 48 ↛ 49line 48 didn't jump to line 49, because the condition on line 48 was never true

49 _Record: TypeAlias = dict[str, int | str | uuid.UUID | None] 

50else: 

51 _Record: TypeAlias = dict[str, Any] # type: ignore 

52 

53 

54class SerializedDatastoreRecordData(_BaseModelCompat): 

55 """Representation of a `DatastoreRecordData` suitable for serialization.""" 

56 

57 dataset_ids: list[uuid.UUID] 

58 """List of dataset IDs""" 

59 

60 records: Mapping[str, Mapping[str, list[_Record]]] 

61 """List of records indexed by record class name and table name.""" 

62 

63 @classmethod 

64 def direct( 

65 cls, 

66 *, 

67 dataset_ids: list[str | uuid.UUID], 

68 records: dict[str, dict[str, list[_Record]]], 

69 ) -> SerializedDatastoreRecordData: 

70 """Construct a `SerializedDatastoreRecordData` directly without 

71 validators. 

72 

73 This differs from the pydantic "construct" method in that the 

74 arguments are explicitly what the model requires, and it will recurse 

75 through members, constructing them from their corresponding `direct` 

76 methods. 

77 

78 This method should only be called when the inputs are trusted. 

79 """ 

80 # See also comments in record_ids_to_uuid() 

81 for table_data in records.values(): 

82 for table_records in table_data.values(): 

83 for record in table_records: 

84 # This only checks dataset_id value, if there are any other 

85 # columns that are UUIDs we'd need more generic approach. 

86 if (id := record.get("dataset_id")) is not None: 

87 record["dataset_id"] = uuid.UUID(id) if isinstance(id, str) else id 

88 

89 data = cls.model_construct( 

90 _fields_set={"dataset_ids", "records"}, 

91 # JSON makes strings out of UUIDs, need to convert them back 

92 dataset_ids=[uuid.UUID(id) if isinstance(id, str) else id for id in dataset_ids], 

93 records=records, 

94 ) 

95 

96 return data 

97 

98 

99@dataclasses.dataclass 

100class DatastoreRecordData: 

101 """A struct that represents a tabular data export from a single 

102 datastore. 

103 """ 

104 

105 records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = dataclasses.field( 

106 default_factory=dict 

107 ) 

108 """Opaque table data, indexed by dataset ID and grouped by opaque table 

109 name.""" 

110 

111 def update(self, other: DatastoreRecordData) -> None: 

112 """Update contents of this instance with data from another instance. 

113 

114 Parameters 

115 ---------- 

116 other : `DatastoreRecordData` 

117 Records tho merge into this instance. 

118 

119 Notes 

120 ----- 

121 Merged instances can not have identical records. 

122 """ 

123 for dataset_id, table_records in other.records.items(): 

124 this_table_records = self.records.setdefault(dataset_id, {}) 

125 for table_name, records in table_records.items(): 

126 this_table_records.setdefault(table_name, []).extend(records) 

127 

128 def subset(self, dataset_ids: set[DatasetId]) -> DatastoreRecordData | None: 

129 """Extract a subset of the records that match given dataset IDs. 

130 

131 Parameters 

132 ---------- 

133 dataset_ids : `set` [ `DatasetId` ] 

134 Dataset IDs to match. 

135 

136 Returns 

137 ------- 

138 record_data : `DatastoreRecordData` or `None` 

139 `None` is returned if there are no matching refs. 

140 

141 Notes 

142 ----- 

143 Records in the returned instance are shared with this instance, clients 

144 should not update or extend records in the returned instance. 

145 """ 

146 matching_records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = {} 

147 for dataset_id in dataset_ids: 

148 if (id_records := self.records.get(dataset_id)) is not None: 

149 matching_records[dataset_id] = id_records 

150 if matching_records: 

151 return DatastoreRecordData(records=matching_records) 

152 else: 

153 return None 

154 

155 def to_simple(self, minimal: bool = False) -> SerializedDatastoreRecordData: 

156 """Make representation of the object for serialization. 

157 

158 Implements `~lsst.daf.butler.core.json.SupportsSimple` protocol. 

159 

160 Parameters 

161 ---------- 

162 minimal : `bool`, optional 

163 If True produce minimal representation, not used by this method. 

164 

165 Returns 

166 ------- 

167 simple : `dict` 

168 Representation of this instance as a simple dictionary. 

169 """ 

170 

171 def _class_name(records: list[StoredDatastoreItemInfo]) -> str: 

172 """Get fully qualified class name for the records. Empty string 

173 returned if list is empty. Exception is raised if records are of 

174 different classes. 

175 """ 

176 if not records: 

177 return "" 

178 classes = {record.__class__ for record in records} 

179 assert len(classes) == 1, f"Records have to be of the same class: {classes}" 

180 return get_full_type_name(classes.pop()) 

181 

182 records: dict[str, dict[str, list[_Record]]] = {} 

183 for table_data in self.records.values(): 

184 for table_name, table_records in table_data.items(): 

185 class_name = _class_name(table_records) 

186 class_records = records.setdefault(class_name, {}) 

187 class_records.setdefault(table_name, []).extend( 

188 [record.to_record() for record in table_records] 

189 ) 

190 return SerializedDatastoreRecordData(dataset_ids=list(self.records.keys()), records=records) 

191 

192 @classmethod 

193 def from_simple( 

194 cls, 

195 simple: SerializedDatastoreRecordData, 

196 universe: DimensionUniverse | None = None, 

197 registry: Registry | None = None, 

198 ) -> DatastoreRecordData: 

199 """Make an instance of this class from serialized data. 

200 

201 Implements `~lsst.daf.butler.core.json.SupportsSimple` protocol. 

202 

203 Parameters 

204 ---------- 

205 data : `dict` 

206 Serialized representation returned from `to_simple` method. 

207 universe : `DimensionUniverse`, optional 

208 Dimension universe, not used by this method. 

209 registry : `Registry`, optional 

210 Registry instance, not used by this method. 

211 

212 Returns 

213 ------- 

214 item_info : `StoredDatastoreItemInfo` 

215 De-serialized instance of `StoredDatastoreItemInfo`. 

216 """ 

217 cache = PersistenceContextVars.dataStoreRecords.get() 

218 key = frozenset(simple.dataset_ids) 

219 if cache is not None and (cachedRecord := cache.get(key)) is not None: 

220 return cachedRecord 

221 records: dict[DatasetId, dict[str, list[StoredDatastoreItemInfo]]] = {} 

222 # make sure that all dataset IDs appear in the dict even if they don't 

223 # have records. 

224 for dataset_id in simple.dataset_ids: 

225 records[dataset_id] = {} 

226 for class_name, table_data in simple.records.items(): 

227 klass = doImportType(class_name) 

228 if not issubclass(klass, StoredDatastoreItemInfo): 

229 raise RuntimeError( 

230 "The class specified in the SerializedDatastoreRecordData " 

231 f"({get_full_type_name(klass)}) is not a StoredDatastoreItemInfo." 

232 ) 

233 for table_name, table_records in table_data.items(): 

234 for record in table_records: 

235 info = klass.from_record(record) 

236 dataset_type_records = records.setdefault(info.dataset_id, {}) 

237 dataset_type_records.setdefault(table_name, []).append(info) 

238 newRecord = cls(records=records) 

239 if cache is not None: 

240 cache[key] = newRecord 

241 return newRecord