Coverage for python/lsst/daf/butler/core/datastoreRecordData.py: 27%

73 statements  

« prev     ^ index     » next       coverage.py v7.2.3, created at 2023-04-19 03:42 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreRecordData", "SerializedDatastoreRecordData") 

27 

28import dataclasses 

29import uuid 

30from collections import defaultdict 

31from typing import TYPE_CHECKING, AbstractSet, Any, Dict, List, Optional, Union 

32 

33from lsst.utils import doImportType 

34from lsst.utils.introspection import get_full_type_name 

35from pydantic import BaseModel 

36 

37from .datasets import DatasetId 

38from .dimensions import DimensionUniverse 

39from .storedFileInfo import StoredDatastoreItemInfo 

40 

41if TYPE_CHECKING: 

42 from ..registry import Registry 

43 

44_Record = Dict[str, Any] 

45 

46 

47class SerializedDatastoreRecordData(BaseModel): 

48 """Representation of a `DatastoreRecordData` suitable for serialization.""" 

49 

50 dataset_ids: List[uuid.UUID] 

51 """List of dataset IDs""" 

52 

53 records: Dict[str, Dict[str, List[_Record]]] 

54 """List of records indexed by record class name and table name.""" 

55 

56 @classmethod 

57 def direct( 

58 cls, 

59 *, 

60 dataset_ids: List[Union[str, uuid.UUID]], 

61 records: Dict[str, Dict[str, List[_Record]]], 

62 ) -> SerializedDatastoreRecordData: 

63 """Construct a `SerializedDatastoreRecordData` directly without 

64 validators. 

65 

66 This differs from the pydantic "construct" method in that the 

67 arguments are explicitly what the model requires, and it will recurse 

68 through members, constructing them from their corresponding `direct` 

69 methods. 

70 

71 This method should only be called when the inputs are trusted. 

72 """ 

73 data = SerializedDatastoreRecordData.__new__(cls) 

74 setter = object.__setattr__ 

75 # JSON makes strings out of UUIDs, need to convert them back 

76 setter(data, "dataset_ids", [uuid.UUID(id) if isinstance(id, str) else id for id in dataset_ids]) 

77 # See also comments in record_ids_to_uuid() 

78 for table_data in records.values(): 

79 for table_records in table_data.values(): 

80 for record in table_records: 

81 # This only checks dataset_id value, if there are any other 

82 # columns that are UUIDs we'd need more generic approach. 

83 if (id := record.get("dataset_id")) is not None: 

84 record["dataset_id"] = uuid.UUID(id) if isinstance(id, str) else id 

85 setter(data, "records", records) 

86 return data 

87 

88 

89@dataclasses.dataclass 

90class DatastoreRecordData: 

91 """A struct that represents a tabular data export from a single 

92 datastore. 

93 """ 

94 

95 records: defaultdict[DatasetId, defaultdict[str, List[StoredDatastoreItemInfo]]] = dataclasses.field( 95 ↛ exitline 95 didn't jump to the function exit

96 default_factory=lambda: defaultdict(lambda: defaultdict(list)) 

97 ) 

98 """Opaque table data, indexed by dataset ID and grouped by opaque table 

99 name.""" 

100 

101 def update(self, other: DatastoreRecordData) -> None: 

102 """Update contents of this instance with data from another instance. 

103 

104 Parameters 

105 ---------- 

106 other : `DatastoreRecordData` 

107 Records tho merge into this instance. 

108 

109 Notes 

110 ----- 

111 Merged instances can not have identical records. 

112 """ 

113 for dataset_id, table_records in other.records.items(): 

114 this_table_records = self.records[dataset_id] 

115 for table_name, records in table_records.items(): 

116 this_table_records[table_name].extend(records) 

117 

118 def subset(self, dataset_ids: AbstractSet[DatasetId]) -> Optional[DatastoreRecordData]: 

119 """Extract a subset of the records that match given dataset IDs. 

120 

121 Parameters 

122 ---------- 

123 dataset_ids : `set` [ `DatasetId` ] 

124 Dataset IDs to match. 

125 

126 Returns 

127 ------- 

128 record_data : `DatastoreRecordData` or `None` 

129 `None` is returned if there are no matching refs. 

130 

131 Notes 

132 ----- 

133 Records in the returned instance are shared with this instance, clients 

134 should not update or extend records in the returned instance. 

135 """ 

136 matching_records: defaultdict[ 

137 DatasetId, defaultdict[str, List[StoredDatastoreItemInfo]] 

138 ] = defaultdict(lambda: defaultdict(list)) 

139 for dataset_id in dataset_ids: 

140 if (id_records := self.records.get(dataset_id)) is not None: 

141 matching_records[dataset_id] = id_records 

142 if matching_records: 

143 return DatastoreRecordData(records=matching_records) 

144 else: 

145 return None 

146 

147 def to_simple(self, minimal: bool = False) -> SerializedDatastoreRecordData: 

148 """Make representation of the object for serialization. 

149 

150 Implements `~lsst.daf.butler.core.json.SupportsSimple` protocol. 

151 

152 Parameters 

153 ---------- 

154 minimal : `bool`, optional 

155 If True produce minimal representation, not used by this method. 

156 

157 Returns 

158 ------- 

159 simple : `dict` 

160 Representation of this instance as a simple dictionary. 

161 """ 

162 

163 def _class_name(records: list[StoredDatastoreItemInfo]) -> str: 

164 """Get fully qualified class name for the records. Empty string 

165 returned if list is empty. Exception is raised if records are of 

166 different classes. 

167 """ 

168 if not records: 

169 return "" 

170 classes = set(record.__class__ for record in records) 

171 assert len(classes) == 1, f"Records have to be of the same class: {classes}" 

172 return get_full_type_name(classes.pop()) 

173 

174 records: defaultdict[str, defaultdict[str, List[_Record]]] = defaultdict(lambda: defaultdict(list)) 

175 for table_data in self.records.values(): 

176 for table_name, table_records in table_data.items(): 

177 class_name = _class_name(table_records) 

178 records[class_name][table_name].extend([record.to_record() for record in table_records]) 

179 return SerializedDatastoreRecordData(dataset_ids=list(self.records.keys()), records=records) 

180 

181 @classmethod 

182 def from_simple( 

183 cls, 

184 simple: SerializedDatastoreRecordData, 

185 universe: Optional[DimensionUniverse] = None, 

186 registry: Optional[Registry] = None, 

187 ) -> DatastoreRecordData: 

188 """Make an instance of this class from serialized data. 

189 

190 Implements `~lsst.daf.butler.core.json.SupportsSimple` protocol. 

191 

192 Parameters 

193 ---------- 

194 data : `dict` 

195 Serialized representation returned from `to_simple` method. 

196 universe : `DimensionUniverse`, optional 

197 Dimension universe, not used by this method. 

198 registry : `Registry`, optional 

199 Registry instance, not used by this method. 

200 

201 Returns 

202 ------- 

203 item_info : `StoredDatastoreItemInfo` 

204 De-serialized instance of `StoredDatastoreItemInfo`. 

205 """ 

206 records: defaultdict[DatasetId, defaultdict[str, List[StoredDatastoreItemInfo]]] = defaultdict( 

207 lambda: defaultdict(list) 

208 ) 

209 # make sure that all dataset IDs appear in the dict even if they don't 

210 # have records. 

211 for dataset_id in simple.dataset_ids: 

212 records[dataset_id] = defaultdict(list) 

213 for class_name, table_data in simple.records.items(): 

214 klass = doImportType(class_name) 

215 for table_name, table_records in table_data.items(): 

216 for record in table_records: 

217 info = klass.from_record(record) 

218 records[info.dataset_id][table_name].append(info) 

219 return cls(records=records)