Coverage for python/lsst/daf/butler/core/datastoreRecordData.py: 28%

75 statements  

« prev     ^ index     » next       coverage.py v6.5.0, created at 2023-02-01 02:05 -0800

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22"""Support for generic data stores.""" 

23 

24from __future__ import annotations 

25 

26__all__ = ("DatastoreRecordData", "SerializedDatastoreRecordData") 

27 

28import dataclasses 

29import uuid 

30from collections import defaultdict 

31from typing import TYPE_CHECKING, AbstractSet, Any, Dict, List, Optional, Union 

32 

33from lsst.utils import doImportType 

34from lsst.utils.introspection import get_full_type_name 

35from pydantic import BaseModel 

36 

37from .datasets import DatasetId 

38from .dimensions import DimensionUniverse 

39from .storedFileInfo import StoredDatastoreItemInfo 

40 

41if TYPE_CHECKING: 41 ↛ 43line 41 didn't jump to line 43, because the condition on line 41 was never true

42 

43 from ..registry import Registry 

44 

45_Record = Dict[str, Any] 

46 

47 

48class SerializedDatastoreRecordData(BaseModel): 

49 """Representation of a `DatastoreRecordData` suitable for serialization.""" 

50 

51 dataset_ids: List[uuid.UUID] 

52 """List of dataset IDs""" 

53 

54 records: Dict[str, Dict[str, List[_Record]]] 

55 """List of records indexed by record class name and table name.""" 

56 

57 @classmethod 

58 def direct( 

59 cls, 

60 *, 

61 dataset_ids: List[Union[str, uuid.UUID]], 

62 records: Dict[str, Dict[str, List[_Record]]], 

63 ) -> SerializedDatastoreRecordData: 

64 """Construct a `SerializedDatastoreRecordData` directly without 

65 validators. 

66 

67 This differs from the pydantic "construct" method in that the 

68 arguments are explicitly what the model requires, and it will recurse 

69 through members, constructing them from their corresponding `direct` 

70 methods. 

71 

72 This method should only be called when the inputs are trusted. 

73 """ 

74 data = SerializedDatastoreRecordData.__new__(cls) 

75 setter = object.__setattr__ 

76 # JSON makes strings out of UUIDs, need to convert them back 

77 setter(data, "dataset_ids", [uuid.UUID(id) if isinstance(id, str) else id for id in dataset_ids]) 

78 # See also comments in record_ids_to_uuid() 

79 for table_data in records.values(): 

80 for table_records in table_data.values(): 

81 for record in table_records: 

82 # This only checks dataset_id value, if there are any other 

83 # columns that are UUIDs we'd need more generic approach. 

84 if (id := record.get("dataset_id")) is not None: 

85 record["dataset_id"] = uuid.UUID(id) if isinstance(id, str) else id 

86 setter(data, "records", records) 

87 return data 

88 

89 

90@dataclasses.dataclass 

91class DatastoreRecordData: 

92 """A struct that represents a tabular data export from a single 

93 datastore. 

94 """ 

95 

96 records: defaultdict[DatasetId, defaultdict[str, List[StoredDatastoreItemInfo]]] = dataclasses.field( 96 ↛ exitline 96 didn't jump to the function exit

97 default_factory=lambda: defaultdict(lambda: defaultdict(list)) 

98 ) 

99 """Opaque table data, indexed by dataset ID and grouped by opaque table 

100 name.""" 

101 

102 def update(self, other: DatastoreRecordData) -> None: 

103 """Update contents of this instance with data from another instance. 

104 

105 Parameters 

106 ---------- 

107 other : `DatastoreRecordData` 

108 Records tho merge into this instance. 

109 

110 Notes 

111 ----- 

112 Merged instances can not have identical records. 

113 """ 

114 for dataset_id, table_records in other.records.items(): 

115 this_table_records = self.records[dataset_id] 

116 for table_name, records in table_records.items(): 

117 this_table_records[table_name].extend(records) 

118 

119 def subset(self, dataset_ids: AbstractSet[DatasetId]) -> Optional[DatastoreRecordData]: 

120 """Extract a subset of the records that match given dataset IDs. 

121 

122 Parameters 

123 ---------- 

124 dataset_ids : `set` [ `DatasetId` ] 

125 Dataset IDs to match. 

126 

127 Returns 

128 ------- 

129 record_data : `DatastoreRecordData` or `None` 

130 `None` is returned if there are no matching refs. 

131 

132 Notes 

133 ----- 

134 Records in the returned instance are shared with this instance, clients 

135 should not update or extend records in the returned instance. 

136 """ 

137 matching_records: defaultdict[ 

138 DatasetId, defaultdict[str, List[StoredDatastoreItemInfo]] 

139 ] = defaultdict(lambda: defaultdict(list)) 

140 for dataset_id in dataset_ids: 

141 if (id_records := self.records.get(dataset_id)) is not None: 

142 matching_records[dataset_id] = id_records 

143 if matching_records: 

144 return DatastoreRecordData(records=matching_records) 

145 else: 

146 return None 

147 

148 def to_simple(self, minimal: bool = False) -> SerializedDatastoreRecordData: 

149 """Make representation of the object for serialization. 

150 

151 Implements `~lsst.daf.butler.core.json.SupportsSimple` protocol. 

152 

153 Parameters 

154 ---------- 

155 minimal : `bool`, optional 

156 If True produce minimal representation, not used by this method. 

157 

158 Returns 

159 ------- 

160 simple : `dict` 

161 Representation of this instance as a simple dictionary. 

162 """ 

163 

164 def _class_name(records: list[StoredDatastoreItemInfo]) -> str: 

165 """Get fully qualified class name for the records. Empty string 

166 returned if list is empty. Exception is raised if records are of 

167 different classes. 

168 """ 

169 if not records: 

170 return "" 

171 classes = set(record.__class__ for record in records) 

172 assert len(classes) == 1, f"Records have to be of the same class: {classes}" 

173 return get_full_type_name(classes.pop()) 

174 

175 records: defaultdict[str, defaultdict[str, List[_Record]]] = defaultdict(lambda: defaultdict(list)) 

176 for table_data in self.records.values(): 

177 for table_name, table_records in table_data.items(): 

178 class_name = _class_name(table_records) 

179 records[class_name][table_name].extend([record.to_record() for record in table_records]) 

180 return SerializedDatastoreRecordData(dataset_ids=list(self.records.keys()), records=records) 

181 

182 @classmethod 

183 def from_simple( 

184 cls, 

185 simple: SerializedDatastoreRecordData, 

186 universe: Optional[DimensionUniverse] = None, 

187 registry: Optional[Registry] = None, 

188 ) -> DatastoreRecordData: 

189 """Make an instance of this class from serialized data. 

190 

191 Implements `~lsst.daf.butler.core.json.SupportsSimple` protocol. 

192 

193 Parameters 

194 ---------- 

195 data : `dict` 

196 Serialized representation returned from `to_simple` method. 

197 universe : `DimensionUniverse`, optional 

198 Dimension universe, not used by this method. 

199 registry : `Registry`, optional 

200 Registry instance, not used by this method. 

201 

202 Returns 

203 ------- 

204 item_info : `StoredDatastoreItemInfo` 

205 De-serialized instance of `StoredDatastoreItemInfo`. 

206 """ 

207 records: defaultdict[DatasetId, defaultdict[str, List[StoredDatastoreItemInfo]]] = defaultdict( 

208 lambda: defaultdict(list) 

209 ) 

210 # make sure that all dataset IDs appear in the dict even if they don't 

211 # have records. 

212 for dataset_id in simple.dataset_ids: 

213 records[dataset_id] = defaultdict(list) 

214 for class_name, table_data in simple.records.items(): 

215 klass = doImportType(class_name) 

216 for table_name, table_records in table_data.items(): 

217 for record in table_records: 

218 info = klass.from_record(record) 

219 records[info.dataset_id][table_name].append(info) 

220 return cls(records=records)