Coverage for python/lsst/daf/butler/registry/queries/_readers.py: 35%

105 statements  

« prev     ^ index     » next       coverage.py v7.2.5, created at 2023-05-09 02:11 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21 

22from __future__ import annotations 

23 

24__all__ = ( 

25 "DataCoordinateReader", 

26 "DatasetRefReader", 

27 "DimensionRecordReader", 

28) 

29 

30from abc import ABC, abstractmethod 

31from collections.abc import Callable, Mapping, Set 

32from typing import TYPE_CHECKING, Any 

33 

34from lsst.utils.classes import cached_getter 

35 

36from ...core import ( 

37 DataCoordinate, 

38 DatasetColumnTag, 

39 DatasetRef, 

40 DatasetType, 

41 DimensionElement, 

42 DimensionGraph, 

43 DimensionKeyColumnTag, 

44 DimensionRecord, 

45) 

46 

47if TYPE_CHECKING: 

48 from lsst.daf.relation import ColumnTag 

49 

50 

51class DataCoordinateReader(ABC): 

52 """Base class and factory for reader objects that extract `DataCoordinate` 

53 instances from query result rows. 

54 """ 

55 

56 @staticmethod 

57 def make( 

58 dimensions: DimensionGraph, 

59 full: bool = True, 

60 records: bool = False, 

61 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None, 

62 ) -> DataCoordinateReader: 

63 """Construct a concrete reader for a set of dimensions. 

64 

65 Parameters 

66 ---------- 

67 dimensions : `DimensionGraph` 

68 Dimensions of the `DataCoordinate` instances the new reader will 

69 read. 

70 full : `bool`, optional 

71 Whether to expect and extract implied dimensions as well as 

72 required dimensions. 

73 records : `bool`, optional 

74 Whether to attach dimension records. 

75 record_caches : `Mapping`, optional 

76 Nested mapping (outer keys are dimension elements, inner keys are 

77 data IDs for that element) of cached dimension records. Ignored 

78 unless ``records=True``. 

79 

80 Returns 

81 ------- 

82 reader : `DataCoordinateReader` 

83 Concrete reader instance. 

84 """ 

85 if full: 

86 full_reader = _FullDataCoordinateReader(dimensions) 

87 if records: 

88 if record_caches is None: 

89 record_caches = {} 

90 else: 

91 record_caches = { 

92 e: cache for e, cache in record_caches.items() if e in dimensions.elements 

93 } 

94 record_readers = {} 

95 for element in dimensions.elements: 

96 if element not in record_caches: 

97 record_readers[element] = DimensionRecordReader(element) 

98 return _ExpandedDataCoordinateReader(full_reader, record_caches, record_readers) 

99 return full_reader 

100 else: 

101 assert not records, "Cannot add records unless full=True." 

102 return _BasicDataCoordinateReader(dimensions) 

103 

104 __slots__ = () 

105 

106 @abstractmethod 

107 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

108 """Read a `DataCoordinate` from a query result row. 

109 

110 Parameters 

111 ---------- 

112 row : `Mapping` 

113 Mapping with `ColumnTag` keys representing a query result row. 

114 

115 Returns 

116 ------- 

117 data_coordinate : `DataCoordinate` 

118 New data ID. 

119 """ 

120 raise NotImplementedError() 

121 

122 @property 

123 @abstractmethod 

124 def columns_required(self) -> Set[ColumnTag]: 

125 raise NotImplementedError() 

126 

127 

128class _BasicDataCoordinateReader(DataCoordinateReader): 

129 """Private subclass of `DataCoordinateReader` for the ``full=False`` case. 

130 

131 Parameters 

132 ---------- 

133 dimensions : `DimensionGraph` 

134 Dimensions of the `DataCoordinate` instances read. 

135 """ 

136 

137 def __init__(self, dimensions: DimensionGraph): 

138 self._dimensions = dimensions 

139 self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions.required.names) 

140 

141 __slots__ = ("_dimensions", "_tags") 

142 

143 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

144 # Docstring inherited. 

145 return DataCoordinate.fromRequiredValues( 

146 self._dimensions, 

147 tuple(row[tag] for tag in self._tags), 

148 ) 

149 

150 @property 

151 def columns_required(self) -> Set[ColumnTag]: 

152 return frozenset(self._tags) 

153 

154 

155class _FullDataCoordinateReader(DataCoordinateReader): 

156 """Private subclass of `DataCoordinateReader` for the ``full=True`` case. 

157 

158 Parameters 

159 ---------- 

160 dimensions : `DimensionGraph` 

161 Dimensions of the `DataCoordinate` instances read. 

162 """ 

163 

164 def __init__(self, dimensions: DimensionGraph): 

165 self._dimensions = dimensions 

166 self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions._dataCoordinateIndices) 

167 

168 __slots__ = ("_dimensions", "_tags") 

169 

170 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

171 # Docstring inherited. 

172 return DataCoordinate.fromFullValues( 

173 self._dimensions, 

174 tuple(row[tag] for tag in self._tags), 

175 ) 

176 

177 @property 

178 def columns_required(self) -> Set[ColumnTag]: 

179 return frozenset(self._tags) 

180 

181 

182class _ExpandedDataCoordinateReader(DataCoordinateReader): 

183 """Private subclass of `DataCoordinateReader` for the ``full=True`` case. 

184 

185 Parameters 

186 ---------- 

187 full_reader : `_FullDataCoordinateReader` 

188 Reader for full data IDs that don't have records. 

189 record_caches : `Mapping` 

190 Nested mapping (outer keys are dimension elements, inner keys are data 

191 IDs for that element) of cached dimension records. 

192 record_readers : `Mapping` 

193 Mapping from `DimensionElement` to `DimensionRecordReaders`. Should 

194 include all elements in the data coordinate's dimensions that are not 

195 in ``record_cache``. 

196 """ 

197 

198 def __init__( 

199 self, 

200 full_reader: _FullDataCoordinateReader, 

201 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]], 

202 record_readers: Mapping[DimensionElement, DimensionRecordReader], 

203 ): 

204 self._full_reader = full_reader 

205 self._record_readers = record_readers 

206 self._record_caches = record_caches 

207 

208 __slots__ = ("_full_reader", "_record_readers", "_record_caches", "_cached_columns_required") 

209 

210 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

211 # Docstring inherited. 

212 full = self._full_reader.read(row) 

213 records = {} 

214 for element, cache in self._record_caches.items(): 

215 records[element.name] = cache[full.subset(element.graph)] 

216 for element, reader in self._record_readers.items(): 

217 records[element.name] = reader.read(row) 

218 return full.expanded(records) 

219 

220 @property 

221 @cached_getter 

222 def columns_required(self) -> Set[ColumnTag]: 

223 result = set(self._full_reader.columns_required) 

224 for reader in self._record_readers.values(): 

225 result.update(reader.columns_required) 

226 return result 

227 

228 

229class DatasetRefReader: 

230 """Reader class that extracts `DatasetRef` objects from query result rows. 

231 

232 Parameters 

233 ---------- 

234 dataset_type : `DatasetType` 

235 Dataset type for extracted references. 

236 full : `bool`, optional 

237 Whether to expect and extract implied dimensions as well as required 

238 dimensions. 

239 translate_collection : `Callable`, optional 

240 Callable that returns `str` collection names given collection primary 

241 key values. Optional only for registries that use names as primary 

242 keys, or if ``run`` is always passed to `read`. 

243 records : `bool`, optional 

244 Whether to attach dimension records to data IDs. 

245 record_caches : `Mapping`, optional 

246 Nested mapping (outer keys are dimension element names, inner keys 

247 are data IDs for that element) of cached dimension records. 

248 Ignored unless ``records=True``. 

249 """ 

250 

251 def __init__( 

252 self, 

253 dataset_type: DatasetType, 

254 *, 

255 full: bool = True, 

256 translate_collection: Callable[[Any], str] | None = None, 

257 records: bool = False, 

258 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None, 

259 ): 

260 self._data_coordinate_reader = DataCoordinateReader.make( 

261 dataset_type.dimensions, full=full, records=records, record_caches=record_caches 

262 ) 

263 self._dataset_type = dataset_type 

264 self._translate_collection = translate_collection 

265 self._id_tag = DatasetColumnTag(dataset_type.name, "dataset_id") 

266 self._run_tag = DatasetColumnTag(dataset_type.name, "run") 

267 

268 __slots__ = ( 

269 "_data_coordinate_reader", 

270 "_dataset_type", 

271 "_translate_collection", 

272 "_id_tag", 

273 "_run_tag", 

274 "_cached_columns_required", 

275 ) 

276 

277 def read( 

278 self, 

279 row: Mapping[ColumnTag, Any], 

280 *, 

281 run: str | None = None, 

282 data_id: DataCoordinate | None = None, 

283 ) -> DatasetRef: 

284 """Read a `DatasetRef` from a query result row. 

285 

286 Parameters 

287 ---------- 

288 row : `Mapping` 

289 Mapping with `ColumnTag` keys representing a query result row. 

290 run : `str`, optional 

291 Name of the `~CollectionType.RUN` collection; when provided the run 

292 key does not need to be present in the result row, and 

293 ``translate_collection`` does not need to be provided at 

294 construction. 

295 data_id : `DataCoordinate`, optional 

296 Data ID; when provided the dimensions do not need to be present in 

297 the result row. 

298 """ 

299 if data_id is None: 

300 data_id = self._data_coordinate_reader.read(row) 

301 if run is None: 

302 run_key = row[self._run_tag] 

303 if self._translate_collection is not None: 

304 run = self._translate_collection(run_key) 

305 else: 

306 run = run_key 

307 return DatasetRef( 

308 self._dataset_type, 

309 data_id, 

310 run=run, 

311 id=row[self._id_tag], 

312 ) 

313 

314 @property 

315 @cached_getter 

316 def columns_required(self) -> Set[ColumnTag]: 

317 result = set(self._data_coordinate_reader.columns_required) 

318 result.add(self._id_tag) 

319 result.add(self._run_tag) 

320 return result 

321 

322 

323class DimensionRecordReader: 

324 def __init__(self, element: DimensionElement): 

325 self._cls = element.RecordClass 

326 self._tags = element.RecordClass.fields.columns 

327 

328 __slots__ = ("_cls", "_tags") 

329 

330 def read(self, row: Mapping[ColumnTag, Any]) -> DimensionRecord: 

331 return self._cls(**{name: row[tag] for tag, name in self._tags.items()}) 

332 

333 @property 

334 def columns_required(self) -> Set[ColumnTag]: 

335 return self._tags.keys()