Coverage for python/lsst/daf/butler/registry/queries/_readers.py: 46%

109 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-13 09:58 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "DataCoordinateReader", 

32 "DatasetRefReader", 

33 "DimensionRecordReader", 

34) 

35 

36from abc import ABC, abstractmethod 

37from collections.abc import Callable, Mapping, Set 

38from typing import TYPE_CHECKING, Any 

39 

40from lsst.utils.classes import cached_getter 

41 

42from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag 

43from ..._dataset_ref import DatasetRef 

44from ..._dataset_type import DatasetType 

45from ...dimensions import ( 

46 DataCoordinate, 

47 DimensionElement, 

48 DimensionGroup, 

49 DimensionRecord, 

50 DimensionRecordSet, 

51) 

52 

53if TYPE_CHECKING: 

54 from lsst.daf.relation import ColumnTag 

55 

56 

57class DataCoordinateReader(ABC): 

58 """Base class and factory for reader objects that extract `DataCoordinate` 

59 instances from query result rows. 

60 """ 

61 

62 @staticmethod 

63 def make( 

64 dimensions: DimensionGroup, 

65 full: bool = True, 

66 records: bool = False, 

67 record_caches: Mapping[str, DimensionRecordSet] | None = None, 

68 ) -> DataCoordinateReader: 

69 """Construct a concrete reader for a set of dimensions. 

70 

71 Parameters 

72 ---------- 

73 dimensions : `DimensionGroup` 

74 Dimensions of the `DataCoordinate` instances the new reader will 

75 read. 

76 full : `bool`, optional 

77 Whether to expect and extract implied dimensions as well as 

78 required dimensions. 

79 records : `bool`, optional 

80 Whether to attach dimension records. 

81 record_caches : `~collections.abc.Mapping`, optional 

82 Mapping of cached dimension records. Ignored unless 

83 ``records=True``. 

84 

85 Returns 

86 ------- 

87 reader : `DataCoordinateReader` 

88 Concrete reader instance. 

89 """ 

90 if full: 

91 full_reader = _FullDataCoordinateReader(dimensions) 

92 if records: 

93 if record_caches is None: 

94 record_caches = {} 

95 else: 

96 record_caches = { 

97 e: cache for e, cache in record_caches.items() if e in dimensions.elements 

98 } 

99 record_readers = {} 

100 for element_name in dimensions.elements: 

101 element = dimensions.universe[element_name] 

102 if element_name not in record_caches: 

103 record_readers[element] = DimensionRecordReader(element) 

104 return _ExpandedDataCoordinateReader(full_reader, record_caches, record_readers) 

105 return full_reader 

106 else: 

107 assert not records, "Cannot add records unless full=True." 

108 return _BasicDataCoordinateReader(dimensions) 

109 

110 __slots__ = () 

111 

112 @abstractmethod 

113 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

114 """Read a `DataCoordinate` from a query result row. 

115 

116 Parameters 

117 ---------- 

118 row : `~collections.abc.Mapping` 

119 Mapping with `ColumnTag` keys representing a query result row. 

120 

121 Returns 

122 ------- 

123 data_coordinate : `DataCoordinate` 

124 New data ID. 

125 """ 

126 raise NotImplementedError() 

127 

128 @property 

129 @abstractmethod 

130 def columns_required(self) -> Set[ColumnTag]: 

131 raise NotImplementedError() 

132 

133 

134class _BasicDataCoordinateReader(DataCoordinateReader): 

135 """Private subclass of `DataCoordinateReader` for the ``full=False`` case. 

136 

137 Parameters 

138 ---------- 

139 dimensions : `DimensionGroup` 

140 Dimensions of the `DataCoordinate` instances read. 

141 """ 

142 

143 def __init__(self, dimensions: DimensionGroup): 

144 self._dimensions = dimensions 

145 self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions.required.names) 

146 

147 __slots__ = ("_dimensions", "_tags") 

148 

149 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

150 # Docstring inherited. 

151 return DataCoordinate.from_required_values( 

152 self._dimensions, 

153 tuple(row[tag] for tag in self._tags), 

154 ) 

155 

156 @property 

157 def columns_required(self) -> Set[ColumnTag]: 

158 return frozenset(self._tags) 

159 

160 

161class _FullDataCoordinateReader(DataCoordinateReader): 

162 """Private subclass of `DataCoordinateReader` for the ``full=True`` case. 

163 

164 Parameters 

165 ---------- 

166 dimensions : `DimensionGroup` 

167 Dimensions of the `DataCoordinate` instances read. 

168 """ 

169 

170 def __init__(self, dimensions: DimensionGroup): 

171 self._dimensions = dimensions 

172 self._tags = tuple( 

173 DimensionKeyColumnTag(name) for name in self._dimensions.as_group().data_coordinate_keys 

174 ) 

175 

176 __slots__ = ("_dimensions", "_tags") 

177 

178 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

179 # Docstring inherited. 

180 return DataCoordinate.from_full_values( 

181 self._dimensions, 

182 tuple(row[tag] for tag in self._tags), 

183 ) 

184 

185 @property 

186 def columns_required(self) -> Set[ColumnTag]: 

187 return frozenset(self._tags) 

188 

189 

190class _ExpandedDataCoordinateReader(DataCoordinateReader): 

191 """Private subclass of `DataCoordinateReader` for the ``full=True`` case. 

192 

193 Parameters 

194 ---------- 

195 full_reader : `_FullDataCoordinateReader` 

196 Reader for full data IDs that don't have records. 

197 record_caches : `~collections.abc.Mapping` 

198 Mapping of cached dimension records. 

199 record_readers : `~collections.abc.Mapping` 

200 Mapping from `DimensionElement` to `DimensionRecordReaders`. Should 

201 include all elements in the data coordinate's dimensions that are not 

202 in ``record_cache``. 

203 """ 

204 

205 def __init__( 

206 self, 

207 full_reader: _FullDataCoordinateReader, 

208 record_caches: Mapping[str, DimensionRecordSet], 

209 record_readers: Mapping[DimensionElement, DimensionRecordReader], 

210 ): 

211 self._full_reader = full_reader 

212 self._record_readers = record_readers 

213 self._record_caches = record_caches 

214 

215 __slots__ = ("_full_reader", "_record_readers", "_record_caches", "_cached_columns_required") 

216 

217 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

218 # Docstring inherited. 

219 full = self._full_reader.read(row) 

220 records = {} 

221 for element_name, cache in self._record_caches.items(): 

222 records[element_name] = cache.find(full.subset(cache.element.minimal_group)) 

223 for element, reader in self._record_readers.items(): 

224 records[element.name] = reader.read(row) 

225 return full.expanded(records) 

226 

227 @property 

228 @cached_getter 

229 def columns_required(self) -> Set[ColumnTag]: 

230 result = set(self._full_reader.columns_required) 

231 for reader in self._record_readers.values(): 

232 result.update(reader.columns_required) 

233 return result 

234 

235 

236class DatasetRefReader: 

237 """Reader class that extracts `DatasetRef` objects from query result rows. 

238 

239 Parameters 

240 ---------- 

241 dataset_type : `DatasetType` 

242 Dataset type for extracted references. 

243 full : `bool`, optional 

244 Whether to expect and extract implied dimensions as well as required 

245 dimensions. 

246 translate_collection : `~collections.abc.Callable`, optional 

247 Callable that returns `str` collection names given collection primary 

248 key values. Optional only for registries that use names as primary 

249 keys, or if ``run`` is always passed to `read`. 

250 records : `bool`, optional 

251 Whether to attach dimension records to data IDs. 

252 record_caches : `~collections.abc.Mapping`, optional 

253 Nested mapping (outer keys are dimension element names, inner keys 

254 are data IDs for that element) of cached dimension records. 

255 Ignored unless ``records=True``. 

256 """ 

257 

258 def __init__( 

259 self, 

260 dataset_type: DatasetType, 

261 *, 

262 full: bool = True, 

263 translate_collection: Callable[[Any], str] | None = None, 

264 records: bool = False, 

265 record_caches: Mapping[str, DimensionRecordSet] | None = None, 

266 ): 

267 self._data_coordinate_reader = DataCoordinateReader.make( 

268 dataset_type.dimensions.as_group(), full=full, records=records, record_caches=record_caches 

269 ) 

270 self._dataset_type = dataset_type 

271 self._translate_collection = translate_collection 

272 self._id_tag = DatasetColumnTag(dataset_type.name, "dataset_id") 

273 self._run_tag = DatasetColumnTag(dataset_type.name, "run") 

274 

275 __slots__ = ( 

276 "_data_coordinate_reader", 

277 "_dataset_type", 

278 "_translate_collection", 

279 "_id_tag", 

280 "_run_tag", 

281 "_cached_columns_required", 

282 ) 

283 

284 def read( 

285 self, 

286 row: Mapping[ColumnTag, Any], 

287 *, 

288 run: str | None = None, 

289 data_id: DataCoordinate | None = None, 

290 ) -> DatasetRef: 

291 """Read a `DatasetRef` from a query result row. 

292 

293 Parameters 

294 ---------- 

295 row : `~collections.abc.Mapping` 

296 Mapping with `ColumnTag` keys representing a query result row. 

297 run : `str`, optional 

298 Name of the `~CollectionType.RUN` collection; when provided the run 

299 key does not need to be present in the result row, and 

300 ``translate_collection`` does not need to be provided at 

301 construction. 

302 data_id : `DataCoordinate`, optional 

303 Data ID; when provided the dimensions do not need to be present in 

304 the result row. 

305 """ 

306 if data_id is None: 

307 data_id = self._data_coordinate_reader.read(row) 

308 if run is None: 

309 run_key = row[self._run_tag] 

310 if self._translate_collection is not None: 

311 run = self._translate_collection(run_key) 

312 else: 

313 run = run_key 

314 return DatasetRef( 

315 self._dataset_type, 

316 data_id, 

317 run=run, 

318 id=row[self._id_tag], 

319 ) 

320 

321 @property 

322 @cached_getter 

323 def columns_required(self) -> Set[ColumnTag]: 

324 result = set(self._data_coordinate_reader.columns_required) 

325 result.add(self._id_tag) 

326 result.add(self._run_tag) 

327 return result 

328 

329 

330class DimensionRecordReader: 

331 """Read dimension records. 

332 

333 Parameters 

334 ---------- 

335 element : `DimensionElement` 

336 The element to read. 

337 """ 

338 

339 def __init__(self, element: DimensionElement): 

340 self._cls = element.RecordClass 

341 self._tags = element.RecordClass.fields.columns 

342 

343 __slots__ = ("_cls", "_tags") 

344 

345 def read(self, row: Mapping[ColumnTag, Any]) -> DimensionRecord: 

346 return self._cls(**{name: row[tag] for tag, name in self._tags.items()}) 

347 

348 @property 

349 def columns_required(self) -> Set[ColumnTag]: 

350 return self._tags.keys()