Coverage for python/lsst/daf/butler/registry/queries/_readers.py: 46%

109 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-01 11:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "DataCoordinateReader", 

32 "DatasetRefReader", 

33 "DimensionRecordReader", 

34) 

35 

36from abc import ABC, abstractmethod 

37from collections.abc import Callable, Mapping, Set 

38from typing import TYPE_CHECKING, Any 

39 

40from lsst.utils.classes import cached_getter 

41 

42from ..._column_tags import DatasetColumnTag, DimensionKeyColumnTag 

43from ..._dataset_ref import DatasetRef 

44from ..._dataset_type import DatasetType 

45from ...dimensions import DataCoordinate, DimensionElement, DimensionGroup, DimensionRecord 

46 

47if TYPE_CHECKING: 

48 from lsst.daf.relation import ColumnTag 

49 

50 

51class DataCoordinateReader(ABC): 

52 """Base class and factory for reader objects that extract `DataCoordinate` 

53 instances from query result rows. 

54 """ 

55 

56 @staticmethod 

57 def make( 

58 dimensions: DimensionGroup, 

59 full: bool = True, 

60 records: bool = False, 

61 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None, 

62 ) -> DataCoordinateReader: 

63 """Construct a concrete reader for a set of dimensions. 

64 

65 Parameters 

66 ---------- 

67 dimensions : `DimensionGroup` 

68 Dimensions of the `DataCoordinate` instances the new reader will 

69 read. 

70 full : `bool`, optional 

71 Whether to expect and extract implied dimensions as well as 

72 required dimensions. 

73 records : `bool`, optional 

74 Whether to attach dimension records. 

75 record_caches : `~collections.abc.Mapping`, optional 

76 Nested mapping (outer keys are dimension elements, inner keys are 

77 data IDs for that element) of cached dimension records. Ignored 

78 unless ``records=True``. 

79 

80 Returns 

81 ------- 

82 reader : `DataCoordinateReader` 

83 Concrete reader instance. 

84 """ 

85 if full: 

86 full_reader = _FullDataCoordinateReader(dimensions) 

87 if records: 

88 if record_caches is None: 

89 record_caches = {} 

90 else: 

91 record_caches = { 

92 e: cache for e, cache in record_caches.items() if e in dimensions.elements 

93 } 

94 record_readers = {} 

95 for element_name in dimensions.elements: 

96 element = dimensions.universe[element_name] 

97 if element_name not in record_caches: 

98 record_readers[element] = DimensionRecordReader(element) 

99 return _ExpandedDataCoordinateReader(full_reader, record_caches, record_readers) 

100 return full_reader 

101 else: 

102 assert not records, "Cannot add records unless full=True." 

103 return _BasicDataCoordinateReader(dimensions) 

104 

105 __slots__ = () 

106 

107 @abstractmethod 

108 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

109 """Read a `DataCoordinate` from a query result row. 

110 

111 Parameters 

112 ---------- 

113 row : `~collections.abc.Mapping` 

114 Mapping with `ColumnTag` keys representing a query result row. 

115 

116 Returns 

117 ------- 

118 data_coordinate : `DataCoordinate` 

119 New data ID. 

120 """ 

121 raise NotImplementedError() 

122 

123 @property 

124 @abstractmethod 

125 def columns_required(self) -> Set[ColumnTag]: 

126 raise NotImplementedError() 

127 

128 

129class _BasicDataCoordinateReader(DataCoordinateReader): 

130 """Private subclass of `DataCoordinateReader` for the ``full=False`` case. 

131 

132 Parameters 

133 ---------- 

134 dimensions : `DimensionGroup` 

135 Dimensions of the `DataCoordinate` instances read. 

136 """ 

137 

138 def __init__(self, dimensions: DimensionGroup): 

139 self._dimensions = dimensions 

140 self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions.required.names) 

141 

142 __slots__ = ("_dimensions", "_tags") 

143 

144 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

145 # Docstring inherited. 

146 return DataCoordinate.from_required_values( 

147 self._dimensions, 

148 tuple(row[tag] for tag in self._tags), 

149 ) 

150 

151 @property 

152 def columns_required(self) -> Set[ColumnTag]: 

153 return frozenset(self._tags) 

154 

155 

156class _FullDataCoordinateReader(DataCoordinateReader): 

157 """Private subclass of `DataCoordinateReader` for the ``full=True`` case. 

158 

159 Parameters 

160 ---------- 

161 dimensions : `DimensionGroup` 

162 Dimensions of the `DataCoordinate` instances read. 

163 """ 

164 

165 def __init__(self, dimensions: DimensionGroup): 

166 self._dimensions = dimensions 

167 self._tags = tuple( 

168 DimensionKeyColumnTag(name) for name in self._dimensions.as_group().data_coordinate_keys 

169 ) 

170 

171 __slots__ = ("_dimensions", "_tags") 

172 

173 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

174 # Docstring inherited. 

175 return DataCoordinate.from_full_values( 

176 self._dimensions, 

177 tuple(row[tag] for tag in self._tags), 

178 ) 

179 

180 @property 

181 def columns_required(self) -> Set[ColumnTag]: 

182 return frozenset(self._tags) 

183 

184 

185class _ExpandedDataCoordinateReader(DataCoordinateReader): 

186 """Private subclass of `DataCoordinateReader` for the ``full=True`` case. 

187 

188 Parameters 

189 ---------- 

190 full_reader : `_FullDataCoordinateReader` 

191 Reader for full data IDs that don't have records. 

192 record_caches : `~collections.abc.Mapping` 

193 Nested mapping (outer keys are dimension elements, inner keys are data 

194 IDs for that element) of cached dimension records. 

195 record_readers : `~collections.abc.Mapping` 

196 Mapping from `DimensionElement` to `DimensionRecordReaders`. Should 

197 include all elements in the data coordinate's dimensions that are not 

198 in ``record_cache``. 

199 """ 

200 

201 def __init__( 

202 self, 

203 full_reader: _FullDataCoordinateReader, 

204 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]], 

205 record_readers: Mapping[DimensionElement, DimensionRecordReader], 

206 ): 

207 self._full_reader = full_reader 

208 self._record_readers = record_readers 

209 self._record_caches = record_caches 

210 

211 __slots__ = ("_full_reader", "_record_readers", "_record_caches", "_cached_columns_required") 

212 

213 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

214 # Docstring inherited. 

215 full = self._full_reader.read(row) 

216 records = {} 

217 for element, cache in self._record_caches.items(): 

218 records[element.name] = cache[full.subset(element.graph)] 

219 for element, reader in self._record_readers.items(): 

220 records[element.name] = reader.read(row) 

221 return full.expanded(records) 

222 

223 @property 

224 @cached_getter 

225 def columns_required(self) -> Set[ColumnTag]: 

226 result = set(self._full_reader.columns_required) 

227 for reader in self._record_readers.values(): 

228 result.update(reader.columns_required) 

229 return result 

230 

231 

232class DatasetRefReader: 

233 """Reader class that extracts `DatasetRef` objects from query result rows. 

234 

235 Parameters 

236 ---------- 

237 dataset_type : `DatasetType` 

238 Dataset type for extracted references. 

239 full : `bool`, optional 

240 Whether to expect and extract implied dimensions as well as required 

241 dimensions. 

242 translate_collection : `~collections.abc.Callable`, optional 

243 Callable that returns `str` collection names given collection primary 

244 key values. Optional only for registries that use names as primary 

245 keys, or if ``run`` is always passed to `read`. 

246 records : `bool`, optional 

247 Whether to attach dimension records to data IDs. 

248 record_caches : `~collections.abc.Mapping`, optional 

249 Nested mapping (outer keys are dimension element names, inner keys 

250 are data IDs for that element) of cached dimension records. 

251 Ignored unless ``records=True``. 

252 """ 

253 

254 def __init__( 

255 self, 

256 dataset_type: DatasetType, 

257 *, 

258 full: bool = True, 

259 translate_collection: Callable[[Any], str] | None = None, 

260 records: bool = False, 

261 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None, 

262 ): 

263 self._data_coordinate_reader = DataCoordinateReader.make( 

264 dataset_type.dimensions.as_group(), full=full, records=records, record_caches=record_caches 

265 ) 

266 self._dataset_type = dataset_type 

267 self._translate_collection = translate_collection 

268 self._id_tag = DatasetColumnTag(dataset_type.name, "dataset_id") 

269 self._run_tag = DatasetColumnTag(dataset_type.name, "run") 

270 

271 __slots__ = ( 

272 "_data_coordinate_reader", 

273 "_dataset_type", 

274 "_translate_collection", 

275 "_id_tag", 

276 "_run_tag", 

277 "_cached_columns_required", 

278 ) 

279 

280 def read( 

281 self, 

282 row: Mapping[ColumnTag, Any], 

283 *, 

284 run: str | None = None, 

285 data_id: DataCoordinate | None = None, 

286 ) -> DatasetRef: 

287 """Read a `DatasetRef` from a query result row. 

288 

289 Parameters 

290 ---------- 

291 row : `~collections.abc.Mapping` 

292 Mapping with `ColumnTag` keys representing a query result row. 

293 run : `str`, optional 

294 Name of the `~CollectionType.RUN` collection; when provided the run 

295 key does not need to be present in the result row, and 

296 ``translate_collection`` does not need to be provided at 

297 construction. 

298 data_id : `DataCoordinate`, optional 

299 Data ID; when provided the dimensions do not need to be present in 

300 the result row. 

301 """ 

302 if data_id is None: 

303 data_id = self._data_coordinate_reader.read(row) 

304 if run is None: 

305 run_key = row[self._run_tag] 

306 if self._translate_collection is not None: 

307 run = self._translate_collection(run_key) 

308 else: 

309 run = run_key 

310 return DatasetRef( 

311 self._dataset_type, 

312 data_id, 

313 run=run, 

314 id=row[self._id_tag], 

315 ) 

316 

317 @property 

318 @cached_getter 

319 def columns_required(self) -> Set[ColumnTag]: 

320 result = set(self._data_coordinate_reader.columns_required) 

321 result.add(self._id_tag) 

322 result.add(self._run_tag) 

323 return result 

324 

325 

326class DimensionRecordReader: 

327 """Read dimension records.""" 

328 

329 def __init__(self, element: DimensionElement): 

330 self._cls = element.RecordClass 

331 self._tags = element.RecordClass.fields.columns 

332 

333 __slots__ = ("_cls", "_tags") 

334 

335 def read(self, row: Mapping[ColumnTag, Any]) -> DimensionRecord: 

336 return self._cls(**{name: row[tag] for tag, name in self._tags.items()}) 

337 

338 @property 

339 def columns_required(self) -> Set[ColumnTag]: 

340 return self._tags.keys()