Coverage for python/lsst/daf/butler/registry/queries/_readers.py: 46%

105 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-10-02 08:00 +0000

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This software is dual licensed under the GNU General Public License and also 

10# under a 3-clause BSD license. Recipients may choose which of these licenses 

11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt, 

12# respectively. If you choose the GPL option then the following text applies 

13# (but note that there is still no warranty even if you opt for BSD instead): 

14# 

15# This program is free software: you can redistribute it and/or modify 

16# it under the terms of the GNU General Public License as published by 

17# the Free Software Foundation, either version 3 of the License, or 

18# (at your option) any later version. 

19# 

20# This program is distributed in the hope that it will be useful, 

21# but WITHOUT ANY WARRANTY; without even the implied warranty of 

22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

23# GNU General Public License for more details. 

24# 

25# You should have received a copy of the GNU General Public License 

26# along with this program. If not, see <http://www.gnu.org/licenses/>. 

27 

28from __future__ import annotations 

29 

30__all__ = ( 

31 "DataCoordinateReader", 

32 "DatasetRefReader", 

33 "DimensionRecordReader", 

34) 

35 

36from abc import ABC, abstractmethod 

37from collections.abc import Callable, Mapping, Set 

38from typing import TYPE_CHECKING, Any 

39 

40from lsst.utils.classes import cached_getter 

41 

42from ...core import ( 

43 DataCoordinate, 

44 DatasetColumnTag, 

45 DatasetRef, 

46 DatasetType, 

47 DimensionElement, 

48 DimensionGraph, 

49 DimensionKeyColumnTag, 

50 DimensionRecord, 

51) 

52 

53if TYPE_CHECKING: 

54 from lsst.daf.relation import ColumnTag 

55 

56 

57class DataCoordinateReader(ABC): 

58 """Base class and factory for reader objects that extract `DataCoordinate` 

59 instances from query result rows. 

60 """ 

61 

62 @staticmethod 

63 def make( 

64 dimensions: DimensionGraph, 

65 full: bool = True, 

66 records: bool = False, 

67 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None, 

68 ) -> DataCoordinateReader: 

69 """Construct a concrete reader for a set of dimensions. 

70 

71 Parameters 

72 ---------- 

73 dimensions : `DimensionGraph` 

74 Dimensions of the `DataCoordinate` instances the new reader will 

75 read. 

76 full : `bool`, optional 

77 Whether to expect and extract implied dimensions as well as 

78 required dimensions. 

79 records : `bool`, optional 

80 Whether to attach dimension records. 

81 record_caches : `~collections.abc.Mapping`, optional 

82 Nested mapping (outer keys are dimension elements, inner keys are 

83 data IDs for that element) of cached dimension records. Ignored 

84 unless ``records=True``. 

85 

86 Returns 

87 ------- 

88 reader : `DataCoordinateReader` 

89 Concrete reader instance. 

90 """ 

91 if full: 

92 full_reader = _FullDataCoordinateReader(dimensions) 

93 if records: 

94 if record_caches is None: 

95 record_caches = {} 

96 else: 

97 record_caches = { 

98 e: cache for e, cache in record_caches.items() if e in dimensions.elements 

99 } 

100 record_readers = {} 

101 for element in dimensions.elements: 

102 if element not in record_caches: 

103 record_readers[element] = DimensionRecordReader(element) 

104 return _ExpandedDataCoordinateReader(full_reader, record_caches, record_readers) 

105 return full_reader 

106 else: 

107 assert not records, "Cannot add records unless full=True." 

108 return _BasicDataCoordinateReader(dimensions) 

109 

110 __slots__ = () 

111 

112 @abstractmethod 

113 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

114 """Read a `DataCoordinate` from a query result row. 

115 

116 Parameters 

117 ---------- 

118 row : `~collections.abc.Mapping` 

119 Mapping with `ColumnTag` keys representing a query result row. 

120 

121 Returns 

122 ------- 

123 data_coordinate : `DataCoordinate` 

124 New data ID. 

125 """ 

126 raise NotImplementedError() 

127 

128 @property 

129 @abstractmethod 

130 def columns_required(self) -> Set[ColumnTag]: 

131 raise NotImplementedError() 

132 

133 

134class _BasicDataCoordinateReader(DataCoordinateReader): 

135 """Private subclass of `DataCoordinateReader` for the ``full=False`` case. 

136 

137 Parameters 

138 ---------- 

139 dimensions : `DimensionGraph` 

140 Dimensions of the `DataCoordinate` instances read. 

141 """ 

142 

143 def __init__(self, dimensions: DimensionGraph): 

144 self._dimensions = dimensions 

145 self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions.required.names) 

146 

147 __slots__ = ("_dimensions", "_tags") 

148 

149 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

150 # Docstring inherited. 

151 return DataCoordinate.fromRequiredValues( 

152 self._dimensions, 

153 tuple(row[tag] for tag in self._tags), 

154 ) 

155 

156 @property 

157 def columns_required(self) -> Set[ColumnTag]: 

158 return frozenset(self._tags) 

159 

160 

161class _FullDataCoordinateReader(DataCoordinateReader): 

162 """Private subclass of `DataCoordinateReader` for the ``full=True`` case. 

163 

164 Parameters 

165 ---------- 

166 dimensions : `DimensionGraph` 

167 Dimensions of the `DataCoordinate` instances read. 

168 """ 

169 

170 def __init__(self, dimensions: DimensionGraph): 

171 self._dimensions = dimensions 

172 self._tags = tuple(DimensionKeyColumnTag(name) for name in self._dimensions._dataCoordinateIndices) 

173 

174 __slots__ = ("_dimensions", "_tags") 

175 

176 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

177 # Docstring inherited. 

178 return DataCoordinate.fromFullValues( 

179 self._dimensions, 

180 tuple(row[tag] for tag in self._tags), 

181 ) 

182 

183 @property 

184 def columns_required(self) -> Set[ColumnTag]: 

185 return frozenset(self._tags) 

186 

187 

188class _ExpandedDataCoordinateReader(DataCoordinateReader): 

189 """Private subclass of `DataCoordinateReader` for the ``full=True`` case. 

190 

191 Parameters 

192 ---------- 

193 full_reader : `_FullDataCoordinateReader` 

194 Reader for full data IDs that don't have records. 

195 record_caches : `~collections.abc.Mapping` 

196 Nested mapping (outer keys are dimension elements, inner keys are data 

197 IDs for that element) of cached dimension records. 

198 record_readers : `~collections.abc.Mapping` 

199 Mapping from `DimensionElement` to `DimensionRecordReaders`. Should 

200 include all elements in the data coordinate's dimensions that are not 

201 in ``record_cache``. 

202 """ 

203 

204 def __init__( 

205 self, 

206 full_reader: _FullDataCoordinateReader, 

207 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]], 

208 record_readers: Mapping[DimensionElement, DimensionRecordReader], 

209 ): 

210 self._full_reader = full_reader 

211 self._record_readers = record_readers 

212 self._record_caches = record_caches 

213 

214 __slots__ = ("_full_reader", "_record_readers", "_record_caches", "_cached_columns_required") 

215 

216 def read(self, row: Mapping[ColumnTag, Any]) -> DataCoordinate: 

217 # Docstring inherited. 

218 full = self._full_reader.read(row) 

219 records = {} 

220 for element, cache in self._record_caches.items(): 

221 records[element.name] = cache[full.subset(element.graph)] 

222 for element, reader in self._record_readers.items(): 

223 records[element.name] = reader.read(row) 

224 return full.expanded(records) 

225 

226 @property 

227 @cached_getter 

228 def columns_required(self) -> Set[ColumnTag]: 

229 result = set(self._full_reader.columns_required) 

230 for reader in self._record_readers.values(): 

231 result.update(reader.columns_required) 

232 return result 

233 

234 

235class DatasetRefReader: 

236 """Reader class that extracts `DatasetRef` objects from query result rows. 

237 

238 Parameters 

239 ---------- 

240 dataset_type : `DatasetType` 

241 Dataset type for extracted references. 

242 full : `bool`, optional 

243 Whether to expect and extract implied dimensions as well as required 

244 dimensions. 

245 translate_collection : `~collections.abc.Callable`, optional 

246 Callable that returns `str` collection names given collection primary 

247 key values. Optional only for registries that use names as primary 

248 keys, or if ``run`` is always passed to `read`. 

249 records : `bool`, optional 

250 Whether to attach dimension records to data IDs. 

251 record_caches : `~collections.abc.Mapping`, optional 

252 Nested mapping (outer keys are dimension element names, inner keys 

253 are data IDs for that element) of cached dimension records. 

254 Ignored unless ``records=True``. 

255 """ 

256 

257 def __init__( 

258 self, 

259 dataset_type: DatasetType, 

260 *, 

261 full: bool = True, 

262 translate_collection: Callable[[Any], str] | None = None, 

263 records: bool = False, 

264 record_caches: Mapping[DimensionElement, Mapping[DataCoordinate, DimensionRecord]] | None = None, 

265 ): 

266 self._data_coordinate_reader = DataCoordinateReader.make( 

267 dataset_type.dimensions, full=full, records=records, record_caches=record_caches 

268 ) 

269 self._dataset_type = dataset_type 

270 self._translate_collection = translate_collection 

271 self._id_tag = DatasetColumnTag(dataset_type.name, "dataset_id") 

272 self._run_tag = DatasetColumnTag(dataset_type.name, "run") 

273 

274 __slots__ = ( 

275 "_data_coordinate_reader", 

276 "_dataset_type", 

277 "_translate_collection", 

278 "_id_tag", 

279 "_run_tag", 

280 "_cached_columns_required", 

281 ) 

282 

283 def read( 

284 self, 

285 row: Mapping[ColumnTag, Any], 

286 *, 

287 run: str | None = None, 

288 data_id: DataCoordinate | None = None, 

289 ) -> DatasetRef: 

290 """Read a `DatasetRef` from a query result row. 

291 

292 Parameters 

293 ---------- 

294 row : `~collections.abc.Mapping` 

295 Mapping with `ColumnTag` keys representing a query result row. 

296 run : `str`, optional 

297 Name of the `~CollectionType.RUN` collection; when provided the run 

298 key does not need to be present in the result row, and 

299 ``translate_collection`` does not need to be provided at 

300 construction. 

301 data_id : `DataCoordinate`, optional 

302 Data ID; when provided the dimensions do not need to be present in 

303 the result row. 

304 """ 

305 if data_id is None: 

306 data_id = self._data_coordinate_reader.read(row) 

307 if run is None: 

308 run_key = row[self._run_tag] 

309 if self._translate_collection is not None: 

310 run = self._translate_collection(run_key) 

311 else: 

312 run = run_key 

313 return DatasetRef( 

314 self._dataset_type, 

315 data_id, 

316 run=run, 

317 id=row[self._id_tag], 

318 ) 

319 

320 @property 

321 @cached_getter 

322 def columns_required(self) -> Set[ColumnTag]: 

323 result = set(self._data_coordinate_reader.columns_required) 

324 result.add(self._id_tag) 

325 result.add(self._run_tag) 

326 return result 

327 

328 

329class DimensionRecordReader: 

330 """Read dimension records.""" 

331 

332 def __init__(self, element: DimensionElement): 

333 self._cls = element.RecordClass 

334 self._tags = element.RecordClass.fields.columns 

335 

336 __slots__ = ("_cls", "_tags") 

337 

338 def read(self, row: Mapping[ColumnTag, Any]) -> DimensionRecord: 

339 return self._cls(**{name: row[tag] for tag, name in self._tags.items()}) 

340 

341 @property 

342 def columns_required(self) -> Set[ColumnTag]: 

343 return self._tags.keys()