Coverage for python/lsst/daf/butler/registry/summaries.py: 30%

102 statements  

« prev     ^ index     » next       coverage.py v6.4.1, created at 2022-06-15 02:06 -0700

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "CollectionSummary", 

25 "GovernorDimensionRestriction", 

26) 

27 

28import itertools 

29from dataclasses import dataclass 

30from typing import ( 

31 AbstractSet, 

32 Any, 

33 ItemsView, 

34 Iterable, 

35 Iterator, 

36 List, 

37 Mapping, 

38 Optional, 

39 Set, 

40 Union, 

41 ValuesView, 

42) 

43 

44from lsst.utils.iteration import ensure_iterable 

45 

46from ..core import ( 

47 DataCoordinate, 

48 DatasetType, 

49 DimensionUniverse, 

50 GovernorDimension, 

51 NamedKeyDict, 

52 NamedKeyMapping, 

53 NamedValueAbstractSet, 

54 NamedValueSet, 

55) 

56 

57 

58class GovernorDimensionRestriction(NamedKeyMapping[GovernorDimension, AbstractSet[str]]): 

59 """A custom mapping that represents a restriction on the values one or 

60 more governor dimensions may take in some context. 

61 

62 Parameters 

63 ---------- 

64 mapping : `NamedKeyDict` [ `GovernorDimension`, `Set` [ `str` ]] 

65 Mapping from governor dimension to the values it may take. Dimensions 

66 not present in the mapping are not constrained at all. 

67 """ 

68 

69 def __init__(self, mapping: NamedKeyDict[GovernorDimension, Set[str]]): 

70 self._mapping = mapping 

71 

72 @classmethod 

73 def makeEmpty(cls, universe: DimensionUniverse) -> GovernorDimensionRestriction: 

74 """Construct a `GovernorDimensionRestriction` that allows no values 

75 for any governor dimension in the given `DimensionUniverse`. 

76 

77 Parameters 

78 ---------- 

79 universe : `DimensionUniverse` 

80 Object that manages all dimensions. 

81 

82 Returns 

83 ------- 

84 restriction : `GovernorDimensionRestriction` 

85 Restriction instance that maps all governor dimensions to an empty 

86 set. 

87 """ 

88 return cls(NamedKeyDict((k, set()) for k in universe.getGovernorDimensions())) 

89 

90 @classmethod 

91 def makeFull(cls) -> GovernorDimensionRestriction: 

92 """Construct a `GovernorDimensionRestriction` that allows any value 

93 for any governor dimension. 

94 

95 Returns 

96 ------- 

97 restriction : `GovernorDimensionRestriction` 

98 Restriction instance that contains no keys, and hence contains 

99 allows any value for any governor dimension. 

100 """ 

101 return cls(NamedKeyDict()) 

102 

103 def __eq__(self, other: Any) -> bool: 

104 if not isinstance(other, GovernorDimensionRestriction): 

105 return False 

106 return self._mapping == other._mapping 

107 

108 def __str__(self) -> str: 

109 return "({})".format( 

110 ", ".join(f"{dimension.name}: {values}" for dimension, values in self._mapping.items()) 

111 ) 

112 

113 def __repr__(self) -> str: 

114 return "GovernorDimensionRestriction({})".format( 

115 ", ".join(f"{dimension.name}={values}" for dimension, values in self._mapping.items()) 

116 ) 

117 

118 def __iter__(self) -> Iterator[GovernorDimension]: 

119 return iter(self._mapping) 

120 

121 def __len__(self) -> int: 

122 return len(self._mapping) 

123 

124 @property 

125 def names(self) -> AbstractSet[str]: 

126 # Docstring inherited. 

127 return self._mapping.names 

128 

129 def keys(self) -> NamedValueAbstractSet[GovernorDimension]: # type: ignore 

130 return self._mapping.keys() 

131 

132 def values(self) -> ValuesView[AbstractSet[str]]: 

133 return self._mapping.values() 

134 

135 def items(self) -> ItemsView[GovernorDimension, AbstractSet[str]]: 

136 return self._mapping.items() 

137 

138 def __getitem__(self, key: Union[str, GovernorDimension]) -> AbstractSet[str]: 

139 return self._mapping[key] 

140 

141 def copy(self) -> GovernorDimensionRestriction: 

142 """Return a deep copy of this object. 

143 

144 Returns 

145 ------- 

146 copy : `GovernorDimensionRestriction` 

147 A copy of ``self`` that can be modified without modifying ``self`` 

148 at all. 

149 """ 

150 return GovernorDimensionRestriction(NamedKeyDict((k, set(v)) for k, v in self.items())) 

151 

152 def add(self, dimension: GovernorDimension, value: str) -> None: 

153 """Add a single dimension value to the restriction. 

154 

155 Parameters 

156 ---------- 

157 dimension : `GovernorDimension` 

158 Dimension to update. 

159 value : `str` 

160 Value to allow for this dimension. 

161 """ 

162 current = self._mapping.get(dimension) 

163 if current is not None: 

164 current.add(value) 

165 

166 def update(self, other: Mapping[GovernorDimension, Union[str, Iterable[str]]]) -> None: 

167 """Update ``self`` to include all dimension values in either ``self`` 

168 or ``other``. 

169 

170 Parameters 

171 ---------- 

172 other : `Mapping` [ `Dimension`, `str` or `Iterable` [ `str` ] ] 

173 Mapping to union into ``self``. This may be another 

174 `GovernorDimensionRestriction` or any other mapping from dimension 

175 to `str` or iterable of `str`. 

176 """ 

177 for dimension in self.keys() - other.keys(): 

178 self._mapping.pop(dimension, None) 

179 for dimension in self.keys() & other.keys(): 

180 self._mapping[dimension].update(ensure_iterable(other[dimension])) 

181 # Dimensions that are in 'other' but not in 'self' are ignored, because 

182 # 'self' says they are already unconstrained. 

183 

184 def union( 

185 self, *others: Mapping[GovernorDimension, Union[str, Iterable[str]]] 

186 ) -> GovernorDimensionRestriction: 

187 """Construct a restriction that permits any values permitted by any of 

188 the input restrictions. 

189 

190 Parameters 

191 ---------- 

192 *others : `Mapping` [ `Dimension`, `str` or `Iterable` [ `str` ] ] 

193 Mappings to union into ``self``. These may be other 

194 `GovernorDimensionRestriction` instances or any other kind of 

195 mapping from dimension to `str` or iterable of `str`. 

196 

197 Returns 

198 ------- 

199 unioned : `GovernorDimensionRestriction` 

200 New restriction object that represents the union of ``self`` with 

201 ``others``. 

202 """ 

203 result = self.copy() 

204 for other in others: 

205 result.update(other) 

206 return result 

207 

208 def intersection_update(self, other: Mapping[GovernorDimension, Union[str, Iterable[str]]]) -> None: 

209 """Update ``self`` to include only dimension values in both ``self`` 

210 and ``other``. 

211 

212 Parameters 

213 ---------- 

214 other : `Mapping` [ `Dimension`, `str` or `Iterable` [ `str` ] ] 

215 Mapping to intersect into ``self``. This may be another 

216 `GovernorDimensionRestriction` or any other mapping from dimension 

217 to `str` or iterable of `str`. 

218 """ 

219 for dimension, values in other.items(): 

220 new_values = set(ensure_iterable(values)) 

221 # Yes, this will often result in a (no-op) self-intersection on the 

222 # inner set, but this is easier to read (and obviously more or less 

223 # efficient) than adding a check to avoid it. 

224 self._mapping.setdefault(dimension, new_values).intersection_update(new_values) 

225 

226 def intersection( 

227 self, *others: Mapping[GovernorDimension, Union[str, Iterable[str]]] 

228 ) -> GovernorDimensionRestriction: 

229 """Construct a restriction that permits only values permitted by all of 

230 the input restrictions. 

231 

232 Parameters 

233 ---------- 

234 *others : `Mapping` [ `Dimension`, `str` or `Iterable` [ `str` ] ] 

235 Mappings to intersect with ``self``. These may be other 

236 `GovernorDimensionRestriction` instances or any other kind of 

237 mapping from dimension to `str` or iterable of `str`. 

238 Returns 

239 ------- 

240 intersection : `GovernorDimensionRestriction` 

241 New restriction object that represents the intersection of ``self`` 

242 with ``others``. 

243 """ 

244 result = self.copy() 

245 for other in others: 

246 result.intersection_update(other) 

247 return result 

248 

249 def update_extract(self, data_id: DataCoordinate) -> None: 

250 """Update ``self`` to include all governor dimension values in the 

251 given data ID (in addition to those already in ``self``). 

252 

253 Parameters 

254 ---------- 

255 data_id : `DataCoordinate` 

256 Data ID from which governor dimension values should be extracted. 

257 Values for non-governor dimensions are ignored. 

258 """ 

259 for dimension in data_id.graph.governors: 

260 current = self._mapping.get(dimension) 

261 if current is not None: 

262 current.add(data_id[dimension]) 

263 

264 

265@dataclass 

266class CollectionSummary: 

267 """A summary of the datasets that can be found in a collection.""" 

268 

269 @classmethod 

270 def makeEmpty(cls, universe: DimensionUniverse) -> CollectionSummary: 

271 """Construct a `CollectionSummary` for a collection with no 

272 datasets. 

273 

274 Parameters 

275 ---------- 

276 universe : `DimensionUniverse` 

277 Object that manages all dimensions. 

278 

279 Returns 

280 ------- 

281 summary : `CollectionSummary` 

282 Summary object with no dataset types and no governor dimension 

283 values. 

284 """ 

285 return cls( 

286 datasetTypes=NamedValueSet(), 

287 dimensions=GovernorDimensionRestriction.makeEmpty(universe), 

288 ) 

289 

290 def copy(self) -> CollectionSummary: 

291 """Return a deep copy of this object. 

292 

293 Returns 

294 ------- 

295 copy : `CollectionSummary` 

296 A copy of ``self`` that can be modified without modifying ``self`` 

297 at all. 

298 """ 

299 return CollectionSummary(datasetTypes=self.datasetTypes.copy(), dimensions=self.dimensions.copy()) 

300 

301 def union(self, *others: CollectionSummary) -> CollectionSummary: 

302 """Construct a summary that contains all dataset types and governor 

303 dimension values in any of the inputs. 

304 

305 Parameters 

306 ---------- 

307 *others : `CollectionSummary` 

308 Restrictions to combine with ``self``. 

309 

310 Returns 

311 ------- 

312 unioned : `CollectionSummary` 

313 New summary object that represents the union of ``self`` with 

314 ``others``. 

315 """ 

316 if not others: 

317 return self 

318 datasetTypes = NamedValueSet(self.datasetTypes) 

319 datasetTypes.update(itertools.chain.from_iterable(o.datasetTypes for o in others)) 

320 dimensions = self.dimensions.union(*[o.dimensions for o in others]) 

321 return CollectionSummary(datasetTypes, dimensions) 

322 

323 def is_compatible_with( 

324 self, 

325 datasetType: DatasetType, 

326 restriction: GovernorDimensionRestriction, 

327 rejections: Optional[List[str]] = None, 

328 name: Optional[str] = None, 

329 ) -> bool: 

330 """Test whether the collection summarized by this object should be 

331 queried for a given dataset type and governor dimension values. 

332 

333 Parameters 

334 ---------- 

335 datasetType : `DatasetType` 

336 Dataset type being queried. If this collection has no instances of 

337 this dataset type (or its parent dataset type, if it is a 

338 component), `False` will always be returned. 

339 restriction : `GovernorDimensionRestriction` 

340 Restriction on the values governor dimensions can take in the 

341 query, usually from a WHERE expression. If this is disjoint with 

342 the data IDs actually present in the collection, `False` will be 

343 returned. 

344 rejections : `list` [ `str` ], optional 

345 If provided, a list that will be populated with a log- or 

346 exception-friendly message explaining why this dataset is 

347 incompatible with this collection when `False` is returned. 

348 name : `str`, optional 

349 Name of the collection this object summarizes, for use in messages 

350 appended to ``rejections``. Ignored if ``rejections`` is `None`. 

351 

352 Returns 

353 ------- 

354 compatible : `bool` 

355 `True` if the dataset query described by this summary and the given 

356 arguments might yield non-empty results; `False` if the result from 

357 such a query is definitely empty. 

358 """ 

359 parent = datasetType if not datasetType.isComponent() else datasetType.makeCompositeDatasetType() 

360 if parent not in self.datasetTypes: 

361 if rejections is not None: 

362 rejections.append(f"No datasets of type {parent.name} in collection {name!r}.") 

363 return False 

364 for governor in datasetType.dimensions.governors: 

365 if (values_in_self := self.dimensions.get(governor)) is not None: 

366 if (values_in_other := restriction.get(governor)) is not None: 

367 if values_in_self.isdisjoint(values_in_other): 

368 assert values_in_other, f"No valid values in restriction for dimension {governor}." 

369 if rejections is not None: 

370 rejections.append( 

371 f"No datasets with {governor.name} in {values_in_other} " 

372 f"in collection {name!r}." 

373 ) 

374 return False 

375 return True 

376 

377 datasetTypes: NamedValueSet[DatasetType] 

378 """Dataset types that may be present in the collection 

379 (`NamedValueSet` [ `DatasetType` ]). 

380 

381 A dataset type not in this set is definitely not in the collection, but 

382 the converse is not necessarily true. 

383 """ 

384 

385 dimensions: GovernorDimensionRestriction 

386 """Governor dimension values that may be present in the collection 

387 (`GovernorDimensionRestriction`). 

388 

389 A dimension value not in this restriction is definitely not in the 

390 collection, but the converse is not necessarily true. 

391 """