Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "CategorizedWildcard", 

25 "CollectionContentRestriction", 

26 "CollectionQuery", 

27 "CollectionSearch", 

28 "DatasetTypeRestriction", 

29 "GovernorDimensionRestriction", 

30) 

31 

32from collections import defaultdict 

33from dataclasses import dataclass 

34import re 

35from typing import ( 

36 AbstractSet, 

37 Any, 

38 Callable, 

39 ClassVar, 

40 Dict, 

41 FrozenSet, 

42 Iterable, 

43 Iterator, 

44 List, 

45 Optional, 

46 Sequence, 

47 Set, 

48 Tuple, 

49 TYPE_CHECKING, 

50 Union, 

51) 

52 

53import sqlalchemy 

54 

55from ..core import DataCoordinate, DatasetType, DimensionUniverse, GovernorDimension 

56from ..core.named import NamedKeyDict, NamedKeyMapping 

57from ..core.utils import iterable 

58from ._collectionType import CollectionType 

59 

60if TYPE_CHECKING: 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true

61 from .interfaces import CollectionManager, CollectionRecord 

62 

63 # Workaround for `...` not having an exposed type in Python, borrowed from 

64 # https://github.com/python/typing/issues/684#issuecomment-548203158 

65 # Along with that, we need to either use `Ellipsis` instead of `...` for 

66 # the actual sentinal value internally, and tell MyPy to ignore conversions 

67 # from `...` to `Ellipsis` at the public-interface boundary. 

68 # 

69 # `Ellipsis` and `EllipsisType` should be directly imported from this 

70 # module by related code that needs them; hopefully that will stay confined 

71 # to `lsst.daf.butler.registry`. Putting these in __all__ is bad for 

72 # Sphinx, and probably more confusing than helpful overall. 

73 from enum import Enum 

74 

75 class EllipsisType(Enum): 

76 Ellipsis = "..." 

77 

78 Ellipsis = EllipsisType.Ellipsis 

79 

80else: 

81 EllipsisType = type(Ellipsis) 

82 Ellipsis = Ellipsis 

83 

84 

85@dataclass 

86class CategorizedWildcard: 

87 """The results of preprocessing a wildcard expression to separate match 

88 patterns from strings. 

89 

90 The `fromExpression` method should almost always be used to construct 

91 instances, as the regular constructor performs no checking of inputs (and 

92 that can lead to confusing error messages downstream). 

93 """ 

94 

95 @classmethod 

96 def fromExpression(cls, expression: Any, *, 

97 allowAny: bool = True, 

98 allowPatterns: bool = True, 

99 coerceUnrecognized: Optional[Callable[[Any], Union[Tuple[str, Any], str]]] = None, 

100 coerceItemValue: Optional[Callable[[Any], Any]] = None, 

101 defaultItemValue: Optional[Any] = None, 

102 ) -> Union[CategorizedWildcard, EllipsisType]: 

103 """Categorize a wildcard expression. 

104 

105 Parameters 

106 ---------- 

107 expression 

108 The expression to categorize. May be any of: 

109 - `str`; 

110 - `re.Pattern` (only if ``allowPatterns`` is `True`); 

111 - objects recognized by ``coerceUnrecognized`` (if provided); 

112 - two-element tuples of (`str`, value) where value is recognized 

113 by ``coerceItemValue`` (if provided); 

114 - a non-`str`, non-mapping iterable containing any of the above; 

115 - the special value `...` (only if ``allowAny`` is `True`), which 

116 matches anything; 

117 - a mapping from `str` to a value are recognized by 

118 ``coerceItemValue`` (if provided); 

119 - a `CategorizedWildcard` instance (passed through unchanged if 

120 it meets the requirements specified by keyword arguments). 

121 allowAny: `bool`, optional 

122 If `False` (`True` is default) raise `TypeError` if `...` is 

123 encountered. 

124 allowPatterns: `bool`, optional 

125 If `False` (`True` is default) raise `TypeError` if a `re.Pattern` 

126 is encountered, or if ``expression`` is a `CategorizedWildcard` 

127 with `patterns` not empty. 

128 coerceUnrecognized: `Callable`, optional 

129 A callback that takes a single argument of arbitrary type and 

130 returns either a `str` - appended to `strings` - or a `tuple` of 

131 (`str`, `Any`) to be appended to `items`. This will be called on 

132 objects of unrecognized type, with the return value added to 

133 `strings`. Exceptions will be reraised as `TypeError` (and 

134 chained). 

135 coerceItemValue: `Callable`, optional 

136 If provided, ``expression`` may be a mapping from `str` to any 

137 type that can be passed to this function; the result of that call 

138 will be stored instead as the value in ``self.items``. 

139 defaultItemValue: `Any`, optional 

140 If provided, combine this value with any string values encountered 

141 (including any returned by ``coerceUnrecognized``) to form a 

142 `tuple` and add it to `items`, guaranteeing that `strings` will be 

143 empty. Patterns are never added to `items`. 

144 

145 Returns 

146 ------- 

147 categorized : `CategorizedWildcard` or ``...``. 

148 The struct describing the wildcard. ``...`` is passed through 

149 unchanged. 

150 

151 Raises 

152 ------ 

153 TypeError 

154 Raised if an unsupported type is found in the expression. 

155 """ 

156 assert expression is not None 

157 # See if we were given ...; just return that if we were. 

158 if expression is Ellipsis: 

159 if not allowAny: 

160 raise TypeError("This expression may not be unconstrained.") 

161 return Ellipsis 

162 if isinstance(expression, cls): 

163 # This is already a CategorizedWildcard. Make sure it meets the 

164 # reqs. implied by the kwargs we got. 

165 if not allowPatterns and expression.patterns: 

166 raise TypeError(f"Regular expression(s) {expression.patterns} " 

167 f"are not allowed in this context.") 

168 if defaultItemValue is not None and expression.strings: 

169 if expression.items: 

170 raise TypeError("Incompatible preprocessed expression: an ordered sequence of str is " 

171 "needed, but the original order was lost in the preprocessing.") 

172 return cls(strings=[], patterns=expression.patterns, 

173 items=[(k, defaultItemValue) for k in expression.strings]) 

174 elif defaultItemValue is None and expression.items: 

175 if expression.strings: 

176 raise TypeError("Incompatible preprocessed expression: an ordered sequence of items is " 

177 "needed, but the original order was lost in the preprocessing.") 

178 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[]) 

179 else: 

180 # Original expression was created with keyword arguments that 

181 # were at least as restrictive as what we just got; pass it 

182 # through. 

183 return expression 

184 

185 # If we get here, we know we'll be creating a new instance. 

186 # Initialize an empty one now. 

187 self = cls(strings=[], patterns=[], items=[]) 

188 

189 # If mappings are allowed, see if we were given a single mapping by 

190 # trying to get items. 

191 if coerceItemValue is not None: 

192 rawItems = None 

193 try: 

194 rawItems = expression.items() 

195 except AttributeError: 

196 pass 

197 if rawItems is not None: 

198 for k, v in rawItems: 

199 try: 

200 self.items.append((k, coerceItemValue(v))) 

201 except Exception as err: 

202 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err 

203 return self 

204 

205 # Not ..., a CategorizedWildcard instance, or a mapping. Just 

206 # process scalars or an iterable. We put the body of the loop inside 

207 # a local function so we can recurse after coercion. 

208 

209 def process(element: Any, alreadyCoerced: bool = False) -> None: 

210 if isinstance(element, str): 

211 if defaultItemValue is not None: 

212 self.items.append((element, defaultItemValue)) 

213 else: 

214 self.strings.append(element) 

215 return 

216 if allowPatterns and isinstance(element, re.Pattern): 

217 self.patterns.append(element) 

218 return 

219 if coerceItemValue is not None: 

220 try: 

221 k, v = element 

222 except TypeError: 

223 pass 

224 else: 

225 if not alreadyCoerced: 

226 if not isinstance(k, str): 

227 raise TypeError(f"Item key '{k}' is not a string.") 

228 try: 

229 v = coerceItemValue(v) 

230 except Exception as err: 

231 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'." 

232 ) from err 

233 self.items.append((k, v)) 

234 return 

235 if alreadyCoerced: 

236 raise TypeError(f"Object '{element}' returned by coercion function is still unrecognized.") 

237 if coerceUnrecognized is not None: 

238 try: 

239 process(coerceUnrecognized(element), alreadyCoerced=True) 

240 except Exception as err: 

241 raise TypeError(f"Could not coerce expression element '{element}'.") from err 

242 else: 

243 raise TypeError(f"Unsupported object in wildcard expression: '{element}'.") 

244 

245 for element in iterable(expression): 

246 process(element) 

247 return self 

248 

249 def makeWhereExpression(self, column: sqlalchemy.sql.ColumnElement 

250 ) -> Optional[sqlalchemy.sql.ColumnElement]: 

251 """Transform the wildcard into a SQLAlchemy boolean expression suitable 

252 for use in a WHERE clause. 

253 

254 Parameters 

255 ---------- 

256 column : `sqlalchemy.sql.ColumnElement` 

257 A string column in a table or query that should be compared to the 

258 wildcard expression. 

259 

260 Returns 

261 ------- 

262 where : `sqlalchemy.sql.ColumnElement` or `None` 

263 A boolean SQL expression that evaluates to true if and only if 

264 the value of ``column`` matches the wildcard. `None` is returned 

265 if both `strings` and `patterns` are empty, and hence no match is 

266 possible. 

267 """ 

268 if self.items: 

269 raise NotImplementedError("Expressions that are processed into items cannot be transformed " 

270 "automatically into queries.") 

271 if self.patterns: 

272 raise NotImplementedError("Regular expression patterns are not yet supported here.") 

273 terms = [] 

274 if len(self.strings) == 1: 

275 terms.append(column == self.strings[0]) 

276 elif len(self.strings) > 1: 

277 terms.append(column.in_(self.strings)) 

278 # TODO: append terms for regular expressions 

279 if not terms: 

280 return None 

281 return sqlalchemy.sql.or_(*terms) 

282 

283 strings: List[str] 

284 """Explicit string values found in the wildcard (`list` [ `str` ]). 

285 """ 

286 

287 patterns: List[re.Pattern] 

288 """Regular expression patterns found in the wildcard 

289 (`list` [ `re.Pattern` ]). 

290 """ 

291 

292 items: List[Tuple[str, Any]] 

293 """Two-item tuples that relate string values to other objects 

294 (`list` [ `tuple` [ `str`, `Any` ] ]). 

295 """ 

296 

297 

298class DatasetTypeRestriction: 

299 """An immutable set-like object that represents a restriction on the 

300 dataset types to search for within a collection. 

301 

302 The `fromExpression` method should almost always be used to construct 

303 instances, as the regular constructor performs no checking of inputs (and 

304 that can lead to confusing error messages downstream). 

305 

306 Parameters 

307 ---------- 

308 names : `frozenset` [`str`] or `...` 

309 The names of the dataset types included in the restriction, or `...` 

310 to permit a search for any dataset type. 

311 

312 Notes 

313 ----- 

314 This class does not inherit from `collections.abc.Set` (and does not 

315 implement the full set interface) because is not always iterable and 

316 sometimes has no length (i.e. when ``names`` is ``...``). 

317 """ 

318 def __init__(self, names: Union[FrozenSet[str], EllipsisType]): 

319 self.names = names 

320 

321 __slots__ = ("names",) 

322 

323 @classmethod 

324 def fromExpression(cls, expression: Any) -> DatasetTypeRestriction: 

325 """Process a general expression to construct a `DatasetTypeRestriction` 

326 instance. 

327 

328 Parameters 

329 ---------- 

330 expression 

331 May be: 

332 - a `DatasetType` instance; 

333 - a `str` dataset type name; 

334 - any non-mapping iterable containing either of the above; 

335 - the special value `...`; 

336 - another `DatasetTypeRestriction` instance (passed through 

337 unchanged). 

338 

339 Returns 

340 ------- 

341 restriction : `DatasetTypeRestriction` 

342 A `DatasetTypeRestriction` instance. 

343 """ 

344 if isinstance(expression, cls): 

345 return expression 

346 wildcard = CategorizedWildcard.fromExpression(expression, allowPatterns=False, 

347 coerceUnrecognized=lambda d: d.name) 

348 if wildcard is Ellipsis: 

349 return cls.any 

350 else: 

351 return cls(frozenset(wildcard.strings)) 

352 

353 def __contains__(self, datasetType: DatasetType) -> bool: 

354 return (self.names is Ellipsis or datasetType.name in self.names 

355 or (datasetType.isComponent() 

356 and DatasetType.splitDatasetTypeName(datasetType.name)[0] in self.names)) 

357 

358 def __eq__(self, other: Any) -> bool: 

359 if isinstance(other, DatasetTypeRestriction): 

360 return self.names == other.names 

361 else: 

362 return False 

363 

364 def __str__(self) -> str: 

365 if self.names is Ellipsis: 

366 return "..." 

367 else: 

368 return "{{{}}}".format(", ".join(self.names)) 

369 

370 def __repr__(self) -> str: 

371 if self.names is Ellipsis: 

372 return "DatasetTypeRestriction(...)" 

373 else: 

374 return f"DatasetTypeRestriction({self.names!r})" 

375 

376 @staticmethod 

377 def union(*args: DatasetTypeRestriction) -> DatasetTypeRestriction: 

378 """Merge one or more `DatasetTypeRestriction` instances, returning one 

379 that allows any of the dataset types included in any of them. 

380 

381 Parameters 

382 ---------- 

383 *args 

384 Positional arguments are `DatasetTypeRestriction` instances. 

385 """ 

386 result: Set[str] = set() 

387 for a in args: 

388 if a.names is Ellipsis: 

389 return DatasetTypeRestriction.any 

390 else: 

391 result.update(a.names) 

392 return DatasetTypeRestriction(frozenset(result)) 

393 

394 names: Union[FrozenSet[str], EllipsisType] 

395 """The names of the dataset types included (i.e. permitted) by the 

396 restriction, or the special value ``...`` to permit all dataset types 

397 (`frozenset` [ `str` ] or ``...``). 

398 """ 

399 

400 any: ClassVar[DatasetTypeRestriction] 

401 """A special `DatasetTypeRestriction` instance that permits any dataset 

402 type. 

403 

404 This instance should be preferred instead of constructing a new one with 

405 ``...``, when possible, but it should not be assumed to be the only such 

406 instance (i.e. don't use ``is`` instead of ``==`` for comparisons). 

407 """ 

408 

409 

410DatasetTypeRestriction.any = DatasetTypeRestriction(Ellipsis) 

411 

412 

413class GovernorDimensionRestriction: 

414 """An object that represents a restriction on some entity to only certain 

415 values of the governor dimensions. 

416 

417 Parameters 

418 ---------- 

419 universe : `DimensionUniverse` 

420 Object managing all dimensions. 

421 kwargs : `str` or `Iterable` [ `str` ] 

422 Dimension values to restrict to, keyed by dimension name. 

423 """ 

424 def __init__(self, universe: DimensionUniverse, **kwargs: Union[str, Iterable[str], EllipsisType]): 

425 self.universe = universe 

426 self._dict: NamedKeyDict[GovernorDimension, Set[str]] = NamedKeyDict() 

427 for dimension in universe.getGovernorDimensions(): 

428 value = kwargs.pop(dimension.name, Ellipsis) 

429 if value is not Ellipsis: 

430 self._dict[dimension] = set(iterable(value)) 

431 if kwargs: 

432 raise ValueError( 

433 f"Invalid keyword argument(s): {kwargs.keys()} (must be governor dimension names)." 

434 ) 

435 

436 @staticmethod 

437 def union( 

438 universe: DimensionUniverse, 

439 *args: GovernorDimensionRestriction 

440 ) -> GovernorDimensionRestriction: 

441 """Merge one or more `GovernorDimensionRestriction` instances. 

442 

443 Parameters 

444 ---------- 

445 universe : `DimensionUniverse` 

446 Object managing all known dimensions. 

447 *args 

448 Additional positional arguments are `GovernorDimensionRestriction` 

449 instances. 

450 

451 Returns 

452 ------- 

453 merged : `GovernorDimensionRestriction` 

454 A `GovernorDimensionRestriction` that allows any of the dimension 

455 values permitted by any of the inputs. 

456 """ 

457 mapping: Dict[str, Union[Set[str], EllipsisType]] = defaultdict(set) 

458 for a in args: 

459 for dimension in universe.getGovernorDimensions(): 

460 new_values = a.mapping.get(dimension, Ellipsis) 

461 if new_values is Ellipsis: 

462 mapping[dimension.name] = Ellipsis 

463 else: 

464 accumulated = mapping[dimension.name] 

465 if accumulated is not Ellipsis: 

466 accumulated.update(new_values) 

467 return GovernorDimensionRestriction(universe, **mapping) 

468 

469 def __eq__(self, other: Any) -> bool: 

470 if not isinstance(other, GovernorDimensionRestriction): 

471 return False 

472 return self.mapping == other.mapping 

473 

474 def __str__(self) -> str: 

475 return "({})".format( 

476 ", ".join(f"{dimension.name}: {values}" for dimension, values in self.mapping.items()) 

477 ) 

478 

479 def __repr__(self) -> str: 

480 return "GovernorDimensionRestriction(<universe>, {})".format( 

481 ", ".join(f"{dimension.name}={values!r}" for dimension, values in self.mapping.items()) 

482 ) 

483 

484 def isConsistentWith(self, dataId: DataCoordinate) -> bool: 

485 """Test whether this restriction is consistent with the given data ID. 

486 

487 Parameters 

488 ---------- 

489 dataId : `DataCoordinate` 

490 Data ID to test. 

491 

492 Returns 

493 ------- 

494 consistent : `bool` 

495 `True` if all values the data ID are either not restricted by 

496 ``self``, or are included in ``self``. 

497 """ 

498 for dimension in self._dict.keys() & dataId.graph.dimensions: 

499 if dataId[dimension] in self._dict[dimension]: 

500 return False 

501 return True 

502 

503 @property 

504 def mapping(self) -> NamedKeyMapping[GovernorDimension, AbstractSet[str]]: 

505 """A `NamedKeyMapping` view of this restriction, with all restricted 

506 dimensions as keys and sets of allowed data ID values as dictionary 

507 values. 

508 """ 

509 return self._dict 

510 

511 universe: DimensionUniverse 

512 """Object that manages all known dimensions (`DimensionUniverse`). 

513 """ 

514 

515 

516class CollectionContentRestriction: 

517 """All restrictions that can be applied to what datasets can be included in 

518 a collection. 

519 

520 Parameters 

521 ---------- 

522 datasetTypes : `DatasetTypeRestriction`, optional 

523 Restriction on dataset types. 

524 dimensions : `GovernorDimensionRestriction`, optional 

525 Restriction on governor dimension values. 

526 universe : `DimensionUniverse` 

527 Object managing all known dimensions. 

528 """ 

529 def __init__( 

530 self, 

531 datasetTypes: DatasetTypeRestriction = DatasetTypeRestriction.any, 

532 dimensions: Optional[GovernorDimensionRestriction] = None, 

533 *, 

534 universe: Optional[DimensionUniverse] = None, 

535 ): 

536 self.datasetTypes = datasetTypes 

537 if dimensions is None: 

538 if universe is None: 

539 raise TypeError("At least one of 'dimensions' and 'universe' must be provided.") 

540 dimensions = GovernorDimensionRestriction(universe) 

541 self.dimensions = dimensions 

542 

543 @classmethod 

544 def fromExpression(cls, expression: Any, universe: DimensionUniverse) -> CollectionContentRestriction: 

545 """Construct a new restriction instance from an expression. 

546 

547 Parameters 

548 ---------- 

549 expression 

550 Either an existing `CollectionContentRestriction` instance (passed 

551 through unchanged) or any of the objects described in 

552 `DatasetTypeRestriction.fromExpression`. 

553 universe : `DimensionUniverse` 

554 Object managing all known dimensions. 

555 """ 

556 if isinstance(expression, cls): 

557 return expression 

558 return cls( 

559 datasetTypes=DatasetTypeRestriction.fromExpression(expression), 

560 universe=universe, 

561 ) 

562 

563 @staticmethod 

564 def union( 

565 universe: DimensionUniverse, 

566 *args: CollectionContentRestriction 

567 ) -> CollectionContentRestriction: 

568 """Merge one or more `CollectionContentRestriction` instances, 

569 returning one that allows any of the dataset types or governor 

570 dimension valuesincluded in any of them. 

571 

572 Parameters 

573 ---------- 

574 universe : `DimensionUniverse` 

575 Object managing all known dimensions. 

576 args 

577 Positional arguments are `CollectionContentRestriction` instances. 

578 """ 

579 return CollectionContentRestriction( 

580 DatasetTypeRestriction.union(*[arg.datasetTypes for arg in args]), 

581 GovernorDimensionRestriction.union(universe, *[arg.dimensions for arg in args]), 

582 ) 

583 

584 @classmethod 

585 def fromPairs( 

586 cls, 

587 pairs: Iterable[Tuple[str, Optional[str]]], 

588 universe: DimensionUniverse, 

589 ) -> CollectionContentRestriction: 

590 """Construct a restriction from a set of tuples that can be more easily 

591 mapped to a database representation. 

592 

593 Parameters 

594 ---------- 

595 pairs : `Iterable` [ `Tuple` [ `str`, `str` or `None` ] ] 

596 Pairs to interpret. The first element of each tuple is either a 

597 governor dimension name or the special string "dataset_type". The 

598 second element is the value of the dimension, the name of the 

599 dataset type, or `None` to indicate that there is no restriction 

600 on that dimension or on dataset types. 

601 universe : `DimensionUniverse` 

602 Object managing all known dimensions. 

603 

604 Returns 

605 ------- 

606 restriction : `CollectionContentRestriction` 

607 New restriction instance. 

608 """ 

609 dimensions = defaultdict(set) 

610 datasetTypeNames: Optional[Set[str]] = set() 

611 for key, value in pairs: 

612 if key == "dataset_type": 

613 if value is None: 

614 datasetTypeNames = None 

615 elif datasetTypeNames is None: 

616 raise RuntimeError("Inconsistent collection content restriction.") 

617 else: 

618 datasetTypeNames.add(value) 

619 else: 

620 dimensions[key].add(value) 

621 return cls( 

622 DatasetTypeRestriction(frozenset(datasetTypeNames) if datasetTypeNames is not None else Ellipsis), 

623 GovernorDimensionRestriction(universe, **dimensions), 

624 ) 

625 

626 def toPairs(self) -> Iterator[Tuple[str, Optional[str]]]: 

627 """Transform the restriction to a set of tuples that can be more easily 

628 mapped to a database representation. 

629 

630 Yields 

631 ------ 

632 key : `str` 

633 Either a governor dimension name or the special string 

634 "dataset_type". 

635 value : `str` or `None` 

636 The value of the dimension, the name of the dataset type, or `None` 

637 to indicate that there is no restriction on that dimension or on 

638 dataset types. 

639 """ 

640 if self.datasetTypes.names is Ellipsis: 

641 yield ("dataset_type", None) 

642 else: 

643 yield from (("dataset_type", name) for name in sorted(self.datasetTypes.names)) 

644 for dimension, values in self.dimensions.mapping.items(): 

645 yield from ((dimension.name, v) for v in sorted(values)) 

646 

647 def __eq__(self, other: Any) -> bool: 

648 if not isinstance(other, CollectionContentRestriction): 

649 return False 

650 return self.datasetTypes == other.datasetTypes and self.dimensions == other.dimensions 

651 

652 def __str__(self) -> str: 

653 terms = [f"datasetTypes: {self.datasetTypes}"] 

654 for dimension, values in self.dimensions.mapping.items(): 

655 terms.append(f"{dimension.name}: {values}") 

656 return "({})".format(", ".join(terms)) 

657 

658 def __repr__(self) -> str: 

659 return f"CollectionContentRestriction({self.datasetTypes!r}, {self.dimensions!r})" 

660 

661 

662def _yieldCollectionRecords( 

663 manager: CollectionManager, 

664 record: CollectionRecord, 

665 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(), 

666 done: Optional[Set[str]] = None, 

667 flattenChains: bool = True, 

668 includeChains: Optional[bool] = None, 

669) -> Iterator[CollectionRecord]: 

670 """A helper function containing common logic for `CollectionSearch.iter` 

671 and `CollectionQuery.iter`: recursively yield `CollectionRecord` only if 

672 they match the criteria given in other arguments. 

673 

674 Parameters 

675 ---------- 

676 manager : `CollectionManager` 

677 Object responsible for managing the collection tables in a `Registry`. 

678 record : `CollectionRecord` 

679 Record to conditionally yield. 

680 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

681 If provided, only yield collections of these types. 

682 done : `set` [ `str` ], optional 

683 A `set` of already-yielded collection names; if provided, ``record`` 

684 will only be yielded if it is not already in ``done``, and ``done`` 

685 will be updated to include it on return. 

686 flattenChains : `bool`, optional 

687 If `True` (default) recursively yield the child collections of 

688 `~CollectionType.CHAINED` collections. 

689 includeChains : `bool`, optional 

690 If `False`, return records for `~CollectionType.CHAINED` collections 

691 themselves. The default is the opposite of ``flattenChains``: either 

692 return records for CHAINED collections or their children, but not both. 

693 

694 Yields 

695 ------ 

696 record : `CollectionRecord` 

697 Matching collection records. 

698 """ 

699 if done is None: 

700 done = set() 

701 includeChains = includeChains if includeChains is not None else not flattenChains 

702 if record.type in collectionTypes: 

703 done.add(record.name) 

704 if record.type is not CollectionType.CHAINED or includeChains: 

705 yield record 

706 if flattenChains and record.type is CollectionType.CHAINED: 

707 done.add(record.name) 

708 # We know this is a ChainedCollectionRecord because of the enum value, 

709 # but MyPy doesn't. 

710 yield from record.children.iter( # type: ignore 

711 manager, 

712 collectionTypes=collectionTypes, 

713 done=done, 

714 flattenChains=flattenChains, 

715 includeChains=includeChains, 

716 ) 

717 

718 

719class CollectionSearch(Sequence[str]): 

720 """An ordered search path of collections. 

721 

722 The `fromExpression` method should almost always be used to construct 

723 instances, as the regular constructor performs no checking of inputs (and 

724 that can lead to confusing error messages downstream). 

725 

726 Parameters 

727 ---------- 

728 collections : `tuple` [ `str` ] 

729 Tuple of collection names, ordered from the first searched to the last 

730 searched. 

731 

732 Notes 

733 ----- 

734 A `CollectionSearch` is used to find a single dataset (or set of datasets 

735 with different dataset types or data IDs) according to its dataset type and 

736 data ID, giving preference to collections in the order in which they are 

737 specified. A `CollectionQuery` can be constructed from a broader range of 

738 expressions but does not order the collections to be searched. 

739 

740 `CollectionSearch` is an immutable sequence of `str` collection names. 

741 

742 A `CollectionSearch` instance constructed properly (e.g. via 

743 `fromExpression`) is a unique representation of a particular search path; 

744 it is exactly the same internally and compares as equal to any 

745 `CollectionSearch` constructed from an equivalent expression, regardless of 

746 how different the original expressions appear. 

747 """ 

748 def __init__(self, collections: Tuple[str, ...]): 

749 self._collections = collections 

750 

751 __slots__ = ("_collections",) 

752 

753 @classmethod 

754 def fromExpression(cls, expression: Any) -> CollectionSearch: 

755 """Process a general expression to construct a `CollectionSearch` 

756 instance. 

757 

758 Parameters 

759 ---------- 

760 expression 

761 May be: 

762 - a `str` collection name; 

763 - an iterable of `str` collection names; 

764 - another `CollectionSearch` instance (passed through 

765 unchanged). 

766 

767 Duplicate entries will be removed (preserving the first appearance 

768 of each collection name). 

769 Returns 

770 ------- 

771 collections : `CollectionSearch` 

772 A `CollectionSearch` instance. 

773 """ 

774 # First see if this is already a CollectionSearch; just pass that 

775 # through unchanged. This lets us standardize expressions (and turn 

776 # single-pass iterators into multi-pass iterables) in advance and pass 

777 # them down to other routines that accept arbitrary expressions. 

778 if isinstance(expression, cls): 

779 return expression 

780 wildcard = CategorizedWildcard.fromExpression( 

781 expression, 

782 allowAny=False, 

783 allowPatterns=False, 

784 ) 

785 assert wildcard is not Ellipsis 

786 assert not wildcard.patterns 

787 assert not wildcard.items 

788 deduplicated = [] 

789 for name in wildcard.strings: 

790 if name not in deduplicated: 

791 deduplicated.append(name) 

792 return cls(tuple(deduplicated)) 

793 

794 def iter( 

795 self, manager: CollectionManager, *, 

796 datasetType: Optional[DatasetType] = None, 

797 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(), 

798 done: Optional[Set[str]] = None, 

799 flattenChains: bool = True, 

800 includeChains: Optional[bool] = None, 

801 ) -> Iterator[CollectionRecord]: 

802 """Iterate over collection records that match this instance and the 

803 given criteria, in order. 

804 

805 This method is primarily intended for internal use by `Registry`; 

806 other callers should generally prefer `Registry.findDatasets` or 

807 other `Registry` query methods. 

808 

809 Parameters 

810 ---------- 

811 manager : `CollectionManager` 

812 Object responsible for managing the collection tables in a 

813 `Registry`. 

814 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

815 If provided, only yield collections of these types. 

816 done : `set`, optional 

817 A `set` containing the names of all collections already yielded; 

818 any collections whose names are already present in this set will 

819 not be yielded again, and those yielded will be added to it while 

820 iterating. If not provided, an empty `set` will be created and 

821 used internally to avoid duplicates. 

822 flattenChains : `bool`, optional 

823 If `True` (default) recursively yield the child collections of 

824 `~CollectionType.CHAINED` collections. 

825 includeChains : `bool`, optional 

826 If `False`, return records for `~CollectionType.CHAINED` 

827 collections themselves. The default is the opposite of 

828 ``flattenChains``: either return records for CHAINED collections or 

829 their children, but not both. 

830 

831 Yields 

832 ------ 

833 record : `CollectionRecord` 

834 Matching collection records. 

835 """ 

836 if done is None: 

837 done = set() 

838 for name in self: 

839 if name not in done: 

840 yield from _yieldCollectionRecords( 

841 manager, 

842 manager.find(name), 

843 collectionTypes=collectionTypes, 

844 done=done, 

845 flattenChains=flattenChains, 

846 includeChains=includeChains, 

847 ) 

848 

849 def __iter__(self) -> Iterator[str]: 

850 yield from self._collections 

851 

852 def __len__(self) -> int: 

853 return len(self._collections) 

854 

855 def __getitem__(self, index: Any) -> str: 

856 return self._collections[index] 

857 

858 def __eq__(self, other: Any) -> bool: 

859 if isinstance(other, CollectionSearch): 

860 return self._collections == other._collections 

861 return False 

862 

863 def __str__(self) -> str: 

864 return "[{}]".format(", ".join(self)) 

865 

866 def __repr__(self) -> str: 

867 return f"CollectionSearch({self._collections!r})" 

868 

869 

870class CollectionQuery: 

871 """An unordered query for collections and dataset type restrictions. 

872 

873 The `fromExpression` method should almost always be used to construct 

874 instances, as the regular constructor performs no checking of inputs (and 

875 that can lead to confusing error messages downstream). 

876 

877 Parameters 

878 ---------- 

879 search : `CollectionSearch` or `...` 

880 An object representing an ordered search for explicitly-named 

881 collections (to be interpreted here as unordered), or the special 

882 value `...` indicating all collections. `...` must be accompanied 

883 by ``patterns=None``. 

884 patterns : `tuple` of `re.Pattern` 

885 Regular expression patterns to match against collection names. 

886 universe : `DimensionUniverse` 

887 Object managing all dimensions. 

888 

889 Notes 

890 ----- 

891 A `CollectionQuery` is used to find all matching datasets in any number 

892 of collections, or to find collections themselves. 

893 

894 `CollectionQuery` is expected to be rarely used outside of `Registry` 

895 (which uses it to back several of its "query" methods that take general 

896 expressions for collections), but it may occassionally be useful outside 

897 `Registry` as a way to preprocess expressions that contain single-pass 

898 iterators into a form that can be used to call those `Registry` methods 

899 multiple times. 

900 """ 

901 def __init__( 

902 self, 

903 search: Union[CollectionSearch, EllipsisType] = Ellipsis, 

904 patterns: Tuple[re.Pattern, ...] = (), 

905 ): 

906 self._search = search 

907 self._patterns = patterns 

908 

909 __slots__ = ("_search", "_patterns") 

910 

911 @classmethod 

912 def fromExpression(cls, expression: Any) -> CollectionQuery: 

913 """Process a general expression to construct a `CollectionQuery` 

914 instance. 

915 

916 Parameters 

917 ---------- 

918 expression 

919 May be: 

920 - a `str` collection name; 

921 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`) 

922 against collection names; 

923 - any iterable containing any of the above; 

924 - a `CollectionSearch` instance; 

925 - another `CollectionQuery` instance (passed through unchanged). 

926 

927 Duplicate collection names will be removed (preserving the first 

928 appearance of each collection name). 

929 

930 Returns 

931 ------- 

932 collections : `CollectionQuery` 

933 A `CollectionQuery` instance. 

934 """ 

935 if isinstance(expression, cls): 

936 return expression 

937 if expression is Ellipsis: 

938 return cls() 

939 if isinstance(expression, CollectionSearch): 

940 return cls(search=expression, patterns=()) 

941 wildcard = CategorizedWildcard.fromExpression( 

942 expression, 

943 allowAny=True, 

944 allowPatterns=True, 

945 ) 

946 if wildcard is Ellipsis: 

947 return cls() 

948 assert not wildcard.items, \ 

949 "We should no longer be transforming to (str, DatasetTypeRestriction) tuples." 

950 return cls( 

951 search=CollectionSearch.fromExpression(wildcard.strings), 

952 patterns=tuple(wildcard.patterns), 

953 ) 

954 

955 def iter( 

956 self, manager: CollectionManager, *, 

957 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(), 

958 flattenChains: bool = True, 

959 includeChains: Optional[bool] = None, 

960 ) -> Iterator[CollectionRecord]: 

961 """Iterate over collection records that match this instance and the 

962 given criteria, in an arbitrary order. 

963 

964 This method is primarily intended for internal use by `Registry`; 

965 other callers should generally prefer `Registry.queryDatasets` or 

966 other `Registry` query methods. 

967 

968 Parameters 

969 ---------- 

970 manager : `CollectionManager` 

971 Object responsible for managing the collection tables in a 

972 `Registry`. 

973 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

974 If provided, only yield collections of these types. 

975 flattenChains : `bool`, optional 

976 If `True` (default) recursively yield the child collections of 

977 `~CollectionType.CHAINED` collections. 

978 includeChains : `bool`, optional 

979 If `False`, return records for `~CollectionType.CHAINED` 

980 collections themselves. The default is the opposite of 

981 ``flattenChains``: either return records for CHAINED collections or 

982 their children, but not both. 

983 

984 Yields 

985 ------ 

986 record : `CollectionRecord` 

987 Matching collection records. 

988 """ 

989 if self._search is Ellipsis: 

990 for record in manager: 

991 yield from _yieldCollectionRecords( 

992 manager, 

993 record, 

994 collectionTypes=collectionTypes, 

995 flattenChains=flattenChains, 

996 includeChains=includeChains, 

997 ) 

998 else: 

999 done: Set[str] = set() 

1000 yield from self._search.iter( 

1001 manager, 

1002 collectionTypes=collectionTypes, 

1003 done=done, 

1004 flattenChains=flattenChains, 

1005 includeChains=includeChains, 

1006 ) 

1007 for record in manager: 

1008 if record.name not in done and any(p.fullmatch(record.name) for p in self._patterns): 

1009 yield from _yieldCollectionRecords( 

1010 manager, 

1011 record, 

1012 collectionTypes=collectionTypes, 

1013 done=done, 

1014 flattenChains=flattenChains, 

1015 includeChains=includeChains, 

1016 ) 

1017 

1018 def __eq__(self, other: Any) -> bool: 

1019 if isinstance(other, CollectionQuery): 

1020 return self._search == other._search and self._patterns == other._patterns 

1021 else: 

1022 return False