Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "CategorizedWildcard", 

25 "CollectionQuery", 

26 "CollectionSearch", 

27 "DatasetTypeRestriction", 

28) 

29 

30from dataclasses import dataclass 

31import itertools 

32import operator 

33import re 

34from typing import ( 

35 Any, 

36 Callable, 

37 ClassVar, 

38 FrozenSet, 

39 Iterator, 

40 List, 

41 Optional, 

42 Set, 

43 Tuple, 

44 TYPE_CHECKING, 

45 Union, 

46) 

47 

48import sqlalchemy 

49 

50from ..core import DatasetType 

51from ..core.utils import iterable 

52from ._collectionType import CollectionType 

53 

54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true

55 from .interfaces import CollectionManager, CollectionRecord 

56 

57 # Workaround for `...` not having an exposed type in Python, borrowed from 

58 # https://github.com/python/typing/issues/684#issuecomment-548203158 

59 # Along with that, we need to either use `Ellipsis` instead of `...` for 

60 # the actual sentinal value internally, and tell MyPy to ignore conversions 

61 # from `...` to `Ellipsis` at the public-interface boundary. 

62 # 

63 # `Ellipsis` and `EllipsisType` should be directly imported from this 

64 # module by related code that needs them; hopefully that will stay confined 

65 # to `lsst.daf.butler.registry`. Putting these in __all__ is bad for 

66 # Sphinx, and probably more confusing than helpful overall. 

67 from enum import Enum 

68 

69 class EllipsisType(Enum): 

70 Ellipsis = "..." 

71 

72 Ellipsis = EllipsisType.Ellipsis 

73 

74else: 

75 EllipsisType = type(Ellipsis) 

76 Ellipsis = Ellipsis 

77 

78 

79@dataclass 

80class CategorizedWildcard: 

81 """The results of preprocessing a wildcard expression to separate match 

82 patterns from strings. 

83 

84 The `fromExpression` method should almost always be used to construct 

85 instances, as the regular constructor performs no checking of inputs (and 

86 that can lead to confusing error messages downstream). 

87 """ 

88 

89 @classmethod 

90 def fromExpression(cls, expression: Any, *, 

91 allowAny: bool = True, 

92 allowPatterns: bool = True, 

93 coerceUnrecognized: Optional[Callable[[Any], Union[Tuple[str, Any], str]]] = None, 

94 coerceItemValue: Optional[Callable[[Any], Any]] = None, 

95 defaultItemValue: Optional[Any] = None, 

96 ) -> Union[CategorizedWildcard, EllipsisType]: 

97 """Categorize a wildcard expression. 

98 

99 Parameters 

100 ---------- 

101 expression 

102 The expression to categorize. May be any of: 

103 - `str`; 

104 - `re.Pattern` (only if ``allowPatterns`` is `True`); 

105 - objects recognized by ``coerceUnrecognized`` (if provided); 

106 - two-element tuples of (`str`, value) where value is recognized 

107 by ``coerceItemValue`` (if provided); 

108 - a non-`str`, non-mapping iterable containing any of the above; 

109 - the special value `...` (only if ``allowAny`` is `True`), which 

110 matches anything; 

111 - a mapping from `str` to a value are recognized by 

112 ``coerceItemValue`` (if provided); 

113 - a `CategorizedWildcard` instance (passed through unchanged if 

114 it meets the requirements specified by keyword arguments). 

115 allowAny: `bool`, optional 

116 If `False` (`True` is default) raise `TypeError` if `...` is 

117 encountered. 

118 allowPatterns: `bool`, optional 

119 If `False` (`True` is default) raise `TypeError` if a `re.Pattern` 

120 is encountered, or if ``expression`` is a `CategorizedWildcard` 

121 with `patterns` not empty. 

122 coerceUnrecognized: `Callable`, optional 

123 A callback that takes a single argument of arbitrary type and 

124 returns either a `str` - appended to `strings` - or a `tuple` of 

125 (`str`, `Any`) to be appended to `items`. This will be called on 

126 objects of unrecognized type, with the return value added to 

127 `strings`. Exceptions will be reraised as `TypeError` (and 

128 chained). 

129 coerceItemValue: `Callable`, optional 

130 If provided, ``expression`` may be a mapping from `str` to any 

131 type that can be passed to this function; the result of that call 

132 will be stored instead as the value in ``self.items``. 

133 defaultItemValue: `Any`, optional 

134 If provided, combine this value with any string values encountered 

135 (including any returned by ``coerceUnrecognized``) to form a 

136 `tuple` and add it to `items`, guaranteeing that `strings` will be 

137 empty. Patterns are never added to `items`. 

138 

139 Returns 

140 ------- 

141 categorized : `CategorizedWildcard` or ``...``. 

142 The struct describing the wildcard. ``...`` is passed through 

143 unchanged. 

144 

145 Raises 

146 ------ 

147 TypeError 

148 Raised if an unsupported type is found in the expression. 

149 """ 

150 assert expression is not None 

151 # See if we were given ...; just return that if we were. 

152 if expression is Ellipsis: 

153 if not allowAny: 

154 raise TypeError("This expression may not be unconstrained.") 

155 return Ellipsis 

156 if isinstance(expression, cls): 

157 # This is already a CategorizedWildcard. Make sure it meets the 

158 # reqs. implied by the kwargs we got. 

159 if not allowPatterns and expression.patterns: 

160 raise TypeError(f"Regular expression(s) {expression.patterns} " 

161 f"are not allowed in this context.") 

162 if defaultItemValue is not None and expression.strings: 

163 if expression.items: 

164 raise TypeError("Incompatible preprocessed expression: an ordered sequence of str is " 

165 "needed, but the original order was lost in the preprocessing.") 

166 return cls(strings=[], patterns=expression.patterns, 

167 items=[(k, defaultItemValue) for k in expression.strings]) 

168 elif defaultItemValue is None and expression.items: 

169 if expression.strings: 

170 raise TypeError("Incompatible preprocessed expression: an ordered sequence of items is " 

171 "needed, but the original order was lost in the preprocessing.") 

172 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[]) 

173 else: 

174 # Original expression was created with keyword arguments that 

175 # were at least as restrictive as what we just got; pass it 

176 # through. 

177 return expression 

178 

179 # If we get here, we know we'll be creating a new instance. 

180 # Initialize an empty one now. 

181 self = cls(strings=[], patterns=[], items=[]) 

182 

183 # If mappings are allowed, see if we were given a single mapping by 

184 # trying to get items. 

185 if coerceItemValue is not None: 

186 rawItems = None 

187 try: 

188 rawItems = expression.items() 

189 except AttributeError: 

190 pass 

191 if rawItems is not None: 

192 for k, v in rawItems: 

193 try: 

194 self.items.append((k, coerceItemValue(v))) 

195 except Exception as err: 

196 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err 

197 return self 

198 

199 # Not ..., a CategorizedWildcard instance, or a mapping. Just 

200 # process scalars or an iterable. We put the body of the loop inside 

201 # a local function so we can recurse after coercion. 

202 

203 def process(element: Any, alreadyCoerced: bool = False) -> None: 

204 if isinstance(element, str): 

205 if defaultItemValue is not None: 

206 self.items.append((element, defaultItemValue)) 

207 else: 

208 self.strings.append(element) 

209 return 

210 if allowPatterns and isinstance(element, re.Pattern): 

211 self.patterns.append(element) 

212 return 

213 if coerceItemValue is not None: 

214 try: 

215 k, v = element 

216 except TypeError: 

217 pass 

218 else: 

219 if not alreadyCoerced: 

220 if not isinstance(k, str): 

221 raise TypeError(f"Item key '{k}' is not a string.") 

222 try: 

223 v = coerceItemValue(v) 

224 except Exception as err: 

225 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'." 

226 ) from err 

227 self.items.append((k, v)) 

228 return 

229 if alreadyCoerced: 

230 raise TypeError(f"Object '{element}' returned by coercion function is still unrecognized.") 

231 if coerceUnrecognized is not None: 

232 try: 

233 process(coerceUnrecognized(element), alreadyCoerced=True) 

234 except Exception as err: 

235 raise TypeError(f"Could not coerce expression element '{element}'.") from err 

236 else: 

237 raise TypeError(f"Unsupported object in wildcard expression: '{element}'.") 

238 

239 for element in iterable(expression): 

240 process(element) 

241 return self 

242 

243 def makeWhereExpression(self, column: sqlalchemy.sql.ColumnElement 

244 ) -> Optional[sqlalchemy.sql.ColumnElement]: 

245 """Transform the wildcard into a SQLAlchemy boolean expression suitable 

246 for use in a WHERE clause. 

247 

248 Parameters 

249 ---------- 

250 column : `sqlalchemy.sql.ColumnElement` 

251 A string column in a table or query that should be compared to the 

252 wildcard expression. 

253 

254 Returns 

255 ------- 

256 where : `sqlalchemy.sql.ColumnElement` or `None` 

257 A boolean SQL expression that evaluates to true if and only if 

258 the value of ``column`` matches the wildcard. `None` is returned 

259 if both `strings` and `patterns` are empty, and hence no match is 

260 possible. 

261 """ 

262 if self.items: 

263 raise NotImplementedError("Expressions that are processed into items cannot be transformed " 

264 "automatically into queries.") 

265 if self.patterns: 

266 raise NotImplementedError("Regular expression patterns are not yet supported here.") 

267 terms = [] 

268 if len(self.strings) == 1: 

269 terms.append(column == self.strings[0]) 

270 elif len(self.strings) > 1: 

271 terms.append(column.in_(self.strings)) 

272 # TODO: append terms for regular expressions 

273 if not terms: 

274 return None 

275 return sqlalchemy.sql.or_(*terms) 

276 

277 strings: List[str] 

278 """Explicit string values found in the wildcard (`list` [ `str` ]). 

279 """ 

280 

281 patterns: List[re.Pattern] 

282 """Regular expression patterns found in the wildcard 

283 (`list` [ `re.Pattern` ]). 

284 """ 

285 

286 items: List[Tuple[str, Any]] 

287 """Two-item tuples that relate string values to other objects 

288 (`list` [ `tuple` [ `str`, `Any` ] ]). 

289 """ 

290 

291 

292class DatasetTypeRestriction: 

293 """An immutable set-like object that represents a restriction on the 

294 dataset types to search for within a collection. 

295 

296 The `fromExpression` method should almost always be used to construct 

297 instances, as the regular constructor performs no checking of inputs (and 

298 that can lead to confusing error messages downstream). 

299 

300 Parameters 

301 ---------- 

302 names : `frozenset` [`str`] or `...` 

303 The names of the dataset types included in the restriction, or `...` 

304 to permit a search for any dataset type. 

305 

306 Notes 

307 ----- 

308 This class does not inherit from `collections.abc.Set` (and does not 

309 implement the full set interface) because is not always iterable and 

310 sometimes has no length (i.e. when ``names`` is ``...``). 

311 """ 

312 def __init__(self, names: Union[FrozenSet[str], EllipsisType]): 

313 self.names = names 

314 

315 __slots__ = ("names",) 

316 

317 @classmethod 

318 def fromExpression(cls, expression: Any) -> DatasetTypeRestriction: 

319 """Process a general expression to construct a `DatasetTypeRestriction` 

320 instance. 

321 

322 Parameters 

323 ---------- 

324 expression 

325 May be: 

326 - a `DatasetType` instance; 

327 - a `str` dataset type name; 

328 - any non-mapping iterable containing either of the above; 

329 - the special value `...`; 

330 - another `DatasetTypeRestriction` instance (passed through 

331 unchanged). 

332 

333 Returns 

334 ------- 

335 restriction : `DatasetTypeRestriction` 

336 A `DatasetTypeRestriction` instance. 

337 """ 

338 if isinstance(expression, cls): 

339 return expression 

340 wildcard = CategorizedWildcard.fromExpression(expression, allowPatterns=False, 

341 coerceUnrecognized=lambda d: d.name) 

342 if wildcard is Ellipsis: 

343 return cls.any 

344 else: 

345 return cls(frozenset(wildcard.strings)) 

346 

347 def __contains__(self, datasetType: DatasetType) -> bool: 

348 return (self.names is Ellipsis or datasetType.name in self.names 

349 or (datasetType.isComponent() 

350 and DatasetType.splitDatasetTypeName(datasetType.name)[0] in self.names)) 

351 

352 def __eq__(self, other: Any) -> bool: 

353 if isinstance(other, DatasetTypeRestriction): 

354 return self.names == other.names 

355 else: 

356 return False 

357 

358 def __str__(self) -> str: 

359 if self.names is Ellipsis: 

360 return "..." 

361 else: 

362 return "{{{}}}".format(", ".join(self.names)) 

363 

364 def __repr__(self) -> str: 

365 if self.names is Ellipsis: 

366 return "DatasetTypeRestriction(...)" 

367 else: 

368 return f"DatasetTypeRestriction({self.names!r})" 

369 

370 @staticmethod 

371 def union(*args: DatasetTypeRestriction) -> DatasetTypeRestriction: 

372 """Merge one or more `DatasetTypeRestriction` instances, returning one 

373 that allows any of the dataset types included in any of them. 

374 

375 Parameters 

376 ---------- 

377 args 

378 Positional arguments are `DatasetTypeRestriction` instances. 

379 """ 

380 result: Set[str] = set() 

381 for a in args: 

382 if a.names is Ellipsis: 

383 return DatasetTypeRestriction.any 

384 else: 

385 result.update(a.names) 

386 return DatasetTypeRestriction(frozenset(result)) 

387 

388 names: Union[FrozenSet[str], EllipsisType] 

389 """The names of the dataset types included (i.e. permitted) by the 

390 restriction, or the special value ``...`` to permit all dataset types 

391 (`frozenset` [ `str` ] or ``...``). 

392 """ 

393 

394 any: ClassVar[DatasetTypeRestriction] 

395 """A special `DatasetTypeRestriction` instance that permits any dataset 

396 type. 

397 

398 This instance should be preferred instead of constructing a new one with 

399 ``...``, when possible, but it should not be assumed to be the only such 

400 instance (i.e. don't use ``is`` instead of ``==`` for comparisons). 

401 """ 

402 

403 

404DatasetTypeRestriction.any = DatasetTypeRestriction(Ellipsis) 

405 

406 

407def _yieldCollectionRecords( 

408 manager: CollectionManager, 

409 record: CollectionRecord, 

410 restriction: DatasetTypeRestriction, 

411 datasetType: Optional[DatasetType] = None, 

412 collectionType: Optional[CollectionType] = None, 

413 done: Optional[Set[str]] = None, 

414 flattenChains: bool = True, 

415 includeChains: Optional[bool] = None, 

416) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]: 

417 """A helper function containing common logic for `CollectionSearch.iter` 

418 and `CollectionQuery.iter`: recursively yield `CollectionRecord` only they 

419 match the criteria given in other arguments. 

420 

421 Parameters 

422 ---------- 

423 manager : `CollectionManager` 

424 Object responsible for managing the collection tables in a `Registry`. 

425 record : `CollectionRecord` 

426 Record to conditionally yield. 

427 restriction : `DatasetTypeRestriction` 

428 A restriction that must match ``datasetType`` (if given) in order to 

429 yield ``record``. 

430 datasetType : `DatasetType`, optional 

431 If given, a `DatasetType` instance that must be included in 

432 ``restriction`` in order to yield ``record``. 

433 collectionType : `CollectionType`, optional 

434 If given, a `CollectionType` enumeration value that must match 

435 ``record.type`` in order for ``record`` to be yielded. 

436 done : `set` [ `str` ], optional 

437 A `set` of already-yielded collection names; if provided, ``record`` 

438 will only be yielded if it is not already in ``done``, and ``done`` 

439 will be updated to include it on return. 

440 flattenChains : `bool`, optional 

441 If `True` (default) recursively yield the child collections of 

442 `~CollectionType.CHAINED` collections. 

443 includeChains : `bool`, optional 

444 If `False`, return records for `~CollectionType.CHAINED` collections 

445 themselves. The default is the opposite of ``flattenChains``: either 

446 return records for CHAINED collections or their children, but not both. 

447 

448 Yields 

449 ------ 

450 record : `CollectionRecord` 

451 Matching collection records. 

452 restriction : `DatasetTypeRestriction` 

453 The given dataset type restriction. 

454 """ 

455 if done is None: 

456 done = set() 

457 includeChains = includeChains if includeChains is not None else not flattenChains 

458 if collectionType is None or record.type is collectionType: 

459 done.add(record.name) 

460 if record.type is not CollectionType.CHAINED or includeChains: 

461 yield record, restriction 

462 if flattenChains and record.type is CollectionType.CHAINED: 

463 done.add(record.name) 

464 # We know this is a ChainedCollectionRecord because of the enum value, 

465 # but MyPy doesn't. 

466 yield from record.children.iterPairs( # type: ignore 

467 manager, 

468 datasetType=datasetType, 

469 collectionType=collectionType, 

470 done=done, 

471 flattenChains=flattenChains, 

472 includeChains=includeChains, 

473 ) 

474 

475 

476class CollectionSearch: 

477 """An ordered search path of collections and dataset type restrictions. 

478 

479 The `fromExpression` method should almost always be used to construct 

480 instances, as the regular constructor performs no checking of inputs (and 

481 that can lead to confusing error messages downstream). 

482 

483 Parameters 

484 ---------- 

485 items : `list` [ `tuple` [ `str`, `DatasetTypeRestriction` ] ] 

486 Tuples that relate a collection name to the restriction on dataset 

487 types to search for within it. This is not a mapping because the 

488 same collection name may appear multiple times with different 

489 restrictions. 

490 

491 Notes 

492 ----- 

493 A `CollectionSearch` is used to find a single dataset according to its 

494 dataset type and data ID, giving preference to collections in which the 

495 order they are specified. A `CollectionQuery` can be constructed from 

496 a broader range of expressions but does not order the collections to be 

497 searched. 

498 

499 `CollectionSearch` is iterable, yielding two-element tuples of `str` 

500 (collection name) and `DatasetTypeRestriction`. 

501 

502 A `CollectionSearch` instance constructed properly (e.g. via 

503 `fromExpression`) is a unique representation of a particular search path; 

504 it is exactly the same internally and compares as equal to any 

505 `CollectionSearch` constructed from an equivalent expression, 

506 regardless of how different the original expressions appear. 

507 """ 

508 def __init__(self, items: List[Tuple[str, DatasetTypeRestriction]]): 

509 assert all(isinstance(v, DatasetTypeRestriction) for _, v in items) 

510 self._items = items 

511 

512 __slots__ = ("_items") 

513 

514 @classmethod 

515 def fromExpression(cls, expression: Any) -> CollectionSearch: 

516 """Process a general expression to construct a `CollectionSearch` 

517 instance. 

518 

519 Parameters 

520 ---------- 

521 expression 

522 May be: 

523 - a `str` collection name; 

524 - a two-element `tuple` containing a `str` and any expression 

525 accepted by `DatasetTypeRestriction.fromExpression`; 

526 - any non-mapping iterable containing either of the above; 

527 - a mapping from `str` to any expression accepted by 

528 `DatasetTypeRestriction`. 

529 - another `CollectionSearch` instance (passed through 

530 unchanged). 

531 

532 Multiple consecutive entries for the same collection with different 

533 restrictions will be merged. Non-consecutive entries will not, 

534 because that actually represents a different search path. 

535 

536 Returns 

537 ------- 

538 collections : `CollectionSearch` 

539 A `CollectionSearch` instance. 

540 """ 

541 # First see if this is already a CollectionSearch; just pass that 

542 # through unchanged. This lets us standardize expressions (and turn 

543 # single-pass iterators into multi-pass iterables) in advance and pass 

544 # them down to other routines that accept arbitrary expressions. 

545 if isinstance(expression, cls): 

546 return expression 

547 wildcard = CategorizedWildcard.fromExpression(expression, 

548 allowAny=False, 

549 allowPatterns=False, 

550 coerceItemValue=DatasetTypeRestriction.fromExpression, 

551 defaultItemValue=DatasetTypeRestriction.any) 

552 assert wildcard is not Ellipsis 

553 assert not wildcard.patterns 

554 assert not wildcard.strings 

555 return cls( 

556 # Consolidate repetitions of the same collection name. 

557 [(name, DatasetTypeRestriction.union(*tuple(item[1] for item in items))) 

558 for name, items in itertools.groupby(wildcard.items, key=operator.itemgetter(0))] 

559 ) 

560 

561 def iterPairs( 

562 self, manager: CollectionManager, *, 

563 datasetType: Optional[DatasetType] = None, 

564 collectionType: Optional[CollectionType] = None, 

565 done: Optional[Set[str]] = None, 

566 flattenChains: bool = True, 

567 includeChains: Optional[bool] = None, 

568 ) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]: 

569 """Like `iter`, but yield pairs of `CollectionRecord`, 

570 `DatasetTypeRestriction` instead of just the former. 

571 

572 See `iter` for all parameter descriptions. 

573 

574 Yields 

575 ------ 

576 record : `CollectionRecord` 

577 Matching collection records. 

578 restriction : `DatasetTypeRestriction` 

579 The given dataset type restriction. 

580 """ 

581 if done is None: 

582 done = set() 

583 for name, restriction in self._items: 

584 if name not in done and (datasetType is None or datasetType in restriction): 

585 yield from _yieldCollectionRecords( 

586 manager, 

587 manager.find(name), 

588 restriction, 

589 datasetType=datasetType, 

590 collectionType=collectionType, 

591 done=done, 

592 flattenChains=flattenChains, 

593 includeChains=includeChains, 

594 ) 

595 

596 def iter( 

597 self, manager: CollectionManager, *, 

598 datasetType: Optional[DatasetType] = None, 

599 collectionType: Optional[CollectionType] = None, 

600 done: Optional[Set[str]] = None, 

601 flattenChains: bool = True, 

602 includeChains: Optional[bool] = None, 

603 ) -> Iterator[CollectionRecord]: 

604 """Iterate over collection records that match this instance and the 

605 given criteria, in order. 

606 

607 This method is primarily intended for internal use by `Registry`; 

608 other callers should generally prefer `Registry.findDatasets` or 

609 other `Registry` query methods. 

610 

611 Parameters 

612 ---------- 

613 manager : `CollectionManager` 

614 Object responsible for managing the collection tables in a 

615 `Registry`. 

616 datasetType : `DatasetType`, optional 

617 If given, only yield collections whose dataset type restrictions 

618 include this dataset type. 

619 collectionType : `CollectionType`, optional 

620 If given, only yield collections of this type. 

621 done : `set`, optional 

622 A `set` containing the names of all collections already yielded; 

623 any collections whose names are already present in this set will 

624 not be yielded again, and those yielded will be added to it while 

625 iterating. If not provided, an empty `set` will be created and 

626 used internally to avoid duplicates. 

627 flattenChains : `bool`, optional 

628 If `True` (default) recursively yield the child collections of 

629 `~CollectionType.CHAINED` collections. 

630 includeChains : `bool`, optional 

631 If `False`, return records for `~CollectionType.CHAINED` 

632 collections themselves. The default is the opposite of 

633 ``flattenChains``: either return records for CHAINED collections or 

634 their children, but not both. 

635 

636 Yields 

637 ------ 

638 record : `CollectionRecord` 

639 Matching collection records. 

640 """ 

641 for record, _ in self.iterPairs(manager, datasetType=datasetType, collectionType=collectionType, 

642 done=done, flattenChains=flattenChains, includeChains=includeChains): 

643 yield record 

644 

645 def __iter__(self) -> Iterator[Tuple[str, DatasetTypeRestriction]]: 

646 yield from self._items 

647 

648 def __len__(self) -> int: 

649 return len(self._items) 

650 

651 def __eq__(self, other: Any) -> bool: 

652 if isinstance(other, CollectionSearch): 

653 return self._items == other._items 

654 else: 

655 return False 

656 

657 def __str__(self) -> str: 

658 return "[{}]".format(", ".join(f"{k}: {v}" for k, v in self._items)) 

659 

660 def __repr__(self) -> str: 

661 return f"CollectionSearch({self._items!r})" 

662 

663 

664class CollectionQuery: 

665 """An unordered query for collections and dataset type restrictions. 

666 

667 The `fromExpression` method should almost always be used to construct 

668 instances, as the regular constructor performs no checking of inputs (and 

669 that can lead to confusing error messages downstream). 

670 

671 Parameters 

672 ---------- 

673 search : `CollectionSearch` or `...` 

674 An object representing an ordered search for explicitly-named 

675 collections (to be interpreted here as unordered), or the special 

676 value `...` indicating all collections. `...` must be accompanied 

677 by ``patterns=None``. 

678 patterns : `tuple` of `re.Pattern` 

679 Regular expression patterns to match against collection names. 

680 

681 Notes 

682 ----- 

683 A `CollectionQuery` is used to find all matching datasets in any number 

684 of collections, or to find collections themselves. 

685 

686 `CollectionQuery` is expected to be rarely used outside of `Registry` 

687 (which uses it to back several of its "query" methods that take general 

688 expressions for collections), but it may occassionally be useful outside 

689 `Registry` as a way to preprocess expressions that contain single-pass 

690 iterators into a form that can be used to call those `Registry` methods 

691 multiple times. 

692 """ 

693 def __init__(self, search: Union[CollectionSearch, EllipsisType], patterns: Tuple[re.Pattern, ...]): 

694 self._search = search 

695 self._patterns = patterns 

696 

697 __slots__ = ("_search", "_patterns") 

698 

699 @classmethod 

700 def fromExpression(cls, expression: Any) -> CollectionQuery: 

701 """Process a general expression to construct a `CollectionQuery` 

702 instance. 

703 

704 Parameters 

705 ---------- 

706 expression 

707 May be: 

708 - a `str` collection name; 

709 - a two-element `tuple` containing a `str` and any expression 

710 accepted by `DatasetTypeRestriction.fromExpression`; 

711 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`) 

712 against collection names; 

713 - any non-mapping iterable containing any of the above; 

714 - a mapping from `str` to any expression accepted by 

715 `DatasetTypeRestriction`. 

716 - a `CollectionSearch` instance; 

717 - another `CollectionQuery` instance (passed through unchanged). 

718 

719 Multiple consecutive entries for the same collection with different 

720 restrictions will be merged. Non-consecutive entries will not, 

721 because that actually represents a different search path. 

722 

723 Returns 

724 ------- 

725 collections : `CollectionQuery` 

726 A `CollectionQuery` instance. 

727 """ 

728 if isinstance(expression, cls): 

729 return expression 

730 if expression is Ellipsis: 

731 return cls.any 

732 if isinstance(expression, CollectionSearch): 

733 return cls(search=expression, patterns=()) 

734 wildcard = CategorizedWildcard.fromExpression(expression, 

735 allowAny=True, 

736 allowPatterns=True, 

737 coerceItemValue=DatasetTypeRestriction.fromExpression, 

738 defaultItemValue=DatasetTypeRestriction.any) 

739 if wildcard is Ellipsis: 

740 return cls.any 

741 assert not wildcard.strings, \ 

742 "All bare strings should be transformed to (str, DatasetTypeRestriction) tuples." 

743 return cls(search=CollectionSearch.fromExpression(wildcard.items), 

744 patterns=tuple(wildcard.patterns)) 

745 

746 def iterPairs( 

747 self, manager: CollectionManager, *, 

748 datasetType: Optional[DatasetType] = None, 

749 collectionType: Optional[CollectionType] = None, 

750 flattenChains: bool = True, 

751 includeChains: Optional[bool] = None, 

752 ) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]: 

753 """Like `iter`, but yield pairs of `CollectionRecord`, 

754 `DatasetTypeRestriction` instead of just the former. 

755 

756 See `iter` for all parameter descriptions. 

757 

758 Yields 

759 ------ 

760 record : `CollectionRecord` 

761 Matching collection records. 

762 restriction : `DatasetTypeRestriction` 

763 The given dataset type restriction. 

764 

765 """ 

766 if self._search is Ellipsis: 

767 for record in manager: 

768 yield from _yieldCollectionRecords( 

769 manager, 

770 record, 

771 DatasetTypeRestriction.any, 

772 datasetType=datasetType, 

773 collectionType=collectionType, 

774 flattenChains=flattenChains, 

775 includeChains=includeChains, 

776 ) 

777 else: 

778 done: Set[str] = set() 

779 yield from self._search.iterPairs( 

780 manager, 

781 datasetType=datasetType, 

782 collectionType=collectionType, 

783 done=done, 

784 flattenChains=flattenChains, 

785 includeChains=includeChains, 

786 ) 

787 for record in manager: 

788 if record.name not in done and any(p.fullmatch(record.name) for p in self._patterns): 

789 yield from _yieldCollectionRecords( 

790 manager, 

791 record, 

792 DatasetTypeRestriction.any, 

793 datasetType=datasetType, 

794 collectionType=collectionType, 

795 done=done, 

796 flattenChains=flattenChains, 

797 includeChains=includeChains, 

798 ) 

799 

800 def iter( 

801 self, manager: CollectionManager, *, 

802 datasetType: Optional[DatasetType] = None, 

803 collectionType: Optional[CollectionType] = None, 

804 flattenChains: bool = True, 

805 includeChains: Optional[bool] = None, 

806 ) -> Iterator[CollectionRecord]: 

807 """Iterate over collection records that match this instance and the 

808 given criteria, in an arbitrary order. 

809 

810 This method is primarily intended for internal use by `Registry`; 

811 other callers should generally prefer `Registry.queryDatasets` or 

812 other `Registry` query methods. 

813 

814 Parameters 

815 ---------- 

816 manager : `CollectionManager` 

817 Object responsible for managing the collection tables in a 

818 `Registry`. 

819 datasetType : `DatasetType`, optional 

820 If given, only yield collections whose dataset type restrictions 

821 include this dataset type. 

822 collectionType : `CollectionType`, optional 

823 If given, only yield collections of this type. 

824 flattenChains : `bool`, optional 

825 If `True` (default) recursively yield the child collections of 

826 `~CollectionType.CHAINED` collections. 

827 includeChains : `bool`, optional 

828 If `False`, return records for `~CollectionType.CHAINED` 

829 collections themselves. The default is the opposite of 

830 ``flattenChains``: either return records for CHAINED collections or 

831 their children, but not both. 

832 

833 Yields 

834 ------ 

835 record : `CollectionRecord` 

836 Matching collection records. 

837 """ 

838 for record, _ in self.iterPairs(manager, datasetType=datasetType, collectionType=collectionType, 

839 flattenChains=flattenChains, includeChains=includeChains): 

840 yield record 

841 

842 any: ClassVar[CollectionQuery] 

843 """A special `CollectionQuery` instance that matches any collection. 

844 

845 This instance should be preferred instead of constructing a new one with 

846 ``...``, when possible, but it should not be assumed to be the only such 

847 instance. 

848 """ 

849 

850 

851CollectionQuery.any = CollectionQuery(Ellipsis, ())