Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "CategorizedWildcard", 

25 "CollectionQuery", 

26 "CollectionSearch", 

27 "DatasetTypeRestriction", 

28) 

29 

30from dataclasses import dataclass 

31import itertools 

32import operator 

33import re 

34from typing import ( 

35 AbstractSet, 

36 Any, 

37 Callable, 

38 ClassVar, 

39 FrozenSet, 

40 Iterator, 

41 List, 

42 Optional, 

43 Set, 

44 Tuple, 

45 TYPE_CHECKING, 

46 Union, 

47) 

48 

49import sqlalchemy 

50 

51from ..core import DatasetType 

52from ..core.utils import iterable 

53from ._collectionType import CollectionType 

54 

55if TYPE_CHECKING: 55 ↛ 56line 55 didn't jump to line 56, because the condition on line 55 was never true

56 from .interfaces import CollectionManager, CollectionRecord 

57 

58 # Workaround for `...` not having an exposed type in Python, borrowed from 

59 # https://github.com/python/typing/issues/684#issuecomment-548203158 

60 # Along with that, we need to either use `Ellipsis` instead of `...` for 

61 # the actual sentinal value internally, and tell MyPy to ignore conversions 

62 # from `...` to `Ellipsis` at the public-interface boundary. 

63 # 

64 # `Ellipsis` and `EllipsisType` should be directly imported from this 

65 # module by related code that needs them; hopefully that will stay confined 

66 # to `lsst.daf.butler.registry`. Putting these in __all__ is bad for 

67 # Sphinx, and probably more confusing than helpful overall. 

68 from enum import Enum 

69 

70 class EllipsisType(Enum): 

71 Ellipsis = "..." 

72 

73 Ellipsis = EllipsisType.Ellipsis 

74 

75else: 

76 EllipsisType = type(Ellipsis) 

77 Ellipsis = Ellipsis 

78 

79 

80@dataclass 

81class CategorizedWildcard: 

82 """The results of preprocessing a wildcard expression to separate match 

83 patterns from strings. 

84 

85 The `fromExpression` method should almost always be used to construct 

86 instances, as the regular constructor performs no checking of inputs (and 

87 that can lead to confusing error messages downstream). 

88 """ 

89 

90 @classmethod 

91 def fromExpression(cls, expression: Any, *, 

92 allowAny: bool = True, 

93 allowPatterns: bool = True, 

94 coerceUnrecognized: Optional[Callable[[Any], Union[Tuple[str, Any], str]]] = None, 

95 coerceItemValue: Optional[Callable[[Any], Any]] = None, 

96 defaultItemValue: Optional[Any] = None, 

97 ) -> Union[CategorizedWildcard, EllipsisType]: 

98 """Categorize a wildcard expression. 

99 

100 Parameters 

101 ---------- 

102 expression 

103 The expression to categorize. May be any of: 

104 - `str`; 

105 - `re.Pattern` (only if ``allowPatterns`` is `True`); 

106 - objects recognized by ``coerceUnrecognized`` (if provided); 

107 - two-element tuples of (`str`, value) where value is recognized 

108 by ``coerceItemValue`` (if provided); 

109 - a non-`str`, non-mapping iterable containing any of the above; 

110 - the special value `...` (only if ``allowAny`` is `True`), which 

111 matches anything; 

112 - a mapping from `str` to a value are recognized by 

113 ``coerceItemValue`` (if provided); 

114 - a `CategorizedWildcard` instance (passed through unchanged if 

115 it meets the requirements specified by keyword arguments). 

116 allowAny: `bool`, optional 

117 If `False` (`True` is default) raise `TypeError` if `...` is 

118 encountered. 

119 allowPatterns: `bool`, optional 

120 If `False` (`True` is default) raise `TypeError` if a `re.Pattern` 

121 is encountered, or if ``expression`` is a `CategorizedWildcard` 

122 with `patterns` not empty. 

123 coerceUnrecognized: `Callable`, optional 

124 A callback that takes a single argument of arbitrary type and 

125 returns either a `str` - appended to `strings` - or a `tuple` of 

126 (`str`, `Any`) to be appended to `items`. This will be called on 

127 objects of unrecognized type, with the return value added to 

128 `strings`. Exceptions will be reraised as `TypeError` (and 

129 chained). 

130 coerceItemValue: `Callable`, optional 

131 If provided, ``expression`` may be a mapping from `str` to any 

132 type that can be passed to this function; the result of that call 

133 will be stored instead as the value in ``self.items``. 

134 defaultItemValue: `Any`, optional 

135 If provided, combine this value with any string values encountered 

136 (including any returned by ``coerceUnrecognized``) to form a 

137 `tuple` and add it to `items`, guaranteeing that `strings` will be 

138 empty. Patterns are never added to `items`. 

139 

140 Returns 

141 ------- 

142 categorized : `CategorizedWildcard` or ``...``. 

143 The struct describing the wildcard. ``...`` is passed through 

144 unchanged. 

145 

146 Raises 

147 ------ 

148 TypeError 

149 Raised if an unsupported type is found in the expression. 

150 """ 

151 assert expression is not None 

152 # See if we were given ...; just return that if we were. 

153 if expression is Ellipsis: 

154 if not allowAny: 

155 raise TypeError("This expression may not be unconstrained.") 

156 return Ellipsis 

157 if isinstance(expression, cls): 

158 # This is already a CategorizedWildcard. Make sure it meets the 

159 # reqs. implied by the kwargs we got. 

160 if not allowPatterns and expression.patterns: 

161 raise TypeError(f"Regular expression(s) {expression.patterns} " 

162 f"are not allowed in this context.") 

163 if defaultItemValue is not None and expression.strings: 

164 if expression.items: 

165 raise TypeError("Incompatible preprocessed expression: an ordered sequence of str is " 

166 "needed, but the original order was lost in the preprocessing.") 

167 return cls(strings=[], patterns=expression.patterns, 

168 items=[(k, defaultItemValue) for k in expression.strings]) 

169 elif defaultItemValue is None and expression.items: 

170 if expression.strings: 

171 raise TypeError("Incompatible preprocessed expression: an ordered sequence of items is " 

172 "needed, but the original order was lost in the preprocessing.") 

173 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[]) 

174 else: 

175 # Original expression was created with keyword arguments that 

176 # were at least as restrictive as what we just got; pass it 

177 # through. 

178 return expression 

179 

180 # If we get here, we know we'll be creating a new instance. 

181 # Initialize an empty one now. 

182 self = cls(strings=[], patterns=[], items=[]) 

183 

184 # If mappings are allowed, see if we were given a single mapping by 

185 # trying to get items. 

186 if coerceItemValue is not None: 

187 rawItems = None 

188 try: 

189 rawItems = expression.items() 

190 except AttributeError: 

191 pass 

192 if rawItems is not None: 

193 for k, v in rawItems: 

194 try: 

195 self.items.append((k, coerceItemValue(v))) 

196 except Exception as err: 

197 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err 

198 return self 

199 

200 # Not ..., a CategorizedWildcard instance, or a mapping. Just 

201 # process scalars or an iterable. We put the body of the loop inside 

202 # a local function so we can recurse after coercion. 

203 

204 def process(element: Any, alreadyCoerced: bool = False) -> None: 

205 if isinstance(element, str): 

206 if defaultItemValue is not None: 

207 self.items.append((element, defaultItemValue)) 

208 else: 

209 self.strings.append(element) 

210 return 

211 if allowPatterns and isinstance(element, re.Pattern): 

212 self.patterns.append(element) 

213 return 

214 if coerceItemValue is not None: 

215 try: 

216 k, v = element 

217 except TypeError: 

218 pass 

219 else: 

220 if not alreadyCoerced: 

221 if not isinstance(k, str): 

222 raise TypeError(f"Item key '{k}' is not a string.") 

223 try: 

224 v = coerceItemValue(v) 

225 except Exception as err: 

226 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'." 

227 ) from err 

228 self.items.append((k, v)) 

229 return 

230 if alreadyCoerced: 

231 raise TypeError(f"Object '{element}' returned by coercion function is still unrecognized.") 

232 if coerceUnrecognized is not None: 

233 try: 

234 process(coerceUnrecognized(element), alreadyCoerced=True) 

235 except Exception as err: 

236 raise TypeError(f"Could not coerce expression element '{element}'.") from err 

237 else: 

238 raise TypeError(f"Unsupported object in wildcard expression: '{element}'.") 

239 

240 for element in iterable(expression): 

241 process(element) 

242 return self 

243 

244 def makeWhereExpression(self, column: sqlalchemy.sql.ColumnElement 

245 ) -> Optional[sqlalchemy.sql.ColumnElement]: 

246 """Transform the wildcard into a SQLAlchemy boolean expression suitable 

247 for use in a WHERE clause. 

248 

249 Parameters 

250 ---------- 

251 column : `sqlalchemy.sql.ColumnElement` 

252 A string column in a table or query that should be compared to the 

253 wildcard expression. 

254 

255 Returns 

256 ------- 

257 where : `sqlalchemy.sql.ColumnElement` or `None` 

258 A boolean SQL expression that evaluates to true if and only if 

259 the value of ``column`` matches the wildcard. `None` is returned 

260 if both `strings` and `patterns` are empty, and hence no match is 

261 possible. 

262 """ 

263 if self.items: 

264 raise NotImplementedError("Expressions that are processed into items cannot be transformed " 

265 "automatically into queries.") 

266 if self.patterns: 

267 raise NotImplementedError("Regular expression patterns are not yet supported here.") 

268 terms = [] 

269 if len(self.strings) == 1: 

270 terms.append(column == self.strings[0]) 

271 elif len(self.strings) > 1: 

272 terms.append(column.in_(self.strings)) 

273 # TODO: append terms for regular expressions 

274 if not terms: 

275 return None 

276 return sqlalchemy.sql.or_(*terms) 

277 

278 strings: List[str] 

279 """Explicit string values found in the wildcard (`list` [ `str` ]). 

280 """ 

281 

282 patterns: List[re.Pattern] 

283 """Regular expression patterns found in the wildcard 

284 (`list` [ `re.Pattern` ]). 

285 """ 

286 

287 items: List[Tuple[str, Any]] 

288 """Two-item tuples that relate string values to other objects 

289 (`list` [ `tuple` [ `str`, `Any` ] ]). 

290 """ 

291 

292 

293class DatasetTypeRestriction: 

294 """An immutable set-like object that represents a restriction on the 

295 dataset types to search for within a collection. 

296 

297 The `fromExpression` method should almost always be used to construct 

298 instances, as the regular constructor performs no checking of inputs (and 

299 that can lead to confusing error messages downstream). 

300 

301 Parameters 

302 ---------- 

303 names : `frozenset` [`str`] or `...` 

304 The names of the dataset types included in the restriction, or `...` 

305 to permit a search for any dataset type. 

306 

307 Notes 

308 ----- 

309 This class does not inherit from `collections.abc.Set` (and does not 

310 implement the full set interface) because is not always iterable and 

311 sometimes has no length (i.e. when ``names`` is ``...``). 

312 """ 

313 def __init__(self, names: Union[FrozenSet[str], EllipsisType]): 

314 self.names = names 

315 

316 __slots__ = ("names",) 

317 

318 @classmethod 

319 def fromExpression(cls, expression: Any) -> DatasetTypeRestriction: 

320 """Process a general expression to construct a `DatasetTypeRestriction` 

321 instance. 

322 

323 Parameters 

324 ---------- 

325 expression 

326 May be: 

327 - a `DatasetType` instance; 

328 - a `str` dataset type name; 

329 - any non-mapping iterable containing either of the above; 

330 - the special value `...`; 

331 - another `DatasetTypeRestriction` instance (passed through 

332 unchanged). 

333 

334 Returns 

335 ------- 

336 restriction : `DatasetTypeRestriction` 

337 A `DatasetTypeRestriction` instance. 

338 """ 

339 if isinstance(expression, cls): 

340 return expression 

341 wildcard = CategorizedWildcard.fromExpression(expression, allowPatterns=False, 

342 coerceUnrecognized=lambda d: d.name) 

343 if wildcard is Ellipsis: 

344 return cls.any 

345 else: 

346 return cls(frozenset(wildcard.strings)) 

347 

348 def __contains__(self, datasetType: DatasetType) -> bool: 

349 return (self.names is Ellipsis or datasetType.name in self.names 

350 or (datasetType.isComponent() 

351 and DatasetType.splitDatasetTypeName(datasetType.name)[0] in self.names)) 

352 

353 def __eq__(self, other: Any) -> bool: 

354 if isinstance(other, DatasetTypeRestriction): 

355 return self.names == other.names 

356 else: 

357 return False 

358 

359 def __str__(self) -> str: 

360 if self.names is Ellipsis: 

361 return "..." 

362 else: 

363 return "{{{}}}".format(", ".join(self.names)) 

364 

365 def __repr__(self) -> str: 

366 if self.names is Ellipsis: 

367 return "DatasetTypeRestriction(...)" 

368 else: 

369 return f"DatasetTypeRestriction({self.names!r})" 

370 

371 @staticmethod 

372 def union(*args: DatasetTypeRestriction) -> DatasetTypeRestriction: 

373 """Merge one or more `DatasetTypeRestriction` instances, returning one 

374 that allows any of the dataset types included in any of them. 

375 

376 Parameters 

377 ---------- 

378 args 

379 Positional arguments are `DatasetTypeRestriction` instances. 

380 """ 

381 result: Set[str] = set() 

382 for a in args: 

383 if a.names is Ellipsis: 

384 return DatasetTypeRestriction.any 

385 else: 

386 result.update(a.names) 

387 return DatasetTypeRestriction(frozenset(result)) 

388 

389 names: Union[FrozenSet[str], EllipsisType] 

390 """The names of the dataset types included (i.e. permitted) by the 

391 restriction, or the special value ``...`` to permit all dataset types 

392 (`frozenset` [ `str` ] or ``...``). 

393 """ 

394 

395 any: ClassVar[DatasetTypeRestriction] 

396 """A special `DatasetTypeRestriction` instance that permits any dataset 

397 type. 

398 

399 This instance should be preferred instead of constructing a new one with 

400 ``...``, when possible, but it should not be assumed to be the only such 

401 instance (i.e. don't use ``is`` instead of ``==`` for comparisons). 

402 """ 

403 

404 

405DatasetTypeRestriction.any = DatasetTypeRestriction(Ellipsis) 

406 

407 

408def _yieldCollectionRecords( 

409 manager: CollectionManager, 

410 record: CollectionRecord, 

411 restriction: DatasetTypeRestriction, 

412 datasetType: Optional[DatasetType] = None, 

413 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(), 

414 done: Optional[Set[str]] = None, 

415 flattenChains: bool = True, 

416 includeChains: Optional[bool] = None, 

417) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]: 

418 """A helper function containing common logic for `CollectionSearch.iter` 

419 and `CollectionQuery.iter`: recursively yield `CollectionRecord` only they 

420 match the criteria given in other arguments. 

421 

422 Parameters 

423 ---------- 

424 manager : `CollectionManager` 

425 Object responsible for managing the collection tables in a `Registry`. 

426 record : `CollectionRecord` 

427 Record to conditionally yield. 

428 restriction : `DatasetTypeRestriction` 

429 A restriction that must match ``datasetType`` (if given) in order to 

430 yield ``record``. 

431 datasetType : `DatasetType`, optional 

432 If given, a `DatasetType` instance that must be included in 

433 ``restriction`` in order to yield ``record``. 

434 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

435 If provided, only yield collections of these types. 

436 done : `set` [ `str` ], optional 

437 A `set` of already-yielded collection names; if provided, ``record`` 

438 will only be yielded if it is not already in ``done``, and ``done`` 

439 will be updated to include it on return. 

440 flattenChains : `bool`, optional 

441 If `True` (default) recursively yield the child collections of 

442 `~CollectionType.CHAINED` collections. 

443 includeChains : `bool`, optional 

444 If `False`, return records for `~CollectionType.CHAINED` collections 

445 themselves. The default is the opposite of ``flattenChains``: either 

446 return records for CHAINED collections or their children, but not both. 

447 

448 Yields 

449 ------ 

450 record : `CollectionRecord` 

451 Matching collection records. 

452 restriction : `DatasetTypeRestriction` 

453 The given dataset type restriction. 

454 """ 

455 if done is None: 

456 done = set() 

457 includeChains = includeChains if includeChains is not None else not flattenChains 

458 if record.type in collectionTypes: 

459 done.add(record.name) 

460 if record.type is not CollectionType.CHAINED or includeChains: 

461 yield record, restriction 

462 if flattenChains and record.type is CollectionType.CHAINED: 

463 done.add(record.name) 

464 # We know this is a ChainedCollectionRecord because of the enum value, 

465 # but MyPy doesn't. 

466 yield from record.children.iterPairs( # type: ignore 

467 manager, 

468 datasetType=datasetType, 

469 collectionTypes=collectionTypes, 

470 done=done, 

471 flattenChains=flattenChains, 

472 includeChains=includeChains, 

473 ) 

474 

475 

476class CollectionSearch: 

477 """An ordered search path of collections and dataset type restrictions. 

478 

479 The `fromExpression` method should almost always be used to construct 

480 instances, as the regular constructor performs no checking of inputs (and 

481 that can lead to confusing error messages downstream). 

482 

483 Parameters 

484 ---------- 

485 items : `list` [ `tuple` [ `str`, `DatasetTypeRestriction` ] ] 

486 Tuples that relate a collection name to the restriction on dataset 

487 types to search for within it. This is not a mapping because the 

488 same collection name may appear multiple times with different 

489 restrictions. 

490 

491 Notes 

492 ----- 

493 A `CollectionSearch` is used to find a single dataset according to its 

494 dataset type and data ID, giving preference to collections in which the 

495 order they are specified. A `CollectionQuery` can be constructed from 

496 a broader range of expressions but does not order the collections to be 

497 searched. 

498 

499 `CollectionSearch` is iterable, yielding two-element tuples of `str` 

500 (collection name) and `DatasetTypeRestriction`. 

501 

502 A `CollectionSearch` instance constructed properly (e.g. via 

503 `fromExpression`) is a unique representation of a particular search path; 

504 it is exactly the same internally and compares as equal to any 

505 `CollectionSearch` constructed from an equivalent expression, 

506 regardless of how different the original expressions appear. 

507 """ 

508 def __init__(self, items: List[Tuple[str, DatasetTypeRestriction]]): 

509 assert all(isinstance(v, DatasetTypeRestriction) for _, v in items) 

510 self._items = items 

511 

512 __slots__ = ("_items") 

513 

514 @classmethod 

515 def fromExpression(cls, expression: Any) -> CollectionSearch: 

516 """Process a general expression to construct a `CollectionSearch` 

517 instance. 

518 

519 Parameters 

520 ---------- 

521 expression 

522 May be: 

523 - a `str` collection name; 

524 - a two-element `tuple` containing a `str` and any expression 

525 accepted by `DatasetTypeRestriction.fromExpression`; 

526 - any non-mapping iterable containing either of the above; 

527 - a mapping from `str` to any expression accepted by 

528 `DatasetTypeRestriction`. 

529 - another `CollectionSearch` instance (passed through 

530 unchanged). 

531 

532 Multiple consecutive entries for the same collection with different 

533 restrictions will be merged. Non-consecutive entries will not, 

534 because that actually represents a different search path. 

535 

536 Returns 

537 ------- 

538 collections : `CollectionSearch` 

539 A `CollectionSearch` instance. 

540 """ 

541 # First see if this is already a CollectionSearch; just pass that 

542 # through unchanged. This lets us standardize expressions (and turn 

543 # single-pass iterators into multi-pass iterables) in advance and pass 

544 # them down to other routines that accept arbitrary expressions. 

545 if isinstance(expression, cls): 

546 return expression 

547 wildcard = CategorizedWildcard.fromExpression(expression, 

548 allowAny=False, 

549 allowPatterns=False, 

550 coerceItemValue=DatasetTypeRestriction.fromExpression, 

551 defaultItemValue=DatasetTypeRestriction.any) 

552 assert wildcard is not Ellipsis 

553 assert not wildcard.patterns 

554 assert not wildcard.strings 

555 return cls( 

556 # Consolidate repetitions of the same collection name. 

557 [(name, DatasetTypeRestriction.union(*tuple(item[1] for item in items))) 

558 for name, items in itertools.groupby(wildcard.items, key=operator.itemgetter(0))] 

559 ) 

560 

561 def iterPairs( 

562 self, manager: CollectionManager, *, 

563 datasetType: Optional[DatasetType] = None, 

564 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(), 

565 done: Optional[Set[str]] = None, 

566 flattenChains: bool = True, 

567 includeChains: Optional[bool] = None, 

568 ) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]: 

569 """Like `iter`, but yield pairs of `CollectionRecord`, 

570 `DatasetTypeRestriction` instead of just the former. 

571 

572 See `iter` for all parameter descriptions. 

573 

574 Yields 

575 ------ 

576 record : `CollectionRecord` 

577 Matching collection records. 

578 restriction : `DatasetTypeRestriction` 

579 The given dataset type restriction. 

580 """ 

581 if done is None: 

582 done = set() 

583 for name, restriction in self._items: 

584 if name not in done and (datasetType is None or datasetType in restriction): 

585 yield from _yieldCollectionRecords( 

586 manager, 

587 manager.find(name), 

588 restriction, 

589 datasetType=datasetType, 

590 collectionTypes=collectionTypes, 

591 done=done, 

592 flattenChains=flattenChains, 

593 includeChains=includeChains, 

594 ) 

595 

596 def iter( 

597 self, manager: CollectionManager, *, 

598 datasetType: Optional[DatasetType] = None, 

599 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(), 

600 done: Optional[Set[str]] = None, 

601 flattenChains: bool = True, 

602 includeChains: Optional[bool] = None, 

603 ) -> Iterator[CollectionRecord]: 

604 """Iterate over collection records that match this instance and the 

605 given criteria, in order. 

606 

607 This method is primarily intended for internal use by `Registry`; 

608 other callers should generally prefer `Registry.findDatasets` or 

609 other `Registry` query methods. 

610 

611 Parameters 

612 ---------- 

613 manager : `CollectionManager` 

614 Object responsible for managing the collection tables in a 

615 `Registry`. 

616 datasetType : `DatasetType`, optional 

617 If given, only yield collections whose dataset type restrictions 

618 include this dataset type. 

619 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

620 If provided, only yield collections of these types. 

621 done : `set`, optional 

622 A `set` containing the names of all collections already yielded; 

623 any collections whose names are already present in this set will 

624 not be yielded again, and those yielded will be added to it while 

625 iterating. If not provided, an empty `set` will be created and 

626 used internally to avoid duplicates. 

627 flattenChains : `bool`, optional 

628 If `True` (default) recursively yield the child collections of 

629 `~CollectionType.CHAINED` collections. 

630 includeChains : `bool`, optional 

631 If `False`, return records for `~CollectionType.CHAINED` 

632 collections themselves. The default is the opposite of 

633 ``flattenChains``: either return records for CHAINED collections or 

634 their children, but not both. 

635 

636 Yields 

637 ------ 

638 record : `CollectionRecord` 

639 Matching collection records. 

640 """ 

641 for record, _ in self.iterPairs(manager, datasetType=datasetType, collectionTypes=collectionTypes, 

642 done=done, flattenChains=flattenChains, includeChains=includeChains): 

643 yield record 

644 

645 def __iter__(self) -> Iterator[Tuple[str, DatasetTypeRestriction]]: 

646 yield from self._items 

647 

648 def __len__(self) -> int: 

649 return len(self._items) 

650 

651 def __eq__(self, other: Any) -> bool: 

652 if isinstance(other, CollectionSearch): 

653 return self._items == other._items 

654 return False 

655 

656 def __str__(self) -> str: 

657 return "[{}]".format(", ".join(f"{k}: {v}" for k, v in self._items)) 

658 

659 def __repr__(self) -> str: 

660 return f"CollectionSearch({self._items!r})" 

661 

662 

663class CollectionQuery: 

664 """An unordered query for collections and dataset type restrictions. 

665 

666 The `fromExpression` method should almost always be used to construct 

667 instances, as the regular constructor performs no checking of inputs (and 

668 that can lead to confusing error messages downstream). 

669 

670 Parameters 

671 ---------- 

672 search : `CollectionSearch` or `...` 

673 An object representing an ordered search for explicitly-named 

674 collections (to be interpreted here as unordered), or the special 

675 value `...` indicating all collections. `...` must be accompanied 

676 by ``patterns=None``. 

677 patterns : `tuple` of `re.Pattern` 

678 Regular expression patterns to match against collection names. 

679 

680 Notes 

681 ----- 

682 A `CollectionQuery` is used to find all matching datasets in any number 

683 of collections, or to find collections themselves. 

684 

685 `CollectionQuery` is expected to be rarely used outside of `Registry` 

686 (which uses it to back several of its "query" methods that take general 

687 expressions for collections), but it may occassionally be useful outside 

688 `Registry` as a way to preprocess expressions that contain single-pass 

689 iterators into a form that can be used to call those `Registry` methods 

690 multiple times. 

691 """ 

692 def __init__(self, search: Union[CollectionSearch, EllipsisType], patterns: Tuple[re.Pattern, ...]): 

693 self._search = search 

694 self._patterns = patterns 

695 

696 __slots__ = ("_search", "_patterns") 

697 

698 @classmethod 

699 def fromExpression(cls, expression: Any) -> CollectionQuery: 

700 """Process a general expression to construct a `CollectionQuery` 

701 instance. 

702 

703 Parameters 

704 ---------- 

705 expression 

706 May be: 

707 - a `str` collection name; 

708 - a two-element `tuple` containing a `str` and any expression 

709 accepted by `DatasetTypeRestriction.fromExpression`; 

710 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`) 

711 against collection names; 

712 - any non-mapping iterable containing any of the above; 

713 - a mapping from `str` to any expression accepted by 

714 `DatasetTypeRestriction`. 

715 - a `CollectionSearch` instance; 

716 - another `CollectionQuery` instance (passed through unchanged). 

717 

718 Multiple consecutive entries for the same collection with different 

719 restrictions will be merged. Non-consecutive entries will not, 

720 because that actually represents a different search path. 

721 

722 Returns 

723 ------- 

724 collections : `CollectionQuery` 

725 A `CollectionQuery` instance. 

726 """ 

727 if isinstance(expression, cls): 

728 return expression 

729 if expression is Ellipsis: 

730 return cls.any 

731 if isinstance(expression, CollectionSearch): 

732 return cls(search=expression, patterns=()) 

733 wildcard = CategorizedWildcard.fromExpression(expression, 

734 allowAny=True, 

735 allowPatterns=True, 

736 coerceItemValue=DatasetTypeRestriction.fromExpression, 

737 defaultItemValue=DatasetTypeRestriction.any) 

738 if wildcard is Ellipsis: 

739 return cls.any 

740 assert not wildcard.strings, \ 

741 "All bare strings should be transformed to (str, DatasetTypeRestriction) tuples." 

742 return cls(search=CollectionSearch.fromExpression(wildcard.items), 

743 patterns=tuple(wildcard.patterns)) 

744 

745 def iterPairs( 

746 self, manager: CollectionManager, *, 

747 datasetType: Optional[DatasetType] = None, 

748 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(), 

749 flattenChains: bool = True, 

750 includeChains: Optional[bool] = None, 

751 ) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]: 

752 """Like `iter`, but yield pairs of `CollectionRecord`, 

753 `DatasetTypeRestriction` instead of just the former. 

754 

755 See `iter` for all parameter descriptions. 

756 

757 Yields 

758 ------ 

759 record : `CollectionRecord` 

760 Matching collection records. 

761 restriction : `DatasetTypeRestriction` 

762 The given dataset type restriction. 

763 

764 """ 

765 if self._search is Ellipsis: 

766 for record in manager: 

767 yield from _yieldCollectionRecords( 

768 manager, 

769 record, 

770 DatasetTypeRestriction.any, 

771 datasetType=datasetType, 

772 collectionTypes=collectionTypes, 

773 flattenChains=flattenChains, 

774 includeChains=includeChains, 

775 ) 

776 else: 

777 done: Set[str] = set() 

778 yield from self._search.iterPairs( 

779 manager, 

780 datasetType=datasetType, 

781 collectionTypes=collectionTypes, 

782 done=done, 

783 flattenChains=flattenChains, 

784 includeChains=includeChains, 

785 ) 

786 for record in manager: 

787 if record.name not in done and any(p.fullmatch(record.name) for p in self._patterns): 

788 yield from _yieldCollectionRecords( 

789 manager, 

790 record, 

791 DatasetTypeRestriction.any, 

792 datasetType=datasetType, 

793 collectionTypes=collectionTypes, 

794 done=done, 

795 flattenChains=flattenChains, 

796 includeChains=includeChains, 

797 ) 

798 

799 def iter( 

800 self, manager: CollectionManager, *, 

801 datasetType: Optional[DatasetType] = None, 

802 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(), 

803 flattenChains: bool = True, 

804 includeChains: Optional[bool] = None, 

805 ) -> Iterator[CollectionRecord]: 

806 """Iterate over collection records that match this instance and the 

807 given criteria, in an arbitrary order. 

808 

809 This method is primarily intended for internal use by `Registry`; 

810 other callers should generally prefer `Registry.queryDatasets` or 

811 other `Registry` query methods. 

812 

813 Parameters 

814 ---------- 

815 manager : `CollectionManager` 

816 Object responsible for managing the collection tables in a 

817 `Registry`. 

818 datasetType : `DatasetType`, optional 

819 If given, only yield collections whose dataset type restrictions 

820 include this dataset type. 

821 collectionTypes : `AbstractSet` [ `CollectionType` ], optional 

822 If provided, only yield collections of these types. 

823 flattenChains : `bool`, optional 

824 If `True` (default) recursively yield the child collections of 

825 `~CollectionType.CHAINED` collections. 

826 includeChains : `bool`, optional 

827 If `False`, return records for `~CollectionType.CHAINED` 

828 collections themselves. The default is the opposite of 

829 ``flattenChains``: either return records for CHAINED collections or 

830 their children, but not both. 

831 

832 Yields 

833 ------ 

834 record : `CollectionRecord` 

835 Matching collection records. 

836 """ 

837 for record, _ in self.iterPairs(manager, datasetType=datasetType, collectionTypes=collectionTypes, 

838 flattenChains=flattenChains, includeChains=includeChains): 

839 yield record 

840 

841 def __eq__(self, other: Any) -> bool: 

842 if isinstance(other, CollectionQuery): 

843 return self._search == other._search and self._patterns == other._patterns 

844 else: 

845 return False 

846 

847 any: ClassVar[CollectionQuery] 

848 """A special `CollectionQuery` instance that matches any collection. 

849 

850 This instance should be preferred instead of constructing a new one with 

851 ``...``, when possible, but it should not be assumed to be the only such 

852 instance. 

853 """ 

854 

855 

856CollectionQuery.any = CollectionQuery(Ellipsis, ())