Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1# This file is part of daf_butler. 

2# 

3# Developed for the LSST Data Management System. 

4# This product includes software developed by the LSST Project 

5# (http://www.lsst.org). 

6# See the COPYRIGHT file at the top-level directory of this distribution 

7# for details of code ownership. 

8# 

9# This program is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# This program is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with this program. If not, see <http://www.gnu.org/licenses/>. 

21from __future__ import annotations 

22 

23__all__ = ( 

24 "CategorizedWildcard", 

25 "CollectionQuery", 

26 "CollectionSearch", 

27 "DatasetTypeRestriction", 

28 "Ellipsis", 

29 "EllipsisType", 

30) 

31 

32from dataclasses import dataclass 

33import itertools 

34import operator 

35import re 

36from typing import ( 

37 Any, 

38 Callable, 

39 ClassVar, 

40 FrozenSet, 

41 Iterator, 

42 List, 

43 Optional, 

44 Set, 

45 Tuple, 

46 TYPE_CHECKING, 

47 Union, 

48) 

49 

50import sqlalchemy 

51 

52from ..core import DatasetType 

53from ..core.utils import iterable 

54from ._collectionType import CollectionType 

55 

56if TYPE_CHECKING: 56 ↛ 57line 56 didn't jump to line 57, because the condition on line 56 was never true

57 from .interfaces import CollectionManager, CollectionRecord 

58 

59 # Workaround for `...` not having an exposed type in Python, borrowed from 

60 # https://github.com/python/typing/issues/684#issuecomment-548203158 

61 # Along with that, we need to either use `Ellipsis` instead of `...` for 

62 # the actual sentinal value internally, and tell MyPy to ignore conversions 

63 # from `...` to `Ellipsis` at the public-interface boundary. 

64 from enum import Enum 

65 

66 class EllipsisType(Enum): 

67 Ellipsis = "..." 

68 

69 Ellipsis = EllipsisType.Ellipsis 

70 

71else: 

72 EllipsisType = type(Ellipsis) 

73 Ellipsis = Ellipsis 

74 

75 

76@dataclass 

77class CategorizedWildcard: 

78 """The results of preprocessing a wildcard expression to separate match 

79 patterns from strings. 

80 

81 The `fromExpression` method should almost always be used to construct 

82 instances, as the regular constructor performs no checking of inputs (and 

83 that can lead to confusing error messages downstream). 

84 """ 

85 

86 @classmethod 

87 def fromExpression(cls, expression: Any, *, 

88 allowAny: bool = True, 

89 allowPatterns: bool = True, 

90 coerceUnrecognized: Optional[Callable[[Any], Union[Tuple[str, Any], str]]] = None, 

91 coerceItemValue: Optional[Callable[[Any], Any]] = None, 

92 defaultItemValue: Optional[Any] = None, 

93 ) -> Union[CategorizedWildcard, EllipsisType]: 

94 """Categorize a wildcard expression. 

95 

96 Parameters 

97 ---------- 

98 expression 

99 The expression to categorize. May be any of: 

100 - `str`; 

101 - `re.Pattern` (only if ``allowPatterns`` is `True`); 

102 - objects recognized by ``coerceUnrecognized`` (if provided); 

103 - two-element tuples of (`str`, value) where value is recognized 

104 by ``coerceItemValue`` (if provided); 

105 - a non-`str`, non-mapping iterable containing any of the above; 

106 - the special value `...` (only if ``allowAny`` is `True`), which 

107 matches anything; 

108 - a mapping from `str` to a value are recognized by 

109 ``coerceItemValue`` (if provided); 

110 - a `CategorizedWildcard` instance (passed through unchanged if 

111 it meets the requirements specified by keyword arguments). 

112 allowAny: `bool`, optional 

113 If `False` (`True` is default) raise `TypeError` if `...` is 

114 encountered. 

115 allowPatterns: `bool`, optional 

116 If `False` (`True` is default) raise `TypeError` if a `re.Pattern` 

117 is encountered, or if ``expression`` is a `CategorizedWildcard` 

118 with `patterns` not empty. 

119 coerceUnrecognized: `Callable`, optional 

120 A callback that takes a single argument of arbitrary type and 

121 returns either a `str` - appended to `strings` - or a `tuple` of 

122 (`str`, `Any`) to be appended to `items`. This will be called on 

123 objects of unrecognized type, with the return value added to 

124 `strings`. Exceptions will be reraised as `TypeError` (and 

125 chained). 

126 coerceItemValue: `Callable`, optional 

127 If provided, ``expression`` may be a mapping from `str` to any 

128 type that can be passed to this function; the result of that call 

129 will be stored instead as the value in ``self.items``. 

130 defaultItemValue: `Any`, optional 

131 If provided, combine this value with any string values encountered 

132 (including any returned by ``coerceUnrecognized``) to form a 

133 `tuple` and add it to `items`, guaranteeing that `strings` will be 

134 empty. Patterns are never added to `items`. 

135 

136 Returns 

137 ------- 

138 categorized : `CategorizedWildcard` or ``...``. 

139 The struct describing the wildcard. ``...`` is passed through 

140 unchanged. 

141 

142 Raises 

143 ------ 

144 TypeError 

145 Raised if an unsupported type is found in the expression. 

146 """ 

147 assert expression is not None 

148 # See if we were given ...; just return that if we were. 

149 if expression is Ellipsis: 

150 if not allowAny: 

151 raise TypeError("This expression may not be unconstrained.") 

152 return Ellipsis 

153 if isinstance(expression, cls): 

154 # This is already a CategorizedWildcard. Make sure it meets the 

155 # reqs. implied by the kwargs we got. 

156 if not allowPatterns and expression.patterns: 

157 raise TypeError(f"Regular expression(s) {expression.patterns} " 

158 f"are not allowed in this context.") 

159 if defaultItemValue is not None and expression.strings: 

160 if expression.items: 

161 raise TypeError("Incompatible preprocessed expression: an ordered sequence of str is " 

162 "needed, but the original order was lost in the preprocessing.") 

163 return cls(strings=[], patterns=expression.patterns, 

164 items=[(k, defaultItemValue) for k in expression.strings]) 

165 elif defaultItemValue is None and expression.items: 

166 if expression.strings: 

167 raise TypeError("Incompatible preprocessed expression: an ordered sequence of items is " 

168 "needed, but the original order was lost in the preprocessing.") 

169 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[]) 

170 else: 

171 # Original expression was created with keyword arguments that 

172 # were at least as restrictive as what we just got; pass it 

173 # through. 

174 return expression 

175 

176 # If we get here, we know we'll be creating a new instance. 

177 # Initialize an empty one now. 

178 self = cls(strings=[], patterns=[], items=[]) 

179 

180 # If mappings are allowed, see if we were given a single mapping by 

181 # trying to get items. 

182 if coerceItemValue is not None: 

183 rawItems = None 

184 try: 

185 rawItems = expression.items() 

186 except AttributeError: 

187 pass 

188 if rawItems is not None: 

189 for k, v in rawItems: 

190 try: 

191 self.items.append((k, coerceItemValue(v))) 

192 except Exception as err: 

193 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err 

194 return self 

195 

196 # Not ..., a CategorizedWildcard instance, or a mapping. Just 

197 # process scalars or an iterable. We put the body of the loop inside 

198 # a local function so we can recurse after coercion. 

199 

200 def process(element: Any, alreadyCoerced: bool = False) -> None: 

201 if isinstance(element, str): 

202 if defaultItemValue is not None: 

203 self.items.append((element, defaultItemValue)) 

204 else: 

205 self.strings.append(element) 

206 return 

207 if allowPatterns and isinstance(element, re.Pattern): 

208 self.patterns.append(element) 

209 return 

210 if coerceItemValue is not None: 

211 try: 

212 k, v = element 

213 except TypeError: 

214 pass 

215 else: 

216 if not alreadyCoerced: 

217 if not isinstance(k, str): 

218 raise TypeError(f"Item key '{k}' is not a string.") 

219 try: 

220 v = coerceItemValue(v) 

221 except Exception as err: 

222 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'." 

223 ) from err 

224 self.items.append((k, v)) 

225 return 

226 if alreadyCoerced: 

227 raise TypeError(f"Object '{element}' returned by coercion function is still unrecognized.") 

228 if coerceUnrecognized is not None: 

229 try: 

230 process(coerceUnrecognized(element), alreadyCoerced=True) 

231 except Exception as err: 

232 raise TypeError(f"Could not coerce expression element '{element}'.") from err 

233 else: 

234 raise TypeError(f"Unsupported object in wildcard expression: '{element}'.") 

235 

236 for element in iterable(expression): 

237 process(element) 

238 return self 

239 

240 def makeWhereExpression(self, column: sqlalchemy.sql.ColumnElement 

241 ) -> Optional[sqlalchemy.sql.ColumnElement]: 

242 """Transform the wildcard into a SQLAlchemy boolean expression suitable 

243 for use in a WHERE clause. 

244 

245 Parameters 

246 ---------- 

247 column : `sqlalchemy.sql.ColumnElement` 

248 A string column in a table or query that should be compared to the 

249 wildcard expression. 

250 

251 Returns 

252 ------- 

253 where : `sqlalchemy.sql.ColumnElement` or `None` 

254 A boolean SQL expression that evaluates to true if and only if 

255 the value of ``column`` matches the wildcard. `None` is returned 

256 if both `strings` and `patterns` are empty, and hence no match is 

257 possible. 

258 """ 

259 if self.items: 

260 raise NotImplementedError("Expressions that are processed into items cannot be transformed " 

261 "automatically into queries.") 

262 if self.patterns: 

263 raise NotImplementedError("Regular expression patterns are not yet supported here.") 

264 terms = [] 

265 if len(self.strings) == 1: 

266 terms.append(column == self.strings[0]) 

267 elif len(self.strings) > 1: 

268 terms.append(column.in_(self.strings)) 

269 # TODO: append terms for regular expressions 

270 if not terms: 

271 return None 

272 return sqlalchemy.sql.or_(*terms) 

273 

274 strings: List[str] 

275 """Explicit string values found in the wildcard (`list` [ `str` ]). 

276 """ 

277 

278 patterns: List[re.Pattern] 

279 """Regular expression patterns found in the wildcard 

280 (`list` [ `re.Pattern` ]). 

281 """ 

282 

283 items: List[Tuple[str, Any]] 

284 """Two-item tuples that relate string values to other objects 

285 (`list` [ `tuple` [ `str`, `Any` ] ]). 

286 """ 

287 

288 

289class DatasetTypeRestriction: 

290 """An immutable set-like object that represents a restriction on the 

291 dataset types to search for within a collection. 

292 

293 The `fromExpression` method should almost always be used to construct 

294 instances, as the regular constructor performs no checking of inputs (and 

295 that can lead to confusing error messages downstream). 

296 

297 Parameters 

298 ---------- 

299 names : `frozenset` [`str`] or `...` 

300 The names of the dataset types included in the restriction, or `...` 

301 to permit a search for any dataset type. 

302 

303 Notes 

304 ----- 

305 This class does not inherit from `collections.abc.Set` (and does not 

306 implement the full set interface) because is not always iterable and 

307 sometimes has no length (i.e. when ``names`` is ``...``). 

308 """ 

309 def __init__(self, names: Union[FrozenSet[str], EllipsisType]): 

310 self.names = names 

311 

312 __slots__ = ("names",) 

313 

314 @classmethod 

315 def fromExpression(cls, expression: Any) -> DatasetTypeRestriction: 

316 """Process a general expression to construct a `DatasetTypeRestriction` 

317 instance. 

318 

319 Parameters 

320 ---------- 

321 expression 

322 May be: 

323 - a `DatasetType` instance; 

324 - a `str` dataset type name; 

325 - any non-mapping iterable containing either of the above; 

326 - the special value `...`; 

327 - another `DatasetTypeRestriction` instance (passed through 

328 unchanged). 

329 

330 Returns 

331 ------- 

332 restriction : `DatasetTypeRestriction` 

333 A `DatasetTypeRestriction` instance. 

334 """ 

335 if isinstance(expression, cls): 

336 return expression 

337 wildcard = CategorizedWildcard.fromExpression(expression, allowPatterns=False, 

338 coerceUnrecognized=lambda d: d.name) 

339 if wildcard is Ellipsis: 

340 return cls.any 

341 else: 

342 return cls(frozenset(wildcard.strings)) 

343 

344 def __contains__(self, datasetType: DatasetType) -> bool: 

345 return (self.names is Ellipsis or datasetType.name in self.names 

346 or (datasetType.isComponent() 

347 and DatasetType.splitDatasetTypeName(datasetType.name)[0] in self.names)) 

348 

349 def __eq__(self, other: Any) -> bool: 

350 if isinstance(other, DatasetTypeRestriction): 

351 return self.names == other.names 

352 else: 

353 return False 

354 

355 def __str__(self) -> str: 

356 if self.names is Ellipsis: 

357 return "..." 

358 else: 

359 return "{{{}}}".format(", ".join(self.names)) 

360 

361 def __repr__(self) -> str: 

362 if self.names is Ellipsis: 

363 return "DatasetTypeRestriction(...)" 

364 else: 

365 return f"DatasetTypeRestriction({self.names!r})" 

366 

367 @staticmethod 

368 def union(*args: DatasetTypeRestriction) -> DatasetTypeRestriction: 

369 """Merge one or more `DatasetTypeRestriction` instances, returning one 

370 that allows any of the dataset types included in any of them. 

371 

372 Parameters 

373 ---------- 

374 args 

375 Positional arguments are `DatasetTypeRestriction` instances. 

376 """ 

377 result: Set[str] = set() 

378 for a in args: 

379 if a.names is Ellipsis: 

380 return DatasetTypeRestriction.any 

381 else: 

382 result.update(a.names) 

383 return DatasetTypeRestriction(frozenset(result)) 

384 

385 names: Union[FrozenSet[str], EllipsisType] 

386 """The names of the dataset types included (i.e. permitted) by the 

387 restriction, or the special value ``...`` to permit all dataset types 

388 (`frozenset` [ `str` ] or ``...``). 

389 """ 

390 

391 any: ClassVar[DatasetTypeRestriction] 

392 """A special `DatasetTypeRestriction` instance that permits any dataset 

393 type. 

394 

395 This instance should be preferred instead of constructing a new one with 

396 ``...``, when possible, but it should not be assumed to be the only such 

397 instance (i.e. don't use ``is`` instead of ``==`` for comparisons). 

398 """ 

399 

400 

401DatasetTypeRestriction.any = DatasetTypeRestriction(Ellipsis) 

402 

403 

404def _yieldCollectionRecords( 

405 manager: CollectionManager, 

406 record: CollectionRecord, 

407 restriction: DatasetTypeRestriction, 

408 datasetType: Optional[DatasetType] = None, 

409 collectionType: Optional[CollectionType] = None, 

410 done: Optional[Set[str]] = None, 

411 flattenChains: bool = True, 

412 includeChains: Optional[bool] = None, 

413) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]: 

414 """A helper function containing common logic for `CollectionSearch.iter` 

415 and `CollectionQuery.iter`: recursively yield `CollectionRecord` only they 

416 match the criteria given in other arguments. 

417 

418 Parameters 

419 ---------- 

420 manager : `CollectionManager` 

421 Object responsible for managing the collection tables in a `Registry`. 

422 record : `CollectionRecord` 

423 Record to conditionally yield. 

424 restriction : `DatasetTypeRestriction` 

425 A restriction that must match ``datasetType`` (if given) in order to 

426 yield ``record``. 

427 datasetType : `DatasetType`, optional 

428 If given, a `DatasetType` instance that must be included in 

429 ``restriction`` in order to yield ``record``. 

430 collectionType : `CollectionType`, optional 

431 If given, a `CollectionType` enumeration value that must match 

432 ``record.type`` in order for ``record`` to be yielded. 

433 done : `set` [ `str` ], optional 

434 A `set` of already-yielded collection names; if provided, ``record`` 

435 will only be yielded if it is not already in ``done``, and ``done`` 

436 will be updated to include it on return. 

437 flattenChains : `bool`, optional 

438 If `True` (default) recursively yield the child collections of 

439 `~CollectionType.CHAINED` collections. 

440 includeChains : `bool`, optional 

441 If `False`, return records for `~CollectionType.CHAINED` collections 

442 themselves. The default is the opposite of ``flattenChains``: either 

443 return records for CHAINED collections or their children, but not both. 

444 

445 Yields 

446 ------ 

447 record : `CollectionRecord` 

448 Matching collection records. 

449 restriction : `DatasetTypeRestriction` 

450 The given dataset type restriction. 

451 """ 

452 if done is None: 

453 done = set() 

454 includeChains = includeChains if includeChains is not None else not flattenChains 

455 if collectionType is None or record.type is collectionType: 

456 done.add(record.name) 

457 if record.type is not CollectionType.CHAINED or includeChains: 

458 yield record, restriction 

459 if flattenChains and record.type is CollectionType.CHAINED: 

460 done.add(record.name) 

461 # We know this is a ChainedCollectionRecord because of the enum value, 

462 # but MyPy doesn't. 

463 yield from record.children.iterPairs( # type: ignore 

464 manager, 

465 datasetType=datasetType, 

466 collectionType=collectionType, 

467 done=done, 

468 flattenChains=flattenChains, 

469 includeChains=includeChains, 

470 ) 

471 

472 

473class CollectionSearch: 

474 """An ordered search path of collections and dataset type restrictions. 

475 

476 The `fromExpression` method should almost always be used to construct 

477 instances, as the regular constructor performs no checking of inputs (and 

478 that can lead to confusing error messages downstream). 

479 

480 Parameters 

481 ---------- 

482 items : `list` [ `tuple` [ `str`, `DatasetTypeRestriction` ] ] 

483 Tuples that relate a collection name to the restriction on dataset 

484 types to search for within it. This is not a mapping because the 

485 same collection name may appear multiple times with different 

486 restrictions. 

487 

488 Notes 

489 ----- 

490 A `CollectionSearch` is used to find a single dataset according to its 

491 dataset type and data ID, giving preference to collections in which the 

492 order they are specified. A `CollectionQuery` can be constructed from 

493 a broader range of expressions but does not order the collections to be 

494 searched. 

495 

496 `CollectionSearch` is iterable, yielding two-element tuples of `str` 

497 (collection name) and `DatasetTypeRestriction`. 

498 

499 A `CollectionSearch` instance constructed properly (e.g. via 

500 `fromExpression`) is a unique representation of a particular search path; 

501 it is exactly the same internally and compares as equal to any 

502 `CollectionSearch` constructed from an equivalent expression, 

503 regardless of how different the original expressions appear. 

504 """ 

505 def __init__(self, items: List[Tuple[str, DatasetTypeRestriction]]): 

506 assert all(isinstance(v, DatasetTypeRestriction) for _, v in items) 

507 self._items = items 

508 

509 __slots__ = ("_items") 

510 

511 @classmethod 

512 def fromExpression(cls, expression: Any) -> CollectionSearch: 

513 """Process a general expression to construct a `CollectionSearch` 

514 instance. 

515 

516 Parameters 

517 ---------- 

518 expression 

519 May be: 

520 - a `str` collection name; 

521 - a two-element `tuple` containing a `str` and any expression 

522 accepted by `DatasetTypeRestriction.fromExpression`; 

523 - any non-mapping iterable containing either of the above; 

524 - a mapping from `str` to any expression accepted by 

525 `DatasetTypeRestriction`. 

526 - another `CollectionSearch` instance (passed through 

527 unchanged). 

528 

529 Multiple consecutive entries for the same collection with different 

530 restrictions will be merged. Non-consecutive entries will not, 

531 because that actually represents a different search path. 

532 

533 Returns 

534 ------- 

535 collections : `CollectionSearch` 

536 A `CollectionSearch` instance. 

537 """ 

538 # First see if this is already a CollectionSearch; just pass that 

539 # through unchanged. This lets us standardize expressions (and turn 

540 # single-pass iterators into multi-pass iterables) in advance and pass 

541 # them down to other routines that accept arbitrary expressions. 

542 if isinstance(expression, cls): 

543 return expression 

544 wildcard = CategorizedWildcard.fromExpression(expression, 

545 allowAny=False, 

546 allowPatterns=False, 

547 coerceItemValue=DatasetTypeRestriction.fromExpression, 

548 defaultItemValue=DatasetTypeRestriction.any) 

549 assert wildcard is not Ellipsis 

550 assert not wildcard.patterns 

551 assert not wildcard.strings 

552 return cls( 

553 # Consolidate repetitions of the same collection name. 

554 [(name, DatasetTypeRestriction.union(*tuple(item[1] for item in items))) 

555 for name, items in itertools.groupby(wildcard.items, key=operator.itemgetter(0))] 

556 ) 

557 

558 def iterPairs( 

559 self, manager: CollectionManager, *, 

560 datasetType: Optional[DatasetType] = None, 

561 collectionType: Optional[CollectionType] = None, 

562 done: Optional[Set[str]] = None, 

563 flattenChains: bool = True, 

564 includeChains: Optional[bool] = None, 

565 ) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]: 

566 """Like `iter`, but yield pairs of `CollectionRecord`, 

567 `DatasetTypeRestriction` instead of just the former. 

568 

569 See `iter` for all parameter descriptions. 

570 

571 Yields 

572 ------ 

573 record : `CollectionRecord` 

574 Matching collection records. 

575 restriction : `DatasetTypeRestriction` 

576 The given dataset type restriction. 

577 """ 

578 if done is None: 

579 done = set() 

580 for name, restriction in self._items: 

581 if name not in done and (datasetType is None or datasetType in restriction): 

582 yield from _yieldCollectionRecords( 

583 manager, 

584 manager.find(name), 

585 restriction, 

586 datasetType=datasetType, 

587 collectionType=collectionType, 

588 done=done, 

589 flattenChains=flattenChains, 

590 includeChains=includeChains, 

591 ) 

592 

593 def iter( 

594 self, manager: CollectionManager, *, 

595 datasetType: Optional[DatasetType] = None, 

596 collectionType: Optional[CollectionType] = None, 

597 done: Optional[Set[str]] = None, 

598 flattenChains: bool = True, 

599 includeChains: Optional[bool] = None, 

600 ) -> Iterator[CollectionRecord]: 

601 """Iterate over collection records that match this instance and the 

602 given criteria, in order. 

603 

604 This method is primarily intended for internal use by `Registry`; 

605 other callers should generally prefer `Registry.findDatasets` or 

606 other `Registry` query methods. 

607 

608 Parameters 

609 ---------- 

610 manager : `CollectionManager` 

611 Object responsible for managing the collection tables in a 

612 `Registry`. 

613 datasetType : `DatasetType`, optional 

614 If given, only yield collections whose dataset type restrictions 

615 include this dataset type. 

616 collectionType : `CollectionType`, optional 

617 If given, only yield collections of this type. 

618 done : `set`, optional 

619 A `set` containing the names of all collections already yielded; 

620 any collections whose names are already present in this set will 

621 not be yielded again, and those yielded will be added to it while 

622 iterating. If not provided, an empty `set` will be created and 

623 used internally to avoid duplicates. 

624 flattenChains : `bool`, optional 

625 If `True` (default) recursively yield the child collections of 

626 `~CollectionType.CHAINED` collections. 

627 includeChains : `bool`, optional 

628 If `False`, return records for `~CollectionType.CHAINED` 

629 collections themselves. The default is the opposite of 

630 ``flattenChains``: either return records for CHAINED collections or 

631 their children, but not both. 

632 

633 Yields 

634 ------ 

635 record : `CollectionRecord` 

636 Matching collection records. 

637 """ 

638 for record, _ in self.iterPairs(manager, datasetType=datasetType, collectionType=collectionType, 

639 done=done, flattenChains=flattenChains, includeChains=includeChains): 

640 yield record 

641 

642 def __iter__(self) -> Iterator[Tuple[str, DatasetTypeRestriction]]: 

643 yield from self._items 

644 

645 def __len__(self) -> int: 

646 return len(self._items) 

647 

648 def __eq__(self, other: Any) -> bool: 

649 if isinstance(other, CollectionSearch): 

650 return self._items == other._items 

651 else: 

652 return False 

653 

654 def __str__(self) -> str: 

655 return "[{}]".format(", ".join(f"{k}: {v}" for k, v in self._items)) 

656 

657 def __repr__(self) -> str: 

658 return f"CollectionSearch({self._items!r})" 

659 

660 

661class CollectionQuery: 

662 """An unordered query for collections and dataset type restrictions. 

663 

664 The `fromExpression` method should almost always be used to construct 

665 instances, as the regular constructor performs no checking of inputs (and 

666 that can lead to confusing error messages downstream). 

667 

668 Parameters 

669 ---------- 

670 search : `CollectionSearch` or `...` 

671 An object representing an ordered search for explicitly-named 

672 collections (to be interpreted here as unordered), or the special 

673 value `...` indicating all collections. `...` must be accompanied 

674 by ``patterns=None``. 

675 patterns : `tuple` of `re.Pattern` 

676 Regular expression patterns to match against collection names. 

677 

678 Notes 

679 ----- 

680 A `CollectionQuery` is used to find all matching datasets in any number 

681 of collections, or to find collections themselves. 

682 

683 `CollectionQuery` is expected to be rarely used outside of `Registry` 

684 (which uses it to back several of its "query" methods that take general 

685 expressions for collections), but it may occassionally be useful outside 

686 `Registry` as a way to preprocess expressions that contain single-pass 

687 iterators into a form that can be used to call those `Registry` methods 

688 multiple times. 

689 """ 

690 def __init__(self, search: Union[CollectionSearch, EllipsisType], patterns: Tuple[re.Pattern, ...]): 

691 self._search = search 

692 self._patterns = patterns 

693 

694 __slots__ = ("_search", "_patterns") 

695 

696 @classmethod 

697 def fromExpression(cls, expression: Any) -> CollectionQuery: 

698 """Process a general expression to construct a `CollectionQuery` 

699 instance. 

700 

701 Parameters 

702 ---------- 

703 expression 

704 May be: 

705 - a `str` collection name; 

706 - a two-element `tuple` containing a `str` and any expression 

707 accepted by `DatasetTypeRestriction.fromExpression`; 

708 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`) 

709 against collection names; 

710 - any non-mapping iterable containing any of the above; 

711 - a mapping from `str` to any expression accepted by 

712 `DatasetTypeRestriction`. 

713 - a `CollectionSearch` instance; 

714 - another `CollectionQuery` instance (passed through unchanged). 

715 

716 Multiple consecutive entries for the same collection with different 

717 restrictions will be merged. Non-consecutive entries will not, 

718 because that actually represents a different search path. 

719 

720 Returns 

721 ------- 

722 collections : `CollectionQuery` 

723 A `CollectionQuery` instance. 

724 """ 

725 if isinstance(expression, cls): 

726 return expression 

727 if expression is Ellipsis: 

728 return cls.any 

729 if isinstance(expression, CollectionSearch): 

730 return cls(search=expression, patterns=()) 

731 wildcard = CategorizedWildcard.fromExpression(expression, 

732 allowAny=True, 

733 allowPatterns=True, 

734 coerceItemValue=DatasetTypeRestriction.fromExpression, 

735 defaultItemValue=DatasetTypeRestriction.any) 

736 if wildcard is Ellipsis: 

737 return cls.any 

738 assert not wildcard.strings 

739 return cls(search=CollectionSearch.fromExpression(wildcard), 

740 patterns=tuple(wildcard.patterns)) 

741 

742 def iterPairs( 

743 self, manager: CollectionManager, *, 

744 datasetType: Optional[DatasetType] = None, 

745 collectionType: Optional[CollectionType] = None, 

746 flattenChains: bool = True, 

747 includeChains: Optional[bool] = None, 

748 ) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]: 

749 """Like `iter`, but yield pairs of `CollectionRecord`, 

750 `DatasetTypeRestriction` instead of just the former. 

751 

752 See `iter` for all parameter descriptions. 

753 

754 Yields 

755 ------ 

756 record : `CollectionRecord` 

757 Matching collection records. 

758 restriction : `DatasetTypeRestriction` 

759 The given dataset type restriction. 

760 

761 """ 

762 if self._search is Ellipsis: 

763 for record in manager: 

764 yield from _yieldCollectionRecords( 

765 manager, 

766 record, 

767 DatasetTypeRestriction.any, 

768 datasetType=datasetType, 

769 collectionType=collectionType, 

770 flattenChains=flattenChains, 

771 includeChains=includeChains, 

772 ) 

773 else: 

774 done: Set[str] = set() 

775 yield from self._search.iterPairs( 

776 manager, 

777 datasetType=datasetType, 

778 collectionType=collectionType, 

779 done=done, 

780 flattenChains=flattenChains, 

781 includeChains=includeChains, 

782 ) 

783 for record in manager: 

784 if record.name not in done and any(p.fullmatch(record.name) for p in self._patterns): 

785 yield from _yieldCollectionRecords( 

786 manager, 

787 record, 

788 DatasetTypeRestriction.any, 

789 datasetType=datasetType, 

790 collectionType=collectionType, 

791 done=done, 

792 flattenChains=flattenChains, 

793 includeChains=includeChains, 

794 ) 

795 

796 def iter( 

797 self, manager: CollectionManager, *, 

798 datasetType: Optional[DatasetType] = None, 

799 collectionType: Optional[CollectionType] = None, 

800 flattenChains: bool = True, 

801 includeChains: Optional[bool] = None, 

802 ) -> Iterator[CollectionRecord]: 

803 """Iterate over collection records that match this instance and the 

804 given criteria, in an arbitrary order. 

805 

806 This method is primarily intended for internal use by `Registry`; 

807 other callers should generally prefer `Registry.queryDatasets` or 

808 other `Registry` query methods. 

809 

810 Parameters 

811 ---------- 

812 manager : `CollectionManager` 

813 Object responsible for managing the collection tables in a 

814 `Registry`. 

815 datasetType : `DatasetType`, optional 

816 If given, only yield collections whose dataset type restrictions 

817 include this dataset type. 

818 collectionType : `CollectionType`, optional 

819 If given, only yield collections of this type. 

820 flattenChains : `bool`, optional 

821 If `True` (default) recursively yield the child collections of 

822 `~CollectionType.CHAINED` collections. 

823 includeChains : `bool`, optional 

824 If `False`, return records for `~CollectionType.CHAINED` 

825 collections themselves. The default is the opposite of 

826 ``flattenChains``: either return records for CHAINED collections or 

827 their children, but not both. 

828 

829 Yields 

830 ------ 

831 record : `CollectionRecord` 

832 Matching collection records. 

833 """ 

834 for record, _ in self.iterPairs(manager, datasetType=datasetType, collectionType=collectionType, 

835 flattenChains=flattenChains, includeChains=includeChains): 

836 yield record 

837 

838 any: ClassVar[CollectionQuery] 

839 """A special `CollectionQuery` instance that matches any collection. 

840 

841 This instance should be preferred instead of constructing a new one with 

842 ``...``, when possible, but it should not be assumed to be the only such 

843 instance. 

844 """ 

845 

846 

847CollectionQuery.any = CollectionQuery(Ellipsis, ())