Coverage for python/lsst/daf/butler/registry/wildcards.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "CategorizedWildcard",
25 "CollectionQuery",
26 "CollectionSearch",
27 "DatasetTypeRestriction",
28 "Ellipsis",
29 "EllipsisType",
30)
32from dataclasses import dataclass
33import itertools
34import operator
35import re
36from typing import (
37 Any,
38 Callable,
39 ClassVar,
40 FrozenSet,
41 Iterator,
42 List,
43 Optional,
44 Set,
45 Tuple,
46 TYPE_CHECKING,
47 Union,
48)
50import sqlalchemy
52from ..core import DatasetType
53from ..core.utils import iterable
54from ._collectionType import CollectionType
56if TYPE_CHECKING: 56 ↛ 57line 56 didn't jump to line 57, because the condition on line 56 was never true
57 from .interfaces import CollectionManager, CollectionRecord
59 # Workaround for `...` not having an exposed type in Python, borrowed from
60 # https://github.com/python/typing/issues/684#issuecomment-548203158
61 # Along with that, we need to either use `Ellipsis` instead of `...` for
62 # the actual sentinal value internally, and tell MyPy to ignore conversions
63 # from `...` to `Ellipsis` at the public-interface boundary.
64 from enum import Enum
66 class EllipsisType(Enum):
67 Ellipsis = "..."
69 Ellipsis = EllipsisType.Ellipsis
71else:
72 EllipsisType = type(Ellipsis)
73 Ellipsis = Ellipsis
76@dataclass
77class CategorizedWildcard:
78 """The results of preprocessing a wildcard expression to separate match
79 patterns from strings.
81 The `fromExpression` method should almost always be used to construct
82 instances, as the regular constructor performs no checking of inputs (and
83 that can lead to confusing error messages downstream).
84 """
86 @classmethod
87 def fromExpression(cls, expression: Any, *,
88 allowAny: bool = True,
89 allowPatterns: bool = True,
90 coerceUnrecognized: Optional[Callable[[Any], Union[Tuple[str, Any], str]]] = None,
91 coerceItemValue: Optional[Callable[[Any], Any]] = None,
92 defaultItemValue: Optional[Any] = None,
93 ) -> Union[CategorizedWildcard, EllipsisType]:
94 """Categorize a wildcard expression.
96 Parameters
97 ----------
98 expression
99 The expression to categorize. May be any of:
100 - `str`;
101 - `re.Pattern` (only if ``allowPatterns`` is `True`);
102 - objects recognized by ``coerceUnrecognized`` (if provided);
103 - two-element tuples of (`str`, value) where value is recognized
104 by ``coerceItemValue`` (if provided);
105 - a non-`str`, non-mapping iterable containing any of the above;
106 - the special value `...` (only if ``allowAny`` is `True`), which
107 matches anything;
108 - a mapping from `str` to a value are recognized by
109 ``coerceItemValue`` (if provided);
110 - a `CategorizedWildcard` instance (passed through unchanged if
111 it meets the requirements specified by keyword arguments).
112 allowAny: `bool`, optional
113 If `False` (`True` is default) raise `TypeError` if `...` is
114 encountered.
115 allowPatterns: `bool`, optional
116 If `False` (`True` is default) raise `TypeError` if a `re.Pattern`
117 is encountered, or if ``expression`` is a `CategorizedWildcard`
118 with `patterns` not empty.
119 coerceUnrecognized: `Callable`, optional
120 A callback that takes a single argument of arbitrary type and
121 returns either a `str` - appended to `strings` - or a `tuple` of
122 (`str`, `Any`) to be appended to `items`. This will be called on
123 objects of unrecognized type, with the return value added to
124 `strings`. Exceptions will be reraised as `TypeError` (and
125 chained).
126 coerceItemValue: `Callable`, optional
127 If provided, ``expression`` may be a mapping from `str` to any
128 type that can be passed to this function; the result of that call
129 will be stored instead as the value in ``self.items``.
130 defaultItemValue: `Any`, optional
131 If provided, combine this value with any string values encountered
132 (including any returned by ``coerceUnrecognized``) to form a
133 `tuple` and add it to `items`, guaranteeing that `strings` will be
134 empty. Patterns are never added to `items`.
136 Returns
137 -------
138 categorized : `CategorizedWildcard` or ``...``.
139 The struct describing the wildcard. ``...`` is passed through
140 unchanged.
142 Raises
143 ------
144 TypeError
145 Raised if an unsupported type is found in the expression.
146 """
147 assert expression is not None
148 # See if we were given ...; just return that if we were.
149 if expression is Ellipsis:
150 if not allowAny:
151 raise TypeError("This expression may not be unconstrained.")
152 return Ellipsis
153 if isinstance(expression, cls):
154 # This is already a CategorizedWildcard. Make sure it meets the
155 # reqs. implied by the kwargs we got.
156 if not allowPatterns and expression.patterns:
157 raise TypeError(f"Regular expression(s) {expression.patterns} "
158 f"are not allowed in this context.")
159 if defaultItemValue is not None and expression.strings:
160 if expression.items:
161 raise TypeError("Incompatible preprocessed expression: an ordered sequence of str is "
162 "needed, but the original order was lost in the preprocessing.")
163 return cls(strings=[], patterns=expression.patterns,
164 items=[(k, defaultItemValue) for k in expression.strings])
165 elif defaultItemValue is None and expression.items:
166 if expression.strings:
167 raise TypeError("Incompatible preprocessed expression: an ordered sequence of items is "
168 "needed, but the original order was lost in the preprocessing.")
169 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[])
170 else:
171 # Original expression was created with keyword arguments that
172 # were at least as restrictive as what we just got; pass it
173 # through.
174 return expression
176 # If we get here, we know we'll be creating a new instance.
177 # Initialize an empty one now.
178 self = cls(strings=[], patterns=[], items=[])
180 # If mappings are allowed, see if we were given a single mapping by
181 # trying to get items.
182 if coerceItemValue is not None:
183 rawItems = None
184 try:
185 rawItems = expression.items()
186 except AttributeError:
187 pass
188 if rawItems is not None:
189 for k, v in rawItems:
190 try:
191 self.items.append((k, coerceItemValue(v)))
192 except Exception as err:
193 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err
194 return self
196 # Not ..., a CategorizedWildcard instance, or a mapping. Just
197 # process scalars or an iterable. We put the body of the loop inside
198 # a local function so we can recurse after coercion.
200 def process(element: Any, alreadyCoerced: bool = False) -> None:
201 if isinstance(element, str):
202 if defaultItemValue is not None:
203 self.items.append((element, defaultItemValue))
204 else:
205 self.strings.append(element)
206 return
207 if allowPatterns and isinstance(element, re.Pattern):
208 self.patterns.append(element)
209 return
210 if coerceItemValue is not None:
211 try:
212 k, v = element
213 except TypeError:
214 pass
215 else:
216 if not alreadyCoerced:
217 if not isinstance(k, str):
218 raise TypeError(f"Item key '{k}' is not a string.")
219 try:
220 v = coerceItemValue(v)
221 except Exception as err:
222 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'."
223 ) from err
224 self.items.append((k, v))
225 return
226 if alreadyCoerced:
227 raise TypeError(f"Object '{element}' returned by coercion function is still unrecognized.")
228 if coerceUnrecognized is not None:
229 try:
230 process(coerceUnrecognized(element), alreadyCoerced=True)
231 except Exception as err:
232 raise TypeError(f"Could not coerce expression element '{element}'.") from err
233 else:
234 raise TypeError(f"Unsupported object in wildcard expression: '{element}'.")
236 for element in iterable(expression):
237 process(element)
238 return self
240 def makeWhereExpression(self, column: sqlalchemy.sql.ColumnElement
241 ) -> Optional[sqlalchemy.sql.ColumnElement]:
242 """Transform the wildcard into a SQLAlchemy boolean expression suitable
243 for use in a WHERE clause.
245 Parameters
246 ----------
247 column : `sqlalchemy.sql.ColumnElement`
248 A string column in a table or query that should be compared to the
249 wildcard expression.
251 Returns
252 -------
253 where : `sqlalchemy.sql.ColumnElement` or `None`
254 A boolean SQL expression that evaluates to true if and only if
255 the value of ``column`` matches the wildcard. `None` is returned
256 if both `strings` and `patterns` are empty, and hence no match is
257 possible.
258 """
259 if self.items:
260 raise NotImplementedError("Expressions that are processed into items cannot be transformed "
261 "automatically into queries.")
262 if self.patterns:
263 raise NotImplementedError("Regular expression patterns are not yet supported here.")
264 terms = []
265 if len(self.strings) == 1:
266 terms.append(column == self.strings[0])
267 elif len(self.strings) > 1:
268 terms.append(column.in_(self.strings))
269 # TODO: append terms for regular expressions
270 if not terms:
271 return None
272 return sqlalchemy.sql.or_(*terms)
274 strings: List[str]
275 """Explicit string values found in the wildcard (`list` [ `str` ]).
276 """
278 patterns: List[re.Pattern]
279 """Regular expression patterns found in the wildcard
280 (`list` [ `re.Pattern` ]).
281 """
283 items: List[Tuple[str, Any]]
284 """Two-item tuples that relate string values to other objects
285 (`list` [ `tuple` [ `str`, `Any` ] ]).
286 """
289class DatasetTypeRestriction:
290 """An immutable set-like object that represents a restriction on the
291 dataset types to search for within a collection.
293 The `fromExpression` method should almost always be used to construct
294 instances, as the regular constructor performs no checking of inputs (and
295 that can lead to confusing error messages downstream).
297 Parameters
298 ----------
299 names : `frozenset` [`str`] or `...`
300 The names of the dataset types included in the restriction, or `...`
301 to permit a search for any dataset type.
303 Notes
304 -----
305 This class does not inherit from `collections.abc.Set` (and does not
306 implement the full set interface) because is not always iterable and
307 sometimes has no length (i.e. when ``names`` is ``...``).
308 """
309 def __init__(self, names: Union[FrozenSet[str], EllipsisType]):
310 self.names = names
312 __slots__ = ("names",)
314 @classmethod
315 def fromExpression(cls, expression: Any) -> DatasetTypeRestriction:
316 """Process a general expression to construct a `DatasetTypeRestriction`
317 instance.
319 Parameters
320 ----------
321 expression
322 May be:
323 - a `DatasetType` instance;
324 - a `str` dataset type name;
325 - any non-mapping iterable containing either of the above;
326 - the special value `...`;
327 - another `DatasetTypeRestriction` instance (passed through
328 unchanged).
330 Returns
331 -------
332 restriction : `DatasetTypeRestriction`
333 A `DatasetTypeRestriction` instance.
334 """
335 if isinstance(expression, cls):
336 return expression
337 wildcard = CategorizedWildcard.fromExpression(expression, allowPatterns=False,
338 coerceUnrecognized=lambda d: d.name)
339 if wildcard is Ellipsis:
340 return cls.any
341 else:
342 return cls(frozenset(wildcard.strings))
344 def __contains__(self, datasetType: DatasetType) -> bool:
345 return (self.names is Ellipsis or datasetType.name in self.names
346 or (datasetType.isComponent()
347 and DatasetType.splitDatasetTypeName(datasetType.name)[0] in self.names))
349 def __eq__(self, other: Any) -> bool:
350 if isinstance(other, DatasetTypeRestriction):
351 return self.names == other.names
352 else:
353 return False
355 def __str__(self) -> str:
356 if self.names is Ellipsis:
357 return "..."
358 else:
359 return "{{{}}}".format(", ".join(self.names))
361 def __repr__(self) -> str:
362 if self.names is Ellipsis:
363 return "DatasetTypeRestriction(...)"
364 else:
365 return f"DatasetTypeRestriction({self.names!r})"
367 @staticmethod
368 def union(*args: DatasetTypeRestriction) -> DatasetTypeRestriction:
369 """Merge one or more `DatasetTypeRestriction` instances, returning one
370 that allows any of the dataset types included in any of them.
372 Parameters
373 ----------
374 args
375 Positional arguments are `DatasetTypeRestriction` instances.
376 """
377 result: Set[str] = set()
378 for a in args:
379 if a.names is Ellipsis:
380 return DatasetTypeRestriction.any
381 else:
382 result.update(a.names)
383 return DatasetTypeRestriction(frozenset(result))
385 names: Union[FrozenSet[str], EllipsisType]
386 """The names of the dataset types included (i.e. permitted) by the
387 restriction, or the special value ``...`` to permit all dataset types
388 (`frozenset` [ `str` ] or ``...``).
389 """
391 any: ClassVar[DatasetTypeRestriction]
392 """A special `DatasetTypeRestriction` instance that permits any dataset
393 type.
395 This instance should be preferred instead of constructing a new one with
396 ``...``, when possible, but it should not be assumed to be the only such
397 instance (i.e. don't use ``is`` instead of ``==`` for comparisons).
398 """
401DatasetTypeRestriction.any = DatasetTypeRestriction(Ellipsis)
404def _yieldCollectionRecords(
405 manager: CollectionManager,
406 record: CollectionRecord,
407 restriction: DatasetTypeRestriction,
408 datasetType: Optional[DatasetType] = None,
409 collectionType: Optional[CollectionType] = None,
410 done: Optional[Set[str]] = None,
411 flattenChains: bool = True,
412 includeChains: Optional[bool] = None,
413) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]:
414 """A helper function containing common logic for `CollectionSearch.iter`
415 and `CollectionQuery.iter`: recursively yield `CollectionRecord` only they
416 match the criteria given in other arguments.
418 Parameters
419 ----------
420 manager : `CollectionManager`
421 Object responsible for managing the collection tables in a `Registry`.
422 record : `CollectionRecord`
423 Record to conditionally yield.
424 restriction : `DatasetTypeRestriction`
425 A restriction that must match ``datasetType`` (if given) in order to
426 yield ``record``.
427 datasetType : `DatasetType`, optional
428 If given, a `DatasetType` instance that must be included in
429 ``restriction`` in order to yield ``record``.
430 collectionType : `CollectionType`, optional
431 If given, a `CollectionType` enumeration value that must match
432 ``record.type`` in order for ``record`` to be yielded.
433 done : `set` [ `str` ], optional
434 A `set` of already-yielded collection names; if provided, ``record``
435 will only be yielded if it is not already in ``done``, and ``done``
436 will be updated to include it on return.
437 flattenChains : `bool`, optional
438 If `True` (default) recursively yield the child collections of
439 `~CollectionType.CHAINED` collections.
440 includeChains : `bool`, optional
441 If `False`, return records for `~CollectionType.CHAINED` collections
442 themselves. The default is the opposite of ``flattenChains``: either
443 return records for CHAINED collections or their children, but not both.
445 Yields
446 ------
447 record : `CollectionRecord`
448 Matching collection records.
449 restriction : `DatasetTypeRestriction`
450 The given dataset type restriction.
451 """
452 if done is None:
453 done = set()
454 includeChains = includeChains if includeChains is not None else not flattenChains
455 if collectionType is None or record.type is collectionType:
456 done.add(record.name)
457 if record.type is not CollectionType.CHAINED or includeChains:
458 yield record, restriction
459 if flattenChains and record.type is CollectionType.CHAINED:
460 done.add(record.name)
461 # We know this is a ChainedCollectionRecord because of the enum value,
462 # but MyPy doesn't.
463 yield from record.children.iterPairs( # type: ignore
464 manager,
465 datasetType=datasetType,
466 collectionType=collectionType,
467 done=done,
468 flattenChains=flattenChains,
469 includeChains=includeChains,
470 )
473class CollectionSearch:
474 """An ordered search path of collections and dataset type restrictions.
476 The `fromExpression` method should almost always be used to construct
477 instances, as the regular constructor performs no checking of inputs (and
478 that can lead to confusing error messages downstream).
480 Parameters
481 ----------
482 items : `list` [ `tuple` [ `str`, `DatasetTypeRestriction` ] ]
483 Tuples that relate a collection name to the restriction on dataset
484 types to search for within it. This is not a mapping because the
485 same collection name may appear multiple times with different
486 restrictions.
488 Notes
489 -----
490 A `CollectionSearch` is used to find a single dataset according to its
491 dataset type and data ID, giving preference to collections in which the
492 order they are specified. A `CollectionQuery` can be constructed from
493 a broader range of expressions but does not order the collections to be
494 searched.
496 `CollectionSearch` is iterable, yielding two-element tuples of `str`
497 (collection name) and `DatasetTypeRestriction`.
499 A `CollectionSearch` instance constructed properly (e.g. via
500 `fromExpression`) is a unique representation of a particular search path;
501 it is exactly the same internally and compares as equal to any
502 `CollectionSearch` constructed from an equivalent expression,
503 regardless of how different the original expressions appear.
504 """
505 def __init__(self, items: List[Tuple[str, DatasetTypeRestriction]]):
506 assert all(isinstance(v, DatasetTypeRestriction) for _, v in items)
507 self._items = items
509 __slots__ = ("_items")
511 @classmethod
512 def fromExpression(cls, expression: Any) -> CollectionSearch:
513 """Process a general expression to construct a `CollectionSearch`
514 instance.
516 Parameters
517 ----------
518 expression
519 May be:
520 - a `str` collection name;
521 - a two-element `tuple` containing a `str` and any expression
522 accepted by `DatasetTypeRestriction.fromExpression`;
523 - any non-mapping iterable containing either of the above;
524 - a mapping from `str` to any expression accepted by
525 `DatasetTypeRestriction`.
526 - another `CollectionSearch` instance (passed through
527 unchanged).
529 Multiple consecutive entries for the same collection with different
530 restrictions will be merged. Non-consecutive entries will not,
531 because that actually represents a different search path.
533 Returns
534 -------
535 collections : `CollectionSearch`
536 A `CollectionSearch` instance.
537 """
538 # First see if this is already a CollectionSearch; just pass that
539 # through unchanged. This lets us standardize expressions (and turn
540 # single-pass iterators into multi-pass iterables) in advance and pass
541 # them down to other routines that accept arbitrary expressions.
542 if isinstance(expression, cls):
543 return expression
544 wildcard = CategorizedWildcard.fromExpression(expression,
545 allowAny=False,
546 allowPatterns=False,
547 coerceItemValue=DatasetTypeRestriction.fromExpression,
548 defaultItemValue=DatasetTypeRestriction.any)
549 assert wildcard is not Ellipsis
550 assert not wildcard.patterns
551 assert not wildcard.strings
552 return cls(
553 # Consolidate repetitions of the same collection name.
554 [(name, DatasetTypeRestriction.union(*tuple(item[1] for item in items)))
555 for name, items in itertools.groupby(wildcard.items, key=operator.itemgetter(0))]
556 )
558 def iterPairs(
559 self, manager: CollectionManager, *,
560 datasetType: Optional[DatasetType] = None,
561 collectionType: Optional[CollectionType] = None,
562 done: Optional[Set[str]] = None,
563 flattenChains: bool = True,
564 includeChains: Optional[bool] = None,
565 ) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]:
566 """Like `iter`, but yield pairs of `CollectionRecord`,
567 `DatasetTypeRestriction` instead of just the former.
569 See `iter` for all parameter descriptions.
571 Yields
572 ------
573 record : `CollectionRecord`
574 Matching collection records.
575 restriction : `DatasetTypeRestriction`
576 The given dataset type restriction.
577 """
578 if done is None:
579 done = set()
580 for name, restriction in self._items:
581 if name not in done and (datasetType is None or datasetType in restriction):
582 yield from _yieldCollectionRecords(
583 manager,
584 manager.find(name),
585 restriction,
586 datasetType=datasetType,
587 collectionType=collectionType,
588 done=done,
589 flattenChains=flattenChains,
590 includeChains=includeChains,
591 )
593 def iter(
594 self, manager: CollectionManager, *,
595 datasetType: Optional[DatasetType] = None,
596 collectionType: Optional[CollectionType] = None,
597 done: Optional[Set[str]] = None,
598 flattenChains: bool = True,
599 includeChains: Optional[bool] = None,
600 ) -> Iterator[CollectionRecord]:
601 """Iterate over collection records that match this instance and the
602 given criteria, in order.
604 This method is primarily intended for internal use by `Registry`;
605 other callers should generally prefer `Registry.findDatasets` or
606 other `Registry` query methods.
608 Parameters
609 ----------
610 manager : `CollectionManager`
611 Object responsible for managing the collection tables in a
612 `Registry`.
613 datasetType : `DatasetType`, optional
614 If given, only yield collections whose dataset type restrictions
615 include this dataset type.
616 collectionType : `CollectionType`, optional
617 If given, only yield collections of this type.
618 done : `set`, optional
619 A `set` containing the names of all collections already yielded;
620 any collections whose names are already present in this set will
621 not be yielded again, and those yielded will be added to it while
622 iterating. If not provided, an empty `set` will be created and
623 used internally to avoid duplicates.
624 flattenChains : `bool`, optional
625 If `True` (default) recursively yield the child collections of
626 `~CollectionType.CHAINED` collections.
627 includeChains : `bool`, optional
628 If `False`, return records for `~CollectionType.CHAINED`
629 collections themselves. The default is the opposite of
630 ``flattenChains``: either return records for CHAINED collections or
631 their children, but not both.
633 Yields
634 ------
635 record : `CollectionRecord`
636 Matching collection records.
637 """
638 for record, _ in self.iterPairs(manager, datasetType=datasetType, collectionType=collectionType,
639 done=done, flattenChains=flattenChains, includeChains=includeChains):
640 yield record
642 def __iter__(self) -> Iterator[Tuple[str, DatasetTypeRestriction]]:
643 yield from self._items
645 def __len__(self) -> int:
646 return len(self._items)
648 def __eq__(self, other: Any) -> bool:
649 if isinstance(other, CollectionSearch):
650 return self._items == other._items
651 else:
652 return False
654 def __str__(self) -> str:
655 return "[{}]".format(", ".join(f"{k}: {v}" for k, v in self._items))
657 def __repr__(self) -> str:
658 return f"CollectionSearch({self._items!r})"
661class CollectionQuery:
662 """An unordered query for collections and dataset type restrictions.
664 The `fromExpression` method should almost always be used to construct
665 instances, as the regular constructor performs no checking of inputs (and
666 that can lead to confusing error messages downstream).
668 Parameters
669 ----------
670 search : `CollectionSearch` or `...`
671 An object representing an ordered search for explicitly-named
672 collections (to be interpreted here as unordered), or the special
673 value `...` indicating all collections. `...` must be accompanied
674 by ``patterns=None``.
675 patterns : `tuple` of `re.Pattern`
676 Regular expression patterns to match against collection names.
678 Notes
679 -----
680 A `CollectionQuery` is used to find all matching datasets in any number
681 of collections, or to find collections themselves.
683 `CollectionQuery` is expected to be rarely used outside of `Registry`
684 (which uses it to back several of its "query" methods that take general
685 expressions for collections), but it may occassionally be useful outside
686 `Registry` as a way to preprocess expressions that contain single-pass
687 iterators into a form that can be used to call those `Registry` methods
688 multiple times.
689 """
690 def __init__(self, search: Union[CollectionSearch, EllipsisType], patterns: Tuple[re.Pattern, ...]):
691 self._search = search
692 self._patterns = patterns
694 __slots__ = ("_search", "_patterns")
696 @classmethod
697 def fromExpression(cls, expression: Any) -> CollectionQuery:
698 """Process a general expression to construct a `CollectionQuery`
699 instance.
701 Parameters
702 ----------
703 expression
704 May be:
705 - a `str` collection name;
706 - a two-element `tuple` containing a `str` and any expression
707 accepted by `DatasetTypeRestriction.fromExpression`;
708 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`)
709 against collection names;
710 - any non-mapping iterable containing any of the above;
711 - a mapping from `str` to any expression accepted by
712 `DatasetTypeRestriction`.
713 - a `CollectionSearch` instance;
714 - another `CollectionQuery` instance (passed through unchanged).
716 Multiple consecutive entries for the same collection with different
717 restrictions will be merged. Non-consecutive entries will not,
718 because that actually represents a different search path.
720 Returns
721 -------
722 collections : `CollectionQuery`
723 A `CollectionQuery` instance.
724 """
725 if isinstance(expression, cls):
726 return expression
727 if expression is Ellipsis:
728 return cls.any
729 if isinstance(expression, CollectionSearch):
730 return cls(search=expression, patterns=())
731 wildcard = CategorizedWildcard.fromExpression(expression,
732 allowAny=True,
733 allowPatterns=True,
734 coerceItemValue=DatasetTypeRestriction.fromExpression,
735 defaultItemValue=DatasetTypeRestriction.any)
736 if wildcard is Ellipsis:
737 return cls.any
738 assert not wildcard.strings
739 return cls(search=CollectionSearch.fromExpression(wildcard),
740 patterns=tuple(wildcard.patterns))
742 def iterPairs(
743 self, manager: CollectionManager, *,
744 datasetType: Optional[DatasetType] = None,
745 collectionType: Optional[CollectionType] = None,
746 flattenChains: bool = True,
747 includeChains: Optional[bool] = None,
748 ) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]:
749 """Like `iter`, but yield pairs of `CollectionRecord`,
750 `DatasetTypeRestriction` instead of just the former.
752 See `iter` for all parameter descriptions.
754 Yields
755 ------
756 record : `CollectionRecord`
757 Matching collection records.
758 restriction : `DatasetTypeRestriction`
759 The given dataset type restriction.
761 """
762 if self._search is Ellipsis:
763 for record in manager:
764 yield from _yieldCollectionRecords(
765 manager,
766 record,
767 DatasetTypeRestriction.any,
768 datasetType=datasetType,
769 collectionType=collectionType,
770 flattenChains=flattenChains,
771 includeChains=includeChains,
772 )
773 else:
774 done: Set[str] = set()
775 yield from self._search.iterPairs(
776 manager,
777 datasetType=datasetType,
778 collectionType=collectionType,
779 done=done,
780 flattenChains=flattenChains,
781 includeChains=includeChains,
782 )
783 for record in manager:
784 if record.name not in done and any(p.fullmatch(record.name) for p in self._patterns):
785 yield from _yieldCollectionRecords(
786 manager,
787 record,
788 DatasetTypeRestriction.any,
789 datasetType=datasetType,
790 collectionType=collectionType,
791 done=done,
792 flattenChains=flattenChains,
793 includeChains=includeChains,
794 )
796 def iter(
797 self, manager: CollectionManager, *,
798 datasetType: Optional[DatasetType] = None,
799 collectionType: Optional[CollectionType] = None,
800 flattenChains: bool = True,
801 includeChains: Optional[bool] = None,
802 ) -> Iterator[CollectionRecord]:
803 """Iterate over collection records that match this instance and the
804 given criteria, in an arbitrary order.
806 This method is primarily intended for internal use by `Registry`;
807 other callers should generally prefer `Registry.queryDatasets` or
808 other `Registry` query methods.
810 Parameters
811 ----------
812 manager : `CollectionManager`
813 Object responsible for managing the collection tables in a
814 `Registry`.
815 datasetType : `DatasetType`, optional
816 If given, only yield collections whose dataset type restrictions
817 include this dataset type.
818 collectionType : `CollectionType`, optional
819 If given, only yield collections of this type.
820 flattenChains : `bool`, optional
821 If `True` (default) recursively yield the child collections of
822 `~CollectionType.CHAINED` collections.
823 includeChains : `bool`, optional
824 If `False`, return records for `~CollectionType.CHAINED`
825 collections themselves. The default is the opposite of
826 ``flattenChains``: either return records for CHAINED collections or
827 their children, but not both.
829 Yields
830 ------
831 record : `CollectionRecord`
832 Matching collection records.
833 """
834 for record, _ in self.iterPairs(manager, datasetType=datasetType, collectionType=collectionType,
835 flattenChains=flattenChains, includeChains=includeChains):
836 yield record
838 any: ClassVar[CollectionQuery]
839 """A special `CollectionQuery` instance that matches any collection.
841 This instance should be preferred instead of constructing a new one with
842 ``...``, when possible, but it should not be assumed to be the only such
843 instance.
844 """
847CollectionQuery.any = CollectionQuery(Ellipsis, ())