Coverage for python/lsst/daf/butler/registry/wildcards.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "CategorizedWildcard",
25 "CollectionQuery",
26 "CollectionSearch",
27 "DatasetTypeRestriction",
28)
30from dataclasses import dataclass
31import itertools
32import operator
33import re
34from typing import (
35 AbstractSet,
36 Any,
37 Callable,
38 ClassVar,
39 FrozenSet,
40 Iterator,
41 List,
42 Optional,
43 Set,
44 Tuple,
45 TYPE_CHECKING,
46 Union,
47)
49import sqlalchemy
51from ..core import DatasetType
52from ..core.utils import iterable
53from ._collectionType import CollectionType
55if TYPE_CHECKING: 55 ↛ 56line 55 didn't jump to line 56, because the condition on line 55 was never true
56 from .interfaces import CollectionManager, CollectionRecord
58 # Workaround for `...` not having an exposed type in Python, borrowed from
59 # https://github.com/python/typing/issues/684#issuecomment-548203158
60 # Along with that, we need to either use `Ellipsis` instead of `...` for
61 # the actual sentinal value internally, and tell MyPy to ignore conversions
62 # from `...` to `Ellipsis` at the public-interface boundary.
63 #
64 # `Ellipsis` and `EllipsisType` should be directly imported from this
65 # module by related code that needs them; hopefully that will stay confined
66 # to `lsst.daf.butler.registry`. Putting these in __all__ is bad for
67 # Sphinx, and probably more confusing than helpful overall.
68 from enum import Enum
70 class EllipsisType(Enum):
71 Ellipsis = "..."
73 Ellipsis = EllipsisType.Ellipsis
75else:
76 EllipsisType = type(Ellipsis)
77 Ellipsis = Ellipsis
80@dataclass
81class CategorizedWildcard:
82 """The results of preprocessing a wildcard expression to separate match
83 patterns from strings.
85 The `fromExpression` method should almost always be used to construct
86 instances, as the regular constructor performs no checking of inputs (and
87 that can lead to confusing error messages downstream).
88 """
90 @classmethod
91 def fromExpression(cls, expression: Any, *,
92 allowAny: bool = True,
93 allowPatterns: bool = True,
94 coerceUnrecognized: Optional[Callable[[Any], Union[Tuple[str, Any], str]]] = None,
95 coerceItemValue: Optional[Callable[[Any], Any]] = None,
96 defaultItemValue: Optional[Any] = None,
97 ) -> Union[CategorizedWildcard, EllipsisType]:
98 """Categorize a wildcard expression.
100 Parameters
101 ----------
102 expression
103 The expression to categorize. May be any of:
104 - `str`;
105 - `re.Pattern` (only if ``allowPatterns`` is `True`);
106 - objects recognized by ``coerceUnrecognized`` (if provided);
107 - two-element tuples of (`str`, value) where value is recognized
108 by ``coerceItemValue`` (if provided);
109 - a non-`str`, non-mapping iterable containing any of the above;
110 - the special value `...` (only if ``allowAny`` is `True`), which
111 matches anything;
112 - a mapping from `str` to a value are recognized by
113 ``coerceItemValue`` (if provided);
114 - a `CategorizedWildcard` instance (passed through unchanged if
115 it meets the requirements specified by keyword arguments).
116 allowAny: `bool`, optional
117 If `False` (`True` is default) raise `TypeError` if `...` is
118 encountered.
119 allowPatterns: `bool`, optional
120 If `False` (`True` is default) raise `TypeError` if a `re.Pattern`
121 is encountered, or if ``expression`` is a `CategorizedWildcard`
122 with `patterns` not empty.
123 coerceUnrecognized: `Callable`, optional
124 A callback that takes a single argument of arbitrary type and
125 returns either a `str` - appended to `strings` - or a `tuple` of
126 (`str`, `Any`) to be appended to `items`. This will be called on
127 objects of unrecognized type, with the return value added to
128 `strings`. Exceptions will be reraised as `TypeError` (and
129 chained).
130 coerceItemValue: `Callable`, optional
131 If provided, ``expression`` may be a mapping from `str` to any
132 type that can be passed to this function; the result of that call
133 will be stored instead as the value in ``self.items``.
134 defaultItemValue: `Any`, optional
135 If provided, combine this value with any string values encountered
136 (including any returned by ``coerceUnrecognized``) to form a
137 `tuple` and add it to `items`, guaranteeing that `strings` will be
138 empty. Patterns are never added to `items`.
140 Returns
141 -------
142 categorized : `CategorizedWildcard` or ``...``.
143 The struct describing the wildcard. ``...`` is passed through
144 unchanged.
146 Raises
147 ------
148 TypeError
149 Raised if an unsupported type is found in the expression.
150 """
151 assert expression is not None
152 # See if we were given ...; just return that if we were.
153 if expression is Ellipsis:
154 if not allowAny:
155 raise TypeError("This expression may not be unconstrained.")
156 return Ellipsis
157 if isinstance(expression, cls):
158 # This is already a CategorizedWildcard. Make sure it meets the
159 # reqs. implied by the kwargs we got.
160 if not allowPatterns and expression.patterns:
161 raise TypeError(f"Regular expression(s) {expression.patterns} "
162 f"are not allowed in this context.")
163 if defaultItemValue is not None and expression.strings:
164 if expression.items:
165 raise TypeError("Incompatible preprocessed expression: an ordered sequence of str is "
166 "needed, but the original order was lost in the preprocessing.")
167 return cls(strings=[], patterns=expression.patterns,
168 items=[(k, defaultItemValue) for k in expression.strings])
169 elif defaultItemValue is None and expression.items:
170 if expression.strings:
171 raise TypeError("Incompatible preprocessed expression: an ordered sequence of items is "
172 "needed, but the original order was lost in the preprocessing.")
173 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[])
174 else:
175 # Original expression was created with keyword arguments that
176 # were at least as restrictive as what we just got; pass it
177 # through.
178 return expression
180 # If we get here, we know we'll be creating a new instance.
181 # Initialize an empty one now.
182 self = cls(strings=[], patterns=[], items=[])
184 # If mappings are allowed, see if we were given a single mapping by
185 # trying to get items.
186 if coerceItemValue is not None:
187 rawItems = None
188 try:
189 rawItems = expression.items()
190 except AttributeError:
191 pass
192 if rawItems is not None:
193 for k, v in rawItems:
194 try:
195 self.items.append((k, coerceItemValue(v)))
196 except Exception as err:
197 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err
198 return self
200 # Not ..., a CategorizedWildcard instance, or a mapping. Just
201 # process scalars or an iterable. We put the body of the loop inside
202 # a local function so we can recurse after coercion.
204 def process(element: Any, alreadyCoerced: bool = False) -> None:
205 if isinstance(element, str):
206 if defaultItemValue is not None:
207 self.items.append((element, defaultItemValue))
208 else:
209 self.strings.append(element)
210 return
211 if allowPatterns and isinstance(element, re.Pattern):
212 self.patterns.append(element)
213 return
214 if coerceItemValue is not None:
215 try:
216 k, v = element
217 except TypeError:
218 pass
219 else:
220 if not alreadyCoerced:
221 if not isinstance(k, str):
222 raise TypeError(f"Item key '{k}' is not a string.")
223 try:
224 v = coerceItemValue(v)
225 except Exception as err:
226 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'."
227 ) from err
228 self.items.append((k, v))
229 return
230 if alreadyCoerced:
231 raise TypeError(f"Object '{element}' returned by coercion function is still unrecognized.")
232 if coerceUnrecognized is not None:
233 try:
234 process(coerceUnrecognized(element), alreadyCoerced=True)
235 except Exception as err:
236 raise TypeError(f"Could not coerce expression element '{element}'.") from err
237 else:
238 raise TypeError(f"Unsupported object in wildcard expression: '{element}'.")
240 for element in iterable(expression):
241 process(element)
242 return self
244 def makeWhereExpression(self, column: sqlalchemy.sql.ColumnElement
245 ) -> Optional[sqlalchemy.sql.ColumnElement]:
246 """Transform the wildcard into a SQLAlchemy boolean expression suitable
247 for use in a WHERE clause.
249 Parameters
250 ----------
251 column : `sqlalchemy.sql.ColumnElement`
252 A string column in a table or query that should be compared to the
253 wildcard expression.
255 Returns
256 -------
257 where : `sqlalchemy.sql.ColumnElement` or `None`
258 A boolean SQL expression that evaluates to true if and only if
259 the value of ``column`` matches the wildcard. `None` is returned
260 if both `strings` and `patterns` are empty, and hence no match is
261 possible.
262 """
263 if self.items:
264 raise NotImplementedError("Expressions that are processed into items cannot be transformed "
265 "automatically into queries.")
266 if self.patterns:
267 raise NotImplementedError("Regular expression patterns are not yet supported here.")
268 terms = []
269 if len(self.strings) == 1:
270 terms.append(column == self.strings[0])
271 elif len(self.strings) > 1:
272 terms.append(column.in_(self.strings))
273 # TODO: append terms for regular expressions
274 if not terms:
275 return None
276 return sqlalchemy.sql.or_(*terms)
278 strings: List[str]
279 """Explicit string values found in the wildcard (`list` [ `str` ]).
280 """
282 patterns: List[re.Pattern]
283 """Regular expression patterns found in the wildcard
284 (`list` [ `re.Pattern` ]).
285 """
287 items: List[Tuple[str, Any]]
288 """Two-item tuples that relate string values to other objects
289 (`list` [ `tuple` [ `str`, `Any` ] ]).
290 """
293class DatasetTypeRestriction:
294 """An immutable set-like object that represents a restriction on the
295 dataset types to search for within a collection.
297 The `fromExpression` method should almost always be used to construct
298 instances, as the regular constructor performs no checking of inputs (and
299 that can lead to confusing error messages downstream).
301 Parameters
302 ----------
303 names : `frozenset` [`str`] or `...`
304 The names of the dataset types included in the restriction, or `...`
305 to permit a search for any dataset type.
307 Notes
308 -----
309 This class does not inherit from `collections.abc.Set` (and does not
310 implement the full set interface) because is not always iterable and
311 sometimes has no length (i.e. when ``names`` is ``...``).
312 """
313 def __init__(self, names: Union[FrozenSet[str], EllipsisType]):
314 self.names = names
316 __slots__ = ("names",)
318 @classmethod
319 def fromExpression(cls, expression: Any) -> DatasetTypeRestriction:
320 """Process a general expression to construct a `DatasetTypeRestriction`
321 instance.
323 Parameters
324 ----------
325 expression
326 May be:
327 - a `DatasetType` instance;
328 - a `str` dataset type name;
329 - any non-mapping iterable containing either of the above;
330 - the special value `...`;
331 - another `DatasetTypeRestriction` instance (passed through
332 unchanged).
334 Returns
335 -------
336 restriction : `DatasetTypeRestriction`
337 A `DatasetTypeRestriction` instance.
338 """
339 if isinstance(expression, cls):
340 return expression
341 wildcard = CategorizedWildcard.fromExpression(expression, allowPatterns=False,
342 coerceUnrecognized=lambda d: d.name)
343 if wildcard is Ellipsis:
344 return cls.any
345 else:
346 return cls(frozenset(wildcard.strings))
348 def __contains__(self, datasetType: DatasetType) -> bool:
349 return (self.names is Ellipsis or datasetType.name in self.names
350 or (datasetType.isComponent()
351 and DatasetType.splitDatasetTypeName(datasetType.name)[0] in self.names))
353 def __eq__(self, other: Any) -> bool:
354 if isinstance(other, DatasetTypeRestriction):
355 return self.names == other.names
356 else:
357 return False
359 def __str__(self) -> str:
360 if self.names is Ellipsis:
361 return "..."
362 else:
363 return "{{{}}}".format(", ".join(self.names))
365 def __repr__(self) -> str:
366 if self.names is Ellipsis:
367 return "DatasetTypeRestriction(...)"
368 else:
369 return f"DatasetTypeRestriction({self.names!r})"
371 @staticmethod
372 def union(*args: DatasetTypeRestriction) -> DatasetTypeRestriction:
373 """Merge one or more `DatasetTypeRestriction` instances, returning one
374 that allows any of the dataset types included in any of them.
376 Parameters
377 ----------
378 args
379 Positional arguments are `DatasetTypeRestriction` instances.
380 """
381 result: Set[str] = set()
382 for a in args:
383 if a.names is Ellipsis:
384 return DatasetTypeRestriction.any
385 else:
386 result.update(a.names)
387 return DatasetTypeRestriction(frozenset(result))
389 names: Union[FrozenSet[str], EllipsisType]
390 """The names of the dataset types included (i.e. permitted) by the
391 restriction, or the special value ``...`` to permit all dataset types
392 (`frozenset` [ `str` ] or ``...``).
393 """
395 any: ClassVar[DatasetTypeRestriction]
396 """A special `DatasetTypeRestriction` instance that permits any dataset
397 type.
399 This instance should be preferred instead of constructing a new one with
400 ``...``, when possible, but it should not be assumed to be the only such
401 instance (i.e. don't use ``is`` instead of ``==`` for comparisons).
402 """
405DatasetTypeRestriction.any = DatasetTypeRestriction(Ellipsis)
408def _yieldCollectionRecords(
409 manager: CollectionManager,
410 record: CollectionRecord,
411 restriction: DatasetTypeRestriction,
412 datasetType: Optional[DatasetType] = None,
413 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(),
414 done: Optional[Set[str]] = None,
415 flattenChains: bool = True,
416 includeChains: Optional[bool] = None,
417) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]:
418 """A helper function containing common logic for `CollectionSearch.iter`
419 and `CollectionQuery.iter`: recursively yield `CollectionRecord` only they
420 match the criteria given in other arguments.
422 Parameters
423 ----------
424 manager : `CollectionManager`
425 Object responsible for managing the collection tables in a `Registry`.
426 record : `CollectionRecord`
427 Record to conditionally yield.
428 restriction : `DatasetTypeRestriction`
429 A restriction that must match ``datasetType`` (if given) in order to
430 yield ``record``.
431 datasetType : `DatasetType`, optional
432 If given, a `DatasetType` instance that must be included in
433 ``restriction`` in order to yield ``record``.
434 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
435 If provided, only yield collections of these types.
436 done : `set` [ `str` ], optional
437 A `set` of already-yielded collection names; if provided, ``record``
438 will only be yielded if it is not already in ``done``, and ``done``
439 will be updated to include it on return.
440 flattenChains : `bool`, optional
441 If `True` (default) recursively yield the child collections of
442 `~CollectionType.CHAINED` collections.
443 includeChains : `bool`, optional
444 If `False`, return records for `~CollectionType.CHAINED` collections
445 themselves. The default is the opposite of ``flattenChains``: either
446 return records for CHAINED collections or their children, but not both.
448 Yields
449 ------
450 record : `CollectionRecord`
451 Matching collection records.
452 restriction : `DatasetTypeRestriction`
453 The given dataset type restriction.
454 """
455 if done is None:
456 done = set()
457 includeChains = includeChains if includeChains is not None else not flattenChains
458 if record.type in collectionTypes:
459 done.add(record.name)
460 if record.type is not CollectionType.CHAINED or includeChains:
461 yield record, restriction
462 if flattenChains and record.type is CollectionType.CHAINED:
463 done.add(record.name)
464 # We know this is a ChainedCollectionRecord because of the enum value,
465 # but MyPy doesn't.
466 yield from record.children.iterPairs( # type: ignore
467 manager,
468 datasetType=datasetType,
469 collectionTypes=collectionTypes,
470 done=done,
471 flattenChains=flattenChains,
472 includeChains=includeChains,
473 )
476class CollectionSearch:
477 """An ordered search path of collections and dataset type restrictions.
479 The `fromExpression` method should almost always be used to construct
480 instances, as the regular constructor performs no checking of inputs (and
481 that can lead to confusing error messages downstream).
483 Parameters
484 ----------
485 items : `list` [ `tuple` [ `str`, `DatasetTypeRestriction` ] ]
486 Tuples that relate a collection name to the restriction on dataset
487 types to search for within it. This is not a mapping because the
488 same collection name may appear multiple times with different
489 restrictions.
491 Notes
492 -----
493 A `CollectionSearch` is used to find a single dataset according to its
494 dataset type and data ID, giving preference to collections in which the
495 order they are specified. A `CollectionQuery` can be constructed from
496 a broader range of expressions but does not order the collections to be
497 searched.
499 `CollectionSearch` is iterable, yielding two-element tuples of `str`
500 (collection name) and `DatasetTypeRestriction`.
502 A `CollectionSearch` instance constructed properly (e.g. via
503 `fromExpression`) is a unique representation of a particular search path;
504 it is exactly the same internally and compares as equal to any
505 `CollectionSearch` constructed from an equivalent expression,
506 regardless of how different the original expressions appear.
507 """
508 def __init__(self, items: List[Tuple[str, DatasetTypeRestriction]]):
509 assert all(isinstance(v, DatasetTypeRestriction) for _, v in items)
510 self._items = items
512 __slots__ = ("_items")
514 @classmethod
515 def fromExpression(cls, expression: Any) -> CollectionSearch:
516 """Process a general expression to construct a `CollectionSearch`
517 instance.
519 Parameters
520 ----------
521 expression
522 May be:
523 - a `str` collection name;
524 - a two-element `tuple` containing a `str` and any expression
525 accepted by `DatasetTypeRestriction.fromExpression`;
526 - any non-mapping iterable containing either of the above;
527 - a mapping from `str` to any expression accepted by
528 `DatasetTypeRestriction`.
529 - another `CollectionSearch` instance (passed through
530 unchanged).
532 Multiple consecutive entries for the same collection with different
533 restrictions will be merged. Non-consecutive entries will not,
534 because that actually represents a different search path.
536 Returns
537 -------
538 collections : `CollectionSearch`
539 A `CollectionSearch` instance.
540 """
541 # First see if this is already a CollectionSearch; just pass that
542 # through unchanged. This lets us standardize expressions (and turn
543 # single-pass iterators into multi-pass iterables) in advance and pass
544 # them down to other routines that accept arbitrary expressions.
545 if isinstance(expression, cls):
546 return expression
547 wildcard = CategorizedWildcard.fromExpression(expression,
548 allowAny=False,
549 allowPatterns=False,
550 coerceItemValue=DatasetTypeRestriction.fromExpression,
551 defaultItemValue=DatasetTypeRestriction.any)
552 assert wildcard is not Ellipsis
553 assert not wildcard.patterns
554 assert not wildcard.strings
555 return cls(
556 # Consolidate repetitions of the same collection name.
557 [(name, DatasetTypeRestriction.union(*tuple(item[1] for item in items)))
558 for name, items in itertools.groupby(wildcard.items, key=operator.itemgetter(0))]
559 )
561 def iterPairs(
562 self, manager: CollectionManager, *,
563 datasetType: Optional[DatasetType] = None,
564 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(),
565 done: Optional[Set[str]] = None,
566 flattenChains: bool = True,
567 includeChains: Optional[bool] = None,
568 ) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]:
569 """Like `iter`, but yield pairs of `CollectionRecord`,
570 `DatasetTypeRestriction` instead of just the former.
572 See `iter` for all parameter descriptions.
574 Yields
575 ------
576 record : `CollectionRecord`
577 Matching collection records.
578 restriction : `DatasetTypeRestriction`
579 The given dataset type restriction.
580 """
581 if done is None:
582 done = set()
583 for name, restriction in self._items:
584 if name not in done and (datasetType is None or datasetType in restriction):
585 yield from _yieldCollectionRecords(
586 manager,
587 manager.find(name),
588 restriction,
589 datasetType=datasetType,
590 collectionTypes=collectionTypes,
591 done=done,
592 flattenChains=flattenChains,
593 includeChains=includeChains,
594 )
596 def iter(
597 self, manager: CollectionManager, *,
598 datasetType: Optional[DatasetType] = None,
599 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(),
600 done: Optional[Set[str]] = None,
601 flattenChains: bool = True,
602 includeChains: Optional[bool] = None,
603 ) -> Iterator[CollectionRecord]:
604 """Iterate over collection records that match this instance and the
605 given criteria, in order.
607 This method is primarily intended for internal use by `Registry`;
608 other callers should generally prefer `Registry.findDatasets` or
609 other `Registry` query methods.
611 Parameters
612 ----------
613 manager : `CollectionManager`
614 Object responsible for managing the collection tables in a
615 `Registry`.
616 datasetType : `DatasetType`, optional
617 If given, only yield collections whose dataset type restrictions
618 include this dataset type.
619 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
620 If provided, only yield collections of these types.
621 done : `set`, optional
622 A `set` containing the names of all collections already yielded;
623 any collections whose names are already present in this set will
624 not be yielded again, and those yielded will be added to it while
625 iterating. If not provided, an empty `set` will be created and
626 used internally to avoid duplicates.
627 flattenChains : `bool`, optional
628 If `True` (default) recursively yield the child collections of
629 `~CollectionType.CHAINED` collections.
630 includeChains : `bool`, optional
631 If `False`, return records for `~CollectionType.CHAINED`
632 collections themselves. The default is the opposite of
633 ``flattenChains``: either return records for CHAINED collections or
634 their children, but not both.
636 Yields
637 ------
638 record : `CollectionRecord`
639 Matching collection records.
640 """
641 for record, _ in self.iterPairs(manager, datasetType=datasetType, collectionTypes=collectionTypes,
642 done=done, flattenChains=flattenChains, includeChains=includeChains):
643 yield record
645 def __iter__(self) -> Iterator[Tuple[str, DatasetTypeRestriction]]:
646 yield from self._items
648 def __len__(self) -> int:
649 return len(self._items)
651 def __eq__(self, other: Any) -> bool:
652 if isinstance(other, CollectionSearch):
653 return self._items == other._items
654 return False
656 def __str__(self) -> str:
657 return "[{}]".format(", ".join(f"{k}: {v}" for k, v in self._items))
659 def __repr__(self) -> str:
660 return f"CollectionSearch({self._items!r})"
663class CollectionQuery:
664 """An unordered query for collections and dataset type restrictions.
666 The `fromExpression` method should almost always be used to construct
667 instances, as the regular constructor performs no checking of inputs (and
668 that can lead to confusing error messages downstream).
670 Parameters
671 ----------
672 search : `CollectionSearch` or `...`
673 An object representing an ordered search for explicitly-named
674 collections (to be interpreted here as unordered), or the special
675 value `...` indicating all collections. `...` must be accompanied
676 by ``patterns=None``.
677 patterns : `tuple` of `re.Pattern`
678 Regular expression patterns to match against collection names.
680 Notes
681 -----
682 A `CollectionQuery` is used to find all matching datasets in any number
683 of collections, or to find collections themselves.
685 `CollectionQuery` is expected to be rarely used outside of `Registry`
686 (which uses it to back several of its "query" methods that take general
687 expressions for collections), but it may occassionally be useful outside
688 `Registry` as a way to preprocess expressions that contain single-pass
689 iterators into a form that can be used to call those `Registry` methods
690 multiple times.
691 """
692 def __init__(self, search: Union[CollectionSearch, EllipsisType], patterns: Tuple[re.Pattern, ...]):
693 self._search = search
694 self._patterns = patterns
696 __slots__ = ("_search", "_patterns")
698 @classmethod
699 def fromExpression(cls, expression: Any) -> CollectionQuery:
700 """Process a general expression to construct a `CollectionQuery`
701 instance.
703 Parameters
704 ----------
705 expression
706 May be:
707 - a `str` collection name;
708 - a two-element `tuple` containing a `str` and any expression
709 accepted by `DatasetTypeRestriction.fromExpression`;
710 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`)
711 against collection names;
712 - any non-mapping iterable containing any of the above;
713 - a mapping from `str` to any expression accepted by
714 `DatasetTypeRestriction`.
715 - a `CollectionSearch` instance;
716 - another `CollectionQuery` instance (passed through unchanged).
718 Multiple consecutive entries for the same collection with different
719 restrictions will be merged. Non-consecutive entries will not,
720 because that actually represents a different search path.
722 Returns
723 -------
724 collections : `CollectionQuery`
725 A `CollectionQuery` instance.
726 """
727 if isinstance(expression, cls):
728 return expression
729 if expression is Ellipsis:
730 return cls.any
731 if isinstance(expression, CollectionSearch):
732 return cls(search=expression, patterns=())
733 wildcard = CategorizedWildcard.fromExpression(expression,
734 allowAny=True,
735 allowPatterns=True,
736 coerceItemValue=DatasetTypeRestriction.fromExpression,
737 defaultItemValue=DatasetTypeRestriction.any)
738 if wildcard is Ellipsis:
739 return cls.any
740 assert not wildcard.strings, \
741 "All bare strings should be transformed to (str, DatasetTypeRestriction) tuples."
742 return cls(search=CollectionSearch.fromExpression(wildcard.items),
743 patterns=tuple(wildcard.patterns))
745 def iterPairs(
746 self, manager: CollectionManager, *,
747 datasetType: Optional[DatasetType] = None,
748 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(),
749 flattenChains: bool = True,
750 includeChains: Optional[bool] = None,
751 ) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]:
752 """Like `iter`, but yield pairs of `CollectionRecord`,
753 `DatasetTypeRestriction` instead of just the former.
755 See `iter` for all parameter descriptions.
757 Yields
758 ------
759 record : `CollectionRecord`
760 Matching collection records.
761 restriction : `DatasetTypeRestriction`
762 The given dataset type restriction.
764 """
765 if self._search is Ellipsis:
766 for record in manager:
767 yield from _yieldCollectionRecords(
768 manager,
769 record,
770 DatasetTypeRestriction.any,
771 datasetType=datasetType,
772 collectionTypes=collectionTypes,
773 flattenChains=flattenChains,
774 includeChains=includeChains,
775 )
776 else:
777 done: Set[str] = set()
778 yield from self._search.iterPairs(
779 manager,
780 datasetType=datasetType,
781 collectionTypes=collectionTypes,
782 done=done,
783 flattenChains=flattenChains,
784 includeChains=includeChains,
785 )
786 for record in manager:
787 if record.name not in done and any(p.fullmatch(record.name) for p in self._patterns):
788 yield from _yieldCollectionRecords(
789 manager,
790 record,
791 DatasetTypeRestriction.any,
792 datasetType=datasetType,
793 collectionTypes=collectionTypes,
794 done=done,
795 flattenChains=flattenChains,
796 includeChains=includeChains,
797 )
799 def iter(
800 self, manager: CollectionManager, *,
801 datasetType: Optional[DatasetType] = None,
802 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(),
803 flattenChains: bool = True,
804 includeChains: Optional[bool] = None,
805 ) -> Iterator[CollectionRecord]:
806 """Iterate over collection records that match this instance and the
807 given criteria, in an arbitrary order.
809 This method is primarily intended for internal use by `Registry`;
810 other callers should generally prefer `Registry.queryDatasets` or
811 other `Registry` query methods.
813 Parameters
814 ----------
815 manager : `CollectionManager`
816 Object responsible for managing the collection tables in a
817 `Registry`.
818 datasetType : `DatasetType`, optional
819 If given, only yield collections whose dataset type restrictions
820 include this dataset type.
821 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
822 If provided, only yield collections of these types.
823 flattenChains : `bool`, optional
824 If `True` (default) recursively yield the child collections of
825 `~CollectionType.CHAINED` collections.
826 includeChains : `bool`, optional
827 If `False`, return records for `~CollectionType.CHAINED`
828 collections themselves. The default is the opposite of
829 ``flattenChains``: either return records for CHAINED collections or
830 their children, but not both.
832 Yields
833 ------
834 record : `CollectionRecord`
835 Matching collection records.
836 """
837 for record, _ in self.iterPairs(manager, datasetType=datasetType, collectionTypes=collectionTypes,
838 flattenChains=flattenChains, includeChains=includeChains):
839 yield record
841 def __eq__(self, other: Any) -> bool:
842 if isinstance(other, CollectionQuery):
843 return self._search == other._search and self._patterns == other._patterns
844 else:
845 return False
847 any: ClassVar[CollectionQuery]
848 """A special `CollectionQuery` instance that matches any collection.
850 This instance should be preferred instead of constructing a new one with
851 ``...``, when possible, but it should not be assumed to be the only such
852 instance.
853 """
856CollectionQuery.any = CollectionQuery(Ellipsis, ())