Coverage for python/lsst/daf/butler/registry/wildcards.py : 18%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "CategorizedWildcard",
25 "CollectionQuery",
26 "CollectionSearch",
27 "DatasetTypeRestriction",
28)
30from dataclasses import dataclass
31import itertools
32import operator
33import re
34from typing import (
35 Any,
36 Callable,
37 ClassVar,
38 FrozenSet,
39 Iterator,
40 List,
41 Optional,
42 Set,
43 Tuple,
44 TYPE_CHECKING,
45 Union,
46)
48import sqlalchemy
50from ..core import DatasetType
51from ..core.utils import iterable
52from ._collectionType import CollectionType
54if TYPE_CHECKING: 54 ↛ 55line 54 didn't jump to line 55, because the condition on line 54 was never true
55 from .interfaces import CollectionManager, CollectionRecord
57 # Workaround for `...` not having an exposed type in Python, borrowed from
58 # https://github.com/python/typing/issues/684#issuecomment-548203158
59 # Along with that, we need to either use `Ellipsis` instead of `...` for
60 # the actual sentinal value internally, and tell MyPy to ignore conversions
61 # from `...` to `Ellipsis` at the public-interface boundary.
62 #
63 # `Ellipsis` and `EllipsisType` should be directly imported from this
64 # module by related code that needs them; hopefully that will stay confined
65 # to `lsst.daf.butler.registry`. Putting these in __all__ is bad for
66 # Sphinx, and probably more confusing than helpful overall.
67 from enum import Enum
69 class EllipsisType(Enum):
70 Ellipsis = "..."
72 Ellipsis = EllipsisType.Ellipsis
74else:
75 EllipsisType = type(Ellipsis)
76 Ellipsis = Ellipsis
79@dataclass
80class CategorizedWildcard:
81 """The results of preprocessing a wildcard expression to separate match
82 patterns from strings.
84 The `fromExpression` method should almost always be used to construct
85 instances, as the regular constructor performs no checking of inputs (and
86 that can lead to confusing error messages downstream).
87 """
89 @classmethod
90 def fromExpression(cls, expression: Any, *,
91 allowAny: bool = True,
92 allowPatterns: bool = True,
93 coerceUnrecognized: Optional[Callable[[Any], Union[Tuple[str, Any], str]]] = None,
94 coerceItemValue: Optional[Callable[[Any], Any]] = None,
95 defaultItemValue: Optional[Any] = None,
96 ) -> Union[CategorizedWildcard, EllipsisType]:
97 """Categorize a wildcard expression.
99 Parameters
100 ----------
101 expression
102 The expression to categorize. May be any of:
103 - `str`;
104 - `re.Pattern` (only if ``allowPatterns`` is `True`);
105 - objects recognized by ``coerceUnrecognized`` (if provided);
106 - two-element tuples of (`str`, value) where value is recognized
107 by ``coerceItemValue`` (if provided);
108 - a non-`str`, non-mapping iterable containing any of the above;
109 - the special value `...` (only if ``allowAny`` is `True`), which
110 matches anything;
111 - a mapping from `str` to a value are recognized by
112 ``coerceItemValue`` (if provided);
113 - a `CategorizedWildcard` instance (passed through unchanged if
114 it meets the requirements specified by keyword arguments).
115 allowAny: `bool`, optional
116 If `False` (`True` is default) raise `TypeError` if `...` is
117 encountered.
118 allowPatterns: `bool`, optional
119 If `False` (`True` is default) raise `TypeError` if a `re.Pattern`
120 is encountered, or if ``expression`` is a `CategorizedWildcard`
121 with `patterns` not empty.
122 coerceUnrecognized: `Callable`, optional
123 A callback that takes a single argument of arbitrary type and
124 returns either a `str` - appended to `strings` - or a `tuple` of
125 (`str`, `Any`) to be appended to `items`. This will be called on
126 objects of unrecognized type, with the return value added to
127 `strings`. Exceptions will be reraised as `TypeError` (and
128 chained).
129 coerceItemValue: `Callable`, optional
130 If provided, ``expression`` may be a mapping from `str` to any
131 type that can be passed to this function; the result of that call
132 will be stored instead as the value in ``self.items``.
133 defaultItemValue: `Any`, optional
134 If provided, combine this value with any string values encountered
135 (including any returned by ``coerceUnrecognized``) to form a
136 `tuple` and add it to `items`, guaranteeing that `strings` will be
137 empty. Patterns are never added to `items`.
139 Returns
140 -------
141 categorized : `CategorizedWildcard` or ``...``.
142 The struct describing the wildcard. ``...`` is passed through
143 unchanged.
145 Raises
146 ------
147 TypeError
148 Raised if an unsupported type is found in the expression.
149 """
150 assert expression is not None
151 # See if we were given ...; just return that if we were.
152 if expression is Ellipsis:
153 if not allowAny:
154 raise TypeError("This expression may not be unconstrained.")
155 return Ellipsis
156 if isinstance(expression, cls):
157 # This is already a CategorizedWildcard. Make sure it meets the
158 # reqs. implied by the kwargs we got.
159 if not allowPatterns and expression.patterns:
160 raise TypeError(f"Regular expression(s) {expression.patterns} "
161 f"are not allowed in this context.")
162 if defaultItemValue is not None and expression.strings:
163 if expression.items:
164 raise TypeError("Incompatible preprocessed expression: an ordered sequence of str is "
165 "needed, but the original order was lost in the preprocessing.")
166 return cls(strings=[], patterns=expression.patterns,
167 items=[(k, defaultItemValue) for k in expression.strings])
168 elif defaultItemValue is None and expression.items:
169 if expression.strings:
170 raise TypeError("Incompatible preprocessed expression: an ordered sequence of items is "
171 "needed, but the original order was lost in the preprocessing.")
172 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[])
173 else:
174 # Original expression was created with keyword arguments that
175 # were at least as restrictive as what we just got; pass it
176 # through.
177 return expression
179 # If we get here, we know we'll be creating a new instance.
180 # Initialize an empty one now.
181 self = cls(strings=[], patterns=[], items=[])
183 # If mappings are allowed, see if we were given a single mapping by
184 # trying to get items.
185 if coerceItemValue is not None:
186 rawItems = None
187 try:
188 rawItems = expression.items()
189 except AttributeError:
190 pass
191 if rawItems is not None:
192 for k, v in rawItems:
193 try:
194 self.items.append((k, coerceItemValue(v)))
195 except Exception as err:
196 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err
197 return self
199 # Not ..., a CategorizedWildcard instance, or a mapping. Just
200 # process scalars or an iterable. We put the body of the loop inside
201 # a local function so we can recurse after coercion.
203 def process(element: Any, alreadyCoerced: bool = False) -> None:
204 if isinstance(element, str):
205 if defaultItemValue is not None:
206 self.items.append((element, defaultItemValue))
207 else:
208 self.strings.append(element)
209 return
210 if allowPatterns and isinstance(element, re.Pattern):
211 self.patterns.append(element)
212 return
213 if coerceItemValue is not None:
214 try:
215 k, v = element
216 except TypeError:
217 pass
218 else:
219 if not alreadyCoerced:
220 if not isinstance(k, str):
221 raise TypeError(f"Item key '{k}' is not a string.")
222 try:
223 v = coerceItemValue(v)
224 except Exception as err:
225 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'."
226 ) from err
227 self.items.append((k, v))
228 return
229 if alreadyCoerced:
230 raise TypeError(f"Object '{element}' returned by coercion function is still unrecognized.")
231 if coerceUnrecognized is not None:
232 try:
233 process(coerceUnrecognized(element), alreadyCoerced=True)
234 except Exception as err:
235 raise TypeError(f"Could not coerce expression element '{element}'.") from err
236 else:
237 raise TypeError(f"Unsupported object in wildcard expression: '{element}'.")
239 for element in iterable(expression):
240 process(element)
241 return self
243 def makeWhereExpression(self, column: sqlalchemy.sql.ColumnElement
244 ) -> Optional[sqlalchemy.sql.ColumnElement]:
245 """Transform the wildcard into a SQLAlchemy boolean expression suitable
246 for use in a WHERE clause.
248 Parameters
249 ----------
250 column : `sqlalchemy.sql.ColumnElement`
251 A string column in a table or query that should be compared to the
252 wildcard expression.
254 Returns
255 -------
256 where : `sqlalchemy.sql.ColumnElement` or `None`
257 A boolean SQL expression that evaluates to true if and only if
258 the value of ``column`` matches the wildcard. `None` is returned
259 if both `strings` and `patterns` are empty, and hence no match is
260 possible.
261 """
262 if self.items:
263 raise NotImplementedError("Expressions that are processed into items cannot be transformed "
264 "automatically into queries.")
265 if self.patterns:
266 raise NotImplementedError("Regular expression patterns are not yet supported here.")
267 terms = []
268 if len(self.strings) == 1:
269 terms.append(column == self.strings[0])
270 elif len(self.strings) > 1:
271 terms.append(column.in_(self.strings))
272 # TODO: append terms for regular expressions
273 if not terms:
274 return None
275 return sqlalchemy.sql.or_(*terms)
277 strings: List[str]
278 """Explicit string values found in the wildcard (`list` [ `str` ]).
279 """
281 patterns: List[re.Pattern]
282 """Regular expression patterns found in the wildcard
283 (`list` [ `re.Pattern` ]).
284 """
286 items: List[Tuple[str, Any]]
287 """Two-item tuples that relate string values to other objects
288 (`list` [ `tuple` [ `str`, `Any` ] ]).
289 """
292class DatasetTypeRestriction:
293 """An immutable set-like object that represents a restriction on the
294 dataset types to search for within a collection.
296 The `fromExpression` method should almost always be used to construct
297 instances, as the regular constructor performs no checking of inputs (and
298 that can lead to confusing error messages downstream).
300 Parameters
301 ----------
302 names : `frozenset` [`str`] or `...`
303 The names of the dataset types included in the restriction, or `...`
304 to permit a search for any dataset type.
306 Notes
307 -----
308 This class does not inherit from `collections.abc.Set` (and does not
309 implement the full set interface) because is not always iterable and
310 sometimes has no length (i.e. when ``names`` is ``...``).
311 """
312 def __init__(self, names: Union[FrozenSet[str], EllipsisType]):
313 self.names = names
315 __slots__ = ("names",)
317 @classmethod
318 def fromExpression(cls, expression: Any) -> DatasetTypeRestriction:
319 """Process a general expression to construct a `DatasetTypeRestriction`
320 instance.
322 Parameters
323 ----------
324 expression
325 May be:
326 - a `DatasetType` instance;
327 - a `str` dataset type name;
328 - any non-mapping iterable containing either of the above;
329 - the special value `...`;
330 - another `DatasetTypeRestriction` instance (passed through
331 unchanged).
333 Returns
334 -------
335 restriction : `DatasetTypeRestriction`
336 A `DatasetTypeRestriction` instance.
337 """
338 if isinstance(expression, cls):
339 return expression
340 wildcard = CategorizedWildcard.fromExpression(expression, allowPatterns=False,
341 coerceUnrecognized=lambda d: d.name)
342 if wildcard is Ellipsis:
343 return cls.any
344 else:
345 return cls(frozenset(wildcard.strings))
347 def __contains__(self, datasetType: DatasetType) -> bool:
348 return (self.names is Ellipsis or datasetType.name in self.names
349 or (datasetType.isComponent()
350 and DatasetType.splitDatasetTypeName(datasetType.name)[0] in self.names))
352 def __eq__(self, other: Any) -> bool:
353 if isinstance(other, DatasetTypeRestriction):
354 return self.names == other.names
355 else:
356 return False
358 def __str__(self) -> str:
359 if self.names is Ellipsis:
360 return "..."
361 else:
362 return "{{{}}}".format(", ".join(self.names))
364 def __repr__(self) -> str:
365 if self.names is Ellipsis:
366 return "DatasetTypeRestriction(...)"
367 else:
368 return f"DatasetTypeRestriction({self.names!r})"
370 @staticmethod
371 def union(*args: DatasetTypeRestriction) -> DatasetTypeRestriction:
372 """Merge one or more `DatasetTypeRestriction` instances, returning one
373 that allows any of the dataset types included in any of them.
375 Parameters
376 ----------
377 args
378 Positional arguments are `DatasetTypeRestriction` instances.
379 """
380 result: Set[str] = set()
381 for a in args:
382 if a.names is Ellipsis:
383 return DatasetTypeRestriction.any
384 else:
385 result.update(a.names)
386 return DatasetTypeRestriction(frozenset(result))
388 names: Union[FrozenSet[str], EllipsisType]
389 """The names of the dataset types included (i.e. permitted) by the
390 restriction, or the special value ``...`` to permit all dataset types
391 (`frozenset` [ `str` ] or ``...``).
392 """
394 any: ClassVar[DatasetTypeRestriction]
395 """A special `DatasetTypeRestriction` instance that permits any dataset
396 type.
398 This instance should be preferred instead of constructing a new one with
399 ``...``, when possible, but it should not be assumed to be the only such
400 instance (i.e. don't use ``is`` instead of ``==`` for comparisons).
401 """
404DatasetTypeRestriction.any = DatasetTypeRestriction(Ellipsis)
407def _yieldCollectionRecords(
408 manager: CollectionManager,
409 record: CollectionRecord,
410 restriction: DatasetTypeRestriction,
411 datasetType: Optional[DatasetType] = None,
412 collectionType: Optional[CollectionType] = None,
413 done: Optional[Set[str]] = None,
414 flattenChains: bool = True,
415 includeChains: Optional[bool] = None,
416) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]:
417 """A helper function containing common logic for `CollectionSearch.iter`
418 and `CollectionQuery.iter`: recursively yield `CollectionRecord` only they
419 match the criteria given in other arguments.
421 Parameters
422 ----------
423 manager : `CollectionManager`
424 Object responsible for managing the collection tables in a `Registry`.
425 record : `CollectionRecord`
426 Record to conditionally yield.
427 restriction : `DatasetTypeRestriction`
428 A restriction that must match ``datasetType`` (if given) in order to
429 yield ``record``.
430 datasetType : `DatasetType`, optional
431 If given, a `DatasetType` instance that must be included in
432 ``restriction`` in order to yield ``record``.
433 collectionType : `CollectionType`, optional
434 If given, a `CollectionType` enumeration value that must match
435 ``record.type`` in order for ``record`` to be yielded.
436 done : `set` [ `str` ], optional
437 A `set` of already-yielded collection names; if provided, ``record``
438 will only be yielded if it is not already in ``done``, and ``done``
439 will be updated to include it on return.
440 flattenChains : `bool`, optional
441 If `True` (default) recursively yield the child collections of
442 `~CollectionType.CHAINED` collections.
443 includeChains : `bool`, optional
444 If `False`, return records for `~CollectionType.CHAINED` collections
445 themselves. The default is the opposite of ``flattenChains``: either
446 return records for CHAINED collections or their children, but not both.
448 Yields
449 ------
450 record : `CollectionRecord`
451 Matching collection records.
452 restriction : `DatasetTypeRestriction`
453 The given dataset type restriction.
454 """
455 if done is None:
456 done = set()
457 includeChains = includeChains if includeChains is not None else not flattenChains
458 if collectionType is None or record.type is collectionType:
459 done.add(record.name)
460 if record.type is not CollectionType.CHAINED or includeChains:
461 yield record, restriction
462 if flattenChains and record.type is CollectionType.CHAINED:
463 done.add(record.name)
464 # We know this is a ChainedCollectionRecord because of the enum value,
465 # but MyPy doesn't.
466 yield from record.children.iterPairs( # type: ignore
467 manager,
468 datasetType=datasetType,
469 collectionType=collectionType,
470 done=done,
471 flattenChains=flattenChains,
472 includeChains=includeChains,
473 )
476class CollectionSearch:
477 """An ordered search path of collections and dataset type restrictions.
479 The `fromExpression` method should almost always be used to construct
480 instances, as the regular constructor performs no checking of inputs (and
481 that can lead to confusing error messages downstream).
483 Parameters
484 ----------
485 items : `list` [ `tuple` [ `str`, `DatasetTypeRestriction` ] ]
486 Tuples that relate a collection name to the restriction on dataset
487 types to search for within it. This is not a mapping because the
488 same collection name may appear multiple times with different
489 restrictions.
491 Notes
492 -----
493 A `CollectionSearch` is used to find a single dataset according to its
494 dataset type and data ID, giving preference to collections in which the
495 order they are specified. A `CollectionQuery` can be constructed from
496 a broader range of expressions but does not order the collections to be
497 searched.
499 `CollectionSearch` is iterable, yielding two-element tuples of `str`
500 (collection name) and `DatasetTypeRestriction`.
502 A `CollectionSearch` instance constructed properly (e.g. via
503 `fromExpression`) is a unique representation of a particular search path;
504 it is exactly the same internally and compares as equal to any
505 `CollectionSearch` constructed from an equivalent expression,
506 regardless of how different the original expressions appear.
507 """
508 def __init__(self, items: List[Tuple[str, DatasetTypeRestriction]]):
509 assert all(isinstance(v, DatasetTypeRestriction) for _, v in items)
510 self._items = items
512 __slots__ = ("_items")
514 @classmethod
515 def fromExpression(cls, expression: Any) -> CollectionSearch:
516 """Process a general expression to construct a `CollectionSearch`
517 instance.
519 Parameters
520 ----------
521 expression
522 May be:
523 - a `str` collection name;
524 - a two-element `tuple` containing a `str` and any expression
525 accepted by `DatasetTypeRestriction.fromExpression`;
526 - any non-mapping iterable containing either of the above;
527 - a mapping from `str` to any expression accepted by
528 `DatasetTypeRestriction`.
529 - another `CollectionSearch` instance (passed through
530 unchanged).
532 Multiple consecutive entries for the same collection with different
533 restrictions will be merged. Non-consecutive entries will not,
534 because that actually represents a different search path.
536 Returns
537 -------
538 collections : `CollectionSearch`
539 A `CollectionSearch` instance.
540 """
541 # First see if this is already a CollectionSearch; just pass that
542 # through unchanged. This lets us standardize expressions (and turn
543 # single-pass iterators into multi-pass iterables) in advance and pass
544 # them down to other routines that accept arbitrary expressions.
545 if isinstance(expression, cls):
546 return expression
547 wildcard = CategorizedWildcard.fromExpression(expression,
548 allowAny=False,
549 allowPatterns=False,
550 coerceItemValue=DatasetTypeRestriction.fromExpression,
551 defaultItemValue=DatasetTypeRestriction.any)
552 assert wildcard is not Ellipsis
553 assert not wildcard.patterns
554 assert not wildcard.strings
555 return cls(
556 # Consolidate repetitions of the same collection name.
557 [(name, DatasetTypeRestriction.union(*tuple(item[1] for item in items)))
558 for name, items in itertools.groupby(wildcard.items, key=operator.itemgetter(0))]
559 )
561 def iterPairs(
562 self, manager: CollectionManager, *,
563 datasetType: Optional[DatasetType] = None,
564 collectionType: Optional[CollectionType] = None,
565 done: Optional[Set[str]] = None,
566 flattenChains: bool = True,
567 includeChains: Optional[bool] = None,
568 ) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]:
569 """Like `iter`, but yield pairs of `CollectionRecord`,
570 `DatasetTypeRestriction` instead of just the former.
572 See `iter` for all parameter descriptions.
574 Yields
575 ------
576 record : `CollectionRecord`
577 Matching collection records.
578 restriction : `DatasetTypeRestriction`
579 The given dataset type restriction.
580 """
581 if done is None:
582 done = set()
583 for name, restriction in self._items:
584 if name not in done and (datasetType is None or datasetType in restriction):
585 yield from _yieldCollectionRecords(
586 manager,
587 manager.find(name),
588 restriction,
589 datasetType=datasetType,
590 collectionType=collectionType,
591 done=done,
592 flattenChains=flattenChains,
593 includeChains=includeChains,
594 )
596 def iter(
597 self, manager: CollectionManager, *,
598 datasetType: Optional[DatasetType] = None,
599 collectionType: Optional[CollectionType] = None,
600 done: Optional[Set[str]] = None,
601 flattenChains: bool = True,
602 includeChains: Optional[bool] = None,
603 ) -> Iterator[CollectionRecord]:
604 """Iterate over collection records that match this instance and the
605 given criteria, in order.
607 This method is primarily intended for internal use by `Registry`;
608 other callers should generally prefer `Registry.findDatasets` or
609 other `Registry` query methods.
611 Parameters
612 ----------
613 manager : `CollectionManager`
614 Object responsible for managing the collection tables in a
615 `Registry`.
616 datasetType : `DatasetType`, optional
617 If given, only yield collections whose dataset type restrictions
618 include this dataset type.
619 collectionType : `CollectionType`, optional
620 If given, only yield collections of this type.
621 done : `set`, optional
622 A `set` containing the names of all collections already yielded;
623 any collections whose names are already present in this set will
624 not be yielded again, and those yielded will be added to it while
625 iterating. If not provided, an empty `set` will be created and
626 used internally to avoid duplicates.
627 flattenChains : `bool`, optional
628 If `True` (default) recursively yield the child collections of
629 `~CollectionType.CHAINED` collections.
630 includeChains : `bool`, optional
631 If `False`, return records for `~CollectionType.CHAINED`
632 collections themselves. The default is the opposite of
633 ``flattenChains``: either return records for CHAINED collections or
634 their children, but not both.
636 Yields
637 ------
638 record : `CollectionRecord`
639 Matching collection records.
640 """
641 for record, _ in self.iterPairs(manager, datasetType=datasetType, collectionType=collectionType,
642 done=done, flattenChains=flattenChains, includeChains=includeChains):
643 yield record
645 def __iter__(self) -> Iterator[Tuple[str, DatasetTypeRestriction]]:
646 yield from self._items
648 def __len__(self) -> int:
649 return len(self._items)
651 def __eq__(self, other: Any) -> bool:
652 if isinstance(other, CollectionSearch):
653 return self._items == other._items
654 else:
655 return False
657 def __str__(self) -> str:
658 return "[{}]".format(", ".join(f"{k}: {v}" for k, v in self._items))
660 def __repr__(self) -> str:
661 return f"CollectionSearch({self._items!r})"
664class CollectionQuery:
665 """An unordered query for collections and dataset type restrictions.
667 The `fromExpression` method should almost always be used to construct
668 instances, as the regular constructor performs no checking of inputs (and
669 that can lead to confusing error messages downstream).
671 Parameters
672 ----------
673 search : `CollectionSearch` or `...`
674 An object representing an ordered search for explicitly-named
675 collections (to be interpreted here as unordered), or the special
676 value `...` indicating all collections. `...` must be accompanied
677 by ``patterns=None``.
678 patterns : `tuple` of `re.Pattern`
679 Regular expression patterns to match against collection names.
681 Notes
682 -----
683 A `CollectionQuery` is used to find all matching datasets in any number
684 of collections, or to find collections themselves.
686 `CollectionQuery` is expected to be rarely used outside of `Registry`
687 (which uses it to back several of its "query" methods that take general
688 expressions for collections), but it may occassionally be useful outside
689 `Registry` as a way to preprocess expressions that contain single-pass
690 iterators into a form that can be used to call those `Registry` methods
691 multiple times.
692 """
693 def __init__(self, search: Union[CollectionSearch, EllipsisType], patterns: Tuple[re.Pattern, ...]):
694 self._search = search
695 self._patterns = patterns
697 __slots__ = ("_search", "_patterns")
699 @classmethod
700 def fromExpression(cls, expression: Any) -> CollectionQuery:
701 """Process a general expression to construct a `CollectionQuery`
702 instance.
704 Parameters
705 ----------
706 expression
707 May be:
708 - a `str` collection name;
709 - a two-element `tuple` containing a `str` and any expression
710 accepted by `DatasetTypeRestriction.fromExpression`;
711 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`)
712 against collection names;
713 - any non-mapping iterable containing any of the above;
714 - a mapping from `str` to any expression accepted by
715 `DatasetTypeRestriction`.
716 - a `CollectionSearch` instance;
717 - another `CollectionQuery` instance (passed through unchanged).
719 Multiple consecutive entries for the same collection with different
720 restrictions will be merged. Non-consecutive entries will not,
721 because that actually represents a different search path.
723 Returns
724 -------
725 collections : `CollectionQuery`
726 A `CollectionQuery` instance.
727 """
728 if isinstance(expression, cls):
729 return expression
730 if expression is Ellipsis:
731 return cls.any
732 if isinstance(expression, CollectionSearch):
733 return cls(search=expression, patterns=())
734 wildcard = CategorizedWildcard.fromExpression(expression,
735 allowAny=True,
736 allowPatterns=True,
737 coerceItemValue=DatasetTypeRestriction.fromExpression,
738 defaultItemValue=DatasetTypeRestriction.any)
739 if wildcard is Ellipsis:
740 return cls.any
741 assert not wildcard.strings, \
742 "All bare strings should be transformed to (str, DatasetTypeRestriction) tuples."
743 return cls(search=CollectionSearch.fromExpression(wildcard.items),
744 patterns=tuple(wildcard.patterns))
746 def iterPairs(
747 self, manager: CollectionManager, *,
748 datasetType: Optional[DatasetType] = None,
749 collectionType: Optional[CollectionType] = None,
750 flattenChains: bool = True,
751 includeChains: Optional[bool] = None,
752 ) -> Iterator[Tuple[CollectionRecord, DatasetTypeRestriction]]:
753 """Like `iter`, but yield pairs of `CollectionRecord`,
754 `DatasetTypeRestriction` instead of just the former.
756 See `iter` for all parameter descriptions.
758 Yields
759 ------
760 record : `CollectionRecord`
761 Matching collection records.
762 restriction : `DatasetTypeRestriction`
763 The given dataset type restriction.
765 """
766 if self._search is Ellipsis:
767 for record in manager:
768 yield from _yieldCollectionRecords(
769 manager,
770 record,
771 DatasetTypeRestriction.any,
772 datasetType=datasetType,
773 collectionType=collectionType,
774 flattenChains=flattenChains,
775 includeChains=includeChains,
776 )
777 else:
778 done: Set[str] = set()
779 yield from self._search.iterPairs(
780 manager,
781 datasetType=datasetType,
782 collectionType=collectionType,
783 done=done,
784 flattenChains=flattenChains,
785 includeChains=includeChains,
786 )
787 for record in manager:
788 if record.name not in done and any(p.fullmatch(record.name) for p in self._patterns):
789 yield from _yieldCollectionRecords(
790 manager,
791 record,
792 DatasetTypeRestriction.any,
793 datasetType=datasetType,
794 collectionType=collectionType,
795 done=done,
796 flattenChains=flattenChains,
797 includeChains=includeChains,
798 )
800 def iter(
801 self, manager: CollectionManager, *,
802 datasetType: Optional[DatasetType] = None,
803 collectionType: Optional[CollectionType] = None,
804 flattenChains: bool = True,
805 includeChains: Optional[bool] = None,
806 ) -> Iterator[CollectionRecord]:
807 """Iterate over collection records that match this instance and the
808 given criteria, in an arbitrary order.
810 This method is primarily intended for internal use by `Registry`;
811 other callers should generally prefer `Registry.queryDatasets` or
812 other `Registry` query methods.
814 Parameters
815 ----------
816 manager : `CollectionManager`
817 Object responsible for managing the collection tables in a
818 `Registry`.
819 datasetType : `DatasetType`, optional
820 If given, only yield collections whose dataset type restrictions
821 include this dataset type.
822 collectionType : `CollectionType`, optional
823 If given, only yield collections of this type.
824 flattenChains : `bool`, optional
825 If `True` (default) recursively yield the child collections of
826 `~CollectionType.CHAINED` collections.
827 includeChains : `bool`, optional
828 If `False`, return records for `~CollectionType.CHAINED`
829 collections themselves. The default is the opposite of
830 ``flattenChains``: either return records for CHAINED collections or
831 their children, but not both.
833 Yields
834 ------
835 record : `CollectionRecord`
836 Matching collection records.
837 """
838 for record, _ in self.iterPairs(manager, datasetType=datasetType, collectionType=collectionType,
839 flattenChains=flattenChains, includeChains=includeChains):
840 yield record
842 any: ClassVar[CollectionQuery]
843 """A special `CollectionQuery` instance that matches any collection.
845 This instance should be preferred instead of constructing a new one with
846 ``...``, when possible, but it should not be assumed to be the only such
847 instance.
848 """
851CollectionQuery.any = CollectionQuery(Ellipsis, ())