Coverage for python/lsst/daf/butler/registry/wildcards.py : 16%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of daf_butler.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
21from __future__ import annotations
23__all__ = (
24 "CategorizedWildcard",
25 "CollectionContentRestriction",
26 "CollectionQuery",
27 "CollectionSearch",
28 "DatasetTypeRestriction",
29 "GovernorDimensionRestriction",
30)
32from collections import defaultdict
33from dataclasses import dataclass
34import re
35from typing import (
36 AbstractSet,
37 Any,
38 Callable,
39 ClassVar,
40 Dict,
41 FrozenSet,
42 Iterable,
43 Iterator,
44 List,
45 Optional,
46 Sequence,
47 Set,
48 Tuple,
49 TYPE_CHECKING,
50 Union,
51)
53import sqlalchemy
55from ..core import DataCoordinate, DatasetType, DimensionUniverse, GovernorDimension
56from ..core.named import NamedKeyDict, NamedKeyMapping
57from ..core.utils import iterable
58from ._collectionType import CollectionType
60if TYPE_CHECKING: 60 ↛ 61line 60 didn't jump to line 61, because the condition on line 60 was never true
61 from .interfaces import CollectionManager, CollectionRecord
63 # Workaround for `...` not having an exposed type in Python, borrowed from
64 # https://github.com/python/typing/issues/684#issuecomment-548203158
65 # Along with that, we need to either use `Ellipsis` instead of `...` for
66 # the actual sentinal value internally, and tell MyPy to ignore conversions
67 # from `...` to `Ellipsis` at the public-interface boundary.
68 #
69 # `Ellipsis` and `EllipsisType` should be directly imported from this
70 # module by related code that needs them; hopefully that will stay confined
71 # to `lsst.daf.butler.registry`. Putting these in __all__ is bad for
72 # Sphinx, and probably more confusing than helpful overall.
73 from enum import Enum
75 class EllipsisType(Enum):
76 Ellipsis = "..."
78 Ellipsis = EllipsisType.Ellipsis
80else:
81 EllipsisType = type(Ellipsis)
82 Ellipsis = Ellipsis
85@dataclass
86class CategorizedWildcard:
87 """The results of preprocessing a wildcard expression to separate match
88 patterns from strings.
90 The `fromExpression` method should almost always be used to construct
91 instances, as the regular constructor performs no checking of inputs (and
92 that can lead to confusing error messages downstream).
93 """
95 @classmethod
96 def fromExpression(cls, expression: Any, *,
97 allowAny: bool = True,
98 allowPatterns: bool = True,
99 coerceUnrecognized: Optional[Callable[[Any], Union[Tuple[str, Any], str]]] = None,
100 coerceItemValue: Optional[Callable[[Any], Any]] = None,
101 defaultItemValue: Optional[Any] = None,
102 ) -> Union[CategorizedWildcard, EllipsisType]:
103 """Categorize a wildcard expression.
105 Parameters
106 ----------
107 expression
108 The expression to categorize. May be any of:
109 - `str`;
110 - `re.Pattern` (only if ``allowPatterns`` is `True`);
111 - objects recognized by ``coerceUnrecognized`` (if provided);
112 - two-element tuples of (`str`, value) where value is recognized
113 by ``coerceItemValue`` (if provided);
114 - a non-`str`, non-mapping iterable containing any of the above;
115 - the special value `...` (only if ``allowAny`` is `True`), which
116 matches anything;
117 - a mapping from `str` to a value are recognized by
118 ``coerceItemValue`` (if provided);
119 - a `CategorizedWildcard` instance (passed through unchanged if
120 it meets the requirements specified by keyword arguments).
121 allowAny: `bool`, optional
122 If `False` (`True` is default) raise `TypeError` if `...` is
123 encountered.
124 allowPatterns: `bool`, optional
125 If `False` (`True` is default) raise `TypeError` if a `re.Pattern`
126 is encountered, or if ``expression`` is a `CategorizedWildcard`
127 with `patterns` not empty.
128 coerceUnrecognized: `Callable`, optional
129 A callback that takes a single argument of arbitrary type and
130 returns either a `str` - appended to `strings` - or a `tuple` of
131 (`str`, `Any`) to be appended to `items`. This will be called on
132 objects of unrecognized type, with the return value added to
133 `strings`. Exceptions will be reraised as `TypeError` (and
134 chained).
135 coerceItemValue: `Callable`, optional
136 If provided, ``expression`` may be a mapping from `str` to any
137 type that can be passed to this function; the result of that call
138 will be stored instead as the value in ``self.items``.
139 defaultItemValue: `Any`, optional
140 If provided, combine this value with any string values encountered
141 (including any returned by ``coerceUnrecognized``) to form a
142 `tuple` and add it to `items`, guaranteeing that `strings` will be
143 empty. Patterns are never added to `items`.
145 Returns
146 -------
147 categorized : `CategorizedWildcard` or ``...``.
148 The struct describing the wildcard. ``...`` is passed through
149 unchanged.
151 Raises
152 ------
153 TypeError
154 Raised if an unsupported type is found in the expression.
155 """
156 assert expression is not None
157 # See if we were given ...; just return that if we were.
158 if expression is Ellipsis:
159 if not allowAny:
160 raise TypeError("This expression may not be unconstrained.")
161 return Ellipsis
162 if isinstance(expression, cls):
163 # This is already a CategorizedWildcard. Make sure it meets the
164 # reqs. implied by the kwargs we got.
165 if not allowPatterns and expression.patterns:
166 raise TypeError(f"Regular expression(s) {expression.patterns} "
167 f"are not allowed in this context.")
168 if defaultItemValue is not None and expression.strings:
169 if expression.items:
170 raise TypeError("Incompatible preprocessed expression: an ordered sequence of str is "
171 "needed, but the original order was lost in the preprocessing.")
172 return cls(strings=[], patterns=expression.patterns,
173 items=[(k, defaultItemValue) for k in expression.strings])
174 elif defaultItemValue is None and expression.items:
175 if expression.strings:
176 raise TypeError("Incompatible preprocessed expression: an ordered sequence of items is "
177 "needed, but the original order was lost in the preprocessing.")
178 return cls(strings=[k for k, _ in expression.items], patterns=expression.patterns, items=[])
179 else:
180 # Original expression was created with keyword arguments that
181 # were at least as restrictive as what we just got; pass it
182 # through.
183 return expression
185 # If we get here, we know we'll be creating a new instance.
186 # Initialize an empty one now.
187 self = cls(strings=[], patterns=[], items=[])
189 # If mappings are allowed, see if we were given a single mapping by
190 # trying to get items.
191 if coerceItemValue is not None:
192 rawItems = None
193 try:
194 rawItems = expression.items()
195 except AttributeError:
196 pass
197 if rawItems is not None:
198 for k, v in rawItems:
199 try:
200 self.items.append((k, coerceItemValue(v)))
201 except Exception as err:
202 raise TypeError(f"Could not coerce mapping value '{v}' for key '{k}'.") from err
203 return self
205 # Not ..., a CategorizedWildcard instance, or a mapping. Just
206 # process scalars or an iterable. We put the body of the loop inside
207 # a local function so we can recurse after coercion.
209 def process(element: Any, alreadyCoerced: bool = False) -> None:
210 if isinstance(element, str):
211 if defaultItemValue is not None:
212 self.items.append((element, defaultItemValue))
213 else:
214 self.strings.append(element)
215 return
216 if allowPatterns and isinstance(element, re.Pattern):
217 self.patterns.append(element)
218 return
219 if coerceItemValue is not None:
220 try:
221 k, v = element
222 except TypeError:
223 pass
224 else:
225 if not alreadyCoerced:
226 if not isinstance(k, str):
227 raise TypeError(f"Item key '{k}' is not a string.")
228 try:
229 v = coerceItemValue(v)
230 except Exception as err:
231 raise TypeError(f"Could not coerce tuple item value '{v}' for key '{k}'."
232 ) from err
233 self.items.append((k, v))
234 return
235 if alreadyCoerced:
236 raise TypeError(f"Object '{element}' returned by coercion function is still unrecognized.")
237 if coerceUnrecognized is not None:
238 try:
239 process(coerceUnrecognized(element), alreadyCoerced=True)
240 except Exception as err:
241 raise TypeError(f"Could not coerce expression element '{element}'.") from err
242 else:
243 raise TypeError(f"Unsupported object in wildcard expression: '{element}'.")
245 for element in iterable(expression):
246 process(element)
247 return self
249 def makeWhereExpression(self, column: sqlalchemy.sql.ColumnElement
250 ) -> Optional[sqlalchemy.sql.ColumnElement]:
251 """Transform the wildcard into a SQLAlchemy boolean expression suitable
252 for use in a WHERE clause.
254 Parameters
255 ----------
256 column : `sqlalchemy.sql.ColumnElement`
257 A string column in a table or query that should be compared to the
258 wildcard expression.
260 Returns
261 -------
262 where : `sqlalchemy.sql.ColumnElement` or `None`
263 A boolean SQL expression that evaluates to true if and only if
264 the value of ``column`` matches the wildcard. `None` is returned
265 if both `strings` and `patterns` are empty, and hence no match is
266 possible.
267 """
268 if self.items:
269 raise NotImplementedError("Expressions that are processed into items cannot be transformed "
270 "automatically into queries.")
271 if self.patterns:
272 raise NotImplementedError("Regular expression patterns are not yet supported here.")
273 terms = []
274 if len(self.strings) == 1:
275 terms.append(column == self.strings[0])
276 elif len(self.strings) > 1:
277 terms.append(column.in_(self.strings))
278 # TODO: append terms for regular expressions
279 if not terms:
280 return None
281 return sqlalchemy.sql.or_(*terms)
283 strings: List[str]
284 """Explicit string values found in the wildcard (`list` [ `str` ]).
285 """
287 patterns: List[re.Pattern]
288 """Regular expression patterns found in the wildcard
289 (`list` [ `re.Pattern` ]).
290 """
292 items: List[Tuple[str, Any]]
293 """Two-item tuples that relate string values to other objects
294 (`list` [ `tuple` [ `str`, `Any` ] ]).
295 """
298class DatasetTypeRestriction:
299 """An immutable set-like object that represents a restriction on the
300 dataset types to search for within a collection.
302 The `fromExpression` method should almost always be used to construct
303 instances, as the regular constructor performs no checking of inputs (and
304 that can lead to confusing error messages downstream).
306 Parameters
307 ----------
308 names : `frozenset` [`str`] or `...`
309 The names of the dataset types included in the restriction, or `...`
310 to permit a search for any dataset type.
312 Notes
313 -----
314 This class does not inherit from `collections.abc.Set` (and does not
315 implement the full set interface) because is not always iterable and
316 sometimes has no length (i.e. when ``names`` is ``...``).
317 """
318 def __init__(self, names: Union[FrozenSet[str], EllipsisType]):
319 self.names = names
321 __slots__ = ("names",)
323 @classmethod
324 def fromExpression(cls, expression: Any) -> DatasetTypeRestriction:
325 """Process a general expression to construct a `DatasetTypeRestriction`
326 instance.
328 Parameters
329 ----------
330 expression
331 May be:
332 - a `DatasetType` instance;
333 - a `str` dataset type name;
334 - any non-mapping iterable containing either of the above;
335 - the special value `...`;
336 - another `DatasetTypeRestriction` instance (passed through
337 unchanged).
339 Returns
340 -------
341 restriction : `DatasetTypeRestriction`
342 A `DatasetTypeRestriction` instance.
343 """
344 if isinstance(expression, cls):
345 return expression
346 wildcard = CategorizedWildcard.fromExpression(expression, allowPatterns=False,
347 coerceUnrecognized=lambda d: d.name)
348 if wildcard is Ellipsis:
349 return cls.any
350 else:
351 return cls(frozenset(wildcard.strings))
353 def __contains__(self, datasetType: DatasetType) -> bool:
354 return (self.names is Ellipsis or datasetType.name in self.names
355 or (datasetType.isComponent()
356 and DatasetType.splitDatasetTypeName(datasetType.name)[0] in self.names))
358 def __eq__(self, other: Any) -> bool:
359 if isinstance(other, DatasetTypeRestriction):
360 return self.names == other.names
361 else:
362 return False
364 def __str__(self) -> str:
365 if self.names is Ellipsis:
366 return "..."
367 else:
368 return "{{{}}}".format(", ".join(self.names))
370 def __repr__(self) -> str:
371 if self.names is Ellipsis:
372 return "DatasetTypeRestriction(...)"
373 else:
374 return f"DatasetTypeRestriction({self.names!r})"
376 @staticmethod
377 def union(*args: DatasetTypeRestriction) -> DatasetTypeRestriction:
378 """Merge one or more `DatasetTypeRestriction` instances, returning one
379 that allows any of the dataset types included in any of them.
381 Parameters
382 ----------
383 *args
384 Positional arguments are `DatasetTypeRestriction` instances.
385 """
386 result: Set[str] = set()
387 for a in args:
388 if a.names is Ellipsis:
389 return DatasetTypeRestriction.any
390 else:
391 result.update(a.names)
392 return DatasetTypeRestriction(frozenset(result))
394 names: Union[FrozenSet[str], EllipsisType]
395 """The names of the dataset types included (i.e. permitted) by the
396 restriction, or the special value ``...`` to permit all dataset types
397 (`frozenset` [ `str` ] or ``...``).
398 """
400 any: ClassVar[DatasetTypeRestriction]
401 """A special `DatasetTypeRestriction` instance that permits any dataset
402 type.
404 This instance should be preferred instead of constructing a new one with
405 ``...``, when possible, but it should not be assumed to be the only such
406 instance (i.e. don't use ``is`` instead of ``==`` for comparisons).
407 """
410DatasetTypeRestriction.any = DatasetTypeRestriction(Ellipsis)
413class GovernorDimensionRestriction:
414 """An object that represents a restriction on some entity to only certain
415 values of the governor dimensions.
417 Parameters
418 ----------
419 universe : `DimensionUniverse`
420 Object managing all dimensions.
421 kwargs : `str` or `Iterable` [ `str` ]
422 Dimension values to restrict to, keyed by dimension name.
423 """
424 def __init__(self, universe: DimensionUniverse, **kwargs: Union[str, Iterable[str], EllipsisType]):
425 self.universe = universe
426 self._dict: NamedKeyDict[GovernorDimension, Set[str]] = NamedKeyDict()
427 for dimension in universe.getGovernorDimensions():
428 value = kwargs.pop(dimension.name, Ellipsis)
429 if value is not Ellipsis:
430 self._dict[dimension] = set(iterable(value))
431 if kwargs:
432 raise ValueError(
433 f"Invalid keyword argument(s): {kwargs.keys()} (must be governor dimension names)."
434 )
436 @staticmethod
437 def union(
438 universe: DimensionUniverse,
439 *args: GovernorDimensionRestriction
440 ) -> GovernorDimensionRestriction:
441 """Merge one or more `GovernorDimensionRestriction` instances.
443 Parameters
444 ----------
445 universe : `DimensionUniverse`
446 Object managing all known dimensions.
447 *args
448 Additional positional arguments are `GovernorDimensionRestriction`
449 instances.
451 Returns
452 -------
453 merged : `GovernorDimensionRestriction`
454 A `GovernorDimensionRestriction` that allows any of the dimension
455 values permitted by any of the inputs.
456 """
457 mapping: Dict[str, Union[Set[str], EllipsisType]] = defaultdict(set)
458 for a in args:
459 for dimension in universe.getGovernorDimensions():
460 new_values = a.mapping.get(dimension, Ellipsis)
461 if new_values is Ellipsis:
462 mapping[dimension.name] = Ellipsis
463 else:
464 accumulated = mapping[dimension.name]
465 if accumulated is not Ellipsis:
466 accumulated.update(new_values)
467 return GovernorDimensionRestriction(universe, **mapping)
469 def __eq__(self, other: Any) -> bool:
470 if not isinstance(other, GovernorDimensionRestriction):
471 return False
472 return self.mapping == other.mapping
474 def __str__(self) -> str:
475 return "({})".format(
476 ", ".join(f"{dimension.name}: {values}" for dimension, values in self.mapping.items())
477 )
479 def __repr__(self) -> str:
480 return "GovernorDimensionRestriction(<universe>, {})".format(
481 ", ".join(f"{dimension.name}={values!r}" for dimension, values in self.mapping.items())
482 )
484 def isConsistentWith(self, dataId: DataCoordinate) -> bool:
485 """Test whether this restriction is consistent with the given data ID.
487 Parameters
488 ----------
489 dataId : `DataCoordinate`
490 Data ID to test.
492 Returns
493 -------
494 consistent : `bool`
495 `True` if all values the data ID are either not restricted by
496 ``self``, or are included in ``self``.
497 """
498 for dimension in self._dict.keys() & dataId.graph.dimensions:
499 if dataId[dimension] in self._dict[dimension]:
500 return False
501 return True
503 @property
504 def mapping(self) -> NamedKeyMapping[GovernorDimension, AbstractSet[str]]:
505 """A `NamedKeyMapping` view of this restriction, with all restricted
506 dimensions as keys and sets of allowed data ID values as dictionary
507 values.
508 """
509 return self._dict
511 universe: DimensionUniverse
512 """Object that manages all known dimensions (`DimensionUniverse`).
513 """
516class CollectionContentRestriction:
517 """All restrictions that can be applied to what datasets can be included in
518 a collection.
520 Parameters
521 ----------
522 datasetTypes : `DatasetTypeRestriction`, optional
523 Restriction on dataset types.
524 dimensions : `GovernorDimensionRestriction`, optional
525 Restriction on governor dimension values.
526 universe : `DimensionUniverse`
527 Object managing all known dimensions.
528 """
529 def __init__(
530 self,
531 datasetTypes: DatasetTypeRestriction = DatasetTypeRestriction.any,
532 dimensions: Optional[GovernorDimensionRestriction] = None,
533 *,
534 universe: Optional[DimensionUniverse] = None,
535 ):
536 self.datasetTypes = datasetTypes
537 if dimensions is None:
538 if universe is None:
539 raise TypeError("At least one of 'dimensions' and 'universe' must be provided.")
540 dimensions = GovernorDimensionRestriction(universe)
541 self.dimensions = dimensions
543 @classmethod
544 def fromExpression(cls, expression: Any, universe: DimensionUniverse) -> CollectionContentRestriction:
545 """Construct a new restriction instance from an expression.
547 Parameters
548 ----------
549 expression
550 Either an existing `CollectionContentRestriction` instance (passed
551 through unchanged) or any of the objects described in
552 `DatasetTypeRestriction.fromExpression`.
553 universe : `DimensionUniverse`
554 Object managing all known dimensions.
555 """
556 if isinstance(expression, cls):
557 return expression
558 return cls(
559 datasetTypes=DatasetTypeRestriction.fromExpression(expression),
560 universe=universe,
561 )
563 @staticmethod
564 def union(
565 universe: DimensionUniverse,
566 *args: CollectionContentRestriction
567 ) -> CollectionContentRestriction:
568 """Merge one or more `CollectionContentRestriction` instances,
569 returning one that allows any of the dataset types or governor
570 dimension valuesincluded in any of them.
572 Parameters
573 ----------
574 universe : `DimensionUniverse`
575 Object managing all known dimensions.
576 args
577 Positional arguments are `CollectionContentRestriction` instances.
578 """
579 return CollectionContentRestriction(
580 DatasetTypeRestriction.union(*[arg.datasetTypes for arg in args]),
581 GovernorDimensionRestriction.union(universe, *[arg.dimensions for arg in args]),
582 )
584 @classmethod
585 def fromPairs(
586 cls,
587 pairs: Iterable[Tuple[str, Optional[str]]],
588 universe: DimensionUniverse,
589 ) -> CollectionContentRestriction:
590 """Construct a restriction from a set of tuples that can be more easily
591 mapped to a database representation.
593 Parameters
594 ----------
595 pairs : `Iterable` [ `Tuple` [ `str`, `str` or `None` ] ]
596 Pairs to interpret. The first element of each tuple is either a
597 governor dimension name or the special string "dataset_type". The
598 second element is the value of the dimension, the name of the
599 dataset type, or `None` to indicate that there is no restriction
600 on that dimension or on dataset types.
601 universe : `DimensionUniverse`
602 Object managing all known dimensions.
604 Returns
605 -------
606 restriction : `CollectionContentRestriction`
607 New restriction instance.
608 """
609 dimensions = defaultdict(set)
610 datasetTypeNames: Optional[Set[str]] = set()
611 for key, value in pairs:
612 if key == "dataset_type":
613 if value is None:
614 datasetTypeNames = None
615 elif datasetTypeNames is None:
616 raise RuntimeError("Inconsistent collection content restriction.")
617 else:
618 datasetTypeNames.add(value)
619 else:
620 dimensions[key].add(value)
621 return cls(
622 DatasetTypeRestriction(frozenset(datasetTypeNames) if datasetTypeNames is not None else Ellipsis),
623 GovernorDimensionRestriction(universe, **dimensions),
624 )
626 def toPairs(self) -> Iterator[Tuple[str, Optional[str]]]:
627 """Transform the restriction to a set of tuples that can be more easily
628 mapped to a database representation.
630 Yields
631 ------
632 key : `str`
633 Either a governor dimension name or the special string
634 "dataset_type".
635 value : `str` or `None`
636 The value of the dimension, the name of the dataset type, or `None`
637 to indicate that there is no restriction on that dimension or on
638 dataset types.
639 """
640 if self.datasetTypes.names is Ellipsis:
641 yield ("dataset_type", None)
642 else:
643 yield from (("dataset_type", name) for name in sorted(self.datasetTypes.names))
644 for dimension, values in self.dimensions.mapping.items():
645 yield from ((dimension.name, v) for v in sorted(values))
647 def __eq__(self, other: Any) -> bool:
648 if not isinstance(other, CollectionContentRestriction):
649 return False
650 return self.datasetTypes == other.datasetTypes and self.dimensions == other.dimensions
652 def __str__(self) -> str:
653 terms = [f"datasetTypes: {self.datasetTypes}"]
654 for dimension, values in self.dimensions.mapping.items():
655 terms.append(f"{dimension.name}: {values}")
656 return "({})".format(", ".join(terms))
658 def __repr__(self) -> str:
659 return f"CollectionContentRestriction({self.datasetTypes!r}, {self.dimensions!r})"
662def _yieldCollectionRecords(
663 manager: CollectionManager,
664 record: CollectionRecord,
665 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(),
666 done: Optional[Set[str]] = None,
667 flattenChains: bool = True,
668 includeChains: Optional[bool] = None,
669) -> Iterator[CollectionRecord]:
670 """A helper function containing common logic for `CollectionSearch.iter`
671 and `CollectionQuery.iter`: recursively yield `CollectionRecord` only if
672 they match the criteria given in other arguments.
674 Parameters
675 ----------
676 manager : `CollectionManager`
677 Object responsible for managing the collection tables in a `Registry`.
678 record : `CollectionRecord`
679 Record to conditionally yield.
680 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
681 If provided, only yield collections of these types.
682 done : `set` [ `str` ], optional
683 A `set` of already-yielded collection names; if provided, ``record``
684 will only be yielded if it is not already in ``done``, and ``done``
685 will be updated to include it on return.
686 flattenChains : `bool`, optional
687 If `True` (default) recursively yield the child collections of
688 `~CollectionType.CHAINED` collections.
689 includeChains : `bool`, optional
690 If `False`, return records for `~CollectionType.CHAINED` collections
691 themselves. The default is the opposite of ``flattenChains``: either
692 return records for CHAINED collections or their children, but not both.
694 Yields
695 ------
696 record : `CollectionRecord`
697 Matching collection records.
698 """
699 if done is None:
700 done = set()
701 includeChains = includeChains if includeChains is not None else not flattenChains
702 if record.type in collectionTypes:
703 done.add(record.name)
704 if record.type is not CollectionType.CHAINED or includeChains:
705 yield record
706 if flattenChains and record.type is CollectionType.CHAINED:
707 done.add(record.name)
708 # We know this is a ChainedCollectionRecord because of the enum value,
709 # but MyPy doesn't.
710 yield from record.children.iter( # type: ignore
711 manager,
712 collectionTypes=collectionTypes,
713 done=done,
714 flattenChains=flattenChains,
715 includeChains=includeChains,
716 )
719class CollectionSearch(Sequence[str]):
720 """An ordered search path of collections.
722 The `fromExpression` method should almost always be used to construct
723 instances, as the regular constructor performs no checking of inputs (and
724 that can lead to confusing error messages downstream).
726 Parameters
727 ----------
728 collections : `tuple` [ `str` ]
729 Tuple of collection names, ordered from the first searched to the last
730 searched.
732 Notes
733 -----
734 A `CollectionSearch` is used to find a single dataset (or set of datasets
735 with different dataset types or data IDs) according to its dataset type and
736 data ID, giving preference to collections in the order in which they are
737 specified. A `CollectionQuery` can be constructed from a broader range of
738 expressions but does not order the collections to be searched.
740 `CollectionSearch` is an immutable sequence of `str` collection names.
742 A `CollectionSearch` instance constructed properly (e.g. via
743 `fromExpression`) is a unique representation of a particular search path;
744 it is exactly the same internally and compares as equal to any
745 `CollectionSearch` constructed from an equivalent expression, regardless of
746 how different the original expressions appear.
747 """
748 def __init__(self, collections: Tuple[str, ...]):
749 self._collections = collections
751 __slots__ = ("_collections",)
753 @classmethod
754 def fromExpression(cls, expression: Any) -> CollectionSearch:
755 """Process a general expression to construct a `CollectionSearch`
756 instance.
758 Parameters
759 ----------
760 expression
761 May be:
762 - a `str` collection name;
763 - an iterable of `str` collection names;
764 - another `CollectionSearch` instance (passed through
765 unchanged).
767 Duplicate entries will be removed (preserving the first appearance
768 of each collection name).
769 Returns
770 -------
771 collections : `CollectionSearch`
772 A `CollectionSearch` instance.
773 """
774 # First see if this is already a CollectionSearch; just pass that
775 # through unchanged. This lets us standardize expressions (and turn
776 # single-pass iterators into multi-pass iterables) in advance and pass
777 # them down to other routines that accept arbitrary expressions.
778 if isinstance(expression, cls):
779 return expression
780 wildcard = CategorizedWildcard.fromExpression(
781 expression,
782 allowAny=False,
783 allowPatterns=False,
784 )
785 assert wildcard is not Ellipsis
786 assert not wildcard.patterns
787 assert not wildcard.items
788 deduplicated = []
789 for name in wildcard.strings:
790 if name not in deduplicated:
791 deduplicated.append(name)
792 return cls(tuple(deduplicated))
794 def iter(
795 self, manager: CollectionManager, *,
796 datasetType: Optional[DatasetType] = None,
797 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(),
798 done: Optional[Set[str]] = None,
799 flattenChains: bool = True,
800 includeChains: Optional[bool] = None,
801 ) -> Iterator[CollectionRecord]:
802 """Iterate over collection records that match this instance and the
803 given criteria, in order.
805 This method is primarily intended for internal use by `Registry`;
806 other callers should generally prefer `Registry.findDatasets` or
807 other `Registry` query methods.
809 Parameters
810 ----------
811 manager : `CollectionManager`
812 Object responsible for managing the collection tables in a
813 `Registry`.
814 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
815 If provided, only yield collections of these types.
816 done : `set`, optional
817 A `set` containing the names of all collections already yielded;
818 any collections whose names are already present in this set will
819 not be yielded again, and those yielded will be added to it while
820 iterating. If not provided, an empty `set` will be created and
821 used internally to avoid duplicates.
822 flattenChains : `bool`, optional
823 If `True` (default) recursively yield the child collections of
824 `~CollectionType.CHAINED` collections.
825 includeChains : `bool`, optional
826 If `False`, return records for `~CollectionType.CHAINED`
827 collections themselves. The default is the opposite of
828 ``flattenChains``: either return records for CHAINED collections or
829 their children, but not both.
831 Yields
832 ------
833 record : `CollectionRecord`
834 Matching collection records.
835 """
836 if done is None:
837 done = set()
838 for name in self:
839 if name not in done:
840 yield from _yieldCollectionRecords(
841 manager,
842 manager.find(name),
843 collectionTypes=collectionTypes,
844 done=done,
845 flattenChains=flattenChains,
846 includeChains=includeChains,
847 )
849 def __iter__(self) -> Iterator[str]:
850 yield from self._collections
852 def __len__(self) -> int:
853 return len(self._collections)
855 def __getitem__(self, index: Any) -> str:
856 return self._collections[index]
858 def __eq__(self, other: Any) -> bool:
859 if isinstance(other, CollectionSearch):
860 return self._collections == other._collections
861 return False
863 def __str__(self) -> str:
864 return "[{}]".format(", ".join(self))
866 def __repr__(self) -> str:
867 return f"CollectionSearch({self._collections!r})"
870class CollectionQuery:
871 """An unordered query for collections and dataset type restrictions.
873 The `fromExpression` method should almost always be used to construct
874 instances, as the regular constructor performs no checking of inputs (and
875 that can lead to confusing error messages downstream).
877 Parameters
878 ----------
879 search : `CollectionSearch` or `...`
880 An object representing an ordered search for explicitly-named
881 collections (to be interpreted here as unordered), or the special
882 value `...` indicating all collections. `...` must be accompanied
883 by ``patterns=None``.
884 patterns : `tuple` of `re.Pattern`
885 Regular expression patterns to match against collection names.
886 universe : `DimensionUniverse`
887 Object managing all dimensions.
889 Notes
890 -----
891 A `CollectionQuery` is used to find all matching datasets in any number
892 of collections, or to find collections themselves.
894 `CollectionQuery` is expected to be rarely used outside of `Registry`
895 (which uses it to back several of its "query" methods that take general
896 expressions for collections), but it may occassionally be useful outside
897 `Registry` as a way to preprocess expressions that contain single-pass
898 iterators into a form that can be used to call those `Registry` methods
899 multiple times.
900 """
901 def __init__(
902 self,
903 search: Union[CollectionSearch, EllipsisType] = Ellipsis,
904 patterns: Tuple[re.Pattern, ...] = (),
905 ):
906 self._search = search
907 self._patterns = patterns
909 __slots__ = ("_search", "_patterns")
911 @classmethod
912 def fromExpression(cls, expression: Any) -> CollectionQuery:
913 """Process a general expression to construct a `CollectionQuery`
914 instance.
916 Parameters
917 ----------
918 expression
919 May be:
920 - a `str` collection name;
921 - an `re.Pattern` instance to match (with `re.Pattern.fullmatch`)
922 against collection names;
923 - any iterable containing any of the above;
924 - a `CollectionSearch` instance;
925 - another `CollectionQuery` instance (passed through unchanged).
927 Duplicate collection names will be removed (preserving the first
928 appearance of each collection name).
930 Returns
931 -------
932 collections : `CollectionQuery`
933 A `CollectionQuery` instance.
934 """
935 if isinstance(expression, cls):
936 return expression
937 if expression is Ellipsis:
938 return cls()
939 if isinstance(expression, CollectionSearch):
940 return cls(search=expression, patterns=())
941 wildcard = CategorizedWildcard.fromExpression(
942 expression,
943 allowAny=True,
944 allowPatterns=True,
945 )
946 if wildcard is Ellipsis:
947 return cls()
948 assert not wildcard.items, \
949 "We should no longer be transforming to (str, DatasetTypeRestriction) tuples."
950 return cls(
951 search=CollectionSearch.fromExpression(wildcard.strings),
952 patterns=tuple(wildcard.patterns),
953 )
955 def iter(
956 self, manager: CollectionManager, *,
957 collectionTypes: AbstractSet[CollectionType] = CollectionType.all(),
958 flattenChains: bool = True,
959 includeChains: Optional[bool] = None,
960 ) -> Iterator[CollectionRecord]:
961 """Iterate over collection records that match this instance and the
962 given criteria, in an arbitrary order.
964 This method is primarily intended for internal use by `Registry`;
965 other callers should generally prefer `Registry.queryDatasets` or
966 other `Registry` query methods.
968 Parameters
969 ----------
970 manager : `CollectionManager`
971 Object responsible for managing the collection tables in a
972 `Registry`.
973 collectionTypes : `AbstractSet` [ `CollectionType` ], optional
974 If provided, only yield collections of these types.
975 flattenChains : `bool`, optional
976 If `True` (default) recursively yield the child collections of
977 `~CollectionType.CHAINED` collections.
978 includeChains : `bool`, optional
979 If `False`, return records for `~CollectionType.CHAINED`
980 collections themselves. The default is the opposite of
981 ``flattenChains``: either return records for CHAINED collections or
982 their children, but not both.
984 Yields
985 ------
986 record : `CollectionRecord`
987 Matching collection records.
988 """
989 if self._search is Ellipsis:
990 for record in manager:
991 yield from _yieldCollectionRecords(
992 manager,
993 record,
994 collectionTypes=collectionTypes,
995 flattenChains=flattenChains,
996 includeChains=includeChains,
997 )
998 else:
999 done: Set[str] = set()
1000 yield from self._search.iter(
1001 manager,
1002 collectionTypes=collectionTypes,
1003 done=done,
1004 flattenChains=flattenChains,
1005 includeChains=includeChains,
1006 )
1007 for record in manager:
1008 if record.name not in done and any(p.fullmatch(record.name) for p in self._patterns):
1009 yield from _yieldCollectionRecords(
1010 manager,
1011 record,
1012 collectionTypes=collectionTypes,
1013 done=done,
1014 flattenChains=flattenChains,
1015 includeChains=includeChains,
1016 )
1018 def __eq__(self, other: Any) -> bool:
1019 if isinstance(other, CollectionQuery):
1020 return self._search == other._search and self._patterns == other._patterns
1021 else:
1022 return False