Coverage for python / lsst / pipe / base / pipeline_graph / expressions.py: 73%
98 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-22 08:57 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-22 08:57 +0000
1# This file is part of pipe_base.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This software is dual licensed under the GNU General Public License and also
10# under a 3-clause BSD license. Recipients may choose which of these licenses
11# to use; please see the files gpl-3.0.txt and/or bsd_license.txt,
12# respectively. If you choose the GPL option then the following text applies
13# (but note that there is still no warranty even if you opt for BSD instead):
14#
15# This program is free software: you can redistribute it and/or modify
16# it under the terms of the GNU General Public License as published by
17# the Free Software Foundation, either version 3 of the License, or
18# (at your option) any later version.
19#
20# This program is distributed in the hope that it will be useful,
21# but WITHOUT ANY WARRANTY; without even the implied warranty of
22# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23# GNU General Public License for more details.
24#
25# You should have received a copy of the GNU General Public License
26# along with this program. If not, see <http://www.gnu.org/licenses/>.
27"""Expressions that resolve to subsets of pipelines.
29See :ref:`pipeline-graph-subset-expressions`.
30"""
32from __future__ import annotations
34__all__ = (
35 "DirectionNode",
36 "IdentifierNode",
37 "IntersectionNode",
38 "Node",
39 "NotNode",
40 "UnionNode",
41 "parse",
42)
44import dataclasses
45import functools
46from typing import TYPE_CHECKING, Any, Literal
48from lsst.daf.butler.queries.expressions.parser.ply import lex, yacc
50from ._exceptions import InvalidExpressionError
52if TYPE_CHECKING:
53 from lsst.daf.butler.queries.expressions.parser.parserLex import LexToken
54 from lsst.daf.butler.queries.expressions.parser.parserYacc import YaccProduction
57class _ParserLex:
58 @classmethod
59 def make_lexer(cls) -> Any: # unspecified PLY type.
60 return lex.lex(object=cls())
62 tokens = (
63 "IDENTIFIER",
64 "LPAREN",
65 "RPAREN",
66 "NOT",
67 "UNION",
68 "INTERSECTION",
69 "LT",
70 "LE",
71 "GT",
72 "GE",
73 )
75 t_LPAREN = r"\("
76 t_RPAREN = r"\)"
77 t_NOT = "~"
78 t_UNION = r"\|"
79 t_INTERSECTION = "&"
80 t_LT = "<"
81 t_LE = "<="
82 t_GT = ">"
83 t_GE = ">="
85 # Identifiers are alphanumeric, and may have a T:, D:, or S: prefix.
86 def t_IDENTIFIER(self, t: LexToken) -> LexToken:
87 r"""([TDS]:)?[\w-]+"""
88 t.type = "IDENTIFIER"
89 return t
91 # Ignore spaces and tables.
92 t_ignore = " \t"
94 def t_error(self, t: LexToken) -> LexToken:
95 raise InvalidExpressionError(
96 f"invalid token in expression near character {t.lexer.lexpos}: {t.value[0]!r}"
97 )
100class _ParserYacc:
101 def __init__(self) -> None:
102 self.parser = self._parser_factory()
104 @staticmethod
105 @functools.cache
106 def _parser_factory() -> Any: # unspecified PLY type.
107 return yacc.yacc(module=_ParserYacc, write_tables=False, debug=False)
109 def parse(self, input: str) -> Node:
110 """Parse input expression and return the parsed tree object.
112 Parameters
113 ----------
114 input : `str`
115 Expression to parse.
117 Returns
118 -------
119 node : `Node`
120 Root of the parsed expression tree.
121 """
122 lexer = _ParserLex.make_lexer()
123 tree = self.parser.parse(input=input, lexer=lexer)
124 return tree
126 tokens = _ParserLex.tokens[:]
128 start = "expr"
130 precedence = (
131 ("left", "UNION"),
132 ("left", "INTERSECTION"),
133 ("right", "NOT", "LT", "LE", "GT", "GE"),
134 )
136 # Ruff wants 'noqa' on the doc line, pydocstyle wants it on the function.
138 @classmethod
139 def p_expr_union(cls, p: YaccProduction) -> None: # noqa: D403
140 """expr : expr UNION expr""" # noqa: D403
141 p[0] = UnionNode(lhs=p[1], rhs=p[3])
143 @classmethod
144 def p_expr_intersection(cls, p: YaccProduction) -> None: # noqa: D403
145 """expr : expr INTERSECTION expr""" # noqa: D403
146 p[0] = IntersectionNode(lhs=p[1], rhs=p[3])
148 @classmethod
149 def p_expr_not(cls, p: YaccProduction) -> None: # noqa: D403
150 """expr : NOT expr""" # noqa: D403
151 p[0] = NotNode(operand=p[2])
153 @classmethod
154 def p_expr_parens(cls, p: YaccProduction) -> None: # noqa: D403
155 """expr : LPAREN expr RPAREN""" # noqa: D403
156 p[0] = p[2]
158 @classmethod
159 def p_expr_inequality(cls, p: YaccProduction) -> None: # noqa: D403
160 """expr : LT identifier
161 | LE identifier
162 | GT identifier
163 | GE identifier
164 """ # noqa: D403
165 p[0] = DirectionNode(operator=p[1], start=p[2])
167 @classmethod
168 def p_expr_identifier(cls, p: YaccProduction) -> None: # noqa: D403
169 """expr : identifier""" # noqa: D403
170 p[0] = p[1]
172 @classmethod
173 def p_identifier_qualified(cls, p: YaccProduction) -> None: # noqa: D403, D401
174 """identifier : IDENTIFIER""" # noqa: D403, D401
175 match p[1].split(":"):
176 case [qualifier, label]:
177 p[0] = IdentifierNode(qualifier=qualifier, label=label)
178 case [label]:
179 p[0] = IdentifierNode(qualifier=None, label=label)
180 case _: # pragma: no cover
181 raise AssertionError("Unexpected identifier form.")
183 @classmethod
184 def p_error(cls, p: YaccProduction | None) -> None:
185 if p is None:
186 raise InvalidExpressionError("Expression ended unexpectedly.")
187 else:
188 raise InvalidExpressionError(f"Syntax error near character {p.lexpos}: {p.value!r}")
191@dataclasses.dataclass
192class IdentifierNode:
193 """A node that corresponds to a task label, dataset type name, or labeled
194 subset.
195 """
197 qualifier: Literal["T", "D", "S"] | None
198 """Qualiifier that indicates whether this is a task (T), dataset type (T),
199 or labeled subset (S).
201 Unqualified identifiers (`None`) must resolve unambiguously.
202 """
204 label: str
205 """Task label, dataset type name, or subset label."""
208@dataclasses.dataclass
209class DirectionNode:
210 """A node that represents the ancestors or descendents of a task label or
211 dataset type.
212 """
214 operator: Literal["<", ">", "<=", ">="]
215 """Which direction to traverse the graph ('>' for descendents, '<' for
216 ancestors), and whether to include the operand ('=') or not.
217 """
219 start: IdentifierNode
220 """Node at which to start the DAG traversal."""
223@dataclasses.dataclass
224class NotNode:
225 """A node that represents set inversion (including all elements not in the
226 operand).
227 """
229 operand: Node
230 """Node representing the set to invert."""
233@dataclasses.dataclass
234class UnionNode:
235 """Node representing a set union."""
237 lhs: Node
238 rhs: Node
241@dataclasses.dataclass
242class IntersectionNode:
243 """Node representing a set intersection."""
245 lhs: Node
246 rhs: Node
249def parse(expression: str) -> Node:
250 """Parse an expression into a `Node` tree.
252 Parameters
253 ----------
254 expression : `str`
255 String expression to parse. See
256 :ref:`pipeline-graph-subset-expressions`.
258 Returns
259 -------
260 node
261 Root node of the parsed expression tree.
263 Raises
264 ------
265 InvalidExpressionError
266 Raised if the expression could not be parsed.
267 """
268 return _ParserYacc().parse(expression)
271type Node = IdentifierNode | DirectionNode | NotNode | UnionNode | IntersectionNode