Coverage for python/lsst/ctrl/mpexec/preExecInit.py : 11%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# This file is part of ctrl_mpexec.
2#
3# Developed for the LSST Data Management System.
4# This product includes software developed by the LSST Project
5# (http://www.lsst.org).
6# See the COPYRIGHT file at the top-level directory of this distribution
7# for details of code ownership.
8#
9# This program is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program. If not, see <http://www.gnu.org/licenses/>.
22__all__ = ['PreExecInit']
24# -------------------------------
25# Imports of standard modules --
26# -------------------------------
27import logging
28import itertools
30# -----------------------------
31# Imports for other modules --
32# -----------------------------
33from lsst.base import Packages
34from lsst.daf.butler import DatasetType
35from lsst.pipe.base import PipelineDatasetTypes
37_LOG = logging.getLogger(__name__.partition(".")[2])
40class PreExecInit:
41 """Initialization of registry for QuantumGraph execution.
43 This class encapsulates all necessary operations that have to be performed
44 on butler and registry to prepare them for QuantumGraph execution.
46 Parameters
47 ----------
48 butler : `~lsst.daf.butler.Butler`
49 Data butler instance.
50 taskFactory : `~lsst.pipe.base.TaskFactory`
51 Task factory.
52 skipExisting : `bool`, optional
53 If `True` then do not try to overwrite any datasets that might exist
54 in the butler. If `False` then any existing conflicting dataset will
55 cause butler exception.
56 """
57 def __init__(self, butler, taskFactory, skipExisting=False):
58 self.butler = butler
59 self.taskFactory = taskFactory
60 self.skipExisting = skipExisting
62 def initialize(self, graph, saveInitOutputs=True, registerDatasetTypes=False, saveVersions=True):
63 """Perform all initialization steps.
65 Convenience method to execute all initialization steps. Instead of
66 calling this method and providing all options it is also possible to
67 call methods individually.
69 Parameters
70 ----------
71 graph : `~lsst.pipe.base.QuantumGraph`
72 Execution graph.
73 saveInitOutputs : `bool`, optional
74 If ``True`` (default) then save "init outputs", configurations,
75 and package versions to butler.
76 registerDatasetTypes : `bool`, optional
77 If ``True`` then register dataset types in registry, otherwise
78 they must be already registered.
79 saveVersions : `bool`, optional
80 If ``False`` then do not save package versions even if
81 ``saveInitOutputs`` is set to ``True``.
82 """
83 # register dataset types or check consistency
84 self.initializeDatasetTypes(graph, registerDatasetTypes)
86 # Save task initialization data or check that saved data
87 # is consistent with what tasks would save
88 if saveInitOutputs:
89 self.saveInitOutputs(graph)
90 self.saveConfigs(graph)
91 if saveVersions:
92 self.savePackageVersions(graph)
94 def initializeDatasetTypes(self, graph, registerDatasetTypes=False):
95 """Save or check DatasetTypes output by the tasks in a graph.
97 Iterates over all DatasetTypes for all tasks in a graph and either
98 tries to add them to registry or compares them to exising ones.
100 Parameters
101 ----------
102 graph : `~lsst.pipe.base.QuantumGraph`
103 Execution graph.
104 registerDatasetTypes : `bool`, optional
105 If ``True`` then register dataset types in registry, otherwise
106 they must be already registered.
108 Raises
109 ------
110 ValueError
111 Raised if existing DatasetType is different from DatasetType
112 in a graph.
113 KeyError
114 Raised if ``registerDatasetTypes`` is ``False`` and DatasetType
115 does not exist in registry.
116 """
117 pipeline = list(nodes.taskDef for nodes in graph)
119 # Make dataset types for configurations
120 configDatasetTypes = [DatasetType(taskDef.configDatasetName, {},
121 storageClass="Config",
122 universe=self.butler.registry.dimensions)
123 for taskDef in pipeline]
125 # And one dataset type for package versions
126 packagesDatasetType = DatasetType("packages", {},
127 storageClass="Packages",
128 universe=self.butler.registry.dimensions)
130 datasetTypes = PipelineDatasetTypes.fromPipeline(pipeline, registry=self.butler.registry)
131 for datasetType in itertools.chain(datasetTypes.initIntermediates, datasetTypes.initOutputs,
132 datasetTypes.intermediates, datasetTypes.outputs,
133 configDatasetTypes, [packagesDatasetType]):
134 if registerDatasetTypes:
135 _LOG.debug("Registering DatasetType %s with registry", datasetType)
136 # this is a no-op if it already exists and is consistent,
137 # and it raises if it is inconsistent.
138 self.butler.registry.registerDatasetType(datasetType)
139 else:
140 _LOG.debug("Checking DatasetType %s against registry", datasetType)
141 expected = self.butler.registry.getDatasetType(datasetType.name)
142 if expected != datasetType:
143 raise ValueError(f"DatasetType configuration does not match Registry: "
144 f"{datasetType} != {expected}")
146 def saveInitOutputs(self, graph):
147 """Write any datasets produced by initializing tasks in a graph.
149 Parameters
150 ----------
151 graph : `~lsst.pipe.base.QuantumGraph`
152 Execution graph.
154 Raises
155 ------
156 Exception
157 Raised if ``skipExisting`` is `False` and datasets already
158 exists. Content of a butler collection may be changed if
159 exception is raised.
161 Notes
162 -----
163 If ``skipExisting`` is `True` then existing datasets are not
164 overwritten, instead we should check that their stored object is
165 exactly the same as what we would save at this time. Comparing
166 arbitrary types of object is, of course, non-trivial. Current
167 implementation only checks the existence of the datasets and their
168 types against the types of objects produced by tasks. Ideally we
169 would like to check that object data is identical too but presently
170 there is no generic way to compare objects. In the future we can
171 potentially introduce some extensible mechanism for that.
172 """
173 _LOG.debug("Will save InitOutputs for all tasks")
174 for taskNodes in graph:
175 taskDef = taskNodes.taskDef
176 task = self.taskFactory.makeTask(taskDef.taskClass, taskDef.config, None, self.butler)
177 for name in taskDef.connections.initOutputs:
178 attribute = getattr(taskDef.connections, name)
179 initOutputVar = getattr(task, name)
180 objFromStore = None
181 if self.skipExisting:
182 # check if it is there already
183 _LOG.debug("Retrieving InitOutputs for task=%s key=%s dsTypeName=%s",
184 task, name, attribute.name)
185 objFromStore = self.butler.get(attribute.name, {})
186 if objFromStore is not None:
187 # Types are supposed to be identical.
188 # TODO: Check that object contents is identical too.
189 if type(objFromStore) is not type(initOutputVar):
190 raise TypeError(f"Stored initOutput object type {type(objFromStore)} "
191 f"is different from task-generated type "
192 f"{type(initOutputVar)} for task {taskDef}")
193 if objFromStore is None:
194 # butler will raise exception if dataset is already there
195 _LOG.debug("Saving InitOutputs for task=%s key=%s", task, name)
196 self.butler.put(initOutputVar, attribute.name, {})
198 def saveConfigs(self, graph):
199 """Write configurations for pipeline tasks to butler or check that
200 existing configurations are equal to the new ones.
202 Parameters
203 ----------
204 graph : `~lsst.pipe.base.QuantumGraph`
205 Execution graph.
207 Raises
208 ------
209 Exception
210 Raised if ``skipExisting`` is `False` and datasets already exists.
211 Content of a butler collection should not be changed if exception
212 is raised.
213 """
214 def logConfigMismatch(msg):
215 """Log messages about configuration mismatch.
216 """
217 _LOG.fatal("Comparing configuration: %s", msg)
219 _LOG.debug("Will save Configs for all tasks")
220 # start transaction to rollback any changes on exceptions
221 with self.butler.transaction():
222 for taskNodes in graph:
223 taskDef = taskNodes.taskDef
224 configName = taskDef.configDatasetName
226 oldConfig = None
227 if self.skipExisting:
228 oldConfig = self.butler.get(configName, {})
229 if oldConfig is not None:
230 if not taskDef.config.compare(oldConfig, shortcut=False, output=logConfigMismatch):
231 raise TypeError(
232 f"Config does not match existing task config {configName!r} in butler; "
233 "tasks configurations must be consistent within the same run collection")
234 if oldConfig is None:
235 # butler will raise exception if dataset is already there
236 _LOG.debug("Saving Config for task=%s dataset type=%s", taskDef.label, configName)
237 self.butler.put(taskDef.config, configName, {})
239 def savePackageVersions(self, graph):
240 """Write versions of software packages to butler.
242 Parameters
243 ----------
244 graph : `~lsst.pipe.base.QuantumGraph`
245 Execution graph.
247 Raises
248 ------
249 Exception
250 Raised if ``checkExisting`` is ``True`` but versions are not
251 compatible.
252 """
253 packages = Packages.fromSystem()
254 datasetType = "packages"
255 oldPackages = self.butler.get(datasetType, {}) if self.skipExisting else None
256 if oldPackages is not None:
257 # Note that because we can only detect python modules that have been imported, the stored
258 # list of products may be more or less complete than what we have now. What's important is
259 # that the products that are in common have the same version.
260 diff = packages.difference(oldPackages)
261 if diff:
262 versions_str = "; ".join(f"{pkg}: {diff[pkg][1]} vs {diff[pkg][0]}" for pkg in diff)
263 raise TypeError(f"Package versions mismatch: ({versions_str})")
264 # Update the old set of packages in case we have more packages that haven't been persisted.
265 extra = packages.extra(oldPackages)
266 if extra:
267 oldPackages.update(packages)
268 self.butler.put(oldPackages, datasetType, {})
269 else:
270 self.butler.put(packages, datasetType, {})