lsst.daf.persistence  14.0-15-gd412284
butlerSubset.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # LSST Data Management System
5 # Copyright 2008, 2009, 2010 LSST Corporation.
6 #
7 # This product includes software developed by the
8 # LSST Project (http://www.lsst.org/).
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the LSST License Statement and
21 # the GNU General Public License along with this program. If not,
22 # see <http://www.lsstcorp.org/LegalNotices/>.
23 #
24 
25 # -*- python -*-
26 
27 """This module defines the ButlerSubset class and the ButlerDataRefs contained
28 within it as well as an iterator over the subset."""
29 from builtins import next
30 from builtins import range
31 from builtins import object
32 
33 from . import DataId
34 
35 
36 class ButlerSubset(object):
37 
38  """ButlerSubset is a container for ButlerDataRefs. It represents a
39  collection of data ids that can be used to obtain datasets of the type
40  used when creating the collection or a compatible dataset type. It can be
41  thought of as the result of a query for datasets matching a partial data
42  id.
43 
44  The ButlerDataRefs are generated at a specified level of the data id
45  hierarchy. If that is not the level at which datasets are specified, the
46  ButlerDataRef.subItems() method may be used to dive further into the
47  ButlerDataRefs.
48 
49  ButlerSubsets should generally be created using Butler.subset().
50 
51  This mechanism replaces the creation of butlers using partial dataIds.
52 
53  Public methods:
54 
55  __init__(self, butler, datasetType, level, dataId)
56 
57  __len__(self)
58 
59  __iter__(self)
60 
61  """
62 
63  def __init__(self, butler, datasetType, level, dataId):
64  """
65  Create a ButlerSubset by querying a butler for data ids matching a
66  given partial data id for a given dataset type at a given hierarchy
67  level.
68 
69  @param butler (Butler) butler that is being queried.
70  @param datasetType (str) the type of dataset to query.
71  @param level (str) the hierarchy level to descend to. if empty string will look up the default
72  level.
73  @param dataId (dict) the (partial or complete) data id.
74  """
75  self.butler = butler
76  self.datasetType = datasetType
77  self.dataId = DataId(dataId)
78  self.cache = []
79  self.level = level
80 
81  keys = self.butler.getKeys(datasetType, level, tag=dataId.tag)
82  if keys is None:
83  return
84  fmt = list(keys.keys())
85 
86  # Don't query if we already have a complete dataId
87  completeId = True
88  for key in fmt:
89  if key not in dataId:
90  completeId = False
91  break
92  if completeId:
93  self.cache.append(dataId)
94  return
95 
96  idTuples = butler.queryMetadata(self.datasetType, fmt, self.dataId)
97  for idTuple in idTuples:
98  tempId = dict(self.dataId)
99  if len(fmt) == 1:
100  tempId[fmt[0]] = idTuple
101  else:
102  for i in range(len(fmt)):
103  tempId[fmt[i]] = idTuple[i]
104  self.cache.append(tempId)
105 
106  def __repr__(self):
107  return "ButlerSubset(butler=%s, datasetType=%s, dataId=%s, cache=%s, level=%s)" % (
108  self.butler, self.datasetType, self.dataId, self.cache, self.level)
109 
110  def __len__(self):
111  """
112  Number of ButlerDataRefs in the ButlerSubset.
113 
114  @returns (int)
115  """
116 
117  return len(self.cache)
118 
119  def __iter__(self):
120  """
121  Iterator over the ButlerDataRefs in the ButlerSubset.
122 
123  @returns (ButlerIterator)
124  """
125 
126  return ButlerSubsetIterator(self)
127 
128 
129 class ButlerSubsetIterator(object):
130  """
131  An iterator over the ButlerDataRefs in a ButlerSubset.
132  """
133 
134  def __init__(self, butlerSubset):
135  self.butlerSubset = butlerSubset
136  self.iter = iter(butlerSubset.cache)
137 
138  def __iter__(self):
139  return self
140 
141  def __next__(self):
142  return ButlerDataRef(self.butlerSubset, next(self.iter))
143 
144 
145 class ButlerDataRef(object):
146  """
147  A ButlerDataRef is a reference to a potential dataset or group of datasets
148  that is portable between compatible dataset types. As such, it can be
149  used to create or retrieve datasets.
150 
151  ButlerDataRefs are (conceptually) created as elements of a ButlerSubset by
152  Butler.subset(). They are initially specific to the dataset type passed
153  to that call, but they may be used with any other compatible dataset type.
154  Dataset type compatibility must be determined externally (or by trial and
155  error).
156 
157  ButlerDataRefs may be created at any level of a data identifier hierarchy.
158  If the level is not one at which datasets exist, a ButlerSubset
159  with lower-level ButlerDataRefs can be created using
160  ButlerDataRef.subItems().
161 
162  Public methods:
163 
164  get(self, datasetType=None, **rest)
165 
166  put(self, obj, datasetType=None, **rest)
167 
168  subItems(self, level=None)
169 
170  datasetExists(self, datasetType=None, **rest)
171 
172  getButler(self)
173  """
174 
175  def __init__(self, butlerSubset, dataId):
176  """
177  For internal use only. ButlerDataRefs should only be created by
178  ButlerSubset and ButlerSubsetIterator.
179  """
180 
181  self.butlerSubset = butlerSubset
182  self.dataId = dataId
183 
184  def __repr__(self):
185  return 'ButlerDataRef(butlerSubset=%s, dataId=%s)' % (self.butlerSubset, self.dataId)
186 
187  def get(self, datasetType=None, **rest):
188  """
189  Retrieve a dataset of the given type (or the type used when creating
190  the ButlerSubset, if None) as specified by the ButlerDataRef.
191 
192  @param datasetType (str) dataset type to retrieve.
193  @param **rest keyword arguments with data identifiers
194  @returns object corresponding to the given dataset type.
195  """
196  if datasetType is None:
197  datasetType = self.butlerSubset.datasetType
198  return self.butlerSubset.butler.get(datasetType, self.dataId, **rest)
199 
200  def put(self, obj, datasetType=None, doBackup=False, **rest):
201  """
202  Persist a dataset of the given type (or the type used when creating
203  the ButlerSubset, if None) as specified by the ButlerDataRef.
204 
205  @param obj object to persist.
206  @param datasetType (str) dataset type to persist.
207  @param doBackup if True, rename existing instead of overwriting
208  @param **rest keyword arguments with data identifiers
209 
210  WARNING: Setting doBackup=True is not safe for parallel processing, as it
211  may be subject to race conditions.
212  """
213 
214  if datasetType is None:
215  datasetType = self.butlerSubset.datasetType
216  self.butlerSubset.butler.put(obj, datasetType, self.dataId, doBackup=doBackup, **rest)
217 
218  def getUri(self, datasetType=None, write=False, **rest):
219  """Return the URL for a dataset
220 
221  .. warning:: This is intended only for debugging. The URI should
222  never be used for anything other than printing.
223 
224  .. note:: In the event there are multiple URIs, we return only
225  the first.
226 
227  .. note:: getUri() does not currently support composite datasets.
228 
229  Parameters
230  ----------
231  datasetType : `str`, optional
232  The dataset type of interest.
233  write : `bool`, optional
234  Return the URI for writing?
235  rest : `dict`, optional
236  Keyword arguments for the data id.
237 
238  Returns
239  -------
240  uri : `str`
241  URI for dataset
242  """
243 
244  if datasetType is None:
245  datasetType = self.butlerSubset.datasetType
246  return self.butlerSubset.butler.getUri(datasetType, self.dataId, write=write, **rest)
247 
248  def subLevels(self):
249  """
250  Return a list of the lower levels of the hierarchy than this
251  ButlerDataRef.
252 
253  @returns (iterable) list of strings with level keys."""
254 
255  return set(
256  self.butlerSubset.butler.getKeys(
257  self.butlerSubset.datasetType,
258  tag=self.butlerSubset.dataId.tag).keys()
259  ) - set(
260  self.butlerSubset.butler.getKeys(
261  self.butlerSubset.datasetType,
262  self.butlerSubset.level,
263  tag=self.butlerSubset.dataId.tag).keys()
264  )
265 
266  def subItems(self, level=None):
267  """
268  Generate a ButlerSubset at a lower level of the hierarchy than this
269  ButlerDataRef, using it as a partial data id. If level is None, a
270  default lower level for the original ButlerSubset level and dataset
271  type is used.
272 
273  As currently implemented, the default sublevels for all the
274  repositories used by this Butler instance must match for the Butler to
275  be able to select a default sublevel to get the subset.
276 
277  @param level (str) the hierarchy level to descend to.
278  @returns (ButlerSubset) resulting from the lower-level query or () if
279  there is no lower level.
280  """
281 
282  if level is None:
283  levelSet = set()
284  for repoData in self.butlerSubset.butler._repos.all():
285  levelSet.add(repoData.repo._mapper.getDefaultSubLevel(
286  self.butlerSubset.level))
287  if len(levelSet) > 1:
288  raise RuntimeError(
289  "Support for multiple levels not implemented.")
290  level = levelSet.pop()
291  if level is None:
292  return ()
293  return self.butlerSubset.butler.subset(self.butlerSubset.datasetType,
294  level, self.dataId)
295 
296  def datasetExists(self, datasetType=None, write=False, **rest):
297  """
298  Determine if a dataset exists of the given type (or the type used when
299  creating the ButlerSubset, if None) as specified by the ButlerDataRef.
300 
301  @param datasetType (str) dataset type to check.
302  @param write (bool) if True, search only in output repositories
303  @param **rest keywords arguments with data identifiers
304  @returns bool
305  """
306  if datasetType is None:
307  datasetType = self.butlerSubset.datasetType
308  return self.butlerSubset.butler.datasetExists(
309  datasetType, self.dataId, write=write, **rest)
310 
311  def getButler(self):
312  """
313  Return the butler associated with this data reference.
314  """
315  return self.butlerSubset.butler
def __init__(self, butlerSubset, dataId)
def __init__(self, butler, datasetType, level, dataId)
Definition: butlerSubset.py:63
def put(self, obj, datasetType=None, doBackup=False, rest)
def datasetExists(self, datasetType=None, write=False, rest)
def getUri(self, datasetType=None, write=False, rest)
def get(self, datasetType=None, rest)