lsst.daf.persistence  14.0-14-g87d16e8+6
butlerSubset.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # LSST Data Management System
5 # Copyright 2008, 2009, 2010 LSST Corporation.
6 #
7 # This product includes software developed by the
8 # LSST Project (http://www.lsst.org/).
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the LSST License Statement and
21 # the GNU General Public License along with this program. If not,
22 # see <http://www.lsstcorp.org/LegalNotices/>.
23 #
24 
25 # -*- python -*-
26 
27 """This module defines the ButlerSubset class and the ButlerDataRefs contained
28 within it as well as an iterator over the subset."""
29 from builtins import next
30 from builtins import range
31 from builtins import object
32 
33 from . import DataId
34 
35 
36 class ButlerSubset(object):
37 
38  """ButlerSubset is a container for ButlerDataRefs. It represents a
39  collection of data ids that can be used to obtain datasets of the type
40  used when creating the collection or a compatible dataset type. It can be
41  thought of as the result of a query for datasets matching a partial data
42  id.
43 
44  The ButlerDataRefs are generated at a specified level of the data id
45  hierarchy. If that is not the level at which datasets are specified, the
46  ButlerDataRef.subItems() method may be used to dive further into the
47  ButlerDataRefs.
48 
49  ButlerSubsets should generally be created using Butler.subset().
50 
51  This mechanism replaces the creation of butlers using partial dataIds.
52 
53  Public methods:
54 
55  __init__(self, butler, datasetType, level, dataId)
56 
57  __len__(self)
58 
59  __iter__(self)
60 
61  """
62 
63  def __init__(self, butler, datasetType, level, dataId):
64  """
65  Create a ButlerSubset by querying a butler for data ids matching a
66  given partial data id for a given dataset type at a given hierarchy
67  level.
68 
69  @param butler (Butler) butler that is being queried.
70  @param datasetType (str) the type of dataset to query.
71  @param level (str) the hierarchy level to descend to. if empty string will look up the default
72  level.
73  @param dataId (dict) the (partial or complete) data id.
74  """
75  self.butler = butler
76  self.datasetType = datasetType
77  self.dataId = DataId(dataId)
78  self.cache = []
79  self.level = level
80 
81  keys = self.butler.getKeys(datasetType, level, tag=dataId.tag)
82  if keys is None:
83  return
84  fmt = list(keys.keys())
85 
86  # Don't query if we already have a complete dataId
87  completeId = True
88  for key in fmt:
89  if key not in dataId:
90  completeId = False
91  break
92  if completeId:
93  self.cache.append(dataId)
94  return
95 
96  idTuples = butler.queryMetadata(self.datasetType, fmt, self.dataId)
97  for idTuple in idTuples:
98  tempId = dict(self.dataId)
99  if len(fmt) == 1:
100  tempId[fmt[0]] = idTuple
101  else:
102  for i in range(len(fmt)):
103  tempId[fmt[i]] = idTuple[i]
104  self.cache.append(tempId)
105 
106  def __repr__(self):
107  return "ButlerSubset(butler=%s, datasetType=%s, dataId=%s, cache=%s, level=%s)" % (
108  self.butler, self.datasetType, self.dataId, self.cache, self.level)
109 
110  def __len__(self):
111  """
112  Number of ButlerDataRefs in the ButlerSubset.
113 
114  @returns (int)
115  """
116 
117  return len(self.cache)
118 
119  def __iter__(self):
120  """
121  Iterator over the ButlerDataRefs in the ButlerSubset.
122 
123  @returns (ButlerIterator)
124  """
125 
126  return ButlerSubsetIterator(self)
127 
128 
129 class ButlerSubsetIterator(object):
130  """
131  An iterator over the ButlerDataRefs in a ButlerSubset.
132  """
133 
134  def __init__(self, butlerSubset):
135  self.butlerSubset = butlerSubset
136  self.iter = iter(butlerSubset.cache)
137 
138  def __iter__(self):
139  return self
140 
141  def __next__(self):
142  return ButlerDataRef(self.butlerSubset, next(self.iter))
143 
144 
145 class ButlerDataRef(object):
146  """
147  A ButlerDataRef is a reference to a potential dataset or group of datasets
148  that is portable between compatible dataset types. As such, it can be
149  used to create or retrieve datasets.
150 
151  ButlerDataRefs are (conceptually) created as elements of a ButlerSubset by
152  Butler.subset(). They are initially specific to the dataset type passed
153  to that call, but they may be used with any other compatible dataset type.
154  Dataset type compatibility must be determined externally (or by trial and
155  error).
156 
157  ButlerDataRefs may be created at any level of a data identifier hierarchy.
158  If the level is not one at which datasets exist, a ButlerSubset
159  with lower-level ButlerDataRefs can be created using
160  ButlerDataRef.subItems().
161 
162  Public methods:
163 
164  get(self, datasetType=None, **rest)
165 
166  put(self, obj, datasetType=None, **rest)
167 
168  subItems(self, level=None)
169 
170  datasetExists(self, datasetType=None, **rest)
171 
172  getButler(self)
173  """
174 
175  def __init__(self, butlerSubset, dataId):
176  """
177  For internal use only. ButlerDataRefs should only be created by
178  ButlerSubset and ButlerSubsetIterator.
179  """
180 
181  self.butlerSubset = butlerSubset
182  self.dataId = dataId
183 
184  def __repr__(self):
185  return 'ButlerDataRef(butlerSubset=%s, dataId=%s)' % (self.butlerSubset, self.dataId)
186 
187  def get(self, datasetType=None, **rest):
188  """
189  Retrieve a dataset of the given type (or the type used when creating
190  the ButlerSubset, if None) as specified by the ButlerDataRef.
191 
192  @param datasetType (str) dataset type to retrieve.
193  @param **rest keyword arguments with data identifiers
194  @returns object corresponding to the given dataset type.
195  """
196  if datasetType is None:
197  datasetType = self.butlerSubset.datasetType
198  return self.butlerSubset.butler.get(datasetType, self.dataId, **rest)
199 
200  def put(self, obj, datasetType=None, doBackup=False, **rest):
201  """
202  Persist a dataset of the given type (or the type used when creating
203  the ButlerSubset, if None) as specified by the ButlerDataRef.
204 
205  @param obj object to persist.
206  @param datasetType (str) dataset type to persist.
207  @param doBackup if True, rename existing instead of overwriting
208  @param **rest keyword arguments with data identifiers
209 
210  WARNING: Setting doBackup=True is not safe for parallel processing, as it
211  may be subject to race conditions.
212  """
213 
214  if datasetType is None:
215  datasetType = self.butlerSubset.datasetType
216  self.butlerSubset.butler.put(obj, datasetType, self.dataId, doBackup=doBackup, **rest)
217 
218  def subLevels(self):
219  """
220  Return a list of the lower levels of the hierarchy than this
221  ButlerDataRef.
222 
223  @returns (iterable) list of strings with level keys."""
224 
225  return set(
226  self.butlerSubset.butler.getKeys(
227  self.butlerSubset.datasetType,
228  tag=self.butlerSubset.dataId.tag).keys()
229  ) - set(
230  self.butlerSubset.butler.getKeys(
231  self.butlerSubset.datasetType,
232  self.butlerSubset.level,
233  tag=self.butlerSubset.dataId.tag).keys()
234  )
235 
236  def subItems(self, level=None):
237  """
238  Generate a ButlerSubset at a lower level of the hierarchy than this
239  ButlerDataRef, using it as a partial data id. If level is None, a
240  default lower level for the original ButlerSubset level and dataset
241  type is used.
242 
243  As currently implemented, the default sublevels for all the
244  repositories used by this Butler instance must match for the Butler to
245  be able to select a default sublevel to get the subset.
246 
247  @param level (str) the hierarchy level to descend to.
248  @returns (ButlerSubset) resulting from the lower-level query or () if
249  there is no lower level.
250  """
251 
252  if level is None:
253  levelSet = set()
254  for repoData in self.butlerSubset.butler._repos.all():
255  levelSet.add(repoData.repo._mapper.getDefaultSubLevel(
256  self.butlerSubset.level))
257  if len(levelSet) > 1:
258  raise RuntimeError(
259  "Support for multiple levels not implemented.")
260  level = levelSet.pop()
261  if level is None:
262  return ()
263  return self.butlerSubset.butler.subset(self.butlerSubset.datasetType,
264  level, self.dataId)
265 
266  def datasetExists(self, datasetType=None, write=False, **rest):
267  """
268  Determine if a dataset exists of the given type (or the type used when
269  creating the ButlerSubset, if None) as specified by the ButlerDataRef.
270 
271  @param datasetType (str) dataset type to check.
272  @param write (bool) if True, search only in output repositories
273  @param **rest keywords arguments with data identifiers
274  @returns bool
275  """
276  if datasetType is None:
277  datasetType = self.butlerSubset.datasetType
278  return self.butlerSubset.butler.datasetExists(
279  datasetType, self.dataId, write=write, **rest)
280 
281  def getButler(self):
282  """
283  Return the butler associated with this data reference.
284  """
285  return self.butlerSubset.butler
def __init__(self, butlerSubset, dataId)
def __init__(self, butler, datasetType, level, dataId)
Definition: butlerSubset.py:63
def put(self, obj, datasetType=None, doBackup=False, rest)
def datasetExists(self, datasetType=None, write=False, rest)
def get(self, datasetType=None, rest)