lsst.daf.persistence  14.0-11-g0362164
repository.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # LSST Data Management System
5 # Copyright 2016 LSST Corporation.
6 #
7 # This product includes software developed by the
8 # LSST Project (http://www.lsst.org/).
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the LSST License Statement and
21 # the GNU General Public License along with this program. If not,
22 # see <http://www.lsstcorp.org/LegalNotices/>.
23 #
24 from past.builtins import basestring
25 from builtins import object
26 
27 import copy
28 import inspect
29 import os
30 
31 from lsst.daf.persistence import Storage, listify, doImport, Policy
32 
33 
34 class RepositoryArgs(object):
35 
36  """Arguments passed into a Butler that are used to instantiate a repository. This includes arguments that
37  can be used to create a new repository (cfgRoot, root, mapper, mapperArgs, policy) and are persisted along
38  with the new repository's configuration file. These arguments can also describe how a new or existing
39  repository are to be used (cfgRoot or root, tags, mode). When indicating an existing repository it is
40  better to not specify unnecessary arguments, as if they conflict with the persisted repository
41  configuration then a RuntimeError will be raised during Butler init.
42 
43  A RepositoryArgs class can be initialized from a dict, if the first argument to the initializer is a dict.
44 
45  Parameters
46  ----------
47  cfgRoot : URI or dict, optional
48  If dict, the initalizer is re-called with the expanded dict.
49  If URI, this is the location where the RepositoryCfg should be found (existing repo) or put (new repo)
50  root : URI, optional
51  If different than cfgRoot then this is the location where the repository should exist. A RepositoryCfg
52  will be put at cfgRoot and its root will be a path to root.
53  mapper : string or class object, optional
54  The mapper to use with this repository. If string, should refer an importable object. If class object,
55  should be a mapper to be instantiated by the Butler during Butler init.
56  mapperArgs : dict
57  Arguments & values to pass to the mapper when initializing it.
58  tags : list or object, optional
59  One or more unique identifiers to uniquely identify this repository and its parents when performing
60  Butler.get.
61  mode : string, optional
62  should be one of 'r', 'w', or 'rw', for 'read', 'write', or 'read-write'. Can be omitted; input
63  repositories will default to 'r', output repositories will default to 'w'. 'w' on an input repository
64  will raise a RuntimeError during Butler init, although 'rw' works and is equivalent to 'r'. Output
65  repositories may be 'r' or 'rw', 'r' for an output repository will raise a RuntimeError during Butler
66  init.
67  policy : dict
68  Policy associated with this repository, overrides all other policy data (which may be loaded from
69  policies in derived packages).
70  """
71  def __init__(self, cfgRoot=None, root=None, mapper=None, mapperArgs=None, tags=None,
72  mode=None, policy=None):
73  try:
74  # is cfgRoot a dict? try dict init:
75  self.__init__(**cfgRoot)
76  except TypeError:
77  self._root = Storage.absolutePath(os.getcwd(), root.rstrip(os.sep)) if root else root
78  self._cfgRoot = Storage.absolutePath(os.getcwd(), cfgRoot.rstrip(os.sep)) if cfgRoot else cfgRoot
79  self._mapper = mapper
80  self.mapperArgs = mapperArgs
81  self.tags = set(listify(tags))
82  self.mode = mode
83  self.policy = Policy(policy) if policy is not None else None
84 
85  def __repr__(self):
86  return "%s(root=%r, cfgRoot=%r, mapper=%r, mapperArgs=%r, tags=%s, mode=%r, policy=%s)" % (
87  self.__class__.__name__, self.root, self._cfgRoot, self._mapper, self.mapperArgs, self.tags,
88  self.mode, self.policy)
89 
90  @property
91  def mapper(self):
92  return self._mapper
93 
94  @mapper.setter
95  def mapper(self, mapper):
96  if mapper is not None and self._mapper:
97  raise RuntimeError("Explicity clear mapper (set to None) before changing its value.")
98  self._mapper = mapper
99 
100  @property
101  def cfgRoot(self):
102  return self._cfgRoot if self._cfgRoot is not None else self._root
103 
104  @property
105  def root(self):
106  return self._root if self._root is not None else self._cfgRoot
107 
108  @staticmethod
109  def inputRepo(storage, tags=None):
110  return RepositoryArgs(storage, tags)
111 
112  @staticmethod
113  def outputRepo(storage, mapper=None, mapperArgs=None, tags=None, mode=None):
114  return RepositoryArgs(storage, mapper, mapperArgs, tags, mode)
115 
116  def tag(self, tag):
117  """add a tag to the repository cfg"""
118  if isinstance(tag, basestring):
119  self.tags.add(tag)
120  else:
121  try:
122  self.tags.update(tag)
123  except TypeError:
124  self.tags.add(tag)
125 
126 
127 class Repository(object):
128  """Represents a repository of persisted data and has methods to access that data.
129  """
130 
131  def __init__(self, repoData):
132  """Initialize a Repository with parameters input via RepoData.
133 
134  Parameters
135  ----------
136  repoData : RepoData
137  Object that contains the parameters with which to init the Repository.
138  """
139  self._storage = Storage.makeFromURI(repoData.cfg.root)
140  if repoData.cfg.dirty and not repoData.isV1Repository and repoData.cfgOrigin != 'nested':
141  self._storage.putRepositoryCfg(repoData.cfg, repoData.cfgRoot)
142  self._mapperArgs = repoData.cfg.mapperArgs # keep for reference in matchesArgs
143  self._initMapper(repoData)
144 
145  def _initMapper(self, repoData):
146  '''Initialize and keep the mapper in a member var.
147 
148  Parameters
149  ----------
150  repoData : RepoData
151  The RepoData with the properties of this Repository.
152  '''
153 
154  # rule: If mapper is:
155  # - an object: use it as the mapper.
156  # - a string: import it and instantiate it with mapperArgs
157  # - a class object: instantiate it with mapperArgs
158  mapper = repoData.cfg.mapper
159 
160  # if mapper is a string, import it:
161  if isinstance(mapper, basestring):
162  mapper = doImport(mapper)
163  # now if mapper is a class type (not instance), instantiate it:
164  if inspect.isclass(mapper):
165  mapperArgs = copy.copy(repoData.cfg.mapperArgs)
166  if mapperArgs is None:
167  mapperArgs = {}
168  if 'root' not in mapperArgs:
169  mapperArgs['root'] = repoData.cfg.root
170  mapper = mapper(parentRegistry=repoData.parentRegistry,
171  repositoryCfg=repoData.cfg,
172  **mapperArgs)
173  self._mapper = mapper
174 
175  def __repr__(self):
176  return 'config(id=%s, storage=%s, parent=%s, mapper=%s, mapperArgs=%s, cls=%s)' % \
177  (self.id, self._storage, self.parent, self._mapper, self.mapperArgs, self.cls)
178 
179  # todo want a way to make a repository read-only
180  def write(self, butlerLocation, obj):
181  """Write a dataset to Storage.
182 
183  :param butlerLocation: Contains the details needed to find the desired dataset.
184  :param dataset: The dataset to be written.
185  :return:
186  """
187  butlerLocationStorage = butlerLocation.getStorage()
188  if butlerLocationStorage:
189  return butlerLocationStorage.write(butlerLocation, obj)
190  else:
191  return self._storage.write(butlerLocation, obj)
192 
193  def read(self, butlerLocation):
194  """Read a dataset from Storage.
195 
196  :param butlerLocation: Contains the details needed to find the desired dataset.
197  :return: An instance of the dataset requested by butlerLocation.
198  """
199  butlerLocationStorage = butlerLocation.getStorage()
200  if butlerLocationStorage:
201  return butlerLocationStorage.read(butlerLocation)
202  else:
203  return self._storage.read(butlerLocation)
204 
205  #################
206  # Mapper Access #
207 
208  def mappers(self):
209  return (self._mapper, )
210 
211  def getRegistry(self):
212  """Get the registry from the mapper
213 
214  Returns
215  -------
216  Registry or None
217  The registry from the mapper or None if the mapper does not have one.
218  """
219  if self._mapper is None:
220  return None
221  return self._mapper.getRegistry()
222 
223  def getKeys(self, *args, **kwargs):
224  """
225  Get the keys available in the repository/repositories.
226  :param args:
227  :param kwargs:
228  :return: A dict of {key:valueType}
229  """
230  # todo: getKeys is not in the mapper API
231  if self._mapper is None:
232  return None
233  keys = self._mapper.getKeys(*args, **kwargs)
234  return keys
235 
236  def map(self, *args, **kwargs):
237  """Find a butler location for the given arguments.
238  See mapper.map for more information about args and kwargs.
239 
240  :param args: arguments to be passed on to mapper.map
241  :param kwargs: keyword arguments to be passed on to mapper.map
242  :return: The type of item is dependent on the mapper being used but is typically a ButlerLocation.
243  """
244  if self._mapper is None:
245  raise RuntimeError("No mapper assigned to Repository")
246  loc = self._mapper.map(*args, **kwargs)
247  if loc is None:
248  return None
249  loc.setRepository(self)
250  return loc
251 
252  def queryMetadata(self, *args, **kwargs):
253  """Gets possible values for keys given a partial data id.
254 
255  See mapper documentation for more explanation about queryMetadata.
256 
257  :param args: arguments to be passed on to mapper.queryMetadata
258  :param kwargs: keyword arguments to be passed on to mapper.queryMetadata
259  :return:The type of item is dependent on the mapper being used but is typically a set that contains
260  available values for the keys in the format input argument.
261  """
262  if self._mapper is None:
263  return None
264  ret = self._mapper.queryMetadata(*args, **kwargs)
265  return ret
266 
267  def backup(self, *args, **kwargs):
268  """Perform mapper.backup.
269 
270  See mapper.backup for more information about args and kwargs.
271 
272  :param args: arguments to be passed on to mapper.backup
273  :param kwargs: keyword arguments to be passed on to mapper.backup
274  :return: None
275  """
276  if self._mapper is None:
277  return None
278  self._mapper.backup(*args, **kwargs)
279 
280  def getMapperDefaultLevel(self):
281  """Get the default level of the mapper.
282 
283  This is typically used if no level is passed into butler methods that call repository.getKeys and/or
284  repository.queryMetadata. There is a bug in that code because it gets the default level from this
285  repository but then uses that value when searching all repositories. If this and other repositories
286  have dissimilar data, the default level value will be nonsensical. A good example of this issue is in
287  Butler.subset; it needs refactoring.
288 
289  :return:
290  """
291  if self._mapper is None:
292  return None
293  return self._mapper.getDefaultLevel()
294 
295  def exists(self, location):
296  """Check if location exists in storage.
297 
298  Parameters
299  ----------
300  location : ButlerLocation
301  Desrcibes a location in storage to look for.
302 
303  Returns
304  -------
305  bool
306  True if location exists, False if not.
307  """
308  butlerLocationStorage = location.getStorage()
309  if butlerLocationStorage:
310  return butlerLocationStorage.exists(location)
311  else:
312  return self._storage.exists(location)