lsst.daf.persistence  13.0-28-gf70af18
 All Classes Namespaces Files Functions Variables Typedefs Friends Macros
repository.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # LSST Data Management System
5 # Copyright 2016 LSST Corporation.
6 #
7 # This product includes software developed by the
8 # LSST Project (http://www.lsst.org/).
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the LSST License Statement and
21 # the GNU General Public License along with this program. If not,
22 # see <http://www.lsstcorp.org/LegalNotices/>.
23 #
24 from past.builtins import basestring
25 from builtins import object
26 
27 import copy
28 import inspect
29 import os
30 
31 from lsst.daf.persistence import Storage, listify, doImport, Policy
32 
33 
34 class RepositoryArgs(object):
35 
36  """Arguments passed into a Butler that are used to instantiate a repository. This includes arguments that
37  can be used to create a new repository (cfgRoot, root, mapper, mapperArgs, policy) and are persisted along
38  with the new repository's configuration file. These arguments can also describe how a new or existing
39  repository are to be used (cfgRoot or root, tags, mode). When indicating an existing repository it is
40  better to not specify unnecessary arguments, as if they conflict with the persisted repository
41  configuration then a RuntimeError will be raised during Butler init.
42 
43  A RepositoryArgs class can be initialized from a dict, if the first argument to the initializer is a dict.
44 
45  Parameters
46  ----------
47  cfgRoot : URI or dict, optional
48  If dict, the initalizer is re-called with the expanded dict.
49  If URI, this is the location where the RepositoryCfg should be found (existing repo) or put (new repo)
50  root : URI, optional
51  If different than cfgRoot then this is the location where the repository should exist. A RepositoryCfg
52  will be put at cfgRoot and its root will be a path to root.
53  mapper : string or class object, optional
54  The mapper to use with this repository. If string, should refer an importable object. If class object,
55  should be a mapper to be instantiated by the Butler during Butler init.
56  tags : list or object, optional
57  One or more unique identifiers to uniquely identify this repository and its parents when performing
58  Butler.get.
59  mode : string, optional
60  should be one of 'r', 'w', or 'rw', for 'read', 'write', or 'read-write'. Can be omitted; input
61  repositories will default to 'r', output repositories will default to 'w'. 'w' on an input repository
62  will raise a RuntimeError during Butler init, although 'rw' works and is equivalent to 'r'. Output
63  repositories may be 'r' or 'rw', 'r' for an output repository will raise a RuntimeError during Butler
64  init.
65  """
66  def __init__(self, cfgRoot=None, root=None, mapper=None, mapperArgs=None, tags=None,
67  mode=None, policy=None):
68  try:
69  # is cfgRoot a dict? try dict init:
70  self.__init__(**cfgRoot)
71  except TypeError:
72  self._root = Storage.absolutePath(os.getcwd(), root.rstrip(os.sep)) if root else root
73  self._cfgRoot = Storage.absolutePath(os.getcwd(), cfgRoot.rstrip(os.sep)) if cfgRoot else cfgRoot
74  self._mapper = mapper
75  self.mapperArgs = mapperArgs
76  self.tags = set(listify(tags))
77  self.mode = mode
78  self.policy = Policy(policy) if policy is not None else None
79 
80  def __repr__(self):
81  return "%s(root=%r, cfgRoot=%r, mapper=%r, mapperArgs=%r, tags=%s, mode=%r, policy=%s)" % (
82  self.__class__.__name__, self.root, self._cfgRoot, self._mapper, self.mapperArgs, self.tags,
83  self.mode, self.policy)
84 
85  @property
86  def mapper(self):
87  return self._mapper
88 
89  @mapper.setter
90  def mapper(self, mapper):
91  if mapper is not None and self._mapper:
92  raise RuntimeError("Explicity clear mapper (set to None) before changing its value.")
93  self._mapper = mapper
94 
95  @property
96  def cfgRoot(self):
97  return self._cfgRoot if self._cfgRoot is not None else self._root
98 
99  @property
100  def root(self):
101  return self._root if self._root is not None else self._cfgRoot
102 
103  @staticmethod
104  def inputRepo(storage, tags=None):
105  return RepositoryArgs(storage, tags)
106 
107  @staticmethod
108  def outputRepo(storage, mapper=None, mapperArgs=None, tags=None, mode=None):
109  return RepositoryArgs(storage, mapper, mapperArgs, tags, mode)
110 
111  def tag(self, tag):
112  """add a tag to the repository cfg"""
113  if isinstance(tag, basestring):
114  self.tags.add(tag)
115  else:
116  try:
117  self.tags.update(tag)
118  except TypeError:
119  self.tags.add(tag)
120 
121 
122 class Repository(object):
123  """Represents a repository of persisted data and has methods to access that data.
124  """
125 
126  def __init__(self, repoData):
127  """Initialize a Repository with parameters input via RepoData.
128 
129  Parameters
130  ----------
131  repoData : RepoData
132  Object that contains the parameters with which to init the Repository.
133  """
134  self._storage = Storage.makeFromURI(repoData.cfg.root)
135  if repoData.cfg.dirty and not repoData.isV1Repository and repoData.cfgOrigin != 'nested':
136  self._storage.putRepositoryCfg(repoData.cfg, repoData.cfgRoot)
137  self._mapperArgs = repoData.cfg.mapperArgs # keep for reference in matchesArgs
138  self._initMapper(repoData)
139 
140  def _initMapper(self, repoData):
141  '''Initialize and keep the mapper in a member var.
142 
143  Parameters
144  ----------
145  repoData : RepoData
146  The RepoData with the properties of this Repository.
147  '''
148 
149  # rule: If mapper is:
150  # - an object: use it as the mapper.
151  # - a string: import it and instantiate it with mapperArgs
152  # - a class object: instantiate it with mapperArgs
153  mapper = repoData.cfg.mapper
154 
155  # if mapper is a string, import it:
156  if isinstance(mapper, basestring):
157  mapper = doImport(mapper)
158  # now if mapper is a class type (not instance), instantiate it:
159  if inspect.isclass(mapper):
160  mapperArgs = copy.copy(repoData.cfg.mapperArgs)
161  if mapperArgs is None:
162  mapperArgs = {}
163  if 'root' not in mapperArgs:
164  mapperArgs['root'] = repoData.cfg.root
165  mapper = mapper(parentRegistry=repoData.parentRegistry,
166  repositoryCfg=repoData.cfg,
167  **mapperArgs)
168  self._mapper = mapper
169 
170  def __repr__(self):
171  return 'config(id=%s, storage=%s, parent=%s, mapper=%s, mapperArgs=%s, cls=%s)' % \
172  (self.id, self._storage, self.parent, self._mapper, self.mapperArgs, self.cls)
173 
174  # todo want a way to make a repository read-only
175  def write(self, butlerLocation, obj):
176  """Write a dataset to Storage.
177 
178  :param butlerLocation: Contains the details needed to find the desired dataset.
179  :param dataset: The dataset to be written.
180  :return:
181  """
182  butlerLocationStorage = butlerLocation.getStorage()
183  if butlerLocationStorage:
184  return butlerLocationStorage.write(butlerLocation, obj)
185  else:
186  return self._storage.write(butlerLocation, obj)
187 
188  def read(self, butlerLocation):
189  """Read a dataset from Storage.
190 
191  :param butlerLocation: Contains the details needed to find the desired dataset.
192  :return: An instance of the dataset requested by butlerLocation.
193  """
194  butlerLocationStorage = butlerLocation.getStorage()
195  if butlerLocationStorage:
196  return butlerLocationStorage.read(butlerLocation)
197  else:
198  return self._storage.read(butlerLocation)
199 
200  #################
201  # Mapper Access #
202 
203  def mappers(self):
204  return (self._mapper, )
205 
206  def getRegistry(self):
207  """Get the registry from the mapper
208 
209  Returns
210  -------
211  Registry or None
212  The registry from the mapper or None if the mapper does not have one.
213  """
214  if self._mapper is None:
215  return None
216  return self._mapper.getRegistry()
217 
218  def getKeys(self, *args, **kwargs):
219  """
220  Get the keys available in the repository/repositories.
221  :param args:
222  :param kwargs:
223  :return: A dict of {key:valueType}
224  """
225  # todo: getKeys is not in the mapper API
226  if self._mapper is None:
227  return None
228  keys = self._mapper.getKeys(*args, **kwargs)
229  return keys
230 
231  def map(self, *args, **kwargs):
232  """Find a butler location for the given arguments.
233  See mapper.map for more information about args and kwargs.
234 
235  :param args: arguments to be passed on to mapper.map
236  :param kwargs: keyword arguments to be passed on to mapper.map
237  :return: The type of item is dependent on the mapper being used but is typically a ButlerLocation.
238  """
239  if self._mapper is None:
240  raise RuntimeError("No mapper assigned to Repository")
241  loc = self._mapper.map(*args, **kwargs)
242  if loc is None:
243  return None
244  loc.setRepository(self)
245  return loc
246 
247  def queryMetadata(self, *args, **kwargs):
248  """Gets possible values for keys given a partial data id.
249 
250  See mapper documentation for more explanation about queryMetadata.
251 
252  :param args: arguments to be passed on to mapper.queryMetadata
253  :param kwargs: keyword arguments to be passed on to mapper.queryMetadata
254  :return:The type of item is dependent on the mapper being used but is typically a set that contains
255  available values for the keys in the format input argument.
256  """
257  if self._mapper is None:
258  return None
259  ret = self._mapper.queryMetadata(*args, **kwargs)
260  return ret
261 
262  def backup(self, *args, **kwargs):
263  """Perform mapper.backup.
264 
265  See mapper.backup for more information about args and kwargs.
266 
267  :param args: arguments to be passed on to mapper.backup
268  :param kwargs: keyword arguments to be passed on to mapper.backup
269  :return: None
270  """
271  if self._mapper is None:
272  return None
273  self._mapper.backup(*args, **kwargs)
274 
275  def getMapperDefaultLevel(self):
276  """Get the default level of the mapper.
277 
278  This is typically used if no level is passed into butler methods that call repository.getKeys and/or
279  repository.queryMetadata. There is a bug in that code because it gets the default level from this
280  repository but then uses that value when searching all repositories. If this and other repositories
281  have dissimilar data, the default level value will be nonsensical. A good example of this issue is in
282  Butler.subset; it needs refactoring.
283 
284  :return:
285  """
286  if self._mapper is None:
287  return None
288  return self._mapper.getDefaultLevel()
289 
290  def exists(self, location):
291  """Check if location exists in storage.
292 
293  Parameters
294  ----------
295  location : ButlerLocation
296  Desrcibes a location in storage to look for.
297 
298  Returns
299  -------
300  bool
301  True if location exists, False if not.
302  """
303  butlerLocationStorage = location.getStorage()
304  if butlerLocationStorage:
305  return butlerLocationStorage.exists(location)
306  else:
307  return self._storage.exists(location)