lsst.daf.persistence  13.0-32-g7b14ddd
 All Classes Namespaces Files Functions Variables Typedefs Friends Macros
posixStorage.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # LSST Data Management System
5 # Copyright 2016 LSST Corporation.
6 #
7 # This product includes software developed by the
8 # LSST Project (http://www.lsst.org/).
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the LSST License Statement and
21 # the GNU General Public License along with this program. If not,
22 # see <http://www.lsstcorp.org/LegalNotices/>.
23 #
24 from past.builtins import basestring
25 import sys
26 import pickle
27 import importlib
28 import os
29 import urllib.parse
30 import glob
31 import shutil
32 
33 from . import (LogicalLocation, Persistence, Policy, StorageList,
34  StorageInterface, Storage, ButlerLocation,
35  NoRepositroyAtRoot, RepositoryCfg)
36 from lsst.log import Log
37 import lsst.pex.policy as pexPolicy
38 from .safeFileIo import SafeFilename, safeMakeDir
39 
40 
41 class PosixStorage(StorageInterface):
42  """Defines the interface for a storage location on the local filesystem.
43 
44  Parameters
45  ----------
46  uri : string
47  URI or path that is used as the storage location.
48  create : bool
49  If True a new repository will be created at the root location if it
50  does not exist. If False then a new repository will not be created.
51 
52  Raises
53  ------
54  NoRepositroyAtRoot
55  If create is False and a repository does not exist at the root
56  specified by uri then NoRepositroyAtRoot is raised.
57  """
58 
59  def __init__(self, uri, create):
60  self.log = Log.getLogger("daf.persistence.butler")
61  self.root = self._pathFromURI(uri)
62  if self.root and not os.path.exists(self.root):
63  if not create:
64  raise NoRepositroyAtRoot("No repository at {}".format(uri))
65  safeMakeDir(self.root)
66 
67  # Always use an empty Persistence policy until we can get rid of it
68  persistencePolicy = pexPolicy.Policy()
69  self.persistence = Persistence.getPersistence(persistencePolicy)
70 
71  def __repr__(self):
72  return 'PosixStorage(root=%s)' % self.root
73 
74  @staticmethod
75  def _pathFromURI(uri):
76  """Get the path part of the URI"""
77  return urllib.parse.urlparse(uri).path
78 
79  @staticmethod
80  def relativePath(fromPath, toPath):
81  """Get a relative path from a location to a location.
82 
83  Parameters
84  ----------
85  fromPath : string
86  A path at which to start. It can be a relative path or an
87  absolute path.
88  toPath : string
89  A target location. It can be a relative path or an absolute path.
90 
91  Returns
92  -------
93  string
94  A relative path that describes the path from fromPath to toPath.
95  """
96  fromPath = os.path.realpath(fromPath)
97  return os.path.relpath(toPath, fromPath)
98 
99  @staticmethod
100  def absolutePath(fromPath, relativePath):
101  """Get an absolute path for the path from fromUri to toUri
102 
103  Parameters
104  ----------
105  fromPath : the starting location
106  A location at which to start. It can be a relative path or an
107  absolute path.
108  relativePath : the location relative to fromPath
109  A relative path.
110 
111  Returns
112  -------
113  string
114  Path that is an absolute path representation of fromPath +
115  relativePath, if one exists. If relativePath is absolute or if
116  fromPath is not related to relativePath then relativePath will be
117  returned.
118  """
119  if os.path.isabs(relativePath):
120  return relativePath
121  fromPath = os.path.realpath(fromPath)
122  return os.path.normpath(os.path.join(fromPath, relativePath))
123 
124  @staticmethod
126  """Get a persisted RepositoryCfg
127 
128  Parameters
129  ----------
130  uri : URI or path to a RepositoryCfg
131  Description
132 
133  Returns
134  -------
135  A RepositoryCfg instance or None
136  """
137  storage = Storage.makeFromURI(uri)
138  formatter = storage._getFormatter(RepositoryCfg)
139  return formatter.read(ButlerLocation(pythonType=None,
140  cppType=None,
141  storageName=None,
142  locationList='repositoryCfg.yaml',
143  dataId={},
144  mapper=None,
145  storage=storage,
146  usedDataId=None,
147  datasetType=None))
148 
149  @staticmethod
150  def putRepositoryCfg(cfg, loc=None):
151  storage = Storage.makeFromURI(cfg.root if loc is None else loc, create=True)
152  formatter = storage._getFormatter(type(cfg))
153  formatter.write(cfg, ButlerLocation(pythonType=None,
154  cppType=None,
155  storageName=None,
156  locationList='repositoryCfg.yaml',
157  dataId={},
158  mapper=None,
159  storage=storage,
160  usedDataId=None,
161  datasetType=None))
162 
163  @staticmethod
164  def getMapperClass(root):
165  """Get the mapper class associated with a repository root.
166 
167  Supports the legacy _parent symlink search (which was only ever posix-only. This should not be used by
168  new code and repositories; they should use the Repository parentCfg mechanism.
169 
170  Parameters
171  ----------
172  root : string
173  The location of a persisted ReositoryCfg is (new style repos), or
174  the location where a _mapper file is (old style repos).
175 
176  Returns
177  -------
178  A class object or a class instance, depending on the state of the
179  mapper when the repository was created.
180  """
181  if not (root):
182  return None
183 
184  cfg = PosixStorage.getRepositoryCfg(root)
185  if cfg is not None:
186  return cfg.mapper
187 
188  # Find a "_mapper" file containing the mapper class name
189  basePath = root
190  mapperFile = "_mapper"
191  while not os.path.exists(os.path.join(basePath, mapperFile)):
192  # Break abstraction by following _parent links from CameraMapper
193  if os.path.exists(os.path.join(basePath, "_parent")):
194  basePath = os.path.join(basePath, "_parent")
195  else:
196  mapperFile = None
197  break
198 
199  if mapperFile is not None:
200  mapperFile = os.path.join(basePath, mapperFile)
201 
202  # Read the name of the mapper class and instantiate it
203  with open(mapperFile, "r") as f:
204  mapperName = f.readline().strip()
205  components = mapperName.split(".")
206  if len(components) <= 1:
207  raise RuntimeError("Unqualified mapper name %s in %s" %
208  (mapperName, mapperFile))
209  pkg = importlib.import_module(".".join(components[:-1]))
210  return getattr(pkg, components[-1])
211 
212  return None
213 
214  @staticmethod
216  """For Butler V1 Repositories only, if a _parent symlink exists, get the location pointed to by the
217  symlink.
218 
219  Parameters
220  ----------
221  root : string
222  A path to the folder on the local filesystem.
223 
224  Returns
225  -------
226  string or None
227  A path to the parent folder indicated by the _parent symlink, or None if there is no _parent
228  symlink at root.
229  """
230  linkpath = os.path.join(root, '_parent')
231  if os.path.exists(linkpath):
232  try:
233  return os.readlink(os.path.join(root, '_parent'))
234  except OSError:
235  # some of the unit tests rely on a folder called _parent instead of a symlink to aother
236  # location. Allow that; return the path of that folder.
237  return os.path.join(root, '_parent')
238  return None
239 
240  def write(self, butlerLocation, obj):
241  """Writes an object to a location and persistence format specified by
242  ButlerLocation
243 
244  Parameters
245  ----------
246  butlerLocation : ButlerLocation
247  The location & formatting for the object to be written.
248  obj : object instance
249  The object to be written.
250  """
251  self.log.debug("Put location=%s obj=%s", butlerLocation, obj)
252 
253  additionalData = butlerLocation.getAdditionalData()
254  storageName = butlerLocation.getStorageName()
255  locations = butlerLocation.getLocations()
256 
257  pythonType = butlerLocation.getPythonType()
258  if pythonType is not None:
259  if isinstance(pythonType, basestring):
260  # import this pythonType dynamically
261  pythonTypeTokenList = pythonType.split('.')
262  importClassString = pythonTypeTokenList.pop()
263  importClassString = importClassString.strip()
264  importPackage = ".".join(pythonTypeTokenList)
265  importType = __import__(importPackage, globals(), locals(), [importClassString], 0)
266  pythonType = getattr(importType, importClassString)
267  # todo this effectively defines the butler posix "do serialize" command to be named "put". This has
268  # implications; write now I'm worried that any python type that can be written to disk and has a
269  # method called 'put' will be called here (even if it's e.g. destined for FitsStorage).
270  # We might want a somewhat more specific API.
271  if hasattr(pythonType, 'butlerWrite'):
272  pythonType.butlerWrite(obj, butlerLocation=butlerLocation)
273  return
274 
275  with SafeFilename(os.path.join(self.root, locations[0])) as locationString:
276  logLoc = LogicalLocation(locationString, additionalData)
277 
278  if storageName == "PickleStorage":
279  with open(logLoc.locString(), "wb") as outfile:
280  pickle.dump(obj, outfile, pickle.HIGHEST_PROTOCOL)
281  return
282 
283  if storageName == "ConfigStorage":
284  obj.save(logLoc.locString())
285  return
286 
287  if storageName == "FitsCatalogStorage":
288  if additionalData.exists("flags"):
289  kwds = dict(flags=additionalData.getInt("flags"))
290  else:
291  kwds = {}
292  obj.writeFits(logLoc.locString(), **kwds)
293  return
294 
295  # Create a list of Storages for the item.
296  storageList = StorageList()
297  storage = self.persistence.getPersistStorage(storageName, logLoc)
298  storageList.append(storage)
299 
300  if storageName == 'FitsStorage':
301  self.persistence.persist(obj, storageList, additionalData)
302  return
303 
304  # Persist the item.
305  if hasattr(obj, '__deref__'):
306  # We have a smart pointer, so dereference it.
307  self.persistence.persist(obj.__deref__(), storageList, additionalData)
308  else:
309  self.persistence.persist(obj, storageList, additionalData)
310 
311  def read(self, butlerLocation):
312  """Read from a butlerLocation.
313 
314  Parameters
315  ----------
316  butlerLocation : ButlerLocation
317  The location & formatting for the object(s) to be read.
318 
319  Returns
320  -------
321  A list of objects as described by the butler location. One item for
322  each location in butlerLocation.getLocations()
323  """
324  additionalData = butlerLocation.getAdditionalData()
325  # Create a list of Storages for the item.
326  storageName = butlerLocation.getStorageName()
327  results = []
328  locations = butlerLocation.getLocations()
329  pythonType = butlerLocation.getPythonType()
330  if pythonType is not None:
331  if isinstance(pythonType, basestring):
332  # import this pythonType dynamically
333  pythonTypeTokenList = pythonType.split('.')
334  importClassString = pythonTypeTokenList.pop()
335  importClassString = importClassString.strip()
336  importPackage = ".".join(pythonTypeTokenList)
337  importType = __import__(importPackage, globals(), locals(), [importClassString], 0)
338  pythonType = getattr(importType, importClassString)
339 
340  # see note re. discomfort with the name 'butlerWrite' in the write method, above.
341  # Same applies to butlerRead.
342  if hasattr(pythonType, 'butlerRead'):
343  results = pythonType.butlerRead(butlerLocation=butlerLocation)
344  return results
345 
346  for locationString in locations:
347  locationString = os.path.join(self.root, locationString)
348 
349  logLoc = LogicalLocation(locationString, additionalData)
350 
351  if storageName == "PafStorage":
352  finalItem = pexPolicy.Policy.createPolicy(logLoc.locString())
353  elif storageName == "YamlStorage":
354  finalItem = Policy(filePath=logLoc.locString())
355  elif storageName == "PickleStorage":
356  if not os.path.exists(logLoc.locString()):
357  raise RuntimeError("No such pickle file: " + logLoc.locString())
358  with open(logLoc.locString(), "rb") as infile:
359  # py3: We have to specify encoding since some files were written
360  # by python2, and 'latin1' manages that conversion safely. See:
361  # http://stackoverflow.com/questions/28218466/unpickling-a-python-2-object-with-python-3/28218598#28218598
362  if sys.version_info.major >= 3:
363  finalItem = pickle.load(infile, encoding="latin1")
364  else:
365  finalItem = pickle.load(infile)
366  elif storageName == "FitsCatalogStorage":
367  if not os.path.exists(logLoc.locString()):
368  raise RuntimeError("No such FITS catalog file: " + logLoc.locString())
369  kwds = {}
370  if additionalData.exists("hdu"):
371  kwds["hdu"] = additionalData.getInt("hdu")
372  if additionalData.exists("flags"):
373  kwds["flags"] = additionalData.getInt("flags")
374  finalItem = pythonType.readFits(logLoc.locString(), **kwds)
375  elif storageName == "ConfigStorage":
376  if not os.path.exists(logLoc.locString()):
377  raise RuntimeError("No such config file: " + logLoc.locString())
378  finalItem = pythonType()
379  finalItem.load(logLoc.locString())
380  else:
381  storageList = StorageList()
382  storage = self.persistence.getRetrieveStorage(storageName, logLoc)
383  storageList.append(storage)
384  finalItem = self.persistence.unsafeRetrieve(
385  butlerLocation.getCppType(), storageList, additionalData)
386  results.append(finalItem)
387 
388  return results
389 
390  def butlerLocationExists(self, location):
391  """Implementaion of PosixStorage.exists for ButlerLocation objects."""
392  storageName = location.getStorageName()
393  if storageName not in ('BoostStorage', 'FitsStorage', 'PafStorage',
394  'PickleStorage', 'ConfigStorage', 'FitsCatalogStorage'):
395  self.log.warn("butlerLocationExists for non-supported storage %s" % location)
396  return False
397  for locationString in location.getLocations():
398  logLoc = LogicalLocation(locationString, location.getAdditionalData()).locString()
399  obj = self.instanceSearch(path=logLoc)
400  if obj:
401  return True
402  return False
403 
404  def exists(self, location):
405  """Check if location exists.
406 
407  Parameters
408  ----------
409  location : ButlerLocation or string
410  A a string or a ButlerLocation that describes the location of an
411  object in this storage.
412 
413  Returns
414  -------
415  bool
416  True if exists, else False.
417  """
418  if isinstance(location, ButlerLocation):
419  return self.butlerLocationExists(location)
420 
421  obj = self.instanceSearch(path=location)
422  return bool(obj)
423 
424  def locationWithRoot(self, location):
425  """Get the full path to the location.
426 
427  :param location:
428  :return:
429  """
430  return os.path.join(self.root, location)
431 
432  @staticmethod
433  def v1RepoExists(root):
434  """Test if a Version 1 Repository exists.
435 
436  Version 1 Repositories only exist in posix storages, do not have a
437  RepositoryCfg file, and contain either a registry.sqlite3 file, a
438  _mapper file, or a _parent link.
439 
440  Parameters
441  ----------
442  root : string
443  A path to a folder on the local filesystem.
444 
445  Returns
446  -------
447  bool
448  True if the repository at root exists, else False.
449  """
450  return os.path.exists(root) and (
451  os.path.exists(os.path.join(root, "registry.sqlite3")) or
452  os.path.exists(os.path.join(root, "_mapper")) or
453  os.path.exists(os.path.join(root, "_parent"))
454  )
455 
456  def copyFile(self, fromLocation, toLocation):
457  """Copy a file from one location to another on the local filesystem.
458 
459  Parameters
460  ----------
461  fromLocation : path
462  Path and name of existing file.
463  toLocation : path
464  Path and name of new file.
465 
466  Returns
467  -------
468  None
469  """
470  shutil.copy(os.path.join(self.root, fromLocation), os.path.join(self.root, toLocation))
471 
472  def getLocalFile(self, path):
473  """Get a handle to a local copy of the file, downloading it to a
474  temporary if needed.
475 
476  Parameters
477  ----------
478  A path the the file in storage, relative to root.
479 
480  Returns
481  -------
482  A handle to a local copy of the file. If storage is remote it will be
483  a temporary file. If storage is local it may be the original file or
484  a temporary file. The file name can be gotten via the 'name' property
485  of the returned object.
486  """
487  p = os.path.join(self.root, path)
488  try:
489  return open(p)
490  except IOError as e:
491  if e.errno == 2: # 'No such file or directory'
492  return None
493  else:
494  raise e
495 
496  def instanceSearch(self, path):
497  """Search for the given path in this storage instance.
498 
499  If the path contains an HDU indicator (a number in brackets before the
500  dot, e.g. 'foo.fits[1]', this will be stripped when searching and so
501  will match filenames without the HDU indicator, e.g. 'foo.fits'. The
502  path returned WILL contain the indicator though, e.g. ['foo.fits[1]'].
503 
504  Parameters
505  ----------
506  path : string
507  A filename (and optionally prefix path) to search for within root.
508 
509  Returns
510  -------
511  string or None
512  The location that was found, or None if no location was found.
513  """
514  return self.search(self.root, path)
515 
516  @staticmethod
517  def search(root, path, searchParents=False):
518  """Look for the given path in the current root.
519 
520  Also supports searching for the path in Butler v1 repositories by
521  following the Butler v1 _parent symlink
522 
523  If the path contains an HDU indicator (a number in brackets, e.g.
524  'foo.fits[1]', this will be stripped when searching and so
525  will match filenames without the HDU indicator, e.g. 'foo.fits'. The
526  path returned WILL contain the indicator though, e.g. ['foo.fits[1]'].
527 
528  Parameters
529  ----------
530  root : string
531  The path to the root directory.
532  path : string
533  The path to the file within the root directory.
534  searchParents : bool, optional
535  For Butler v1 repositories only, if true and a _parent symlink
536  exists, then the directory at _parent will be searched if the file
537  is not found in the root repository. Will continue searching the
538  parent of the parent until the file is found or no additional
539  parent exists.
540 
541  Returns
542  -------
543  string or None
544  The location that was found, or None if no location was found.
545  """
546  # Separate path into a root-equivalent prefix (in dir) and the rest
547  # (left in path)
548  rootDir = root
549  # First remove trailing slashes (#2527)
550  while len(rootDir) > 1 and rootDir[-1] == '/':
551  rootDir = rootDir[:-1]
552 
553  if path.startswith(rootDir + "/"):
554  # Common case; we have the same root prefix string
555  path = path[len(rootDir + '/'):]
556  pathPrefix = rootDir
557  elif rootDir == "/" and path.startswith("/"):
558  path = path[1:]
559  pathPrefix = None
560  else:
561  # Search for prefix that is the same as root
562  pathPrefix = os.path.dirname(path)
563  while pathPrefix != "" and pathPrefix != "/":
564  if os.path.realpath(pathPrefix) == os.path.realpath(root):
565  break
566  pathPrefix = os.path.dirname(pathPrefix)
567  if pathPrefix == "/":
568  path = path[1:]
569  elif pathPrefix != "":
570  path = path[len(pathPrefix)+1:]
571 
572  # Now search for the path in the root or its parents
573  # Strip off any cfitsio bracketed extension if present
574  strippedPath = path
575  pathStripped = None
576  firstBracket = path.find("[")
577  if firstBracket != -1:
578  strippedPath = path[:firstBracket]
579  pathStripped = path[firstBracket:]
580 
581  dir = rootDir
582  while True:
583  paths = glob.glob(os.path.join(dir, strippedPath))
584  if len(paths) > 0:
585  if pathPrefix != rootDir:
586  paths = [p[len(rootDir+'/'):] for p in paths]
587  if pathStripped is not None:
588  paths = [p + pathStripped for p in paths]
589  return paths
590  if searchParents:
591  dir = os.path.join(dir, "_parent")
592  if not os.path.exists(dir):
593  return None
594  else:
595  return None
596 
597  @staticmethod
598  def storageExists(uri):
599  """Ask if a storage at the location described by uri exists
600 
601  Parameters
602  ----------
603  root : string
604  URI to the the root location of the storage
605 
606  Returns
607  -------
608  bool
609  True if the storage exists, false if not
610  """
611  return os.path.exists(PosixStorage._pathFromURI(uri))
612 
613 
614 Storage.registerStorageClass(scheme='', cls=PosixStorage)
615 Storage.registerStorageClass(scheme='file', cls=PosixStorage)
Class for logical location of a persisted Persistable instance.