lsst.daf.persistence  13.0-28-gf70af18
 All Classes Namespaces Files Functions Variables Typedefs Friends Macros
posixStorage.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # LSST Data Management System
5 # Copyright 2016 LSST Corporation.
6 #
7 # This product includes software developed by the
8 # LSST Project (http://www.lsst.org/).
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the LSST License Statement and
21 # the GNU General Public License along with this program. If not,
22 # see <http://www.lsstcorp.org/LegalNotices/>.
23 #
24 from past.builtins import basestring
25 import sys
26 import copy
27 import pickle
28 import importlib
29 import os
30 import urllib.parse
31 import glob
32 import shutil
33 import fcntl
34 
35 import yaml
36 
37 from . import (LogicalLocation, Persistence, Policy, StorageList,
38  StorageInterface, Storage, ButlerLocation,
39  NoRepositroyAtRoot, RepositoryCfg)
40 from lsst.log import Log
41 import lsst.pex.policy as pexPolicy
42 from .safeFileIo import SafeFilename, safeMakeDir
43 from future import standard_library
44 standard_library.install_aliases()
45 
46 class PosixStorage(StorageInterface):
47  """Defines the interface for a storage location on the local filesystem.
48 
49  Parameters
50  ----------
51  uri : string
52  URI or path that is used as the storage location.
53  create : bool
54  If True a new repository will be created at the root location if it
55  does not exist. If False then a new repository will not be created.
56 
57  Raises
58  ------
59  NoRepositroyAtRoot
60  If create is False and a repository does not exist at the root
61  specified by uri then NoRepositroyAtRoot is raised.
62  """
63 
64  def __init__(self, uri, create):
65  self.log = Log.getLogger("daf.persistence.butler")
66  self.root = self._pathFromURI(uri)
67  if self.root and not os.path.exists(self.root):
68  if not create:
69  raise NoRepositroyAtRoot("No repository at {}".format(uri))
70  safeMakeDir(self.root)
71 
72  # Always use an empty Persistence policy until we can get rid of it
73  persistencePolicy = pexPolicy.Policy()
74  self.persistence = Persistence.getPersistence(persistencePolicy)
75 
76  def __repr__(self):
77  return 'PosixStorage(root=%s)' % self.root
78 
79  @staticmethod
80  def _pathFromURI(uri):
81  """Get the path part of the URI"""
82  return urllib.parse.urlparse(uri).path
83 
84  @staticmethod
85  def relativePath(fromPath, toPath):
86  """Get a relative path from a location to a location.
87 
88  Parameters
89  ----------
90  fromPath : string
91  A path at which to start. It can be a relative path or an
92  absolute path.
93  toPath : string
94  A target location. It can be a relative path or an absolute path.
95 
96  Returns
97  -------
98  string
99  A relative path that describes the path from fromPath to toPath.
100  """
101  fromPath = os.path.realpath(fromPath)
102  return os.path.relpath(toPath, fromPath)
103 
104  @staticmethod
105  def absolutePath(fromPath, relativePath):
106  """Get an absolute path for the path from fromUri to toUri
107 
108  Parameters
109  ----------
110  fromPath : the starting location
111  A location at which to start. It can be a relative path or an
112  absolute path.
113  relativePath : the location relative to fromPath
114  A relative path.
115 
116  Returns
117  -------
118  string
119  Path that is an absolute path representation of fromPath +
120  relativePath, if one exists. If relativePath is absolute or if
121  fromPath is not related to relativePath then relativePath will be
122  returned.
123  """
124  if os.path.isabs(relativePath):
125  return relativePath
126  fromPath = os.path.realpath(fromPath)
127  return os.path.normpath(os.path.join(fromPath, relativePath))
128 
129  @staticmethod
131  """Get a persisted RepositoryCfg
132 
133  Parameters
134  ----------
135  uri : URI or path to a RepositoryCfg
136  Description
137 
138  Returns
139  -------
140  A RepositoryCfg instance or None
141  """
142  storage = Storage.makeFromURI(uri)
143  formatter = storage._getFormatter(RepositoryCfg)
144  return formatter.read(ButlerLocation(pythonType=None,
145  cppType=None,
146  storageName=None,
147  locationList='repositoryCfg.yaml',
148  dataId={},
149  mapper=None,
150  storage=storage,
151  usedDataId=None,
152  datasetType=None))
153 
154  @staticmethod
155  def putRepositoryCfg(cfg, loc=None):
156  storage = Storage.makeFromURI(cfg.root if loc is None else loc, create=True)
157  formatter = storage._getFormatter(type(cfg))
158  formatter.write(cfg, ButlerLocation(pythonType=None,
159  cppType=None,
160  storageName=None,
161  locationList='repositoryCfg.yaml',
162  dataId={},
163  mapper=None,
164  storage=storage,
165  usedDataId=None,
166  datasetType=None))
167 
168  @staticmethod
169  def getMapperClass(root):
170  """Get the mapper class associated with a repository root.
171 
172  Supports the legacy _parent symlink search (which was only ever posix-only. This should not be used by
173  new code and repositories; they should use the Repository parentCfg mechanism.
174 
175  Parameters
176  ----------
177  root : string
178  The location of a persisted ReositoryCfg is (new style repos), or
179  the location where a _mapper file is (old style repos).
180 
181  Returns
182  -------
183  A class object or a class instance, depending on the state of the
184  mapper when the repository was created.
185  """
186  if not (root):
187  return None
188 
189  cfg = PosixStorage.getRepositoryCfg(root)
190  if cfg is not None:
191  return cfg.mapper
192 
193  # Find a "_mapper" file containing the mapper class name
194  basePath = root
195  mapperFile = "_mapper"
196  while not os.path.exists(os.path.join(basePath, mapperFile)):
197  # Break abstraction by following _parent links from CameraMapper
198  if os.path.exists(os.path.join(basePath, "_parent")):
199  basePath = os.path.join(basePath, "_parent")
200  else:
201  mapperFile = None
202  break
203 
204  if mapperFile is not None:
205  mapperFile = os.path.join(basePath, mapperFile)
206 
207  # Read the name of the mapper class and instantiate it
208  with open(mapperFile, "r") as f:
209  mapperName = f.readline().strip()
210  components = mapperName.split(".")
211  if len(components) <= 1:
212  raise RuntimeError("Unqualified mapper name %s in %s" %
213  (mapperName, mapperFile))
214  pkg = importlib.import_module(".".join(components[:-1]))
215  return getattr(pkg, components[-1])
216 
217  return None
218 
219  @staticmethod
221  """For Butler V1 Repositories only, if a _parent symlink exists, get the location pointed to by the
222  symlink.
223 
224  Parameters
225  ----------
226  root : string
227  A path to the folder on the local filesystem.
228 
229  Returns
230  -------
231  string or None
232  A path to the parent folder indicated by the _parent symlink, or None if there is no _parent
233  symlink at root.
234  """
235  linkpath = os.path.join(root, '_parent')
236  if os.path.exists(linkpath):
237  try:
238  return os.readlink(os.path.join(root, '_parent'))
239  except OSError:
240  # some of the unit tests rely on a folder called _parent instead of a symlink to aother
241  # location. Allow that; return the path of that folder.
242  return os.path.join(root, '_parent')
243  return None
244 
245  def write(self, butlerLocation, obj):
246  """Writes an object to a location and persistence format specified by
247  ButlerLocation
248 
249  Parameters
250  ----------
251  butlerLocation : ButlerLocation
252  The location & formatting for the object to be written.
253  obj : object instance
254  The object to be written.
255  """
256  self.log.debug("Put location=%s obj=%s", butlerLocation, obj)
257 
258  additionalData = butlerLocation.getAdditionalData()
259  storageName = butlerLocation.getStorageName()
260  locations = butlerLocation.getLocations()
261 
262  pythonType = butlerLocation.getPythonType()
263  if pythonType is not None:
264  if isinstance(pythonType, basestring):
265  # import this pythonType dynamically
266  pythonTypeTokenList = pythonType.split('.')
267  importClassString = pythonTypeTokenList.pop()
268  importClassString = importClassString.strip()
269  importPackage = ".".join(pythonTypeTokenList)
270  importType = __import__(importPackage, globals(), locals(), [importClassString], 0)
271  pythonType = getattr(importType, importClassString)
272  # todo this effectively defines the butler posix "do serialize" command to be named "put". This has
273  # implications; write now I'm worried that any python type that can be written to disk and has a
274  # method called 'put' will be called here (even if it's e.g. destined for FitsStorage).
275  # We might want a somewhat more specific API.
276  if hasattr(pythonType, 'butlerWrite'):
277  pythonType.butlerWrite(obj, butlerLocation=butlerLocation)
278  return
279 
280  with SafeFilename(os.path.join(self.root, locations[0])) as locationString:
281  logLoc = LogicalLocation(locationString, additionalData)
282 
283  if storageName == "PickleStorage":
284  with open(logLoc.locString(), "wb") as outfile:
285  pickle.dump(obj, outfile, pickle.HIGHEST_PROTOCOL)
286  return
287 
288  if storageName == "ConfigStorage":
289  obj.save(logLoc.locString())
290  return
291 
292  if storageName == "FitsCatalogStorage":
293  if additionalData.exists("flags"):
294  kwds = dict(flags=additionalData.getInt("flags"))
295  else:
296  kwds = {}
297  obj.writeFits(logLoc.locString(), **kwds)
298  return
299 
300  # Create a list of Storages for the item.
301  storageList = StorageList()
302  storage = self.persistence.getPersistStorage(storageName, logLoc)
303  storageList.append(storage)
304 
305  if storageName == 'FitsStorage':
306  self.persistence.persist(obj, storageList, additionalData)
307  return
308 
309  # Persist the item.
310  if hasattr(obj, '__deref__'):
311  # We have a smart pointer, so dereference it.
312  self.persistence.persist(obj.__deref__(), storageList, additionalData)
313  else:
314  self.persistence.persist(obj, storageList, additionalData)
315 
316  def read(self, butlerLocation):
317  """Read from a butlerLocation.
318 
319  Parameters
320  ----------
321  butlerLocation : ButlerLocation
322  The location & formatting for the object(s) to be read.
323 
324  Returns
325  -------
326  A list of objects as described by the butler location. One item for
327  each location in butlerLocation.getLocations()
328  """
329  additionalData = butlerLocation.getAdditionalData()
330  # Create a list of Storages for the item.
331  storageName = butlerLocation.getStorageName()
332  results = []
333  locations = butlerLocation.getLocations()
334  pythonType = butlerLocation.getPythonType()
335  if pythonType is not None:
336  if isinstance(pythonType, basestring):
337  # import this pythonType dynamically
338  pythonTypeTokenList = pythonType.split('.')
339  importClassString = pythonTypeTokenList.pop()
340  importClassString = importClassString.strip()
341  importPackage = ".".join(pythonTypeTokenList)
342  importType = __import__(importPackage, globals(), locals(), [importClassString], 0)
343  pythonType = getattr(importType, importClassString)
344 
345  # see note re. discomfort with the name 'butlerWrite' in the write method, above.
346  # Same applies to butlerRead.
347  if hasattr(pythonType, 'butlerRead'):
348  results = pythonType.butlerRead(butlerLocation=butlerLocation)
349  return results
350 
351  for locationString in locations:
352  locationString = os.path.join(self.root, locationString)
353 
354  logLoc = LogicalLocation(locationString, additionalData)
355 
356  if storageName == "PafStorage":
357  finalItem = pexPolicy.Policy.createPolicy(logLoc.locString())
358  elif storageName == "YamlStorage":
359  finalItem = Policy(filePath=logLoc.locString())
360  elif storageName == "PickleStorage":
361  if not os.path.exists(logLoc.locString()):
362  raise RuntimeError("No such pickle file: " + logLoc.locString())
363  with open(logLoc.locString(), "rb") as infile:
364  # py3: We have to specify encoding since some files were written
365  # by python2, and 'latin1' manages that conversion safely. See:
366  # http://stackoverflow.com/questions/28218466/unpickling-a-python-2-object-with-python-3/28218598#28218598
367  if sys.version_info.major >= 3:
368  finalItem = pickle.load(infile, encoding="latin1")
369  else:
370  finalItem = pickle.load(infile)
371  elif storageName == "FitsCatalogStorage":
372  if not os.path.exists(logLoc.locString()):
373  raise RuntimeError("No such FITS catalog file: " + logLoc.locString())
374  kwds = {}
375  if additionalData.exists("hdu"):
376  kwds["hdu"] = additionalData.getInt("hdu")
377  if additionalData.exists("flags"):
378  kwds["flags"] = additionalData.getInt("flags")
379  finalItem = pythonType.readFits(logLoc.locString(), **kwds)
380  elif storageName == "ConfigStorage":
381  if not os.path.exists(logLoc.locString()):
382  raise RuntimeError("No such config file: " + logLoc.locString())
383  finalItem = pythonType()
384  finalItem.load(logLoc.locString())
385  else:
386  storageList = StorageList()
387  storage = self.persistence.getRetrieveStorage(storageName, logLoc)
388  storageList.append(storage)
389  finalItem = self.persistence.unsafeRetrieve(
390  butlerLocation.getCppType(), storageList, additionalData)
391  results.append(finalItem)
392 
393  return results
394 
395  def butlerLocationExists(self, location):
396  """Implementaion of PosixStorage.exists for ButlerLocation objects."""
397  storageName = location.getStorageName()
398  if storageName not in ('BoostStorage', 'FitsStorage', 'PafStorage',
399  'PickleStorage', 'ConfigStorage', 'FitsCatalogStorage'):
400  self.log.warn("butlerLocationExists for non-supported storage %s" % location)
401  return False
402  for locationString in location.getLocations():
403  logLoc = LogicalLocation(locationString, location.getAdditionalData()).locString()
404  obj = self.instanceSearch(path=logLoc)
405  if obj:
406  return True
407  return False
408 
409  def exists(self, location):
410  """Check if location exists.
411 
412  Parameters
413  ----------
414  location : ButlerLocation or string
415  A a string or a ButlerLocation that describes the location of an
416  object in this storage.
417 
418  Returns
419  -------
420  bool
421  True if exists, else False.
422  """
423  if isinstance(location, ButlerLocation):
424  return self.butlerLocationExists(location)
425 
426  obj = self.instanceSearch(path=location)
427  return bool(obj)
428 
429  def locationWithRoot(self, location):
430  """Get the full path to the location.
431 
432  :param location:
433  :return:
434  """
435  return os.path.join(self.root, location)
436 
437  @staticmethod
438  def v1RepoExists(root):
439  """Test if a Version 1 Repository exists.
440 
441  Version 1 Repositories only exist in posix storages, do not have a
442  RepositoryCfg file, and contain either a registry.sqlite3 file, a
443  _mapper file, or a _parent link.
444 
445  Parameters
446  ----------
447  root : string
448  A path to a folder on the local filesystem.
449 
450  Returns
451  -------
452  bool
453  True if the repository at root exists, else False.
454  """
455  return os.path.exists(root) and (
456  os.path.exists(os.path.join(root, "registry.sqlite3")) or
457  os.path.exists(os.path.join(root, "_mapper")) or
458  os.path.exists(os.path.join(root, "_parent"))
459  )
460 
461  def copyFile(self, fromLocation, toLocation):
462  """Copy a file from one location to another on the local filesystem.
463 
464  Parameters
465  ----------
466  fromLocation : path
467  Path and name of existing file.
468  toLocation : path
469  Path and name of new file.
470 
471  Returns
472  -------
473  None
474  """
475  shutil.copy(os.path.join(self.root, fromLocation), os.path.join(self.root, toLocation))
476 
477  def getLocalFile(self, path):
478  """Get a handle to a local copy of the file, downloading it to a
479  temporary if needed.
480 
481  Parameters
482  ----------
483  A path the the file in storage, relative to root.
484 
485  Returns
486  -------
487  A handle to a local copy of the file. If storage is remote it will be
488  a temporary file. If storage is local it may be the original file or
489  a temporary file. The file name can be gotten via the 'name' property
490  of the returned object.
491  """
492  p = os.path.join(self.root, path)
493  try:
494  return open(p)
495  except IOError as e:
496  if e.errno == 2: # 'No such file or directory'
497  return None
498  else:
499  raise e
500 
501  def instanceSearch(self, path):
502  """Search for the given path in this storage instance.
503 
504  If the path contains an HDU indicator (a number in brackets before the
505  dot, e.g. 'foo.fits[1]', this will be stripped when searching and so
506  will match filenames without the HDU indicator, e.g. 'foo.fits'. The
507  path returned WILL contain the indicator though, e.g. ['foo.fits[1]'].
508 
509  Parameters
510  ----------
511  path : string
512  A filename (and optionally prefix path) to search for within root.
513 
514  Returns
515  -------
516  string or None
517  The location that was found, or None if no location was found.
518  """
519  return self.search(self.root, path)
520 
521  @staticmethod
522  def search(root, path, searchParents=False):
523  """Look for the given path in the current root.
524 
525  Also supports searching for the path in Butler v1 repositories by
526  following the Butler v1 _parent symlink
527 
528  If the path contains an HDU indicator (a number in brackets, e.g.
529  'foo.fits[1]', this will be stripped when searching and so
530  will match filenames without the HDU indicator, e.g. 'foo.fits'. The
531  path returned WILL contain the indicator though, e.g. ['foo.fits[1]'].
532 
533  Parameters
534  ----------
535  root : string
536  The path to the root directory.
537  path : string
538  The path to the file within the root directory.
539  searchParents : bool, optional
540  For Butler v1 repositories only, if true and a _parent symlink
541  exists, then the directory at _parent will be searched if the file
542  is not found in the root repository. Will continue searching the
543  parent of the parent until the file is found or no additional
544  parent exists.
545 
546  Returns
547  -------
548  string or None
549  The location that was found, or None if no location was found.
550  """
551  # Separate path into a root-equivalent prefix (in dir) and the rest
552  # (left in path)
553  rootDir = root
554  # First remove trailing slashes (#2527)
555  while len(rootDir) > 1 and rootDir[-1] == '/':
556  rootDir = rootDir[:-1]
557 
558  if path.startswith(rootDir + "/"):
559  # Common case; we have the same root prefix string
560  path = path[len(rootDir + '/'):]
561  pathPrefix = rootDir
562  elif rootDir == "/" and path.startswith("/"):
563  path = path[1:]
564  pathPrefix = None
565  else:
566  # Search for prefix that is the same as root
567  pathPrefix = os.path.dirname(path)
568  while pathPrefix != "" and pathPrefix != "/":
569  if os.path.realpath(pathPrefix) == os.path.realpath(root):
570  break
571  pathPrefix = os.path.dirname(pathPrefix)
572  if pathPrefix == "/":
573  path = path[1:]
574  elif pathPrefix != "":
575  path = path[len(pathPrefix)+1:]
576 
577  # Now search for the path in the root or its parents
578  # Strip off any cfitsio bracketed extension if present
579  strippedPath = path
580  pathStripped = None
581  firstBracket = path.find("[")
582  if firstBracket != -1:
583  strippedPath = path[:firstBracket]
584  pathStripped = path[firstBracket:]
585 
586  dir = rootDir
587  while True:
588  paths = glob.glob(os.path.join(dir, strippedPath))
589  if len(paths) > 0:
590  if pathPrefix != rootDir:
591  paths = [p[len(rootDir+'/'):] for p in paths]
592  if pathStripped is not None:
593  paths = [p + pathStripped for p in paths]
594  return paths
595  if searchParents:
596  dir = os.path.join(dir, "_parent")
597  if not os.path.exists(dir):
598  return None
599  else:
600  return None
601 
602  @staticmethod
603  def storageExists(uri):
604  """Ask if a storage at the location described by uri exists
605 
606  Parameters
607  ----------
608  root : string
609  URI to the the root location of the storage
610 
611  Returns
612  -------
613  bool
614  True if the storage exists, false if not
615  """
616  return os.path.exists(PosixStorage._pathFromURI(uri))
617 
618 Storage.registerStorageClass(scheme='', cls=PosixStorage)
619 Storage.registerStorageClass(scheme='file', cls=PosixStorage)
Class for logical location of a persisted Persistable instance.