lsst.daf.persistence  13.0-17-gd5d205a+2
 All Classes Namespaces Files Functions Variables Typedefs Friends Macros
posixStorage.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # LSST Data Management System
5 # Copyright 2016 LSST Corporation.
6 #
7 # This product includes software developed by the
8 # LSST Project (http://www.lsst.org/).
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the LSST License Statement and
21 # the GNU General Public License along with this program. If not,
22 # see <http://www.lsstcorp.org/LegalNotices/>.
23 #
24 from past.builtins import basestring
25 import sys
26 import copy
27 import pickle
28 import importlib
29 import os
30 import urllib.parse
31 import glob
32 import shutil
33 
34 import yaml
35 
36 from . import (LogicalLocation, Persistence, Policy, StorageList,
37  StorageInterface, Storage, safeFileIo, ButlerLocation,
38  NoRepositroyAtRoot)
39 from lsst.log import Log
40 import lsst.pex.policy as pexPolicy
41 from .safeFileIo import SafeFilename, safeMakeDir
42 from future import standard_library
43 standard_library.install_aliases()
44 
45 
46 class PosixStorage(StorageInterface):
47  """Defines the interface for a storage location on the local filesystem.
48 
49  Parameters
50  ----------
51  uri : string
52  URI or path that is used as the storage location.
53  create : bool
54  If True a new repository will be created at the root location if it
55  does not exist. If False then a new repository will not be created.
56 
57  Raises
58  ------
59  NoRepositroyAtRoot
60  If create is False and a repository does not exist at the root
61  specified by uri then NoRepositroyAtRoot is raised.
62  """
63 
64  def __init__(self, uri, create):
65  self.log = Log.getLogger("daf.persistence.butler")
66  self.root = self._pathFromURI(uri)
67  if self.root and not os.path.exists(self.root):
68  if not create:
69  raise NoRepositroyAtRoot("No repository at {}".format(uri))
70  safeMakeDir(self.root)
71 
72  # Always use an empty Persistence policy until we can get rid of it
73  persistencePolicy = pexPolicy.Policy()
74  self.persistence = Persistence.getPersistence(persistencePolicy)
75 
76  def __repr__(self):
77  return 'PosixStorage(root=%s)' % self.root
78 
79  @staticmethod
80  def _pathFromURI(uri):
81  """Get the path part of the URI"""
82  return urllib.parse.urlparse(uri).path
83 
84  @staticmethod
85  def relativePath(fromPath, toPath):
86  """Get a relative path from a location to a location.
87 
88  Parameters
89  ----------
90  fromPath : string
91  A path at which to start. It can be a relative path or an
92  absolute path.
93  toPath : string
94  A target location. It can be a relative path or an absolute path.
95 
96  Returns
97  -------
98  string
99  A relative path that describes the path from fromPath to toPath.
100  """
101  fromPath = os.path.realpath(fromPath)
102  return os.path.relpath(toPath, fromPath)
103 
104  @staticmethod
105  def absolutePath(fromPath, relativePath):
106  """Get an absolute path for the path from fromUri to toUri
107 
108  Parameters
109  ----------
110  fromPath : the starting location
111  A location at which to start. It can be a relative path or an
112  absolute path.
113  relativePath : the location relative to fromPath
114  A relative path.
115 
116  Returns
117  -------
118  string
119  Path that is an absolute path representation of fromPath +
120  relativePath, if one exists. If relativePath is absolute or if
121  fromPath is not related to relativePath then relativePath will be
122  returned.
123  """
124  if os.path.isabs(relativePath):
125  return relativePath
126  fromPath = os.path.realpath(fromPath)
127  return os.path.normpath(os.path.join(fromPath, relativePath))
128 
129  @staticmethod
130  def _getRepositoryCfg(uri):
131  """Get a persisted RepositoryCfg
132 
133  Parameters
134  ----------
135  uri : URI or path to a RepositoryCfg
136  Description
137 
138  Returns
139  -------
140  A RepositoryCfg instance or None
141  """
142  repositoryCfg = None
143  parseRes = urllib.parse.urlparse(uri)
144  loc = os.path.join(parseRes.path, 'repositoryCfg.yaml')
145  if os.path.exists(loc):
146  with open(loc, 'r') as f:
147  repositoryCfg = yaml.load(f)
148  if repositoryCfg.root is None:
149  repositoryCfg.root = uri
150  return repositoryCfg
151 
152  @staticmethod
154  """Get a persisted RepositoryCfg
155 
156  Parameters
157  ----------
158  uri : URI or path to a RepositoryCfg
159  Description
160 
161  Returns
162  -------
163  A RepositoryCfg instance or None
164  """
165  repositoryCfg = PosixStorage._getRepositoryCfg(uri)
166  if repositoryCfg is not None:
167  return repositoryCfg
168 
169  return repositoryCfg
170 
171  @staticmethod
172  def putRepositoryCfg(cfg, loc=None):
173  """Serialize a RepositoryCfg to a location.
174 
175  When loc == cfg.root, the RepositoryCfg is to be writtenat the root
176  location of the repository. In that case, root is not written, it is
177  implicit in the location of the cfg. This allows the cfg to move from
178  machine to machine without modification.
179 
180  Parameters
181  ----------
182  cfg : RepositoryCfg instance
183  The RepositoryCfg to be serailized.
184  loc : None, optional
185  The location to write the RepositoryCfg. If loc is None, the
186  location will be read from the root parameter of loc.
187 
188  Returns
189  -------
190  None
191  """
192  if loc is None or cfg.root == loc:
193  cfg = copy.copy(cfg)
194  loc = cfg.root
195  cfg.root = None
196  # This class supports schema 'file' and also treats no schema as 'file'.
197  # Split the URI and take only the path; remove the schema fom loc if it's there.
198  parseRes = urllib.parse.urlparse(loc)
199  loc = parseRes.path
200  if not os.path.exists(loc):
201  os.makedirs(loc)
202  loc = os.path.join(loc, 'repositoryCfg.yaml')
203  with safeFileIo.FileForWriteOnceCompareSame(loc) as f:
204  yaml.dump(cfg, f)
205 
206  @staticmethod
207  def getMapperClass(root):
208  """Get the mapper class associated with a repository root.
209 
210  Supports the legacy _parent symlink search (which was only ever posix-only. This should not be used by
211  new code and repositories; they should use the Repository parentCfg mechanism.
212 
213  Parameters
214  ----------
215  root : string
216  The location of a persisted ReositoryCfg is (new style repos), or
217  the location where a _mapper file is (old style repos).
218 
219  Returns
220  -------
221  A class object or a class instance, depending on the state of the
222  mapper when the repository was created.
223  """
224  if not (root):
225  return None
226 
227  cfg = PosixStorage._getRepositoryCfg(root)
228  if cfg is not None:
229  return cfg.mapper
230 
231  # Find a "_mapper" file containing the mapper class name
232  basePath = root
233  mapperFile = "_mapper"
234  while not os.path.exists(os.path.join(basePath, mapperFile)):
235  # Break abstraction by following _parent links from CameraMapper
236  if os.path.exists(os.path.join(basePath, "_parent")):
237  basePath = os.path.join(basePath, "_parent")
238  else:
239  mapperFile = None
240  break
241 
242  if mapperFile is not None:
243  mapperFile = os.path.join(basePath, mapperFile)
244 
245  # Read the name of the mapper class and instantiate it
246  with open(mapperFile, "r") as f:
247  mapperName = f.readline().strip()
248  components = mapperName.split(".")
249  if len(components) <= 1:
250  raise RuntimeError("Unqualified mapper name %s in %s" %
251  (mapperName, mapperFile))
252  pkg = importlib.import_module(".".join(components[:-1]))
253  return getattr(pkg, components[-1])
254 
255  return None
256 
257  @staticmethod
259  """For Butler V1 Repositories only, if a _parent symlink exists, get the location pointed to by the
260  symlink.
261 
262  Parameters
263  ----------
264  root : string
265  A path to the folder on the local filesystem.
266 
267  Returns
268  -------
269  string or None
270  A path to the parent folder indicated by the _parent symlink, or None if there is no _parent
271  symlink at root.
272  """
273  linkpath = os.path.join(root, '_parent')
274  if os.path.exists(linkpath):
275  try:
276  return os.readlink(os.path.join(root, '_parent'))
277  except OSError:
278  # some of the unit tests rely on a folder called _parent instead of a symlink to aother
279  # location. Allow that; return the path of that folder.
280  return os.path.join(root, '_parent')
281  return None
282 
283  def write(self, butlerLocation, obj):
284  """Writes an object to a location and persistence format specified by
285  ButlerLocation
286 
287  Parameters
288  ----------
289  butlerLocation : ButlerLocation
290  The location & formatting for the object to be written.
291  obj : object instance
292  The object to be written.
293  """
294  self.log.debug("Put location=%s obj=%s", butlerLocation, obj)
295 
296  additionalData = butlerLocation.getAdditionalData()
297  storageName = butlerLocation.getStorageName()
298  locations = butlerLocation.getLocations()
299 
300  pythonType = butlerLocation.getPythonType()
301  if pythonType is not None:
302  if isinstance(pythonType, basestring):
303  # import this pythonType dynamically
304  pythonTypeTokenList = pythonType.split('.')
305  importClassString = pythonTypeTokenList.pop()
306  importClassString = importClassString.strip()
307  importPackage = ".".join(pythonTypeTokenList)
308  importType = __import__(importPackage, globals(), locals(), [importClassString], 0)
309  pythonType = getattr(importType, importClassString)
310  # todo this effectively defines the butler posix "do serialize" command to be named "put". This has
311  # implications; write now I'm worried that any python type that can be written to disk and has a
312  # method called 'put' will be called here (even if it's e.g. destined for FitsStorage).
313  # We might want a somewhat more specific API.
314  if hasattr(pythonType, 'butlerWrite'):
315  pythonType.butlerWrite(obj, butlerLocation=butlerLocation)
316  return
317 
318  with SafeFilename(os.path.join(self.root, locations[0])) as locationString:
319  logLoc = LogicalLocation(locationString, additionalData)
320 
321  if storageName == "PickleStorage":
322  with open(logLoc.locString(), "wb") as outfile:
323  pickle.dump(obj, outfile, pickle.HIGHEST_PROTOCOL)
324  return
325 
326  if storageName == "ConfigStorage":
327  obj.save(logLoc.locString())
328  return
329 
330  if storageName == "FitsCatalogStorage":
331  flags = additionalData.getInt("flags", 0)
332  obj.writeFits(logLoc.locString(), flags=flags)
333  return
334 
335  # Create a list of Storages for the item.
336  storageList = StorageList()
337  storage = self.persistence.getPersistStorage(storageName, logLoc)
338  storageList.append(storage)
339 
340  if storageName == 'FitsStorage':
341  self.persistence.persist(obj, storageList, additionalData)
342  return
343 
344  # Persist the item.
345  if hasattr(obj, '__deref__'):
346  # We have a smart pointer, so dereference it.
347  self.persistence.persist(obj.__deref__(), storageList, additionalData)
348  else:
349  self.persistence.persist(obj, storageList, additionalData)
350 
351  def read(self, butlerLocation):
352  """Read from a butlerLocation.
353 
354  Parameters
355  ----------
356  butlerLocation : ButlerLocation
357  The location & formatting for the object(s) to be read.
358 
359  Returns
360  -------
361  A list of objects as described by the butler location. One item for
362  each location in butlerLocation.getLocations()
363  """
364  additionalData = butlerLocation.getAdditionalData()
365  # Create a list of Storages for the item.
366  storageName = butlerLocation.getStorageName()
367  results = []
368  locations = butlerLocation.getLocations()
369  pythonType = butlerLocation.getPythonType()
370  if pythonType is not None:
371  if isinstance(pythonType, basestring):
372  # import this pythonType dynamically
373  pythonTypeTokenList = pythonType.split('.')
374  importClassString = pythonTypeTokenList.pop()
375  importClassString = importClassString.strip()
376  importPackage = ".".join(pythonTypeTokenList)
377  importType = __import__(importPackage, globals(), locals(), [importClassString], 0)
378  pythonType = getattr(importType, importClassString)
379 
380  # see note re. discomfort with the name 'butlerWrite' in the write method, above.
381  # Same applies to butlerRead.
382  if hasattr(pythonType, 'butlerRead'):
383  results = pythonType.butlerRead(butlerLocation=butlerLocation)
384  return results
385 
386  for locationString in locations:
387  locationString = os.path.join(self.root, locationString)
388 
389  logLoc = LogicalLocation(locationString, additionalData)
390 
391  if storageName == "PafStorage":
392  finalItem = pexPolicy.Policy.createPolicy(logLoc.locString())
393  elif storageName == "YamlStorage":
394  finalItem = Policy(filePath=logLoc.locString())
395  elif storageName == "PickleStorage":
396  if not os.path.exists(logLoc.locString()):
397  raise RuntimeError("No such pickle file: " + logLoc.locString())
398  with open(logLoc.locString(), "rb") as infile:
399  # py3: We have to specify encoding since some files were written
400  # by python2, and 'latin1' manages that conversion safely. See:
401  # http://stackoverflow.com/questions/28218466/unpickling-a-python-2-object-with-python-3/28218598#28218598
402  if sys.version_info.major >= 3:
403  finalItem = pickle.load(infile, encoding="latin1")
404  else:
405  finalItem = pickle.load(infile)
406  elif storageName == "FitsCatalogStorage":
407  if not os.path.exists(logLoc.locString()):
408  raise RuntimeError("No such FITS catalog file: " + logLoc.locString())
409  INT_MIN = -(1 << 31)
410  hdu = additionalData.getInt("hdu", INT_MIN)
411  flags = additionalData.getInt("flags", 0)
412  finalItem = pythonType.readFits(logLoc.locString(), hdu, flags)
413  elif storageName == "ConfigStorage":
414  if not os.path.exists(logLoc.locString()):
415  raise RuntimeError("No such config file: " + logLoc.locString())
416  finalItem = pythonType()
417  finalItem.load(logLoc.locString())
418  else:
419  storageList = StorageList()
420  storage = self.persistence.getRetrieveStorage(storageName, logLoc)
421  storageList.append(storage)
422  finalItem = self.persistence.unsafeRetrieve(
423  butlerLocation.getCppType(), storageList, additionalData)
424  results.append(finalItem)
425 
426  return results
427 
428  def butlerLocationExists(self, location):
429  """Implementaion of PosixStorage.exists for ButlerLocation objects."""
430  storageName = location.getStorageName()
431  if storageName not in ('BoostStorage', 'FitsStorage', 'PafStorage',
432  'PickleStorage', 'ConfigStorage', 'FitsCatalogStorage'):
433  self.log.warn("butlerLocationExists for non-supported storage %s" % location)
434  return False
435  for locationString in location.getLocations():
436  logLoc = LogicalLocation(locationString, location.getAdditionalData()).locString()
437  obj = self.instanceSearch(path=logLoc)
438  if obj:
439  return True
440  return False
441 
442  def exists(self, location):
443  """Check if location exists.
444 
445  Parameters
446  ----------
447  location : ButlerLocation or string
448  A a string or a ButlerLocation that describes the location of an
449  object in this storage.
450 
451  Returns
452  -------
453  bool
454  True if exists, else False.
455  """
456  if isinstance(location, ButlerLocation):
457  return self.butlerLocationExists(location)
458 
459  obj = self.instanceSearch(path=location)
460  return bool(obj)
461 
462  def locationWithRoot(self, location):
463  """Get the full path to the location.
464 
465  :param location:
466  :return:
467  """
468  return os.path.join(self.root, location)
469 
470  @staticmethod
471  def v1RepoExists(root):
472  """Test if a Version 1 Repository exists.
473 
474  Version 1 Repositories only exist in posix storages and do not have a RepositoryCfg file.
475  To "exist" the folder at root must exist and contain files or folders.
476 
477  Parameters
478  ----------
479  root : string
480  A path to a folder on the local filesystem.
481 
482  Returns
483  -------
484  bool
485  True if the repository at root exists, else False.
486  """
487  return os.path.exists(root) and bool(os.listdir(root))
488 
489  def copyFile(self, fromLocation, toLocation):
490  """Copy a file from one location to another on the local filesystem.
491 
492  Parameters
493  ----------
494  fromLocation : path
495  Path and name of existing file.
496  toLocation : path
497  Path and name of new file.
498 
499  Returns
500  -------
501  None
502  """
503  shutil.copy(os.path.join(self.root, fromLocation), os.path.join(self.root, toLocation))
504 
505  def getLocalFile(self, path):
506  """Get a handle to a local copy of the file, downloading it to a
507  temporary if needed.
508 
509  Parameters
510  ----------
511  A path the the file in storage, relative to root.
512 
513  Returns
514  -------
515  A handle to a local copy of the file. If storage is remote it will be
516  a temporary file. If storage is local it may be the original file or
517  a temporary file. The file name can be gotten via the 'name' property
518  of the returned object.
519  """
520  p = os.path.join(self.root, path)
521  try:
522  return open(p)
523  except IOError as e:
524  if e.errno == 2: # 'No such file or directory'
525  return None
526  else:
527  raise e
528 
529  def instanceSearch(self, path):
530  """Search for the given path in this storage instance.
531 
532  If the path contains an HDU indicator (a number in brackets before the
533  dot, e.g. 'foo.fits[1]', this will be stripped when searching and so
534  will match filenames without the HDU indicator, e.g. 'foo.fits'. The
535  path returned WILL contain the indicator though, e.g. ['foo.fits[1]'].
536 
537  Parameters
538  ----------
539  path : string
540  A filename (and optionally prefix path) to search for within root.
541 
542  Returns
543  -------
544  string or None
545  The location that was found, or None if no location was found.
546  """
547  return self.search(self.root, path)
548 
549  @staticmethod
550  def search(root, path, searchParents=False):
551  """Look for the given path in the current root.
552 
553  Also supports searching for the path in Butler v1 repositories by
554  following the Butler v1 _parent symlink
555 
556  If the path contains an HDU indicator (a number in brackets, e.g.
557  'foo.fits[1]', this will be stripped when searching and so
558  will match filenames without the HDU indicator, e.g. 'foo.fits'. The
559  path returned WILL contain the indicator though, e.g. ['foo.fits[1]'].
560 
561  Parameters
562  ----------
563  root : string
564  The path to the root directory.
565  path : string
566  The path to the file within the root directory.
567  searchParents : bool, optional
568  For Butler v1 repositories only, if true and a _parent symlink
569  exists, then the directory at _parent will be searched if the file
570  is not found in the root repository. Will continue searching the
571  parent of the parent until the file is found or no additional
572  parent exists.
573 
574  Returns
575  -------
576  string or None
577  The location that was found, or None if no location was found.
578  """
579  # Separate path into a root-equivalent prefix (in dir) and the rest
580  # (left in path)
581  rootDir = root
582  # First remove trailing slashes (#2527)
583  while len(rootDir) > 1 and rootDir[-1] == '/':
584  rootDir = rootDir[:-1]
585 
586  if path.startswith(rootDir + "/"):
587  # Common case; we have the same root prefix string
588  path = path[len(rootDir + '/'):]
589  pathPrefix = rootDir
590  elif rootDir == "/" and path.startswith("/"):
591  path = path[1:]
592  pathPrefix = None
593  else:
594  # Search for prefix that is the same as root
595  pathPrefix = os.path.dirname(path)
596  while pathPrefix != "" and pathPrefix != "/":
597  if os.path.realpath(pathPrefix) == os.path.realpath(root):
598  break
599  pathPrefix = os.path.dirname(pathPrefix)
600  if pathPrefix == "/":
601  path = path[1:]
602  elif pathPrefix != "":
603  path = path[len(pathPrefix)+1:]
604 
605  # Now search for the path in the root or its parents
606  # Strip off any cfitsio bracketed extension if present
607  strippedPath = path
608  pathStripped = None
609  firstBracket = path.find("[")
610  if firstBracket != -1:
611  strippedPath = path[:firstBracket]
612  pathStripped = path[firstBracket:]
613 
614  dir = rootDir
615  while True:
616  paths = glob.glob(os.path.join(dir, strippedPath))
617  if len(paths) > 0:
618  if pathPrefix != rootDir:
619  paths = [p[len(rootDir+'/'):] for p in paths]
620  if pathStripped is not None:
621  paths = [p + pathStripped for p in paths]
622  return paths
623  if searchParents:
624  dir = os.path.join(dir, "_parent")
625  if not os.path.exists(dir):
626  return None
627  else:
628  return None
629 
630  @staticmethod
631  def storageExists(uri):
632  """Ask if a storage at the location described by uri exists
633 
634  Parameters
635  ----------
636  root : string
637  URI to the the root location of the storage
638 
639  Returns
640  -------
641  bool
642  True if the storage exists, false if not
643  """
644  return os.path.exists(PosixStorage._pathFromURI(uri))
645 
646 
647 Storage.registerStorageClass(scheme='', cls=PosixStorage)
648 Storage.registerStorageClass(scheme='file', cls=PosixStorage)
Class for logical location of a persisted Persistable instance.