lsst.daf.persistence  13.0-11-gfc17871
 All Classes Namespaces Files Functions Variables Typedefs Friends Macros
posixStorage.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 #
4 # LSST Data Management System
5 # Copyright 2016 LSST Corporation.
6 #
7 # This product includes software developed by the
8 # LSST Project (http://www.lsst.org/).
9 #
10 # This program is free software: you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation, either version 3 of the License, or
13 # (at your option) any later version.
14 #
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
19 #
20 # You should have received a copy of the LSST License Statement and
21 # the GNU General Public License along with this program. If not,
22 # see <http://www.lsstcorp.org/LegalNotices/>.
23 #
24 from future import standard_library
25 standard_library.install_aliases()
26 from past.builtins import basestring
27 import sys
28 import copy
29 import pickle
30 import importlib
31 import os
32 import urllib.parse
33 import glob
34 import shutil
35 
36 import yaml
37 
38 from . import (LogicalLocation, Persistence, Policy, StorageList, Registry,
39  Storage, RepositoryCfg, safeFileIo, ButlerLocation)
40 from lsst.log import Log
41 import lsst.pex.policy as pexPolicy
42 from .safeFileIo import SafeFilename, safeMakeDir
43 
44 
46 
47  def __init__(self, uri):
48  """Initializer
49 
50  :return:
51  """
52  self.log = Log.getLogger("daf.persistence.butler")
53  self.root = self._pathFromURI(uri)
54  if self.root and not os.path.exists(self.root):
55  safeMakeDir(self.root)
56 
57  # Always use an empty Persistence policy until we can get rid of it
58  persistencePolicy = pexPolicy.Policy()
59  self.persistence = Persistence.getPersistence(persistencePolicy)
60 
61  def __repr__(self):
62  return 'PosixStorage(root=%s)' % self.root
63 
64  @staticmethod
65  def _pathFromURI(uri):
66  """Get the path part of the URI"""
67  return urllib.parse.urlparse(uri).path
68 
69  @staticmethod
70  def relativePath(fromPath, toPath):
71  """Get a relative path from a location to a location.
72 
73  Parameters
74  ----------
75  fromPath : string
76  A path at which to start. It can be a relative path or an
77  absolute path.
78  toPath : string
79  A target location. It can be a relative path or an absolute path.
80 
81  Returns
82  -------
83  string
84  A relative path that describes the path from fromPath to toPath.
85  """
86  fromPath = os.path.realpath(fromPath)
87  return os.path.relpath(toPath, fromPath)
88 
89  @staticmethod
90  def absolutePath(fromPath, relativePath):
91  """Get an absolute path for the path from fromUri to toUri
92 
93  Parameters
94  ----------
95  fromPath : the starting location
96  A location at which to start. It can be a relative path or an
97  absolute path.
98  relativePath : the location relative to fromPath
99  A relative path.
100 
101  Returns
102  -------
103  string
104  Path that is an absolute path representation of fromPath +
105  relativePath, if one exists. If relativePath is absolute or if
106  fromPath is not related to relativePath then relativePath will be
107  returned.
108  """
109  if os.path.isabs(relativePath):
110  return relativePath
111  fromPath = os.path.realpath(fromPath)
112  return os.path.normpath(os.path.join(fromPath, relativePath))
113 
114  @staticmethod
115  def _getRepositoryCfg(uri):
116  """Get a persisted RepositoryCfg
117 
118  Parameters
119  ----------
120  uri : URI or path to a RepositoryCfg
121  Description
122 
123  Returns
124  -------
125  A RepositoryCfg instance or None
126  """
127  repositoryCfg = None
128  parseRes = urllib.parse.urlparse(uri)
129  loc = os.path.join(parseRes.path, 'repositoryCfg.yaml')
130  if os.path.exists(loc):
131  with open(loc, 'r') as f:
132  repositoryCfg = yaml.load(f)
133  if repositoryCfg.root is None:
134  repositoryCfg.root = uri
135  return repositoryCfg
136 
137  @staticmethod
139  """Get a persisted RepositoryCfg
140 
141  Parameters
142  ----------
143  uri : URI or path to a RepositoryCfg
144  Description
145 
146  Returns
147  -------
148  A RepositoryCfg instance or None
149  """
150  repositoryCfg = PosixStorage._getRepositoryCfg(uri)
151  if repositoryCfg is not None:
152  return repositoryCfg
153 
154  return repositoryCfg
155 
156  @staticmethod
157  def putRepositoryCfg(cfg, loc=None):
158  """Serialize a RepositoryCfg to a location.
159 
160  When loc == cfg.root, the RepositoryCfg is to be writtenat the root
161  location of the repository. In that case, root is not written, it is
162  implicit in the location of the cfg. This allows the cfg to move from
163  machine to machine without modification.
164 
165  Parameters
166  ----------
167  cfg : RepositoryCfg instance
168  The RepositoryCfg to be serailized.
169  loc : None, optional
170  The location to write the RepositoryCfg. If loc is None, the
171  location will be read from the root parameter of loc.
172 
173  Returns
174  -------
175  None
176  """
177  if loc is None or cfg.root == loc:
178  cfg = copy.copy(cfg)
179  loc = cfg.root
180  cfg.root = None
181  # This class supports schema 'file' and also treats no schema as 'file'.
182  # Split the URI and take only the path; remove the schema fom loc if it's there.
183  parseRes = urllib.parse.urlparse(loc)
184  loc = parseRes.path
185  if not os.path.exists(loc):
186  os.makedirs(loc)
187  loc = os.path.join(loc, 'repositoryCfg.yaml')
188  with safeFileIo.FileForWriteOnceCompareSame(loc) as f:
189  yaml.dump(cfg, f)
190 
191  @staticmethod
192  def getMapperClass(root):
193  """Get the mapper class associated with a repository root.
194 
195  Supports the legacy _parent symlink search (which was only ever posix-only. This should not be used by
196  new code and repositories; they should use the Repository parentCfg mechanism.
197 
198  Parameters
199  ----------
200  root : string
201  The location of a persisted ReositoryCfg is (new style repos), or
202  the location where a _mapper file is (old style repos).
203 
204  Returns
205  -------
206  A class object or a class instance, depending on the state of the
207  mapper when the repository was created.
208  """
209  if not (root):
210  return None
211 
212  cfg = PosixStorage._getRepositoryCfg(root)
213  if cfg is not None:
214  return cfg.mapper
215 
216  # Find a "_mapper" file containing the mapper class name
217  basePath = root
218  mapperFile = "_mapper"
219  while not os.path.exists(os.path.join(basePath, mapperFile)):
220  # Break abstraction by following _parent links from CameraMapper
221  if os.path.exists(os.path.join(basePath, "_parent")):
222  basePath = os.path.join(basePath, "_parent")
223  else:
224  mapperFile = None
225  break
226 
227  if mapperFile is not None:
228  mapperFile = os.path.join(basePath, mapperFile)
229 
230  # Read the name of the mapper class and instantiate it
231  with open(mapperFile, "r") as f:
232  mapperName = f.readline().strip()
233  components = mapperName.split(".")
234  if len(components) <= 1:
235  raise RuntimeError("Unqualified mapper name %s in %s" %
236  (mapperName, mapperFile))
237  pkg = importlib.import_module(".".join(components[:-1]))
238  return getattr(pkg, components[-1])
239 
240  return None
241 
242  @staticmethod
244  """For Butler V1 Repositories only, if a _parent symlink exists, get the location pointed to by the
245  symlink.
246 
247  Parameters
248  ----------
249  root : string
250  A path to the folder on the local filesystem.
251 
252  Returns
253  -------
254  string or None
255  A path to the parent folder indicated by the _parent symlink, or None if there is no _parent
256  symlink at root.
257  """
258  linkpath = os.path.join(root, '_parent')
259  if os.path.exists(linkpath):
260  try:
261  return os.readlink(os.path.join(root, '_parent'))
262  except OSError:
263  # some of the unit tests rely on a folder called _parent instead of a symlink to aother
264  # location. Allow that; return the path of that folder.
265  return os.path.join(root, '_parent')
266  return None
267 
268  def write(self, butlerLocation, obj):
269  """Writes an object to a location and persistence format specified by
270  ButlerLocation
271 
272  Parameters
273  ----------
274  butlerLocation : ButlerLocation
275  The location & formatting for the object to be written.
276  obj : object instance
277  The object to be written.
278  """
279  self.log.debug("Put location=%s obj=%s", butlerLocation, obj)
280 
281  additionalData = butlerLocation.getAdditionalData()
282  storageName = butlerLocation.getStorageName()
283  locations = butlerLocation.getLocations()
284 
285  pythonType = butlerLocation.getPythonType()
286  if pythonType is not None:
287  if isinstance(pythonType, basestring):
288  # import this pythonType dynamically
289  pythonTypeTokenList = pythonType.split('.')
290  importClassString = pythonTypeTokenList.pop()
291  importClassString = importClassString.strip()
292  importPackage = ".".join(pythonTypeTokenList)
293  importType = __import__(importPackage, globals(), locals(), [importClassString], 0)
294  pythonType = getattr(importType, importClassString)
295  # todo this effectively defines the butler posix "do serialize" command to be named "put". This has
296  # implications; write now I'm worried that any python type that can be written to disk and has a
297  # method called 'put' will be called here (even if it's e.g. destined for FitsStorage).
298  # We might want a somewhat more specific API.
299  if hasattr(pythonType, 'butlerWrite'):
300  pythonType.butlerWrite(obj, butlerLocation=butlerLocation)
301  return
302 
303  with SafeFilename(os.path.join(self.root, locations[0])) as locationString:
304  logLoc = LogicalLocation(locationString, additionalData)
305 
306  if storageName == "PickleStorage":
307  with open(logLoc.locString(), "wb") as outfile:
308  pickle.dump(obj, outfile, pickle.HIGHEST_PROTOCOL)
309  return
310 
311  if storageName == "ConfigStorage":
312  obj.save(logLoc.locString())
313  return
314 
315  if storageName == "FitsCatalogStorage":
316  flags = additionalData.getInt("flags", 0)
317  obj.writeFits(logLoc.locString(), flags=flags)
318  return
319 
320  # Create a list of Storages for the item.
321  storageList = StorageList()
322  storage = self.persistence.getPersistStorage(storageName, logLoc)
323  storageList.append(storage)
324 
325  if storageName == 'FitsStorage':
326  self.persistence.persist(obj, storageList, additionalData)
327  return
328 
329  # Persist the item.
330  if hasattr(obj, '__deref__'):
331  # We have a smart pointer, so dereference it.
332  self.persistence.persist(obj.__deref__(), storageList, additionalData)
333  else:
334  self.persistence.persist(obj, storageList, additionalData)
335 
336  def read(self, butlerLocation):
337  """Read from a butlerLocation.
338 
339  Parameters
340  ----------
341  butlerLocation : ButlerLocation
342  The location & formatting for the object(s) to be read.
343 
344  Returns
345  -------
346  A list of objects as described by the butler location. One item for
347  each location in butlerLocation.getLocations()
348  """
349  additionalData = butlerLocation.getAdditionalData()
350  # Create a list of Storages for the item.
351  storageName = butlerLocation.getStorageName()
352  results = []
353  locations = butlerLocation.getLocations()
354  pythonType = butlerLocation.getPythonType()
355  if pythonType is not None:
356  if isinstance(pythonType, basestring):
357  # import this pythonType dynamically
358  pythonTypeTokenList = pythonType.split('.')
359  importClassString = pythonTypeTokenList.pop()
360  importClassString = importClassString.strip()
361  importPackage = ".".join(pythonTypeTokenList)
362  importType = __import__(importPackage, globals(), locals(), [importClassString], 0)
363  pythonType = getattr(importType, importClassString)
364 
365  # see note re. discomfort with the name 'butlerWrite' in the write method, above.
366  # Same applies to butlerRead.
367  if hasattr(pythonType, 'butlerRead'):
368  results = pythonType.butlerRead(butlerLocation=butlerLocation)
369  return results
370 
371  for locationString in locations:
372  locationString = os.path.join(self.root, locationString)
373 
374  logLoc = LogicalLocation(locationString, additionalData)
375 
376  if storageName == "PafStorage":
377  finalItem = pexPolicy.Policy.createPolicy(logLoc.locString())
378  elif storageName == "YamlStorage":
379  finalItem = Policy(filePath=logLoc.locString())
380  elif storageName == "PickleStorage":
381  if not os.path.exists(logLoc.locString()):
382  raise RuntimeError("No such pickle file: " + logLoc.locString())
383  with open(logLoc.locString(), "rb") as infile:
384  # py3: We have to specify encoding since some files were written
385  # by python2, and 'latin1' manages that conversion safely. See:
386  # http://stackoverflow.com/questions/28218466/unpickling-a-python-2-object-with-python-3/28218598#28218598
387  if sys.version_info.major >= 3:
388  finalItem = pickle.load(infile, encoding="latin1")
389  else:
390  finalItem = pickle.load(infile)
391  elif storageName == "FitsCatalogStorage":
392  if not os.path.exists(logLoc.locString()):
393  raise RuntimeError("No such FITS catalog file: " + logLoc.locString())
394  INT_MIN = -(1 << 31)
395  hdu = additionalData.getInt("hdu", INT_MIN)
396  flags = additionalData.getInt("flags", 0)
397  finalItem = pythonType.readFits(logLoc.locString(), hdu, flags)
398  elif storageName == "ConfigStorage":
399  if not os.path.exists(logLoc.locString()):
400  raise RuntimeError("No such config file: " + logLoc.locString())
401  finalItem = pythonType()
402  finalItem.load(logLoc.locString())
403  else:
404  storageList = StorageList()
405  storage = self.persistence.getRetrieveStorage(storageName, logLoc)
406  storageList.append(storage)
407  finalItem = self.persistence.unsafeRetrieve(
408  butlerLocation.getCppType(), storageList, additionalData)
409  results.append(finalItem)
410 
411  return results
412 
413  def butlerLocationExists(self, location):
414  """Implementaion of PosixStorage.exists for ButlerLocation objects."""
415  storageName = location.getStorageName()
416  if storageName not in ('BoostStorage', 'FitsStorage', 'PafStorage',
417  'PickleStorage', 'ConfigStorage', 'FitsCatalogStorage'):
418  self.log.warn("butlerLocationExists for non-supported storage %s" % location)
419  return False
420  for locationString in location.getLocations():
421  logLoc = LogicalLocation(locationString, location.getAdditionalData()).locString()
422  obj = self.instanceSearch(path=logLoc)
423  if obj:
424  return True
425  return False
426 
427  def exists(self, location):
428  """Check if location exists.
429 
430  Parameters
431  ----------
432  location : ButlerLocation or string
433  A a string or a ButlerLocation that describes the location of an
434  object in this storage.
435 
436  Returns
437  -------
438  bool
439  True if exists, else False.
440  """
441  if isinstance(location, ButlerLocation):
442  return self.butlerLocationExists(location)
443 
444  obj = self.instanceSearch(path=location)
445  return bool(obj)
446 
447  def locationWithRoot(self, location):
448  """Get the full path to the location.
449 
450  :param location:
451  :return:
452  """
453  return os.path.join(self.root, location)
454 
455  @staticmethod
456  def v1RepoExists(root):
457  """Test if a Version 1 Repository exists.
458 
459  Version 1 Repositories only exist in posix storages and do not have a RepositoryCfg file.
460  To "exist" the folder at root must exist and contain files or folders.
461 
462  Parameters
463  ----------
464  root : string
465  A path to a folder on the local filesystem.
466 
467  Returns
468  -------
469  bool
470  True if the repository at root exists, else False.
471  """
472  return os.path.exists(root) and bool(os.listdir(root))
473 
474  def copyFile(self, fromLocation, toLocation):
475  """Copy a file from one location to another on the local filesystem.
476 
477  Parameters
478  ----------
479  fromLocation : path
480  Path and name of existing file.
481  toLocation : path
482  Path and name of new file.
483 
484  Returns
485  -------
486  None
487  """
488  shutil.copy(os.path.join(self.root, fromLocation), os.path.join(self.root, toLocation))
489 
490  def getLocalFile(self, path):
491  """Get the path to a local copy of the file, downloading it to a
492  temporary if needed.
493 
494  Parameters
495  ----------
496  A path the the file in storage, relative to root.
497 
498  Returns
499  -------
500  A path to a local copy of the file. May be the original file (if
501  storage is local).
502  """
503  p = os.path.join(self.root, path)
504  if os.path.exists(p):
505  return p
506  else:
507  return None
508 
509  def instanceSearch(self, path):
510  """Search for the given path in this storage instance.
511 
512  If the path contains an HDU indicator (a number in brackets before the
513  dot, e.g. 'foo.fits[1]', this will be stripped when searching and so
514  will match filenames without the HDU indicator, e.g. 'foo.fits'. The
515  path returned WILL contain the indicator though, e.g. ['foo.fits[1]'].
516 
517  Parameters
518  ----------
519  path : string
520  A filename (and optionally prefix path) to search for within root.
521 
522  Returns
523  -------
524  string or None
525  The location that was found, or None if no location was found.
526  """
527  return self.search(self.root, path)
528 
529  @staticmethod
530  def search(root, path, searchParents=False):
531  """Look for the given path in the current root.
532 
533  Also supports searching for the path in Butler v1 repositories by
534  following the Butler v1 _parent symlink
535 
536  If the path contains an HDU indicator (a number in brackets, e.g.
537  'foo.fits[1]', this will be stripped when searching and so
538  will match filenames without the HDU indicator, e.g. 'foo.fits'. The
539  path returned WILL contain the indicator though, e.g. ['foo.fits[1]'].
540 
541  Parameters
542  ----------
543  root : string
544  The path to the root directory.
545  path : string
546  The path to the file within the root directory.
547  searchParents : bool, optional
548  For Butler v1 repositories only, if true and a _parent symlink
549  exists, then the directory at _parent will be searched if the file
550  is not found in the root repository. Will continue searching the
551  parent of the parent until the file is found or no additional
552  parent exists.
553 
554  Returns
555  -------
556  string or None
557  The location that was found, or None if no location was found.
558  """
559  # Separate path into a root-equivalent prefix (in dir) and the rest
560  # (left in path)
561  rootDir = root
562  # First remove trailing slashes (#2527)
563  while len(rootDir) > 1 and rootDir[-1] == '/':
564  rootDir = rootDir[:-1]
565 
566  if path.startswith(rootDir + "/"):
567  # Common case; we have the same root prefix string
568  path = path[len(rootDir + '/'):]
569  pathPrefix = rootDir
570  elif rootDir == "/" and path.startswith("/"):
571  path = path[1:]
572  pathPrefix = None
573  else:
574  # Search for prefix that is the same as root
575  pathPrefix = os.path.dirname(path)
576  while pathPrefix != "" and pathPrefix != "/":
577  if os.path.realpath(pathPrefix) == os.path.realpath(root):
578  break
579  pathPrefix = os.path.dirname(pathPrefix)
580  if pathPrefix == "/":
581  path = path[1:]
582  elif pathPrefix != "":
583  path = path[len(pathPrefix)+1:]
584 
585  # Now search for the path in the root or its parents
586  # Strip off any cfitsio bracketed extension if present
587  strippedPath = path
588  pathStripped = None
589  firstBracket = path.find("[")
590  if firstBracket != -1:
591  strippedPath = path[:firstBracket]
592  pathStripped = path[firstBracket:]
593 
594  dir = rootDir
595  while True:
596  paths = glob.glob(os.path.join(dir, strippedPath))
597  if len(paths) > 0:
598  if pathPrefix != rootDir:
599  paths = [p[len(rootDir+'/'):] for p in paths]
600  if pathStripped is not None:
601  paths = [p + pathStripped for p in paths]
602  return paths
603  if searchParents:
604  dir = os.path.join(dir, "_parent")
605  if not os.path.exists(dir):
606  return None
607  else:
608  return None
609 
610  @staticmethod
611  def storageExists(uri):
612  """Ask if a storage at the location described by uri exists
613 
614  Parameters
615  ----------
616  root : string
617  URI to the the root location of the storage
618 
619  Returns
620  -------
621  bool
622  True if the storage exists, false if not
623  """
624  return os.path.exists(PosixStorage._pathFromURI(uri))
625 
626 
627 Storage.registerStorageClass(scheme='', cls=PosixStorage)
628 Storage.registerStorageClass(scheme='file', cls=PosixStorage)
Class for logical location of a persisted Persistable instance.
Abstract base class for storage implementations.
Definition: Storage.h:60