24 from past.builtins
import basestring
33 from .
import (LogicalLocation, Persistence, Policy, StorageList,
34 StorageInterface, Storage, ButlerLocation,
35 NoRepositroyAtRoot, RepositoryCfg, doImport)
38 from .safeFileIo
import SafeFilename, safeMakeDir
41 __all__ = [
"PosixStorage"]
45 """Defines the interface for a storage location on the local filesystem. 50 URI or path that is used as the storage location. 52 If True a new repository will be created at the root location if it 53 does not exist. If False then a new repository will not be created. 58 If create is False and a repository does not exist at the root 59 specified by uri then NoRepositroyAtRoot is raised. 63 self.
log = Log.getLogger(
"daf.persistence.butler")
65 if self.
root and not os.path.exists(self.
root):
76 return Persistence.getPersistence(persistencePolicy)
79 return 'PosixStorage(root=%s)' % self.
root 82 def _pathFromURI(uri):
83 """Get the path part of the URI""" 84 return urllib.parse.urlparse(uri).path
88 """Get a relative path from a location to a location. 93 A path at which to start. It can be a relative path or an 96 A target location. It can be a relative path or an absolute path. 101 A relative path that describes the path from fromPath to toPath. 103 fromPath = os.path.realpath(fromPath)
104 return os.path.relpath(toPath, fromPath)
108 """Get an absolute path for the path from fromUri to toUri 112 fromPath : the starting location 113 A location at which to start. It can be a relative path or an 115 relativePath : the location relative to fromPath 121 Path that is an absolute path representation of fromPath + 122 relativePath, if one exists. If relativePath is absolute or if 123 fromPath is not related to relativePath then relativePath will be 126 if os.path.isabs(relativePath):
128 fromPath = os.path.realpath(fromPath)
129 return os.path.normpath(os.path.join(fromPath, relativePath))
133 """Get a persisted RepositoryCfg 137 uri : URI or path to a RepositoryCfg 142 A RepositoryCfg instance or None 144 storage = Storage.makeFromURI(uri)
148 locationList=
'repositoryCfg.yaml',
154 return storage.read(location)
158 storage = Storage.makeFromURI(cfg.root
if loc
is None else loc, create=
True)
162 locationList=
'repositoryCfg.yaml',
168 storage.write(location, cfg)
172 """Get the mapper class associated with a repository root. 174 Supports the legacy _parent symlink search (which was only ever posix-only. This should not be used by 175 new code and repositories; they should use the Repository parentCfg mechanism. 180 The location of a persisted ReositoryCfg is (new style repos), or 181 the location where a _mapper file is (old style repos). 185 A class object or a class instance, depending on the state of the 186 mapper when the repository was created. 191 cfg = PosixStorage.getRepositoryCfg(root)
197 mapperFile =
"_mapper" 198 while not os.path.exists(os.path.join(basePath, mapperFile)):
200 if os.path.exists(os.path.join(basePath,
"_parent")):
201 basePath = os.path.join(basePath,
"_parent")
206 if mapperFile
is not None:
207 mapperFile = os.path.join(basePath, mapperFile)
210 with open(mapperFile,
"r") as f: 211 mapperName = f.readline().strip() 212 components = mapperName.split(".")
213 if len(components) <= 1:
214 raise RuntimeError(
"Unqualified mapper name %s in %s" %
215 (mapperName, mapperFile))
216 pkg = importlib.import_module(
".".join(components[:-1]))
217 return getattr(pkg, components[-1])
223 """For Butler V1 Repositories only, if a _parent symlink exists, get the location pointed to by the 229 A path to the folder on the local filesystem. 234 A path to the parent folder indicated by the _parent symlink, or None if there is no _parent 237 linkpath = os.path.join(root,
'_parent')
238 if os.path.exists(linkpath):
240 return os.readlink(os.path.join(root,
'_parent'))
244 return os.path.join(root,
'_parent')
247 def write(self, butlerLocation, obj):
248 """Writes an object to a location and persistence format specified by 253 butlerLocation : ButlerLocation 254 The location & formatting for the object to be written. 255 obj : object instance 256 The object to be written. 258 self.
log.debug(
"Put location=%s obj=%s", butlerLocation, obj)
261 if not writeFormatter:
264 writeFormatter(butlerLocation, obj)
267 raise(RuntimeError(
"No formatter for location:{}".format(butlerLocation)))
269 def read(self, butlerLocation):
270 """Read from a butlerLocation. 274 butlerLocation : ButlerLocation 275 The location & formatting for the object(s) to be read. 279 A list of objects as described by the butler location. One item for 280 each location in butlerLocation.getLocations() 283 if not readFormatter:
286 return readFormatter(butlerLocation)
288 raise(RuntimeError(
"No formatter for location:{}".format(butlerLocation)))
291 """Implementation of PosixStorage.exists for ButlerLocation objects. 293 storageName = location.getStorageName()
294 if storageName
not in (
'BoostStorage',
'FitsStorage',
'PafStorage',
295 'PickleStorage',
'ConfigStorage',
'FitsCatalogStorage',
297 self.
log.warn(
"butlerLocationExists for non-supported storage %s" % location)
299 for locationString
in location.getLocations():
300 logLoc =
LogicalLocation(locationString, location.getAdditionalData()).locString()
307 """Check if location exists. 311 location : ButlerLocation or string 312 A a string or a ButlerLocation that describes the location of an 313 object in this storage. 318 True if exists, else False. 320 if isinstance(location, ButlerLocation):
327 """Get the full path to the location. 332 return os.path.join(self.
root, location)
336 """Test if a Version 1 Repository exists. 338 Version 1 Repositories only exist in posix storages, do not have a 339 RepositoryCfg file, and contain either a registry.sqlite3 file, a 340 _mapper file, or a _parent link. 345 A path to a folder on the local filesystem. 350 True if the repository at root exists, else False. 352 return os.path.exists(root)
and (
353 os.path.exists(os.path.join(root,
"registry.sqlite3"))
or 354 os.path.exists(os.path.join(root,
"_mapper"))
or 355 os.path.exists(os.path.join(root,
"_parent"))
359 """Copy a file from one location to another on the local filesystem. 364 Path and name of existing file. 366 Path and name of new file. 372 shutil.copy(os.path.join(self.
root, fromLocation), os.path.join(self.
root, toLocation))
375 """Get a handle to a local copy of the file, downloading it to a 380 A path the the file in storage, relative to root. 384 A handle to a local copy of the file. If storage is remote it will be 385 a temporary file. If storage is local it may be the original file or 386 a temporary file. The file name can be gotten via the 'name' property 387 of the returned object. 389 p = os.path.join(self.
root, path)
399 """Search for the given path in this storage instance. 401 If the path contains an HDU indicator (a number in brackets before the 402 dot, e.g. 'foo.fits[1]', this will be stripped when searching and so 403 will match filenames without the HDU indicator, e.g. 'foo.fits'. The 404 path returned WILL contain the indicator though, e.g. ['foo.fits[1]']. 409 A filename (and optionally prefix path) to search for within root. 414 The location that was found, or None if no location was found. 419 def search(root, path, searchParents=False):
420 """Look for the given path in the current root. 422 Also supports searching for the path in Butler v1 repositories by 423 following the Butler v1 _parent symlink 425 If the path contains an HDU indicator (a number in brackets, e.g. 426 'foo.fits[1]', this will be stripped when searching and so 427 will match filenames without the HDU indicator, e.g. 'foo.fits'. The 428 path returned WILL contain the indicator though, e.g. ['foo.fits[1]']. 433 The path to the root directory. 435 The path to the file within the root directory. 436 searchParents : bool, optional 437 For Butler v1 repositories only, if true and a _parent symlink 438 exists, then the directory at _parent will be searched if the file 439 is not found in the root repository. Will continue searching the 440 parent of the parent until the file is found or no additional 446 The location that was found, or None if no location was found. 452 while len(rootDir) > 1
and rootDir[-1] ==
'/':
453 rootDir = rootDir[:-1]
455 if path.startswith(rootDir +
"/"):
457 path = path[len(rootDir +
'/'):]
459 elif rootDir ==
"/" and path.startswith(
"/"):
464 pathPrefix = os.path.dirname(path)
465 while pathPrefix !=
"" and pathPrefix !=
"/":
466 if os.path.realpath(pathPrefix) == os.path.realpath(root):
468 pathPrefix = os.path.dirname(pathPrefix)
469 if pathPrefix ==
"/":
471 elif pathPrefix !=
"":
472 path = path[len(pathPrefix)+1:]
478 firstBracket = path.find(
"[")
479 if firstBracket != -1:
480 strippedPath = path[:firstBracket]
481 pathStripped = path[firstBracket:]
485 paths = glob.glob(os.path.join(dir, strippedPath))
487 if pathPrefix != rootDir:
488 paths = [p[len(rootDir+
'/'):]
for p
in paths]
489 if pathStripped
is not None:
490 paths = [p + pathStripped
for p
in paths]
493 dir = os.path.join(dir,
"_parent")
494 if not os.path.exists(dir):
501 """Ask if a storage at the location described by uri exists 506 URI to the the root location of the storage 511 True if the storage exists, false if not 513 return os.path.exists(PosixStorage._pathFromURI(uri))
517 """Read from a butlerLocation. 521 butlerLocation : ButlerLocation 522 The location & formatting for the object(s) to be read. 526 A list of objects as described by the butler location. One item for 527 each location in butlerLocation.getLocations() 530 for locationString
in butlerLocation.getLocations():
531 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
532 logLoc =
LogicalLocation(locStringWithRoot, butlerLocation.getAdditionalData())
533 if not os.path.exists(logLoc.locString()):
534 raise RuntimeError(
"No such config file: " + logLoc.locString())
535 pythonType = butlerLocation.getPythonType()
536 if pythonType
is not None:
537 if isinstance(pythonType, basestring):
539 finalItem = pythonType()
540 finalItem.load(logLoc.locString())
541 results.append(finalItem)
546 """Writes an object to a location and persistence format specified by 551 butlerLocation : ButlerLocation 552 The location & formatting for the object to be written. 553 obj : object instance 554 The object to be written. 556 filename = os.path.join(butlerLocation.getStorage().root, butlerLocation.getLocations()[0])
558 logLoc =
LogicalLocation(locationString, butlerLocation.getAdditionalData())
559 obj.save(logLoc.locString())
563 """Read from a butlerLocation. 567 butlerLocation : ButlerLocation 568 The location & formatting for the object(s) to be read. 572 A list of objects as described by the butler location. One item for 573 each location in butlerLocation.getLocations() 576 for locationString
in butlerLocation.getLocations():
577 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
578 logLoc =
LogicalLocation(locStringWithRoot, butlerLocation.getAdditionalData())
580 storage = PosixStorage.getPersistence().getRetrieveStorage(butlerLocation.getStorageName(),
582 storageList.append(storage)
583 finalItem = PosixStorage.getPersistence().unsafeRetrieve(
584 butlerLocation.getCppType(), storageList, butlerLocation.getAdditionalData())
585 results.append(finalItem)
590 """Writes an object to a location and persistence format specified by 595 butlerLocation : ButlerLocation 596 The location & formatting for the object to be written. 597 obj : object instance 598 The object to be written. 600 location = butlerLocation.getLocations()[0]
601 with
SafeFilename(os.path.join(butlerLocation.getStorage().root, location))
as locationString:
602 logLoc =
LogicalLocation(locationString, butlerLocation.getAdditionalData())
605 storage = PosixStorage.getPersistence().getPersistStorage(butlerLocation.getStorageName(), logLoc)
606 storageList.append(storage)
607 persistence = PosixStorage.getPersistence()
608 if hasattr(obj,
'__deref__'):
610 persistence.persist(obj.__deref__(), storageList, butlerLocation.getAdditionalData())
612 persistence.persist(obj, storageList, butlerLocation.getAdditionalData())
616 """Read from a butlerLocation. 618 The object returned by this is expected to be a subtype 619 of `ParquetTable`, which is a thin wrapper to `pyarrow.ParquetFile` 620 that allows for lazy loading of the data. 624 butlerLocation : ButlerLocation 625 The location & formatting for the object(s) to be read. 629 A list of objects as described by the butler location. One item for 630 each location in butlerLocation.getLocations() 633 additionalData = butlerLocation.getAdditionalData()
635 for locationString
in butlerLocation.getLocations():
636 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
638 if not os.path.exists(logLoc.locString()):
639 raise RuntimeError(
"No such parquet file: " + logLoc.locString())
641 pythonType = butlerLocation.getPythonType()
642 if pythonType
is not None:
643 if isinstance(pythonType, basestring):
646 filename = logLoc.locString()
650 results.append(pythonType(filename=filename))
653 Log.getLogger(
"daf.persistence.butler").warning(
'Not using multiple locations!')
659 """Writes pandas dataframe to parquet file 663 butlerLocation : ButlerLocation 664 The location & formatting for the object(s) to be read. 665 obj : `lsst.qa.explorer.parquetTable.ParquetTable` 666 Wrapped DataFrame to write. 669 additionalData = butlerLocation.getAdditionalData()
670 locations = butlerLocation.getLocations()
671 with
SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
673 filename = logLoc.locString()
678 """Read from a butlerLocation. 682 butlerLocation : ButlerLocation 683 The location & formatting for the object(s) to be read. 687 A list of objects as described by the butler location. One item for 688 each location in butlerLocation.getLocations() 692 additionalData = butlerLocation.getAdditionalData()
693 for locationString
in butlerLocation.getLocations():
694 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
696 if not os.path.exists(logLoc.locString()):
697 raise RuntimeError(
"No such pickle file: " + logLoc.locString())
698 with open(logLoc.locString(),
"rb")
as infile:
702 if sys.version_info.major >= 3:
703 finalItem = pickle.load(infile, encoding=
"latin1")
705 finalItem = pickle.load(infile)
706 results.append(finalItem)
711 """Writes an object to a location and persistence format specified by 716 butlerLocation : ButlerLocation 717 The location & formatting for the object to be written. 718 obj : object instance 719 The object to be written. 721 additionalData = butlerLocation.getAdditionalData()
722 locations = butlerLocation.getLocations()
723 with
SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
725 with open(logLoc.locString(),
"wb")
as outfile:
726 pickle.dump(obj, outfile, pickle.HIGHEST_PROTOCOL)
730 """Read from a butlerLocation. 734 butlerLocation : ButlerLocation 735 The location & formatting for the object(s) to be read. 739 A list of objects as described by the butler location. One item for 740 each location in butlerLocation.getLocations() 742 pythonType = butlerLocation.getPythonType()
743 if pythonType
is not None:
744 if isinstance(pythonType, basestring):
747 additionalData = butlerLocation.getAdditionalData()
748 for locationString
in butlerLocation.getLocations():
749 locStringWithRoot = os.path.join(butlerLocation.getStorage().root, locationString)
751 if not os.path.exists(logLoc.locString()):
752 raise RuntimeError(
"No such FITS catalog file: " + logLoc.locString())
754 if additionalData.exists(
"hdu"):
755 kwds[
"hdu"] = additionalData.getInt(
"hdu")
756 if additionalData.exists(
"flags"):
757 kwds[
"flags"] = additionalData.getInt(
"flags")
758 finalItem = pythonType.readFits(logLoc.locString(), **kwds)
759 results.append(finalItem)
764 """Writes an object to a location and persistence format specified by 769 butlerLocation : ButlerLocation 770 The location & formatting for the object to be written. 771 obj : object instance 772 The object to be written. 774 additionalData = butlerLocation.getAdditionalData()
775 locations = butlerLocation.getLocations()
776 with
SafeFilename(os.path.join(butlerLocation.getStorage().root, locations[0]))
as locationString:
778 if additionalData.exists(
"flags"):
779 kwds = dict(flags=additionalData.getInt(
"flags"))
782 obj.writeFits(logLoc.locString(), **kwds)
787 """Read from a butlerLocation. 791 butlerLocation : ButlerLocation 792 The location & formatting for the object(s) to be read. 796 A list of objects as described by the butler location. One item for 797 each location in butlerLocation.getLocations() 800 for locationString
in butlerLocation.getLocations():
801 logLoc =
LogicalLocation(butlerLocation.getStorage().locationWithRoot(locationString),
802 butlerLocation.getAdditionalData())
803 finalItem = pexPolicy.Policy.createPolicy(logLoc.locString())
804 results.append(finalItem)
809 """Read from a butlerLocation. 813 butlerLocation : ButlerLocation 814 The location & formatting for the object(s) to be read. 818 A list of objects as described by the butler location. One item for 819 each location in butlerLocation.getLocations() 822 for locationString
in butlerLocation.getLocations():
823 logLoc =
LogicalLocation(butlerLocation.getStorage().locationWithRoot(locationString),
824 butlerLocation.getAdditionalData())
825 finalItem =
Policy(filePath=logLoc.locString())
826 results.append(finalItem)
832 additionalData = butlerLocation.getAdditionalData()
833 for locationString
in butlerLocation.getLocations():
834 logLoc =
LogicalLocation(butlerLocation.getStorage().locationWithRoot(locationString),
835 butlerLocation.getAdditionalData())
837 storage = PosixStorage.getPersistence().getRetrieveStorage(butlerLocation.getStorageName(), logLoc)
838 storageList.append(storage)
839 finalItem = PosixStorage.getPersistence().unsafeRetrieve(butlerLocation.getCppType(), storageList,
841 results.append(finalItem)
846 additionalData = butlerLocation.getAdditionalData()
847 location = butlerLocation.getStorage().locationWithRoot(butlerLocation.getLocations()[0])
852 storage = PosixStorage.getPersistence().getPersistStorage(butlerLocation.getStorageName(), logLoc)
853 storageList.append(storage)
855 if hasattr(obj,
'__deref__'):
857 PosixStorage.getPersistence().persist(obj.__deref__(), storageList, additionalData)
859 PosixStorage.getPersistence().persist(obj, storageList, additionalData)
862 PosixStorage.registerFormatters(
"FitsStorage", readFitsStorage, writeFitsStorage)
863 PosixStorage.registerFormatters(
"ParquetStorage", readParquetStorage, writeParquetStorage)
864 PosixStorage.registerFormatters(
"ConfigStorage", readConfigStorage, writeConfigStorage)
865 PosixStorage.registerFormatters(
"PickleStorage", readPickleStorage, writePickleStorage)
866 PosixStorage.registerFormatters(
"FitsCatalogStorage", readFitsCatalogStorage, writeFitsCatalogStorage)
867 PosixStorage.registerFormatters(
"PafStorage", writeFormatter=readPafStorage)
868 PosixStorage.registerFormatters(
"YamlStorage", readFormatter=readYamlStorage)
869 PosixStorage.registerFormatters(
"BoostStorage", readFitsStorage, writeFitsStorage)
871 Storage.registerStorageClass(scheme=
'', cls=PosixStorage)
872 Storage.registerStorageClass(scheme=
'file', cls=PosixStorage)
def copyFile(self, fromLocation, toLocation)
def readConfigStorage(butlerLocation)
def readPickleStorage(butlerLocation)
def safeMakeDir(directory)
def getWriteFormatter(cls, objType)
Class for logical location of a persisted Persistable instance.
def writePickleStorage(butlerLocation, obj)
def readBoostStorage(butlerLocation)
def readParquetStorage(butlerLocation)
def relativePath(fromPath, toPath)
def writeFitsCatalogStorage(butlerLocation, obj)
def exists(self, location)
def readPafStorage(butlerLocation)
def butlerLocationExists(self, location)
def search(root, path, searchParents=False)
def writeParquetStorage(butlerLocation, obj)
def writeConfigStorage(butlerLocation, obj)
def readFitsStorage(butlerLocation)
def locationWithRoot(self, location)
def readFitsCatalogStorage(butlerLocation)
def getParentSymlinkPath(root)
def readYamlStorage(butlerLocation)
def absolutePath(fromPath, relativePath)
def instanceSearch(self, path)
def putRepositoryCfg(cfg, loc=None)
def getReadFormatter(cls, objType)
def getLocalFile(self, path)
def read(self, butlerLocation)
def writeBoostStorage(butlerLocation, obj)
def writeFitsStorage(butlerLocation, obj)
def write(self, butlerLocation, obj)
def __init__(self, uri, create)
def getRepositoryCfg(uri)