27 """This module defines the Butler class."""
28 from future
import standard_library
29 standard_library.install_aliases()
30 from builtins
import str
31 from past.builtins
import basestring
32 from builtins
import object
43 from lsst.log
import Log
44 import lsst.pex.policy
as pexPolicy
45 from .
import LogicalLocation, ReadProxy, ButlerSubset, ButlerDataRef, Persistence, \
46 Storage, Policy, NoResults, Repository, DataId, RepositoryCfg, \
47 RepositoryArgs, listify, setify, sequencify, doImport, ButlerComposite, genericAssembler, \
48 genericDisassembler, PosixStorage
50 preinitedMapperWarning = (
"Passing an instantiated mapper into " +
51 "Butler.__init__ will prevent Butler from passing " +
52 "parentRegistry or repositoryCfg information to " +
53 "the mapper, which is done only at init time. " +
54 "It is better to pass a importable string or " +
59 """Represents a Butler configuration.
63 cfg is 'wet paint' and very likely to change. Use of it in production
64 code other than via the 'old butler' API is strongly discouraged.
66 yaml_tag =
u"!ButlerCfg"
69 super(ButlerCfg, self).
__init__({
'repoCfg': repoCfg,
'cls': cls})
73 """Container object for repository data used by Butler
78 Arguments used to initialize self.repo
80 Configuration of repository
81 storedCfg - RepositoryCfg or None
82 If the cfg at root and the RepositoryArgs don't match then a new cfg is kept in cfg and the cfg that
83 was read from root is kept in storedCfg.
85 The repository class instance
87 The tags that apply to this repository, if any
90 def __init__(self, args, cfg, storedCfg=None, isNewRepository=False, isV1Repository=True):
107 s =
"RepoData(args=%s cfg=%s repo=%s tags=%s isNewRepository=%s isV1Repository:%s parentRegistry:%s)"
112 self.
tags = self.tags.union(tags)
116 """Container object for RepoData instances owned by a Butler instance."""
126 """Add a RepoData to the container
130 repoData - RepoData instance to add
133 self.
byCfgRoot[repoData.args.cfgRoot] = repoData
136 """Get a list of RepoData that are used to as inputs to the Butler.
137 The list is created lazily as needed, and cached.
141 A list of RepoData with readable repositories, in the order to be used when searching.
144 raise RuntimeError(
"Inputs not yet initialized.")
148 """Get a list of RepoData that are used to as outputs to the Butler.
149 The list is created lazily as needed, and cached.
153 A list of RepoData with writable repositories, in the order to be use when searching.
156 raise RuntimeError(
"Outputs not yet initialized.")
160 """Get a list of all RepoData that are used to as by the Butler.
161 The list is created lazily as needed, and cached.
165 A list of RepoData with writable repositories, in the order to be use when searching.
167 if self.
_all is None:
168 raise RuntimeError(
"The all list is not yet initialized.")
172 return "%s(\nbyRepoRoot=%r, \nbyCfgRoot=%r, \n_inputs=%r, \n_outputs=%s, \n_all=%s)" % (
173 self.__class__.__name__,
180 def _buildLookupLists(self, inputs, outputs):
181 """Buld the lists of inputs, outputs, and all repo datas in lookup
186 inputs : list of RepositoryArgs
187 The input RepositoryArgs, in order.
188 outputs : list of RepositoryArgs
189 The output RepositoryArgs, in order.
195 def addRepoDataToLists(repoData, inout):
196 """"Adds the cfg represented by repoData to the _all dict/list, as
197 well as the _inputs or _outputs list, as indicated by inout. Then,
198 adds all the parents of the cfg to the lists."""
199 if inout
not in (
'in',
'out',
'ref'):
200 raise RuntimeError(
"'inout' must be 'in', 'out', or 'ref', not '%s'" % inout)
201 if repoData.cfg.root
not in self.
_all:
202 self.
_all[repoData.cfg.root] = repoData
203 if inout ==
'in' and repoData
not in self.
_inputs:
204 self._inputs.append(repoData)
205 elif inout ==
'out' and repoData
not in self.
_outputs:
206 self._outputs.append(repoData)
207 if 'r' in repoData.args.mode:
208 self._inputs.append(repoData)
209 for parent
in repoData.cfg.parents:
210 addParentAs =
'in' if 'r' in repoData.args.mode and inout != 'ref' else 'ref'
211 addRepoDataToLists(self.
byRepoRoot[parent], addParentAs)
213 self.
_all = collections.OrderedDict()
217 for repoArgs
in outputs:
218 repoData = self.
byCfgRoot[repoArgs.cfgRoot]
219 addRepoDataToLists(repoData,
'out')
220 for repoArgs
in inputs:
221 repoData = self.
byCfgRoot[repoArgs.cfgRoot]
222 addRepoDataToLists(repoData,
'in')
226 """Butler provides a generic mechanism for persisting and retrieving data using mappers.
228 A Butler manages a collection of datasets known as a repository. Each dataset has a type representing its
229 intended usage and a location. Note that the dataset type is not the same as the C++ or Python type of the
230 object containing the data. For example, an ExposureF object might be used to hold the data for a raw
231 image, a post-ISR image, a calibrated science image, or a difference image. These would all be different
234 A Butler can produce a collection of possible values for a key (or tuples of values for multiple keys) if
235 given a partial data identifier. It can check for the existence of a file containing a dataset given its
236 type and data identifier. The Butler can then retrieve the dataset. Similarly, it can persist an object to
237 an appropriate location when given its associated data identifier.
239 Note that the Butler has two more advanced features when retrieving a data set. First, the retrieval is
240 lazy. Input does not occur until the data set is actually accessed. This allows datasets to be retrieved
241 and placed on a clipboard prospectively with little cost, even if the algorithm of a stage ends up not
242 using them. Second, the Butler will call a standardization hook upon retrieval of the dataset. This
243 function, contained in the input mapper object, must perform any necessary manipulations to force the
244 retrieved object to conform to standards, including translating metadata.
248 __init__(self, root, mapper=None, **mapperArgs)
250 defineAlias(self, alias, datasetType)
252 getKeys(self, datasetType=None, level=None)
254 queryMetadata(self, datasetType, format=None, dataId={}, **rest)
256 datasetExists(self, datasetType, dataId={}, **rest)
258 get(self, datasetType, dataId={}, immediate=False, **rest)
260 put(self, obj, datasetType, dataId={}, **rest)
262 subset(self, datasetType, level=None, dataId={}, **rest)
264 dataRef(self, datasetType, level=None, dataId={}, **rest)
268 The preferred method of initialization is to pass in a RepositoryArgs instance, or a list of
269 RepositoryArgs to inputs and/or outputs.
271 For backward compatibility: this initialization method signature can take a posix root path, and
272 optionally a mapper class instance or class type that will be instantiated using the mapperArgs input
273 argument. However, for this to work in a backward compatible way it creates a single repository that is
274 used as both an input and an output repository. This is NOT preferred, and will likely break any
275 provenance system we have in place.
280 .. note:: Deprecated in 12_0
281 `root` will be removed in TBD, it is replaced by `inputs` and `outputs` for
282 multiple-repository support.
283 A fileysystem path. Will only work with a PosixRepository.
284 mapper - string or instance
285 .. note:: Deprecated in 12_0
286 `mapper` will be removed in TBD, it is replaced by `inputs` and `outputs` for
287 multiple-repository support.
288 Provides a mapper to be used with Butler.
290 .. note:: Deprecated in 12_0
291 `mapperArgs` will be removed in TBD, it is replaced by `inputs` and `outputs` for
292 multiple-repository support.
293 Provides arguments to be passed to the mapper if the mapper input arg is a class type to be
294 instantiated by Butler.
295 inputs - RepositoryArgs or string
296 Can be a single item or a list. Provides arguments to load an existing repository (or repositories).
297 String is assumed to be a URI and is used as the cfgRoot (URI to the location of the cfg file). (Local
298 file system URI does not have to start with 'file://' and in this way can be a relative path).
299 outputs - RepositoryArg or string
300 Can be a single item or a list. Provides arguments to load one or more existing repositories or create
301 new ones. String is assumed to be a URI and as used as the repository root.
304 def __init__(self, root=None, mapper=None, inputs=None, outputs=None, **mapperArgs):
306 self.
log = Log.getLogger(
"daf.persistence.butler")
308 self.
_initArgs = {
'root': root,
'mapper': mapper,
'inputs': inputs,
'outputs': outputs,
309 'mapperArgs': mapperArgs}
311 inputs = copy.deepcopy(inputs)
312 outputs = copy.deepcopy(outputs)
314 isV1Args = inputs
is None and outputs
is None
316 inputs, outputs = self.
_convertV1Args(root=root, mapper=mapper, mapperArgs=mapperArgs)
317 elif root
or mapper
or mapperArgs:
319 'Butler version 1 API (root, mapper, **mapperArgs) may ' +
320 'not be used with version 2 API (inputs, outputs)')
326 inputs = [RepositoryArgs(cfgRoot=args)
327 if not isinstance(args, RepositoryArgs)
else args
for args
in inputs]
328 outputs = [RepositoryArgs(cfgRoot=args)
329 if not isinstance(args, RepositoryArgs)
else args
for args
in outputs]
332 if args.mode
is None:
334 elif 'r' not in args.mode:
335 raise RuntimeError(
"The mode of an input should be readable.")
337 if args.mode
is None:
339 elif 'w' not in args.mode:
340 raise RuntimeError(
"The mode of an output should be writable.")
342 for args
in inputs + outputs:
343 if (args.mapper
and not isinstance(args.mapper, basestring)
and
344 not inspect.isclass(args.mapper)):
345 self.log.warn(preinitedMapperWarning)
348 persistencePolicy = pexPolicy.Policy()
353 self._repos._buildLookupLists(inputs, outputs)
360 for repoData
in reversed(self._repos.all().values()):
362 repoData.parentRegistry = parentRegistry
363 repoData.repo = Repository(repoData)
365 def _getParentRegistry(self, repoData):
366 """Get the first found registry that matches the the passed-in repo.
368 "Matches" means the mapper in the passed-in repo is the same type as
369 the mapper in the parent.
374 The RepoData for the repository for which we are searching for a
380 A registry from a parent if one can be found, or None.
385 Indicates a butler init order problem, all parents should be initialized before child
386 repositories, so this function should be able to get any parent of any child repo.
390 if parentRepoData.cfg.mapper == repoData.cfg.mapper:
391 if not parentRepoData.repo:
392 raise RuntimeError(
"Parent repo should be initialized before child repos.")
393 registry = parentRepoData.repo.getRegistry()
398 def _getParentRepoDatas(self, repoData):
399 """Get the parents & grandparents etc of a given repo data, in depth-first search order.
403 repoData : RepoData instance
404 The RepoData whose parents should be retreived.
409 A list of the parents & grandparents etc of a given repo data, in depth-first search order.
411 for parentCfgRoot
in repoData.cfg.parents:
412 parentRepoData = self._repos.byCfgRoot[parentCfgRoot]
417 def _setRepoDataTags(self):
418 """Set the tags from each repoArgs into all its parent repoArgs so that they can be included in tagged
425 def setTags(butler, repoData, tags):
426 tags.update(repoData.args.tags)
427 repoData.addTags(tags)
428 for parent
in repoData.cfg.parents:
429 setTags(butler, butler._repos.byRepoRoot[parent], copy.copy(tags))
431 for repoData
in self._repos.all().values():
432 setTags(self, repoData, set())
434 def _createRepoData(self, args, inout, instanceParents):
435 """Make a RepoData object for args, adding it to the RepoDataContainer.
440 A RepositoryArgs that describes a new or existing Repository.
441 inout : 'in' or 'out'
442 Indicates if this Repository should be used by the Butler as an input or an output.
443 instanceParents : list of string
444 URI/path to the RepositoryCfg of parents in this instance of Butler; inputs and readable outputs
445 (but not their parents, grand-parents are looked up when the parents are loaded)
451 def parentListWithoutThis(root, instanceParents):
452 """instanceParents is typically all the inputs to butler. If 'this' root is in that list (because
453 this repo is writable) then remove it, as a repo is never its own parent."""
454 parents = copy.copy(instanceParents)
456 parents.remove(args.cfgRoot)
463 if inout
not in (
'in',
'out'):
464 raise RuntimeError(
"inout must be either 'in' or 'out'")
466 if args.cfgRoot
in self._repos.byCfgRoot:
469 cfg = Storage.getRepositoryCfg(args.cfgRoot)
472 if not cfg.matchesArgs(args):
473 raise RuntimeError(
"Persisted repo cfg does not match input args. cfg:%s, args:%s"
478 parents = parentListWithoutThis(args.cfgRoot, instanceParents)
479 if inout ==
'out' and cfg.parents != parents:
481 "Persisted repo cfg parents do not match butler parents: cfg:%s, parents:%s"
482 % (cfg, instanceParents))
485 repoData =
RepoData(args=args, cfg=cfg, storedCfg=storedCfg)
486 self._repos.add(repoData)
487 for parentArgs
in cfg.parents:
488 self.
_createRepoData(RepositoryArgs(parentArgs, mode=
'r'), 'in', instanceParents)
492 if Storage.isPosix(args.cfgRoot):
493 v1RepoExists = PosixStorage.v1RepoExists(args.cfgRoot)
494 if not v1RepoExists
and inout ==
'in':
495 msg =
"Input repositories must exist; no repo found at " \
496 "%s. (A Butler V1 Repository 'exists' if the root " \
497 " folder exists AND contains items.)" % args.cfgRoot
498 raise RuntimeError(msg)
499 if inout ==
'out' and not v1RepoExists:
500 parents = parentListWithoutThis(args.cfgRoot, instanceParents)
505 args.mapper = PosixStorage.getMapperClass(args.cfgRoot)
506 cfg = RepositoryCfg.makeFromArgs(args, parents)
507 repoData =
RepoData(args=args, cfg=cfg, isNewRepository=
not v1RepoExists,
508 isV1Repository=v1RepoExists)
509 self._repos.add(repoData)
511 parent = PosixStorage.getParentSymlinkPath(args.cfgRoot)
513 parent = PosixStorage.absolutePath(args.cfgRoot, parent)
514 cfg.addParents(parent)
515 self.
_createRepoData(RepositoryArgs(parent, mode=
'r'), 'in', instanceParents)
519 msg =
"Input repositories must exist; no repo found at " \
521 raise RuntimeError(msg)
522 cfg = RepositoryCfg.makeFromArgs(args, parents)
523 repoData =
RepoData(args=args, cfg=cfg, isNewRepository=
True)
524 self._repos.add(repoData)
527 def _getParentsList(inputs, outputs):
534 if 'r' in args.mode and args.cfgRoot not in parents:
535 parents.append(args.cfgRoot)
536 cfg = Storage.getRepositoryCfg(args.cfgRoot)
538 for parent
in cfg.parents:
539 if parent
not in parents:
540 parents.append(parent)
542 if args.cfgRoot
not in parents:
543 parents.append(args.cfgRoot)
546 def _createRepoDatas(self, inputs, outputs):
547 """Create the RepoDataContainer and put a RepoData object in it for each repository listed in inputs
548 and outputs as well as each parent of each repository.
550 After this function runs, there will be a RepoData for any Repository that may be used by this Butler
555 inputs : list of RepoArgs
556 Repositories to be used by the Butler as as input repositories.
557 outputs : list of RepoArgs
558 Repositories to be used by the Butler as as output repositories.
566 raise RuntimeError(
"Must not call _createRepoDatas twice.")
567 except AttributeError:
572 for outputArgs
in outputs:
574 for inputArgs
in inputs:
577 def _convertV1Args(self, root, mapper, mapperArgs):
578 """Convert Butler V1 args (root, mapper, mapperArgs) to V2 args (inputs, outputs)
583 Posix path to repository root
584 mapper : class, class instance, or string
585 Instantiated class, a class object to be instantiated, or a string that refers to a class that
586 can be imported & used as the mapper.
588 Args & their values used when instnatiating the mapper.
593 (inputs, outputs) - values to be used for inputs and outputs in Butler.__init__
595 if (mapper
and not isinstance(mapper, basestring)
and
596 not inspect.isclass(mapper)):
597 self.log.warn(preinitedMapperWarning)
600 if hasattr(mapper,
'root'):
606 outputs = RepositoryArgs(mode=
'rw',
609 mapperArgs=mapperArgs)
610 return inputs, outputs
613 return 'Butler(datasetTypeAliasDict=%s, repos=%s, persistence=%s)' % (
616 def _getDefaultMapper(self):
617 """Get the default mapper. Currently this means if all the repos use
618 exactly the same mapper, that mapper may be considered the default.
620 This definition may be changing; mappers may be able to exclude
621 themselves as candidates for default, and they may nominate a different
622 mapper instead. Also, we may not want to look at *all* the repos, but
623 only a depth-first search on each of the input & output repos, and
624 use the first-found mapper for each of those. TBD.
634 Returns the class type of the default mapper, or None if a default
635 mapper can not be determined.
639 for inputRepoData
in self._repos.inputs():
641 if inputRepoData.cfg.mapper
is not None:
642 mapper = inputRepoData.cfg.mapper
647 if isinstance(mapper, basestring):
649 elif not inspect.isclass(mapper):
650 mapper = mapper.__class__
656 if defaultMapper
is None:
657 defaultMapper = mapper
658 elif mapper == defaultMapper:
660 elif mapper
is not None:
664 def _assignDefaultMapper(self, defaultMapper):
665 for repoData
in self._repos.all().values():
666 if repoData.cfg.mapper
is None and (repoData.isNewRepository
or repoData.isV1Repository):
667 if defaultMapper
is None:
669 "No mapper specified for %s and no default mapper could be determined." %
671 repoData.cfg.mapper = defaultMapper
675 """posix-only; gets the mapper class at the path specifed by root (if a file _mapper can be found at
676 that location or in a parent location.
678 As we abstract the storage and support different types of storage locations this method will be
679 moved entirely into Butler Access, or made more dynamic, and the API will very likely change."""
680 return Storage.getMapperClass(root)
683 """Register an alias that will be substituted in datasetTypes.
688 The alias keyword. It may start with @ or not. It may not contain @ except as the first character.
690 The string that will be substituted when @alias is passed into datasetType. It may not contain '@'
694 atLoc = alias.rfind(
'@')
696 alias =
"@" + str(alias)
698 raise RuntimeError(
"Badly formatted alias string: %s" % (alias,))
701 if datasetType.count(
'@') != 0:
702 raise RuntimeError(
"Badly formatted type string: %s" % (datasetType))
707 if key.startswith(alias)
or alias.startswith(key):
708 raise RuntimeError(
"Alias: %s overlaps with existing alias: %s" % (alias, key))
712 def getKeys(self, datasetType=None, level=None, tag=None):
713 """Get the valid data id keys at or above the given level of hierarchy for the dataset type or the
714 entire collection if None. The dict values are the basic Python types corresponding to the keys (int,
720 The type of dataset to get keys for, entire collection if None.
722 The hierarchy level to descend to. None if it should not be restricted. Use an empty string if the
723 mapper should lookup the default level.
724 tags - any, or list of any
725 Any object that can be tested to be the same as the tag in a dataId passed into butler input
726 functions. Applies only to input repositories: If tag is specified by the dataId then the repo
727 will only be read from used if the tag in the dataId matches a tag used for that repository.
731 Returns a dict. The dict keys are the valid data id keys at or above the given level of hierarchy for
732 the dataset type or the entire collection if None. The dict values are the basic Python types
733 corresponding to the keys (int, float, str).
739 for repoData
in self._repos.inputs():
740 if not tag
or len(tag.intersection(repoData.tags)) > 0:
741 keys = repoData.repo.getKeys(datasetType, level)
749 """Returns the valid values for one or more keys when given a partial
750 input collection data id.
755 The type of dataset to inquire about.
757 A key giving the level of granularity of the inquiry.
759 An optional key or tuple of keys to be returned.
760 dataId - DataId, dict
763 Keyword arguments for the partial data id.
767 A list of valid values or tuples of valid values as specified by the format (defaulting to the same as
768 the key) at the key's level of granularity.
772 dataId = DataId(dataId)
773 dataId.update(**rest)
781 for repoData
in self._repos.inputs():
782 if not dataId.tag
or len(dataId.tag.intersection(repoData.tags)) > 0:
783 tuples = repoData.repo.queryMetadata(datasetType, format, dataId)
802 """Determines if a dataset file exists.
807 The type of dataset to inquire about.
808 dataId - DataId, dict
809 The data id of the dataset.
810 **rest keyword arguments for the data id.
815 True if the dataset exists or is non-file-based.
818 dataId = DataId(dataId)
819 dataId.update(**rest)
822 for repoData
in self._repos.inputs():
823 if not dataId.tag
or len(dataId.tag.intersection(repoData.tags)) > 0:
824 location = repoData.repo.map(datasetType, dataId)
825 if location
and location.repository.exists(location):
830 return bool(location)
833 def _locate(self, datasetType, dataId, write):
834 """Get one or more ButlerLocations and/or ButlercComposites.
839 The datasetType that is being searched for. The datasetType may be followed by a dot and
840 a component name (component names are specified in the policy). IE datasetType.componentName
842 dataId : dict or DataId class instance
846 True if this is a search to write an object. False if it is a search to read an object. This
847 affects what type (an object or a container) is returned.
851 If write is False, will return either a single object or None. If write is True, will return a list
854 repos = self._repos.outputs()
if write
else self._repos.inputs()
856 for repoData
in repos:
858 if not write
and dataId.tag
and len(dataId.tag.intersection(repoData.tags)) == 0:
860 components = datasetType.split(
'.')
861 datasetType = components[0]
862 components = components[1:]
864 location = repoData.repo.map(datasetType, dataId, write=write)
869 location.datasetType = datasetType
870 if len(components) > 0:
871 if not isinstance(location, ButlerComposite):
872 raise RuntimeError(
"The location for a dotted datasetType must be a composite.")
874 components[0] = location.componentInfo[components[0]].datasetType
876 datasetType =
'.'.join(components)
877 location = self.
_locate(datasetType, dataId, write)
886 if hasattr(location.mapper,
"bypass_" + location.datasetType):
898 if isinstance(location, ButlerComposite)
or location.repository.exists(location):
902 locations.extend(location)
904 locations.append(location)
910 def _getBypassFunc(location, dataId):
911 pythonType = location.getPythonType()
912 if pythonType
is not None:
913 if isinstance(pythonType, basestring):
915 bypassFunc = getattr(location.mapper,
"bypass_" + location.datasetType)
916 return lambda: bypassFunc(location.datasetType, pythonType, location, dataId)
919 def get(self, datasetType, dataId=None, immediate=True, **rest):
920 """Retrieves a dataset given an input collection data id.
925 The type of dataset to retrieve.
929 If False use a proxy for delayed loading.
931 keyword arguments for the data id.
935 An object retrieved from the dataset (or a proxy for one).
938 dataId = DataId(dataId)
939 dataId.update(**rest)
941 location = self.
_locate(datasetType, dataId, write=
False)
943 raise NoResults(
"No locations for get:", datasetType, dataId)
944 self.log.debug(
"Get type=%s keys=%s from %s", datasetType, dataId, str(location))
946 if isinstance(location, ButlerComposite):
947 for name, componentInfo
in location.componentInfo.items():
948 if componentInfo.subset:
949 subset = self.
subset(datasetType=componentInfo.datasetType, dataId=location.dataId)
950 componentInfo.obj = [obj.get()
for obj
in subset]
952 obj = self.
get(componentInfo.datasetType, location.dataId, immediate=
True)
953 componentInfo.obj = obj
954 assembler = location.assembler
or genericAssembler
955 obj = assembler(dataId=location.dataId, componentInfo=location.componentInfo, cls=location.python)
958 if hasattr(location,
'bypass'):
960 callback =
lambda : location.bypass
962 callback =
lambda: self.
_read(location)
963 if location.mapper.canStandardize(location.datasetType):
964 innerCallback = callback
965 callback =
lambda: location.mapper.standardize(location.datasetType, innerCallback(), dataId)
968 return ReadProxy(callback)
970 def put(self, obj, datasetType, dataId={}, doBackup=False, **rest):
971 """Persists a dataset given an output collection data id.
976 The object to persist.
978 The type of dataset to persist.
982 If True, rename existing instead of overwriting.
983 WARNING: Setting doBackup=True is not safe for parallel processing, as it may be subject to race
986 Keyword arguments for the data id.
989 dataId = DataId(dataId)
990 dataId.update(**rest)
992 for location
in self.
_locate(datasetType, dataId, write=
True):
993 if isinstance(location, ButlerComposite):
994 disassembler = location.disassembler
if location.disassembler
else genericDisassembler
995 disassembler(obj=obj, dataId=location.dataId, componentInfo=location.componentInfo)
996 for name, info
in location.componentInfo.items():
997 if not info.inputOnly:
998 self.
put(info.obj, info.datasetType, location.dataId, doBackup=doBackup)
1001 location.getRepository().backup(location.datasetType, dataId)
1002 location.getRepository().write(location, obj)
1004 def subset(self, datasetType, level=None, dataId={}, **rest):
1005 """Return complete dataIds for a dataset type that match a partial (or empty) dataId.
1007 Given a partial (or empty) dataId specified in dataId and **rest, find all datasets that match the
1008 dataId. Optionally restrict the results to a given level specified by a dataId key (e.g. visit or
1009 sensor or amp for a camera). Return an iterable collection of complete dataIds as ButlerDataRefs.
1010 Datasets with the resulting dataIds may not exist; that needs to be tested with datasetExists().
1015 The type of dataset collection to subset
1017 The level of dataId at which to subset. Use an empty string if the mapper should look up the
1022 Keyword arguments for the data id.
1026 subset - ButlerSubset
1027 Collection of ButlerDataRefs for datasets matching the data id.
1031 To print the full dataIds for all r-band measurements in a source catalog
1032 (note that the subset call is equivalent to: `butler.subset('src', dataId={'filter':'r'})`):
1034 >>> subset = butler.subset('src', filter=
'r')
1035 >>> for data_ref
in subset: print(data_ref.dataId)
1037 datasetType = self._resolveDatasetTypeAlias(datasetType)
1039 # Currently expected behavior of subset is that if specified level is None then the mapper's default
1040 # level should be used. Convention for level within Butler is that an empty string is used to indicate
1045 dataId = DataId(dataId)
1046 dataId.update(**rest)
1047 return ButlerSubset(self, datasetType, level, dataId)
1049 def dataRef(self, datasetType, level=None, dataId={}, **rest):
1050 """Returns a single ButlerDataRef.
1052 Given a complete dataId specified
in dataId
and **rest, find the unique dataset at the given level
1053 specified by a dataId key (e.g. visit
or sensor
or amp
for a camera)
and return a ButlerDataRef.
1058 The type of dataset collection to reference
1060 The level of dataId at which to reference
1064 Keyword arguments
for the data id.
1068 dataRef - ButlerDataRef
1069 ButlerDataRef
for dataset matching the data id
1072 datasetType = self._resolveDatasetTypeAlias(datasetType)
1073 dataId = DataId(dataId)
1074 subset = self.subset(datasetType, level, dataId, **rest)
1075 if len(subset) != 1:
1076 raise RuntimeError("No unique dataset for: Dataset type:%s Level:%s Data ID:%s Keywords:%s" %
1077 (str(datasetType), str(level), str(dataId), str(rest)))
1078 return ButlerDataRef(subset, subset.cache[0])
1080 def _read(self, location):
1081 """Unpersist an object using data inside a butlerLocation object.
1085 location - ButlerLocation
1086 A butlerLocation instance populated with data needed to read the object.
1090 object - an instance of the object specified by the butlerLocation.
1092 self.log.debug("Starting read from %s", location)
1093 results = location.repository.read(location)
1094 if len(results) == 1:
1095 results = results[0]
1096 self.log.debug("Ending read from %s", location)
1099 def __reduce__(self):
1100 ret = (_unreduce, (self._initArgs, self.datasetTypeAliasDict))
1103 def _resolveDatasetTypeAlias(self, datasetType):
1104 """Replaces all the known alias keywords
in the given string with the alias value.
1109 A datasetType string to search & replace on
1114 The de-aliased string
1116 for key in self.datasetTypeAliasDict:
1117 # if all aliases have been replaced, bail out
1118 if datasetType.find('@') == -1:
1120 datasetType = datasetType.replace(key, self.datasetTypeAliasDict[key])
1122 # If an alias specifier can not be resolved then throw.
1123 if datasetType.find('@') != -1:
1124 raise RuntimeError("Unresolvable alias specifier in datasetType: %s" % (datasetType))
1129 def _unreduce(initArgs, datasetTypeAliasDict):
1130 mapperArgs = initArgs.pop('mapperArgs')
1131 initArgs.update(mapperArgs)
1132 butler = Butler(**initArgs)
1133 butler.datasetTypeAliasDict = datasetTypeAliasDict
def _resolveDatasetTypeAlias