27 """This module defines the Butler class."""
28 from future
import standard_library
29 standard_library.install_aliases()
30 from builtins
import str
31 from past.builtins
import basestring
32 from builtins
import object
43 from lsst.log
import Log
44 import lsst.pex.policy
as pexPolicy
45 from .
import LogicalLocation, ReadProxy, ButlerSubset, ButlerDataRef, Persistence, \
46 Storage, Policy, NoResults, Repository, DataId, RepositoryCfg, \
47 RepositoryArgs, listify, setify, sequencify, doImport, ButlerComposite, genericAssembler, \
48 genericDisassembler, PosixStorage
50 preinitedMapperWarning = (
"Passing an instantiated mapper into " +
51 "Butler.__init__ will prevent Butler from passing " +
52 "parentRegistry or repositoryCfg information to " +
53 "the mapper, which is done only at init time. " +
54 "It is better to pass a importable string or " +
59 """Represents a Butler configuration.
63 cfg is 'wet paint' and very likely to change. Use of it in production
64 code other than via the 'old butler' API is strongly discouraged.
66 yaml_tag =
u"!ButlerCfg"
69 super(ButlerCfg, self).
__init__({
'repoCfg': repoCfg,
'cls': cls})
73 """Container object for repository data used by Butler
78 Arguments used to initialize self.repo
80 Configuration of repository
81 storedCfg - RepositoryCfg or None
82 If the cfg at root and the RepositoryArgs don't match then a new cfg is kept in cfg and the cfg that
83 was read from root is kept in storedCfg.
85 The repository class instance
87 The tags that apply to this repository, if any
90 def __init__(self, args, cfg, storedCfg=None, isNewRepository=False, isV1Repository=True):
107 s =
"RepoData(args=%s cfg=%s repo=%s tags=%s isNewRepository=%s isV1Repository:%s parentRegistry:%s)"
112 self.
tags = self.tags.union(tags)
116 """Container object for RepoData instances owned by a Butler instance."""
126 """Add a RepoData to the container
130 repoData - RepoData instance to add
133 self.
byCfgRoot[repoData.args.cfgRoot] = repoData
136 """Get a list of RepoData that are used to as inputs to the Butler.
137 The list is created lazily as needed, and cached.
141 A list of RepoData with readable repositories, in the order to be used when searching.
144 raise RuntimeError(
"Inputs not yet initialized.")
148 """Get a list of RepoData that are used to as outputs to the Butler.
149 The list is created lazily as needed, and cached.
153 A list of RepoData with writable repositories, in the order to be use when searching.
156 raise RuntimeError(
"Outputs not yet initialized.")
160 """Get a list of all RepoData that are used to as by the Butler.
161 The list is created lazily as needed, and cached.
165 A list of RepoData with writable repositories, in the order to be use when searching.
167 if self.
_all is None:
168 raise RuntimeError(
"The all list is not yet initialized.")
172 return "%s(\nbyRepoRoot=%r, \nbyCfgRoot=%r, \n_inputs=%r, \n_outputs=%s, \n_all=%s)" % (
173 self.__class__.__name__,
180 def _buildLookupLists(self, inputs, outputs):
181 """Buld the lists of inputs, outputs, and all repo datas in lookup
186 inputs : list of RepositoryArgs
187 The input RepositoryArgs, in order.
188 outputs : list of RepositoryArgs
189 The output RepositoryArgs, in order.
195 def addRepoDataToLists(repoData, inout):
196 """"Adds the cfg represented by repoData to the _all dict/list, as
197 well as the _inputs or _outputs list, as indicated by inout. Then,
198 adds all the parents of the cfg to the lists."""
199 if inout
not in (
'in',
'out',
'ref'):
200 raise RuntimeError(
"'inout' must be 'in', 'out', or 'ref', not '%s'" % inout)
201 if repoData.cfg.root
not in self.
_all:
202 self.
_all[repoData.cfg.root] = repoData
203 if inout ==
'in' and repoData
not in self.
_inputs:
204 self._inputs.append(repoData)
205 elif inout ==
'out' and repoData
not in self.
_outputs:
206 self._outputs.append(repoData)
207 if 'r' in repoData.args.mode:
208 self._inputs.append(repoData)
209 for parent
in repoData.cfg.parents:
210 addParentAs =
'in' if 'r' in repoData.args.mode and inout != 'ref' else 'ref'
211 addRepoDataToLists(self.
byRepoRoot[parent], addParentAs)
213 self.
_all = collections.OrderedDict()
217 for repoArgs
in outputs:
218 repoData = self.
byCfgRoot[repoArgs.cfgRoot]
219 addRepoDataToLists(repoData,
'out')
220 for repoArgs
in inputs:
221 repoData = self.
byCfgRoot[repoArgs.cfgRoot]
222 addRepoDataToLists(repoData,
'in')
226 """Butler provides a generic mechanism for persisting and retrieving data using mappers.
228 A Butler manages a collection of datasets known as a repository. Each dataset has a type representing its
229 intended usage and a location. Note that the dataset type is not the same as the C++ or Python type of the
230 object containing the data. For example, an ExposureF object might be used to hold the data for a raw
231 image, a post-ISR image, a calibrated science image, or a difference image. These would all be different
234 A Butler can produce a collection of possible values for a key (or tuples of values for multiple keys) if
235 given a partial data identifier. It can check for the existence of a file containing a dataset given its
236 type and data identifier. The Butler can then retrieve the dataset. Similarly, it can persist an object to
237 an appropriate location when given its associated data identifier.
239 Note that the Butler has two more advanced features when retrieving a data set. First, the retrieval is
240 lazy. Input does not occur until the data set is actually accessed. This allows datasets to be retrieved
241 and placed on a clipboard prospectively with little cost, even if the algorithm of a stage ends up not
242 using them. Second, the Butler will call a standardization hook upon retrieval of the dataset. This
243 function, contained in the input mapper object, must perform any necessary manipulations to force the
244 retrieved object to conform to standards, including translating metadata.
248 __init__(self, root, mapper=None, **mapperArgs)
250 defineAlias(self, alias, datasetType)
252 getKeys(self, datasetType=None, level=None)
254 queryMetadata(self, datasetType, format=None, dataId={}, **rest)
256 datasetExists(self, datasetType, dataId={}, **rest)
258 get(self, datasetType, dataId={}, immediate=False, **rest)
260 put(self, obj, datasetType, dataId={}, **rest)
262 subset(self, datasetType, level=None, dataId={}, **rest)
264 dataRef(self, datasetType, level=None, dataId={}, **rest)
268 The preferred method of initialization is to pass in a RepositoryArgs instance, or a list of
269 RepositoryArgs to inputs and/or outputs.
271 For backward compatibility: this initialization method signature can take a posix root path, and
272 optionally a mapper class instance or class type that will be instantiated using the mapperArgs input
273 argument. However, for this to work in a backward compatible way it creates a single repository that is
274 used as both an input and an output repository. This is NOT preferred, and will likely break any
275 provenance system we have in place.
280 .. note:: Deprecated in 12_0
281 `root` will be removed in TBD, it is replaced by `inputs` and `outputs` for
282 multiple-repository support.
283 A fileysystem path. Will only work with a PosixRepository.
284 mapper - string or instance
285 .. note:: Deprecated in 12_0
286 `mapper` will be removed in TBD, it is replaced by `inputs` and `outputs` for
287 multiple-repository support.
288 Provides a mapper to be used with Butler.
290 .. note:: Deprecated in 12_0
291 `mapperArgs` will be removed in TBD, it is replaced by `inputs` and `outputs` for
292 multiple-repository support.
293 Provides arguments to be passed to the mapper if the mapper input arg is a class type to be
294 instantiated by Butler.
295 inputs - RepositoryArgs or string
296 Can be a single item or a list. Provides arguments to load an existing repository (or repositories).
297 String is assumed to be a URI and is used as the cfgRoot (URI to the location of the cfg file). (Local
298 file system URI does not have to start with 'file://' and in this way can be a relative path).
299 outputs - RepositoryArg or string
300 Can be a single item or a list. Provides arguments to load one or more existing repositories or create
301 new ones. String is assumed to be a URI and as used as the repository root.
304 def __init__(self, root=None, mapper=None, inputs=None, outputs=None, **mapperArgs):
306 self.
log = Log.getLogger(
"daf.persistence.butler")
308 self.
_initArgs = {
'root': root,
'mapper': mapper,
'inputs': inputs,
'outputs': outputs,
309 'mapperArgs': mapperArgs}
311 inputs = copy.deepcopy(inputs)
312 outputs = copy.deepcopy(outputs)
314 isV1Args = inputs
is None and outputs
is None
316 inputs, outputs = self.
_convertV1Args(root=root, mapper=mapper, mapperArgs=mapperArgs)
317 elif root
or mapper
or mapperArgs:
319 'Butler version 1 API (root, mapper, **mapperArgs) may ' +
320 'not be used with version 2 API (inputs, outputs)')
328 inputs = [RepositoryArgs(cfgRoot=args)
329 if not isinstance(args, RepositoryArgs)
else args
for args
in inputs]
330 outputs = [RepositoryArgs(cfgRoot=args)
331 if not isinstance(args, RepositoryArgs)
else args
for args
in outputs]
334 if args.mode
is None:
336 elif 'r' not in args.mode:
337 raise RuntimeError(
"The mode of an input should be readable.")
339 if args.mode
is None:
341 elif 'w' not in args.mode:
342 raise RuntimeError(
"The mode of an output should be writable.")
344 for args
in inputs + outputs:
345 if (args.mapper
and not isinstance(args.mapper, basestring)
and
346 not inspect.isclass(args.mapper)):
347 self.log.warn(preinitedMapperWarning)
350 persistencePolicy = pexPolicy.Policy()
355 self._repos._buildLookupLists(inputs, outputs)
362 for repoData
in reversed(self._repos.all().values()):
364 repoData.parentRegistry = parentRegistry
365 repoData.repo = Repository(repoData)
367 def _getParentRegistry(self, repoData):
368 """Get the first found registry that matches the the passed-in repo.
370 "Matches" means the mapper in the passed-in repo is the same type as
371 the mapper in the parent.
376 The RepoData for the repository for which we are searching for a
382 A registry from a parent if one can be found, or None.
387 Indicates a butler init order problem, all parents should be initialized before child
388 repositories, so this function should be able to get any parent of any child repo.
392 if parentRepoData.cfg.mapper == repoData.cfg.mapper:
393 if not parentRepoData.repo:
394 raise RuntimeError(
"Parent repo should be initialized before child repos.")
395 registry = parentRepoData.repo.getRegistry()
400 def _getParentRepoDatas(self, repoData):
401 """Get the parents & grandparents etc of a given repo data, in depth-first search order.
405 repoData : RepoData instance
406 The RepoData whose parents should be retreived.
411 A list of the parents & grandparents etc of a given repo data, in depth-first search order.
413 for parentCfgRoot
in repoData.cfg.parents:
414 parentRepoData = self._repos.byCfgRoot[parentCfgRoot]
419 def _setRepoDataTags(self):
420 """Set the tags from each repoArgs into all its parent repoArgs so that they can be included in tagged
427 def setTags(butler, repoData, tags):
428 tags.update(repoData.args.tags)
429 repoData.addTags(tags)
430 for parent
in repoData.cfg.parents:
431 setTags(butler, butler._repos.byRepoRoot[parent], copy.copy(tags))
433 for repoData
in self._repos.all().values():
434 setTags(self, repoData, set())
436 def _createRepoData(self, args, inout, instanceParents):
437 """Make a RepoData object for args, adding it to the RepoDataContainer.
442 A RepositoryArgs that describes a new or existing Repository.
443 inout : 'in' or 'out'
444 Indicates if this Repository should be used by the Butler as an input or an output.
445 instanceParents : list of string
446 URI/path to the RepositoryCfg of parents in this instance of Butler; inputs and readable outputs
447 (but not their parents, grand-parents are looked up when the parents are loaded)
453 def parentListWithoutThis(root, instanceParents):
454 """instanceParents is typically all the inputs to butler. If 'this' root is in that list (because
455 this repo is writable) then remove it, as a repo is never its own parent."""
456 parents = copy.copy(instanceParents)
458 parents.remove(args.cfgRoot)
465 if inout
not in (
'in',
'out'):
466 raise RuntimeError(
"inout must be either 'in' or 'out'")
468 if args.cfgRoot
in self._repos.byCfgRoot:
471 cfg = self.storage.getRepositoryCfg(args.cfgRoot)
474 if not cfg.matchesArgs(args):
475 raise RuntimeError(
"Persisted repo cfg does not match input args. cfg:%s, args:%s"
480 parents = parentListWithoutThis(args.cfgRoot, instanceParents)
481 if inout ==
'out' and cfg.parents != parents:
483 "Persisted repo cfg parents do not match butler parents: cfg:%s, parents:%s"
484 % (cfg, instanceParents))
487 repoData =
RepoData(args=args, cfg=cfg, storedCfg=storedCfg)
488 self._repos.add(repoData)
489 for parentArgs
in cfg.parents:
490 self.
_createRepoData(RepositoryArgs(parentArgs, mode=
'r'), 'in', instanceParents)
494 if Storage.isPosix(args.cfgRoot):
495 v1RepoExists = PosixStorage.v1RepoExists(args.cfgRoot)
496 if not v1RepoExists
and inout ==
'in':
497 msg =
"Input repositories must exist; no repo found at " \
498 "%s. (A Butler V1 Repository 'exists' if the root " \
499 " folder exists AND contains items.)" % args.cfgRoot
500 raise RuntimeError(msg)
501 if inout ==
'out' and not v1RepoExists:
502 parents = parentListWithoutThis(args.cfgRoot, instanceParents)
507 args.mapper = PosixStorage.getMapperClass(args.cfgRoot)
508 cfg = RepositoryCfg.makeFromArgs(args, parents)
509 repoData =
RepoData(args=args, cfg=cfg, isNewRepository=
not v1RepoExists,
510 isV1Repository=v1RepoExists)
511 self._repos.add(repoData)
513 parent = PosixStorage.getParentSymlinkPath(args.cfgRoot)
515 parent = PosixStorage.absolutePath(args.cfgRoot, parent)
516 cfg.addParents(parent)
517 self.
_createRepoData(RepositoryArgs(parent, mode=
'r'), 'in', instanceParents)
521 msg =
"Input repositories must exist; no repo found at " \
523 raise RuntimeError(msg)
524 cfg = RepositoryCfg.makeFromArgs(args, parents)
525 repoData =
RepoData(args=args, cfg=cfg, isNewRepository=
True)
526 self._repos.add(repoData)
528 def _getParentsList(self, inputs, outputs):
535 if 'r' in args.mode and args.cfgRoot not in parents:
536 parents.append(args.cfgRoot)
537 cfg = self.storage.getRepositoryCfg(args.cfgRoot)
539 for parent
in cfg.parents:
540 if parent
not in parents:
541 parents.append(parent)
543 if args.cfgRoot
not in parents:
544 parents.append(args.cfgRoot)
547 def _createRepoDatas(self, inputs, outputs):
548 """Create the RepoDataContainer and put a RepoData object in it for each repository listed in inputs
549 and outputs as well as each parent of each repository.
551 After this function runs, there will be a RepoData for any Repository that may be used by this Butler
556 inputs : list of RepoArgs
557 Repositories to be used by the Butler as as input repositories.
558 outputs : list of RepoArgs
559 Repositories to be used by the Butler as as output repositories.
567 raise RuntimeError(
"Must not call _createRepoDatas twice.")
568 except AttributeError:
573 for outputArgs
in outputs:
575 for inputArgs
in inputs:
578 def _convertV1Args(self, root, mapper, mapperArgs):
579 """Convert Butler V1 args (root, mapper, mapperArgs) to V2 args (inputs, outputs)
584 Posix path to repository root
585 mapper : class, class instance, or string
586 Instantiated class, a class object to be instantiated, or a string that refers to a class that
587 can be imported & used as the mapper.
589 Args & their values used when instnatiating the mapper.
594 (inputs, outputs) - values to be used for inputs and outputs in Butler.__init__
596 if (mapper
and not isinstance(mapper, basestring)
and
597 not inspect.isclass(mapper)):
598 self.log.warn(preinitedMapperWarning)
601 if hasattr(mapper,
'root'):
607 outputs = RepositoryArgs(mode=
'rw',
610 mapperArgs=mapperArgs)
611 return inputs, outputs
614 return 'Butler(datasetTypeAliasDict=%s, repos=%s, persistence=%s)' % (
617 def _getDefaultMapper(self):
618 """Get the default mapper. Currently this means if all the repos use
619 exactly the same mapper, that mapper may be considered the default.
621 This definition may be changing; mappers may be able to exclude
622 themselves as candidates for default, and they may nominate a different
623 mapper instead. Also, we may not want to look at *all* the repos, but
624 only a depth-first search on each of the input & output repos, and
625 use the first-found mapper for each of those. TBD.
635 Returns the class type of the default mapper, or None if a default
636 mapper can not be determined.
640 for inputRepoData
in self._repos.inputs():
642 if inputRepoData.cfg.mapper
is not None:
643 mapper = inputRepoData.cfg.mapper
648 if isinstance(mapper, basestring):
650 elif not inspect.isclass(mapper):
651 mapper = mapper.__class__
657 if defaultMapper
is None:
658 defaultMapper = mapper
659 elif mapper == defaultMapper:
661 elif mapper
is not None:
665 def _assignDefaultMapper(self, defaultMapper):
666 for repoData
in self._repos.all().values():
667 if repoData.cfg.mapper
is None and (repoData.isNewRepository
or repoData.isV1Repository):
668 if defaultMapper
is None:
670 "No mapper specified for %s and no default mapper could be determined." %
672 repoData.cfg.mapper = defaultMapper
676 """posix-only; gets the mapper class at the path specifed by root (if a file _mapper can be found at
677 that location or in a parent location.
679 As we abstract the storage and support different types of storage locations this method will be
680 moved entirely into Butler Access, or made more dynamic, and the API will very likely change."""
681 return Storage.getMapperClass(root)
684 """Register an alias that will be substituted in datasetTypes.
689 The alias keyword. It may start with @ or not. It may not contain @ except as the first character.
691 The string that will be substituted when @alias is passed into datasetType. It may not contain '@'
695 atLoc = alias.rfind(
'@')
697 alias =
"@" + str(alias)
699 raise RuntimeError(
"Badly formatted alias string: %s" % (alias,))
702 if datasetType.count(
'@') != 0:
703 raise RuntimeError(
"Badly formatted type string: %s" % (datasetType))
708 if key.startswith(alias)
or alias.startswith(key):
709 raise RuntimeError(
"Alias: %s overlaps with existing alias: %s" % (alias, key))
713 def getKeys(self, datasetType=None, level=None, tag=None):
714 """Get the valid data id keys at or above the given level of hierarchy for the dataset type or the
715 entire collection if None. The dict values are the basic Python types corresponding to the keys (int,
721 The type of dataset to get keys for, entire collection if None.
723 The hierarchy level to descend to. None if it should not be restricted. Use an empty string if the
724 mapper should lookup the default level.
725 tags - any, or list of any
726 Any object that can be tested to be the same as the tag in a dataId passed into butler input
727 functions. Applies only to input repositories: If tag is specified by the dataId then the repo
728 will only be read from used if the tag in the dataId matches a tag used for that repository.
732 Returns a dict. The dict keys are the valid data id keys at or above the given level of hierarchy for
733 the dataset type or the entire collection if None. The dict values are the basic Python types
734 corresponding to the keys (int, float, str).
740 for repoData
in self._repos.inputs():
741 if not tag
or len(tag.intersection(repoData.tags)) > 0:
742 keys = repoData.repo.getKeys(datasetType, level)
750 """Returns the valid values for one or more keys when given a partial
751 input collection data id.
756 The type of dataset to inquire about.
758 Key or tuple of keys to be returned.
759 dataId - DataId, dict
762 Keyword arguments for the partial data id.
766 A list of valid values or tuples of valid values as specified by the
771 dataId = DataId(dataId)
772 dataId.update(**rest)
776 for repoData
in self._repos.inputs():
777 if not dataId.tag
or len(dataId.tag.intersection(repoData.tags)) > 0:
778 tuples = repoData.repo.queryMetadata(datasetType, format, dataId)
797 """Determines if a dataset file exists.
802 The type of dataset to inquire about.
803 dataId - DataId, dict
804 The data id of the dataset.
805 **rest keyword arguments for the data id.
810 True if the dataset exists or is non-file-based.
813 dataId = DataId(dataId)
814 dataId.update(**rest)
817 for repoData
in self._repos.inputs():
818 if not dataId.tag
or len(dataId.tag.intersection(repoData.tags)) > 0:
819 location = repoData.repo.map(datasetType, dataId)
820 if location
and location.repository.exists(location):
825 return bool(location)
828 def _locate(self, datasetType, dataId, write):
829 """Get one or more ButlerLocations and/or ButlercComposites.
834 The datasetType that is being searched for. The datasetType may be followed by a dot and
835 a component name (component names are specified in the policy). IE datasetType.componentName
837 dataId : dict or DataId class instance
841 True if this is a search to write an object. False if it is a search to read an object. This
842 affects what type (an object or a container) is returned.
846 If write is False, will return either a single object or None. If write is True, will return a list
849 repos = self._repos.outputs()
if write
else self._repos.inputs()
851 for repoData
in repos:
853 if not write
and dataId.tag
and len(dataId.tag.intersection(repoData.tags)) == 0:
855 components = datasetType.split(
'.')
856 datasetType = components[0]
857 components = components[1:]
859 location = repoData.repo.map(datasetType, dataId, write=write)
864 location.datasetType = datasetType
865 if len(components) > 0:
866 if not isinstance(location, ButlerComposite):
867 raise RuntimeError(
"The location for a dotted datasetType must be a composite.")
869 components[0] = location.componentInfo[components[0]].datasetType
871 datasetType =
'.'.join(components)
872 location = self.
_locate(datasetType, dataId, write)
881 if hasattr(location.mapper,
"bypass_" + location.datasetType):
893 if isinstance(location, ButlerComposite)
or location.repository.exists(location):
897 locations.extend(location)
899 locations.append(location)
905 def _getBypassFunc(location, dataId):
906 pythonType = location.getPythonType()
907 if pythonType
is not None:
908 if isinstance(pythonType, basestring):
910 bypassFunc = getattr(location.mapper,
"bypass_" + location.datasetType)
911 return lambda: bypassFunc(location.datasetType, pythonType, location, dataId)
914 def get(self, datasetType, dataId=None, immediate=True, **rest):
915 """Retrieves a dataset given an input collection data id.
920 The type of dataset to retrieve.
924 If False use a proxy for delayed loading.
926 keyword arguments for the data id.
930 An object retrieved from the dataset (or a proxy for one).
933 dataId = DataId(dataId)
934 dataId.update(**rest)
936 location = self.
_locate(datasetType, dataId, write=
False)
938 raise NoResults(
"No locations for get:", datasetType, dataId)
939 self.log.debug(
"Get type=%s keys=%s from %s", datasetType, dataId, str(location))
941 if isinstance(location, ButlerComposite):
942 for name, componentInfo
in location.componentInfo.items():
943 if componentInfo.subset:
944 subset = self.
subset(datasetType=componentInfo.datasetType, dataId=location.dataId)
945 componentInfo.obj = [obj.get()
for obj
in subset]
947 obj = self.
get(componentInfo.datasetType, location.dataId, immediate=
True)
948 componentInfo.obj = obj
949 assembler = location.assembler
or genericAssembler
950 obj = assembler(dataId=location.dataId, componentInfo=location.componentInfo, cls=location.python)
953 if hasattr(location,
'bypass'):
955 callback =
lambda : location.bypass
957 callback =
lambda: self.
_read(location)
958 if location.mapper.canStandardize(location.datasetType):
959 innerCallback = callback
960 callback =
lambda: location.mapper.standardize(location.datasetType, innerCallback(), dataId)
963 return ReadProxy(callback)
965 def put(self, obj, datasetType, dataId={}, doBackup=False, **rest):
966 """Persists a dataset given an output collection data id.
971 The object to persist.
973 The type of dataset to persist.
977 If True, rename existing instead of overwriting.
978 WARNING: Setting doBackup=True is not safe for parallel processing, as it may be subject to race
981 Keyword arguments for the data id.
984 dataId = DataId(dataId)
985 dataId.update(**rest)
987 for location
in self.
_locate(datasetType, dataId, write=
True):
988 if isinstance(location, ButlerComposite):
989 disassembler = location.disassembler
if location.disassembler
else genericDisassembler
990 disassembler(obj=obj, dataId=location.dataId, componentInfo=location.componentInfo)
991 for name, info
in location.componentInfo.items():
992 if not info.inputOnly:
993 self.
put(info.obj, info.datasetType, location.dataId, doBackup=doBackup)
996 location.getRepository().backup(location.datasetType, dataId)
997 location.getRepository().write(location, obj)
999 def subset(self, datasetType, level=None, dataId={}, **rest):
1000 """Return complete dataIds for a dataset type that match a partial (or empty) dataId.
1002 Given a partial (or empty) dataId specified in dataId and **rest, find all datasets that match the
1003 dataId. Optionally restrict the results to a given level specified by a dataId key (e.g. visit or
1004 sensor or amp for a camera). Return an iterable collection of complete dataIds as ButlerDataRefs.
1005 Datasets with the resulting dataIds may not exist; that needs to be tested with datasetExists().
1010 The type of dataset collection to subset
1012 The level of dataId at which to subset. Use an empty string if the mapper should look up the
1017 Keyword arguments for the data id.
1021 subset - ButlerSubset
1022 Collection of ButlerDataRefs for datasets matching the data id.
1026 To print the full dataIds for all r-band measurements in a source catalog
1027 (note that the subset call is equivalent to: `butler.subset('src', dataId={'filter':'r'})`):
1029 >>> subset = butler.subset('src', filter=
'r')
1030 >>> for data_ref
in subset: print(data_ref.dataId)
1032 datasetType = self._resolveDatasetTypeAlias(datasetType)
1034 # Currently expected behavior of subset is that if specified level is None then the mapper's default
1035 # level should be used. Convention for level within Butler is that an empty string is used to indicate
1040 dataId = DataId(dataId)
1041 dataId.update(**rest)
1042 return ButlerSubset(self, datasetType, level, dataId)
1044 def dataRef(self, datasetType, level=None, dataId={}, **rest):
1045 """Returns a single ButlerDataRef.
1047 Given a complete dataId specified
in dataId
and **rest, find the unique dataset at the given level
1048 specified by a dataId key (e.g. visit
or sensor
or amp
for a camera)
and return a ButlerDataRef.
1053 The type of dataset collection to reference
1055 The level of dataId at which to reference
1059 Keyword arguments
for the data id.
1063 dataRef - ButlerDataRef
1064 ButlerDataRef
for dataset matching the data id
1067 datasetType = self._resolveDatasetTypeAlias(datasetType)
1068 dataId = DataId(dataId)
1069 subset = self.subset(datasetType, level, dataId, **rest)
1070 if len(subset) != 1:
1071 raise RuntimeError("No unique dataset for: Dataset type:%s Level:%s Data ID:%s Keywords:%s" %
1072 (str(datasetType), str(level), str(dataId), str(rest)))
1073 return ButlerDataRef(subset, subset.cache[0])
1075 def _read(self, location):
1076 """Unpersist an object using data inside a butlerLocation object.
1080 location - ButlerLocation
1081 A butlerLocation instance populated with data needed to read the object.
1085 object - an instance of the object specified by the butlerLocation.
1087 self.log.debug("Starting read from %s", location)
1088 results = location.repository.read(location)
1089 if len(results) == 1:
1090 results = results[0]
1091 self.log.debug("Ending read from %s", location)
1094 def __reduce__(self):
1095 ret = (_unreduce, (self._initArgs, self.datasetTypeAliasDict))
1098 def _resolveDatasetTypeAlias(self, datasetType):
1099 """Replaces all the known alias keywords
in the given string with the alias value.
1104 A datasetType string to search & replace on
1109 The de-aliased string
1111 for key in self.datasetTypeAliasDict:
1112 # if all aliases have been replaced, bail out
1113 if datasetType.find('@') == -1:
1115 datasetType = datasetType.replace(key, self.datasetTypeAliasDict[key])
1117 # If an alias specifier can not be resolved then throw.
1118 if datasetType.find('@') != -1:
1119 raise RuntimeError("Unresolvable alias specifier in datasetType: %s" % (datasetType))
1124 def _unreduce(initArgs, datasetTypeAliasDict):
1125 mapperArgs = initArgs.pop('mapperArgs')
1126 initArgs.update(mapperArgs)
1127 butler = Butler(**initArgs)
1128 butler.datasetTypeAliasDict = datasetTypeAliasDict
def _resolveDatasetTypeAlias