1 from future
import standard_library
2 standard_library.install_aliases()
3 from builtins
import zip
4 from builtins
import range
5 from past.builtins
import basestring
6 from builtins
import object
30 from functools
import wraps, partial
31 from contextlib
import contextmanager
33 import mpi4py.MPI
as mpi
35 from lsst.pipe.base
import Struct
36 from future.utils
import with_metaclass
38 __all__ = [
"Comm",
"Pool",
"startPool",
"setBatchType",
"getBatchType",
"abortOnError",
"NODE", ]
40 NODE =
"%s:%d" % (os.uname()[1], os.getpid())
44 """Unpickle an instance method
46 This has to be a named function rather than a lambda because
47 pickle needs to find it.
49 return getattr(obj, name)
53 """Pickle an instance method
55 The instance method is divided into the object and the
59 name = method.__name__
60 return unpickleInstanceMethod, (obj, name)
62 copyreg.pickle(types.MethodType, pickleInstanceMethod)
66 """Unpickle a function
68 This has to be a named function rather than a lambda because
69 pickle needs to find it.
72 module = importlib.import_module(moduleName)
73 return getattr(module, funcName)
79 This assumes that we can recreate the function object by grabbing
80 it from the proper module. This may be violated if the function
81 is a lambda or in __main__. In that case, I recommend recasting
82 the function as an object with a __call__ method.
84 Another problematic case may be a wrapped (e.g., decorated) method
85 in a class: the 'method' is then a function, and recreating it is
86 not as easy as we assume here.
88 moduleName = function.__module__
89 funcName = function.__name__
90 return unpickleFunction, (moduleName, funcName)
92 copyreg.pickle(types.FunctionType, pickleFunction)
97 _batchType =
"unknown"
100 """Return a string giving the type of batch system in use"""
104 """Return a string giving the type of batch system in use"""
106 _batchType = batchType
109 """Function decorator to throw an MPI abort on an unhandled exception"""
111 def wrapper(*args, **kwargs):
113 return func(*args, **kwargs)
114 except Exception
as e:
115 sys.stderr.write(
"%s on %s in %s: %s\n" % (type(e).__name__, NODE, func.__name__, e))
117 traceback.print_exc(file=sys.stderr)
119 mpi.COMM_WORLD.Abort(1)
126 """Singleton to hold what's about to be pickled.
128 We hold onto the object in case there's trouble pickling,
129 so we can figure out what class in particular is causing
132 The held object is in the 'obj' attribute.
134 Here we use the __new__-style singleton pattern, because
135 we specifically want __init__ to be called each time.
141 if cls._instance
is None:
142 cls._instance = super(PickleHolder, cls).
__new__(cls)
143 cls._instance.__init__(hold)
144 cls._instance.obj =
None
148 """Hold onto new object"""
156 """Drop held object if there were no problems"""
162 """Try to guess what's not pickling after an exception
164 This tends to work if the problem is coming from the
165 regular pickle module. If it's coming from the bowels
166 of mpi4py, there's not much that can be done.
169 excType, excValue, tb = sys.exc_info()
180 return stack[-2].tb_frame.f_locals[
"obj"]
187 """Context manager to sniff out pickle problems
189 If there's a pickle error, you're normally told what the problem
190 class is. However, all SWIG objects are reported as "SwigPyObject".
191 In order to figure out which actual SWIG-ed class is causing
192 problems, we need to go digging.
196 with pickleSniffer():
197 someOperationInvolvingPickle()
199 If 'abort' is True, will call MPI abort in the event of problems.
203 except Exception
as e:
204 if "SwigPyObject" not in str(e)
or "pickle" not in str(e):
209 sys.stderr.write(
"Pickling error detected: %s\n" % e)
210 traceback.print_exc(file=sys.stderr)
213 if obj
is None and heldObj
is not None:
218 pickle.dumps(heldObj)
219 sys.stderr.write(
"Hmmm, that's strange: no problem with pickling held object?!?!\n")
223 sys.stderr.write(
"Unable to determine class causing pickle problems.\n")
225 sys.stderr.write(
"Object that could not be pickled: %s\n" % obj)
228 mpi.COMM_WORLD.Abort(1)
233 """Function decorator to catch errors in pickling and print something useful"""
235 def wrapper(*args, **kwargs):
237 return func(*args, **kwargs)
242 """Wrapper to mpi4py's MPI.Intracomm class to avoid busy-waiting.
244 As suggested by Lisandro Dalcin at:
245 * http://code.google.com/p/mpi4py/issues/detail?id=4 and
246 * https://groups.google.com/forum/?fromgroups=#!topic/mpi4py/nArVuMXyyZI
249 def __new__(cls, comm=mpi.COMM_WORLD, recvSleep=0.1, barrierSleep=0.1):
250 """!Construct an MPI.Comm wrapper
253 @param comm MPI.Intracomm to wrap a duplicate of
254 @param recvSleep Sleep time (seconds) for recv()
255 @param barrierSleep Sleep time (seconds) for Barrier()
257 self = super(Comm, cls).
__new__(cls, comm.Dup())
263 def recv(self, obj=None, source=0, tag=0, status=None):
264 """Version of comm.recv() that doesn't busy-wait"""
266 while not self.Iprobe(source=source, tag=tag, status=sts):
268 return super(Comm, self).
recv(buf=obj, source=sts.source, tag=sts.tag, status=status)
270 def send(self, obj=None, *args, **kwargs):
272 return super(Comm, self).
send(obj, *args, **kwargs)
274 def _checkBarrierComm(self):
275 """Ensure the duplicate communicator is available"""
280 """Version of comm.Barrier() that doesn't busy-wait
282 A duplicate communicator is used so as not to interfere with the user's own communications.
285 size = self._barrierComm.Get_size()
288 rank = self._barrierComm.Get_rank()
291 dst = (rank + mask) % size
292 src = (rank - mask + size) % size
293 req = self._barrierComm.isend(
None, dst, tag)
294 while not self._barrierComm.Iprobe(src, tag):
296 self._barrierComm.recv(
None, src, tag)
302 return super(Comm, self).bcast(value, root=root)
305 """Scatter data across the nodes
307 The default version apparently pickles the entire 'dataList',
308 which can cause errors if the pickle size grows over 2^31 bytes
309 due to fundamental problems with pickle in python 2. Instead,
310 we send the data to each slave node in turn; this reduces the
313 @param dataList List of data to distribute; one per node
315 @param root Index of root node
316 @param tag Message tag (integer)
317 @return Data for this node
319 if self.Get_rank() == root:
320 for rank, data
in enumerate(dataList):
323 self.
send(data, rank, tag=tag)
324 return dataList[root]
326 return self.
recv(source=root, tag=tag)
330 self._barrierComm.Free()
331 super(Comm, self).
Free()
335 """Object to signal no operation"""
340 """Provides tag numbers by symbolic name in attributes"""
344 for i, name
in enumerate(nameList, 1):
345 setattr(self, name, i)
348 return self.__class__.__name__ + repr(self.
_nameList)
351 return self.__class__, tuple(self.
_nameList)
355 """An object to hold stuff between different scatter calls
357 Includes a communicator by default, to allow intercommunication
362 super(Cache, self).
__init__(comm=comm)
366 """!Metaclass to produce a singleton
368 Doing a singleton mixin without a metaclass (via __new__) is
369 annoying because the user has to name his __init__ something else
370 (otherwise it's called every time, which undoes any changes).
371 Using this metaclass, the class's __init__ is called exactly once.
373 Because this is a metaclass, note that:
374 * "self" here is the class
375 * "__init__" is making the class (it's like the body of the
377 * "__call__" is making an instance of the class (it's like
378 "__new__" in the class).
382 super(SingletonMeta, self).
__init__(name, bases, dict_)
392 """Debug logger singleton
394 Disabled by default; to enable, do: 'Debugger().enabled = True'
395 You can also redirect the output by changing the 'out' attribute.
402 def log(self, source, msg, *args):
405 The 'args' are only stringified if we're enabled.
407 @param source: name of source
408 @param msg: message to write
409 @param args: additional outputs to append to message
412 self.out.write(
"%s: %s" % (source, msg))
414 self.out.write(
" %s" % arg)
419 """Thread to do reduction of results
421 "A thread?", you say. "What about the python GIL?"
422 Well, because we 'sleep' when there's no immediate response from the
423 slaves, that gives the thread a chance to fire; and threads are easier
424 to manage (e.g., shared memory) than a process.
426 def __init__(self, reducer, initial=None, sleep=0.1):
429 The 'reducer' should take two values and return a single
432 @param reducer Function that does the reducing
433 @param initial Initial value for reduction, or None
434 @param sleep Time to sleep when there's nothing to do (sec)
436 threading.Thread.__init__(self, name=
"reducer")
438 self.
_lock = threading.Lock()
442 self.
_done = threading.Event()
445 """Do the actual work
447 We pull the data out of the queue and release the lock before
448 operating on it. This stops us from blocking the addition of
449 new data to the queue.
460 Thread entry point, called by Thread.start
464 if self._done.wait(self.
_sleep):
469 """Add data to the queue to be reduced"""
471 self._queue.append(data)
474 """Complete the thread
476 Unlike Thread.join (which always returns 'None'), we return the result
480 threading.Thread.join(self)
485 """Node in MPI process pool
487 WARNING: You should not let a pool instance hang around at program
488 termination, as the garbage collection behaves differently, and may
489 cause a segmentation fault (signal 11).
504 def _getCache(self, context, index):
505 """Retrieve cache for particular data
507 The cache is updated with the contents of the store.
509 if not context
in self.
_cache:
511 if not context
in self.
_store:
513 cache = self.
_cache[context]
514 store = self.
_store[context]
515 if index
not in cache:
517 cache[index].__dict__.update(store)
520 def log(self, msg, *args):
521 """Log a debugging message"""
522 self.debugger.log(
"Node %d" % self.
rank, msg, *args)
527 def _processQueue(self, context, func, queue, *args, **kwargs):
528 """!Process a queue of data
530 The queue consists of a list of (index, data) tuples,
531 where the index maps to the cache, and the data is
532 passed to the 'func'.
534 The 'func' signature should be func(cache, data, *args, **kwargs)
535 if 'context' is non-None; otherwise func(data, *args, **kwargs).
537 @param context: Namespace for cache; None to not use cache
538 @param func: function for slaves to run
539 @param queue: List of (index,data) tuples to process
540 @param args: Constant arguments
541 @param kwargs: Keyword arguments
542 @return list of results from applying 'func' to dataList
544 return self.
_reduceQueue(context,
None, func, queue, *args, **kwargs)
546 def _reduceQueue(self, context, reducer, func, queue, *args, **kwargs):
547 """!Reduce a queue of data
549 The queue consists of a list of (index, data) tuples,
550 where the index maps to the cache, and the data is
551 passed to the 'func', the output of which is reduced
552 using the 'reducer' (if non-None).
554 The 'func' signature should be func(cache, data, *args, **kwargs)
555 if 'context' is non-None; otherwise func(data, *args, **kwargs).
557 The 'reducer' signature should be reducer(old, new). If the 'reducer'
558 is None, then we will return the full list of results
560 @param context: Namespace for cache; None to not use cache
561 @param reducer: function for master to run to reduce slave results; or None
562 @param func: function for slaves to run
563 @param queue: List of (index,data) tuples to process
564 @param args: Constant arguments
565 @param kwargs: Keyword arguments
566 @return reduced result (if reducer is non-None) or list of results
567 from applying 'func' to dataList
569 if context
is not None:
570 resultList = [func(self.
_getCache(context, i), data, *args, **kwargs)
for i, data
in queue]
572 resultList = [func(data, *args, **kwargs)
for i, data
in queue]
575 if len(resultList) == 0:
577 output = resultList.pop(0)
578 for result
in resultList:
579 output = reducer(output, result)
583 """Set values in store for a particular context"""
584 self.
log(
"storing", context, kwargs)
585 if not context
in self.
_store:
587 for name, value
in kwargs.items():
588 self.
_store[context][name] = value
591 """Delete value in store for a particular context"""
592 self.
log(
"deleting from store", context, nameList)
593 if not context
in self.
_store:
594 raise KeyError(
"No such context: %s" % context)
595 for name
in nameList:
596 del self.
_store[context][name]
599 """Clear stored data for a particular context"""
600 self.
log(
"clearing store", context)
601 if not context
in self.
_store:
602 raise KeyError(
"No such context: %s" % context)
606 """Reset cache for a particular context"""
607 self.
log(
"clearing cache", context)
608 if not context
in self.
_cache:
613 """List contents of cache"""
614 cache = self.
_cache[context]
if context
in self.
_cache else {}
615 sys.stderr.write(
"Cache on %s (%s): %s\n" % (self.
node, context, cache))
618 """List contents of store for a particular context"""
619 if not context
in self.
_store:
620 raise KeyError(
"No such context: %s" % context)
621 sys.stderr.write(
"Store on %s (%s): %s\n" % (self.
node, context, self.
_store[context]))
625 """Master node instance of MPI process pool
627 Only the master node should instantiate this.
629 WARNING: You should not let a pool instance hang around at program
630 termination, as the garbage collection behaves differently, and may
631 cause a segmentation fault (signal 11).
635 super(PoolMaster, self).
__init__(*args, **kwargs)
636 assert self.
root == self.
rank,
"This is the master node"
639 """Ensure slaves exit when we're done"""
642 def log(self, msg, *args):
643 """Log a debugging message"""
644 self.debugger.log(
"Master", msg, *args)
647 """Send command to slaves
649 A command is the name of the PoolSlave method they should run.
651 self.
log(
"command", cmd)
652 self.comm.broadcast(cmd, root=self.
root)
654 def map(self, context, func, dataList, *args, **kwargs):
655 """!Scatter work to slaves and gather the results
657 Work is distributed dynamically, so that slaves that finish
658 quickly will receive more work.
660 Each slave applies the function to the data they're provided.
661 The slaves may optionally be passed a cache instance, which
662 they can use to store data for subsequent executions (to ensure
663 subsequent data is distributed in the same pattern as before,
664 use the 'mapToPrevious' method). The cache also contains
665 data that has been stored on the slaves.
667 The 'func' signature should be func(cache, data, *args, **kwargs)
668 if 'context' is non-None; otherwise func(data, *args, **kwargs).
670 @param context: Namespace for cache
671 @param func: function for slaves to run; must be picklable
672 @param dataList: List of data to distribute to slaves; must be picklable
673 @param args: List of constant arguments
674 @param kwargs: Dict of constant arguments
675 @return list of results from applying 'func' to dataList
677 return self.
reduce(context,
None, func, dataList, *args, **kwargs)
681 def reduce(self, context, reducer, func, dataList, *args, **kwargs):
682 """!Scatter work to slaves and reduce the results
684 Work is distributed dynamically, so that slaves that finish
685 quickly will receive more work.
687 Each slave applies the function to the data they're provided.
688 The slaves may optionally be passed a cache instance, which
689 they can use to store data for subsequent executions (to ensure
690 subsequent data is distributed in the same pattern as before,
691 use the 'mapToPrevious' method). The cache also contains
692 data that has been stored on the slaves.
694 The 'func' signature should be func(cache, data, *args, **kwargs)
695 if 'context' is non-None; otherwise func(data, *args, **kwargs).
697 The 'reducer' signature should be reducer(old, new). If the 'reducer'
698 is None, then we will return the full list of results
700 @param context: Namespace for cache
701 @param reducer: function for master to run to reduce slave results; or None
702 @param func: function for slaves to run; must be picklable
703 @param dataList: List of data to distribute to slaves; must be picklable
704 @param args: List of constant arguments
705 @param kwargs: Dict of constant arguments
706 @return reduced result (if reducer is non-None) or list of results
707 from applying 'func' to dataList
709 tags =
Tags(
"request",
"work")
712 return self.
_reduceQueue(context, reducer, func, list(zip(list(range(num)), dataList)),
716 return self.
reduceNoBalance(context, reducer, func, dataList, *args, **kwargs)
722 self.comm.broadcast((tags, func, reducer, args, kwargs, context), root=self.
root)
725 queue = list(zip(range(num), dataList))
726 output = [
None]*num
if reducer
is None else None
727 initial = [
None if i == self.
rank else queue.pop(0)
if queue
else NoOp()
for
728 i
in range(self.
size)]
729 pending = min(num, self.
size - 1)
730 self.
log(
"scatter initial jobs")
731 self.comm.scatter(initial, root=self.
rank)
733 while queue
or pending > 0:
734 status = mpi.Status()
735 report = self.comm.recv(status=status, tag=tags.request, source=mpi.ANY_SOURCE)
736 source = status.source
737 self.
log(
"gather from slave", source)
739 index, result = report
740 output[index] = result
744 self.
log(
"send job to slave", job[0], source)
748 self.comm.send(job, source, tag=tags.work)
750 if reducer
is not None:
751 results = self.comm.gather(
None, root=self.
root)
753 for rank
in range(self.
size):
754 if rank == self.
root:
756 output = reducer(output, results[rank])
if output
is not None else results[rank]
762 """!Scatter work to slaves and gather the results
764 Work is distributed statically, so there is no load balancing.
766 Each slave applies the function to the data they're provided.
767 The slaves may optionally be passed a cache instance, which
768 they can store data in for subsequent executions (to ensure
769 subsequent data is distributed in the same pattern as before,
770 use the 'mapToPrevious' method). The cache also contains
771 data that has been stored on the slaves.
773 The 'func' signature should be func(cache, data, *args, **kwargs)
774 if 'context' is true; otherwise func(data, *args, **kwargs).
776 @param context: Namespace for cache
777 @param func: function for slaves to run; must be picklable
778 @param dataList: List of data to distribute to slaves; must be picklable
779 @param args: List of constant arguments
780 @param kwargs: Dict of constant arguments
781 @return list of results from applying 'func' to dataList
783 return self.
reduceNoBalance(context,
None, func, dataList, *args, **kwargs)
788 """!Scatter work to slaves and reduce the results
790 Work is distributed statically, so there is no load balancing.
792 Each slave applies the function to the data they're provided.
793 The slaves may optionally be passed a cache instance, which
794 they can store data in for subsequent executions (to ensure
795 subsequent data is distributed in the same pattern as before,
796 use the 'mapToPrevious' method). The cache also contains
797 data that has been stored on the slaves.
799 The 'func' signature should be func(cache, data, *args, **kwargs)
800 if 'context' is true; otherwise func(data, *args, **kwargs).
802 The 'reducer' signature should be reducer(old, new). If the 'reducer'
803 is None, then we will return the full list of results
805 @param context: Namespace for cache
806 @param reducer: function for master to run to reduce slave results; or None
807 @param func: function for slaves to run; must be picklable
808 @param dataList: List of data to distribute to slaves; must be picklable
809 @param args: List of constant arguments
810 @param kwargs: Dict of constant arguments
811 @return reduced result (if reducer is non-None) or list of results
812 from applying 'func' to dataList
814 tags =
Tags(
"result",
"work")
816 if self.
size == 1
or num <= 1:
817 return self.
_reduceQueue(context, reducer, func, list(zip(range(num), dataList)), *args, **kwargs)
823 self.comm.broadcast((tags, func, args, kwargs, context), root=self.
root)
827 queue = list(zip(range(num), dataList))
829 distribution = [[queue[i]]
for i
in range(num)]
830 distribution.insert(self.
rank, [])
831 for i
in range(num, self.
size - 1):
832 distribution.append([])
833 elif num % self.
size == 0:
834 numEach = num//self.
size
835 distribution = [queue[i*numEach:(i+1)*numEach]
for i
in range(self.
size)]
837 numEach = num//self.
size
838 distribution = [queue[i*numEach:(i+1)*numEach]
for i
in range(self.
size)]
839 for i
in range(numEach*self.
size, num):
840 distribution[(self.
rank + 1) % self.
size].append
841 distribution = list([]
for i
in range(self.
size))
842 for i, job
in enumerate(queue, self.
rank + 1):
843 distribution[i % self.
size].append(job)
846 for source
in range(self.
size):
847 if source == self.
rank:
849 self.
log(
"send jobs to ", source)
850 self.comm.send(distribution[source], source, tag=tags.work)
853 output = [
None]*num
if reducer
is None else None
855 def ingestResults(output, nodeResults, distList):
857 for i, result
in enumerate(nodeResults):
858 index = distList[i][0]
859 output[index] = result
862 output = nodeResults.pop(0)
863 for result
in nodeResults:
864 output = reducer(output, result)
867 ourResults = self.
_processQueue(context, func, distribution[self.
rank], *args, **kwargs)
868 output = ingestResults(output, ourResults, distribution[self.
rank])
871 pending = self.
size - 1
873 status = mpi.Status()
874 slaveResults = self.comm.recv(status=status, tag=tags.result, source=mpi.ANY_SOURCE)
875 source = status.source
876 self.
log(
"gather from slave", source)
877 output = ingestResults(output, slaveResults, distribution[source])
884 """!Scatter work to the same target as before
886 Work is distributed so that each slave handles the same
887 indices in the dataList as when 'map' was called.
888 This allows the right data to go to the right cache.
890 It is assumed that the dataList is the same length as when it was
893 The 'func' signature should be func(cache, data, *args, **kwargs).
895 @param context: Namespace for cache
896 @param func: function for slaves to run; must be picklable
897 @param dataList: List of data to distribute to slaves; must be picklable
898 @param args: List of constant arguments
899 @param kwargs: Dict of constant arguments
900 @return list of results from applying 'func' to dataList
902 return self.
reduceToPrevious(context,
None, func, dataList, *args, **kwargs)
907 """!Reduction where work goes to the same target as before
909 Work is distributed so that each slave handles the same
910 indices in the dataList as when 'map' was called.
911 This allows the right data to go to the right cache.
913 It is assumed that the dataList is the same length as when it was
916 The 'func' signature should be func(cache, data, *args, **kwargs).
918 The 'reducer' signature should be reducer(old, new). If the 'reducer'
919 is None, then we will return the full list of results
921 @param context: Namespace for cache
922 @param reducer: function for master to run to reduce slave results; or None
923 @param func: function for slaves to run; must be picklable
924 @param dataList: List of data to distribute to slaves; must be picklable
925 @param args: List of constant arguments
926 @param kwargs: Dict of constant arguments
927 @return reduced result (if reducer is non-None) or list of results
928 from applying 'func' to dataList
931 raise ValueError(
"context must be set to map to same nodes as previous context")
932 tags =
Tags(
"result",
"work")
934 if self.
size == 1
or num <= 1:
936 return self.
_reduceQueue(context, reducer, func, list(zip(range(num), dataList)), *args, **kwargs)
945 self.comm.broadcast((tags, func, args, kwargs, context), root=self.
root)
947 requestList = self.comm.gather(
None, root=self.
root)
948 self.
log(
"listen", requestList)
949 initial = [dataList[index]
if (index
is not None and index >= 0)
else None for index
in requestList]
950 self.
log(
"scatter jobs", initial)
951 self.comm.scatter(initial, root=self.
root)
952 pending = min(num, self.
size - 1)
961 status = mpi.Status()
962 index, result, nextIndex = self.comm.recv(status=status, tag=tags.result, source=mpi.ANY_SOURCE)
963 source = status.source
964 self.
log(
"gather from slave", source)
966 output[index] = result
971 job = dataList[nextIndex]
972 self.
log(
"send job to slave", source)
973 self.comm.send(job, source, tag=tags.work)
977 self.
log(
"waiting on", pending)
979 if reducer
is not None:
980 output = thread.join()
988 """!Store data on slave for a particular context
990 The data is made available to functions through the cache. The
991 stored data differs from the cache in that it is identical for
992 all operations, whereas the cache is specific to the data being
995 @param context: namespace for store
996 @param kwargs: dict of name=value pairs
998 super(PoolMaster, self).
storeSet(context, **kwargs)
1000 self.
log(
"give data")
1001 self.comm.broadcast((context, kwargs), root=self.
root)
1006 """Delete stored data on slave for a particular context"""
1007 super(PoolMaster, self).
storeDel(context, *nameList)
1009 self.
log(
"tell names")
1010 self.comm.broadcast((context, nameList), root=self.
root)
1015 """Reset data store for a particular context on master and slaves"""
1018 self.comm.broadcast(context, root=self.
root)
1022 """Reset cache for a particular context on master and slaves"""
1025 self.comm.broadcast(context, root=self.
root)
1029 """List cache contents for a particular context on master and slaves"""
1030 super(PoolMaster, self).
cacheList(context)
1032 self.comm.broadcast(context, root=self.
root)
1036 """List store contents for a particular context on master and slaves"""
1037 super(PoolMaster, self).
storeList(context)
1039 self.comm.broadcast(context, root=self.
root)
1042 """Command slaves to exit"""
1047 """Slave node instance of MPI process pool"""
1050 """Log a debugging message"""
1051 assert self.
rank != self.
root,
"This is not the master node."
1052 self.debugger.log(
"Slave %d" % self.
rank, msg, *args)
1056 """Serve commands of master node
1058 Slave accepts commands, which are the names of methods to execute.
1059 This exits when a command returns a true value.
1061 menu = dict((cmd, getattr(self, cmd))
for cmd
in (
"reduce",
"mapNoBalance",
"mapToPrevious",
1062 "storeSet",
"storeDel",
"storeClear",
"storeList",
1063 "cacheList",
"cacheClear",
"exit",))
1064 self.
log(
"waiting for command from", self.
root)
1065 command = self.comm.broadcast(
None, root=self.
root)
1066 self.
log(
"command", command)
1067 while not menu[command]():
1068 self.
log(
"waiting for command from", self.
root)
1069 command = self.comm.broadcast(
None, root=self.
root)
1070 self.
log(
"command", command)
1075 """Reduce scattered data and return results"""
1076 self.
log(
"waiting for instruction")
1077 tags, func, reducer, args, kwargs, context = self.comm.broadcast(
None, root=self.
root)
1078 self.
log(
"waiting for job")
1079 job = self.comm.scatter(
None, root=self.
root)
1082 while not isinstance(job, NoOp):
1084 self.
log(
"running job")
1085 result = self.
_processQueue(context, func, [(index, data)], *args, **kwargs)[0]
1087 report = (index, result)
1090 out = reducer(out, result)
if out
is not None else result
1091 self.comm.send(report, self.
root, tag=tags.request)
1092 self.
log(
"waiting for job")
1093 job = self.comm.recv(tag=tags.work, source=self.
root)
1095 if reducer
is not None:
1096 self.comm.gather(out, root=self.
root)
1101 """Process bulk scattered data and return results"""
1102 self.
log(
"waiting for instruction")
1103 tags, func, args, kwargs, context = self.comm.broadcast(
None, root=self.
root)
1104 self.
log(
"waiting for job")
1105 queue = self.comm.recv(tag=tags.work, source=self.
root)
1108 for index, data
in queue:
1109 self.
log(
"running job", index)
1110 result = self.
_processQueue(context, func, [(index, data)], *args, **kwargs)[0]
1111 resultList.append(result)
1113 self.comm.send(resultList, self.
root, tag=tags.result)
1118 """Process the same scattered data processed previously"""
1119 self.
log(
"waiting for instruction")
1120 tags, func, args, kwargs, context = self.comm.broadcast(
None, root=self.
root)
1121 queue = list(self.
_cache[context].keys())
if context
in self.
_cache else None
1122 index = queue.pop(0)
if queue
else -1
1123 self.
log(
"request job", index)
1124 self.comm.gather(index, root=self.
root)
1125 self.
log(
"waiting for job")
1126 data = self.comm.scatter(
None, root=self.
root)
1129 self.
log(
"running job")
1130 result = func(self.
_getCache(context, index), data, *args, **kwargs)
1131 self.
log(
"pending", queue)
1132 nextIndex = queue.pop(0)
if queue
else -1
1133 self.comm.send((index, result, nextIndex), self.
root, tag=tags.result)
1136 data = self.comm.recv(tag=tags.work, source=self.
root)
1141 """Set value in store"""
1142 context, kwargs = self.comm.broadcast(
None, root=self.
root)
1143 super(PoolSlave, self).
storeSet(context, **kwargs)
1146 """Delete value in store"""
1147 context, nameList = self.comm.broadcast(
None, root=self.
root)
1148 super(PoolSlave, self).
storeDel(context, *nameList)
1151 """Reset data store"""
1152 context = self.comm.broadcast(
None, root=self.
root)
1157 context = self.comm.broadcast(
None, root=self.
root)
1161 """List cache contents"""
1162 context = self.comm.broadcast(
None, root=self.
root)
1163 super(PoolSlave, self).
cacheList(context)
1166 """List store contents"""
1167 context = self.comm.broadcast(
None, root=self.
root)
1168 super(PoolSlave, self).
storeList(context)
1171 """Allow exit from loop in 'run'"""
1176 """Metaclass for PoolWrapper to add methods pointing to PoolMaster
1178 The 'context' is automatically supplied to these methods as the first argument.
1182 instance = super(PoolWrapperMeta, self).
__call__(context)
1184 for name
in (
"map",
"mapNoBalance",
"mapToPrevious",
1185 "reduce",
"reduceNoBalance",
"reduceToPrevious",
1186 "storeSet",
"storeDel",
"storeClear",
"storeList",
1187 "cacheList",
"cacheClear",):
1188 setattr(instance, name, partial(getattr(pool, name), context))
1193 """Wrap PoolMaster to automatically provide context"""
1196 self.
_pool = PoolMaster._instance
1200 return getattr(self.
_pool, name)
1206 Use this class to automatically provide 'context' to
1207 the PoolMaster class. If you want to call functions
1208 that don't take a 'cache' object, use the PoolMaster
1209 class directly, and specify context=None.
1215 """!Start a process pool.
1217 Returns a PoolMaster object for the master node.
1218 Slave nodes are run and then optionally killed.
1220 If you elect not to kill the slaves, note that they
1221 will emerge at the point this function was called,
1222 which is likely very different from the point the
1223 master is at, so it will likely be necessary to put
1224 in some rank dependent code (e.g., look at the 'rank'
1225 attribute of the returned pools).
1227 Note that the pool objects should be deleted (either
1228 by going out of scope or explicit 'del') before program
1229 termination to avoid a segmentation fault.
1231 @param comm: MPI communicator
1232 @param root: Rank of root/master node
1233 @param killSlaves: Kill slaves on completion?
1237 if comm.rank == root:
1238 return PoolMaster(comm, root=root)
1239 slave = PoolSlave(comm, root=root)
def mapNoBalance
Scatter work to slaves and gather the results.
def map
Scatter work to slaves and gather the results.
def storeSet
Store data on slave for a particular context.
def _processQueue
Process a queue of data.
def reduceNoBalance
Scatter work to slaves and reduce the results.
def mapToPrevious
Scatter work to the same target as before.
def unpickleInstanceMethod
def startPool
Start a process pool.
def reduce
Scatter work to slaves and reduce the results.
def _reduceQueue
Reduce a queue of data.
def reduceToPrevious
Reduction where work goes to the same target as before.
def __new__
Construct an MPI.Comm wrapper.