1 from future
import standard_library
2 standard_library.install_aliases()
3 from builtins
import zip
4 from builtins
import range
5 from past.builtins
import basestring
6 from builtins
import object
30 from functools
import wraps, partial
31 from contextlib
import contextmanager
33 import mpi4py.MPI
as mpi
36 from future.utils
import with_metaclass
38 __all__ = [
"Comm",
"Pool",
"startPool",
"setBatchType",
"getBatchType",
"abortOnError",
"NODE", ]
40 NODE =
"%s:%d" % (os.uname()[1], os.getpid())
44 """Unpickle an instance method 46 This has to be a named function rather than a lambda because 47 pickle needs to find it. 49 return getattr(obj, name)
53 """Pickle an instance method 55 The instance method is divided into the object and the 59 name = method.__name__
60 return unpickleInstanceMethod, (obj, name)
62 copyreg.pickle(types.MethodType, pickleInstanceMethod)
66 """Unpickle a function 68 This has to be a named function rather than a lambda because 69 pickle needs to find it. 72 module = importlib.import_module(moduleName)
73 return getattr(module, funcName)
79 This assumes that we can recreate the function object by grabbing 80 it from the proper module. This may be violated if the function 81 is a lambda or in __main__. In that case, I recommend recasting 82 the function as an object with a __call__ method. 84 Another problematic case may be a wrapped (e.g., decorated) method 85 in a class: the 'method' is then a function, and recreating it is 86 not as easy as we assume here. 88 moduleName = function.__module__
89 funcName = function.__name__
90 return unpickleFunction, (moduleName, funcName)
92 copyreg.pickle(types.FunctionType, pickleFunction)
97 _batchType =
"unknown" 100 """Return a string giving the type of batch system in use""" 104 """Return a string giving the type of batch system in use""" 106 _batchType = batchType
109 """Function decorator to throw an MPI abort on an unhandled exception""" 111 def wrapper(*args, **kwargs):
113 return func(*args, **kwargs)
114 except Exception
as e:
115 sys.stderr.write(
"%s on %s in %s: %s\n" % (type(e).__name__, NODE, func.__name__, e))
117 traceback.print_exc(file=sys.stderr)
119 mpi.COMM_WORLD.Abort(1)
126 """Singleton to hold what's about to be pickled. 128 We hold onto the object in case there's trouble pickling, 129 so we can figure out what class in particular is causing 132 The held object is in the 'obj' attribute. 134 Here we use the __new__-style singleton pattern, because 135 we specifically want __init__ to be called each time. 148 """Hold onto new object""" 156 """Drop held object if there were no problems""" 162 """Try to guess what's not pickling after an exception 164 This tends to work if the problem is coming from the 165 regular pickle module. If it's coming from the bowels 166 of mpi4py, there's not much that can be done. 169 excType, excValue, tb = sys.exc_info()
180 return stack[-2].tb_frame.f_locals[
"obj"]
187 """Context manager to sniff out pickle problems 189 If there's a pickle error, you're normally told what the problem 190 class is. However, all SWIG objects are reported as "SwigPyObject". 191 In order to figure out which actual SWIG-ed class is causing 192 problems, we need to go digging. 196 with pickleSniffer(): 197 someOperationInvolvingPickle() 199 If 'abort' is True, will call MPI abort in the event of problems. 203 except Exception
as e:
204 if "SwigPyObject" not in str(e)
or "pickle" not in str(e):
209 sys.stderr.write(
"Pickling error detected: %s\n" % e)
210 traceback.print_exc(file=sys.stderr)
213 if obj
is None and heldObj
is not None:
218 pickle.dumps(heldObj)
219 sys.stderr.write(
"Hmmm, that's strange: no problem with pickling held object?!?!\n")
223 sys.stderr.write(
"Unable to determine class causing pickle problems.\n")
225 sys.stderr.write(
"Object that could not be pickled: %s\n" % obj)
228 mpi.COMM_WORLD.Abort(1)
233 """Function decorator to catch errors in pickling and print something useful""" 235 def wrapper(*args, **kwargs):
237 return func(*args, **kwargs)
242 """Wrapper to mpi4py's MPI.Intracomm class to avoid busy-waiting. 244 As suggested by Lisandro Dalcin at: 245 * http://code.google.com/p/mpi4py/issues/detail?id=4 and 246 * https://groups.google.com/forum/?fromgroups=#!topic/mpi4py/nArVuMXyyZI 249 def __new__(cls, comm=mpi.COMM_WORLD, recvSleep=0.1, barrierSleep=0.1):
250 """!Construct an MPI.Comm wrapper 253 @param comm MPI.Intracomm to wrap a duplicate of 254 @param recvSleep Sleep time (seconds) for recv() 255 @param barrierSleep Sleep time (seconds) for Barrier() 257 self = super(Comm, cls).
__new__(cls, comm.Dup())
263 def recv(self, obj=None, source=0, tag=0, status=None):
264 """Version of comm.recv() that doesn't busy-wait""" 266 while not self.Iprobe(source=source, tag=tag, status=sts):
268 return super(Comm, self).
recv(buf=obj, source=sts.source, tag=sts.tag, status=status)
270 def send(self, obj=None, *args, **kwargs):
272 return super(Comm, self).
send(obj, *args, **kwargs)
274 def _checkBarrierComm(self):
275 """Ensure the duplicate communicator is available""" 280 """Version of comm.Barrier() that doesn't busy-wait 282 A duplicate communicator is used so as not to interfere with the user's own communications. 291 dst = (rank + mask) % size
292 src = (rank - mask + size) % size
302 return super(Comm, self).bcast(value, root=root)
305 """Scatter data across the nodes 307 The default version apparently pickles the entire 'dataList', 308 which can cause errors if the pickle size grows over 2^31 bytes 309 due to fundamental problems with pickle in python 2. Instead, 310 we send the data to each slave node in turn; this reduces the 313 @param dataList List of data to distribute; one per node 315 @param root Index of root node 316 @param tag Message tag (integer) 317 @return Data for this node 319 if self.Get_rank() == root:
320 for rank, data
in enumerate(dataList):
323 self.
send(data, rank, tag=tag)
324 return dataList[root]
326 return self.
recv(source=root, tag=tag)
331 super(Comm, self).
Free()
335 """Object to signal no operation""" 340 """Provides tag numbers by symbolic name in attributes""" 344 for i, name
in enumerate(nameList, 1):
345 setattr(self, name, i)
348 return self.__class__.__name__ + repr(self.
_nameList)
351 return self.__class__, tuple(self.
_nameList)
355 """An object to hold stuff between different scatter calls 357 Includes a communicator by default, to allow intercommunication 362 super(Cache, self).
__init__(comm=comm)
366 """!Metaclass to produce a singleton 368 Doing a singleton mixin without a metaclass (via __new__) is 369 annoying because the user has to name his __init__ something else 370 (otherwise it's called every time, which undoes any changes). 371 Using this metaclass, the class's __init__ is called exactly once. 373 Because this is a metaclass, note that: 374 * "self" here is the class 375 * "__init__" is making the class (it's like the body of the 377 * "__call__" is making an instance of the class (it's like 378 "__new__" in the class). 382 super(SingletonMeta, self).
__init__(name, bases, dict_)
392 """Debug logger singleton 394 Disabled by default; to enable, do: 'Debugger().enabled = True' 395 You can also redirect the output by changing the 'out' attribute. 402 def log(self, source, msg, *args):
405 The 'args' are only stringified if we're enabled. 407 @param source: name of source 408 @param msg: message to write 409 @param args: additional outputs to append to message 412 self.
out.write(
"%s: %s" % (source, msg))
414 self.
out.write(
" %s" % arg)
419 """Thread to do reduction of results 421 "A thread?", you say. "What about the python GIL?" 422 Well, because we 'sleep' when there's no immediate response from the 423 slaves, that gives the thread a chance to fire; and threads are easier 424 to manage (e.g., shared memory) than a process. 426 def __init__(self, reducer, initial=None, sleep=0.1):
429 The 'reducer' should take two values and return a single 432 @param reducer Function that does the reducing 433 @param initial Initial value for reduction, or None 434 @param sleep Time to sleep when there's nothing to do (sec) 436 threading.Thread.__init__(self, name=
"reducer")
438 self.
_lock = threading.Lock()
442 self.
_done = threading.Event()
445 """Do the actual work 447 We pull the data out of the queue and release the lock before 448 operating on it. This stops us from blocking the addition of 449 new data to the queue. 460 Thread entry point, called by Thread.start 469 """Add data to the queue to be reduced""" 474 """Complete the thread 476 Unlike Thread.join (which always returns 'None'), we return the result 480 threading.Thread.join(self)
485 """Node in MPI process pool 487 WARNING: You should not let a pool instance hang around at program 488 termination, as the garbage collection behaves differently, and may 489 cause a segmentation fault (signal 11). 504 def _getCache(self, context, index):
505 """Retrieve cache for particular data 507 The cache is updated with the contents of the store. 509 if not context
in self.
_cache:
511 if not context
in self.
_store:
513 cache = self.
_cache[context]
514 store = self.
_store[context]
515 if index
not in cache:
517 cache[index].__dict__.update(store)
520 def log(self, msg, *args):
521 """Log a debugging message""" 527 def _processQueue(self, context, func, queue, *args, **kwargs):
528 """!Process a queue of data 530 The queue consists of a list of (index, data) tuples, 531 where the index maps to the cache, and the data is 532 passed to the 'func'. 534 The 'func' signature should be func(cache, data, *args, **kwargs) 535 if 'context' is non-None; otherwise func(data, *args, **kwargs). 537 @param context: Namespace for cache; None to not use cache 538 @param func: function for slaves to run 539 @param queue: List of (index,data) tuples to process 540 @param args: Constant arguments 541 @param kwargs: Keyword arguments 542 @return list of results from applying 'func' to dataList 544 return self.
_reduceQueue(context,
None, func, queue, *args, **kwargs)
546 def _reduceQueue(self, context, reducer, func, queue, *args, **kwargs):
547 """!Reduce a queue of data 549 The queue consists of a list of (index, data) tuples, 550 where the index maps to the cache, and the data is 551 passed to the 'func', the output of which is reduced 552 using the 'reducer' (if non-None). 554 The 'func' signature should be func(cache, data, *args, **kwargs) 555 if 'context' is non-None; otherwise func(data, *args, **kwargs). 557 The 'reducer' signature should be reducer(old, new). If the 'reducer' 558 is None, then we will return the full list of results 560 @param context: Namespace for cache; None to not use cache 561 @param reducer: function for master to run to reduce slave results; or None 562 @param func: function for slaves to run 563 @param queue: List of (index,data) tuples to process 564 @param args: Constant arguments 565 @param kwargs: Keyword arguments 566 @return reduced result (if reducer is non-None) or list of results 567 from applying 'func' to dataList 569 if context
is not None:
570 resultList = [func(self.
_getCache(context, i), data, *args, **kwargs)
for i, data
in queue]
572 resultList = [func(data, *args, **kwargs)
for i, data
in queue]
575 if len(resultList) == 0:
577 output = resultList.pop(0)
578 for result
in resultList:
579 output = reducer(output, result)
583 """Set values in store for a particular context""" 584 self.
log(
"storing", context, kwargs)
585 if not context
in self.
_store:
587 for name, value
in kwargs.items():
588 self.
_store[context][name] = value
591 """Delete value in store for a particular context""" 592 self.
log(
"deleting from store", context, nameList)
593 if not context
in self.
_store:
594 raise KeyError(
"No such context: %s" % context)
595 for name
in nameList:
596 del self.
_store[context][name]
599 """Clear stored data for a particular context""" 600 self.
log(
"clearing store", context)
601 if not context
in self.
_store:
602 raise KeyError(
"No such context: %s" % context)
606 """Reset cache for a particular context""" 607 self.
log(
"clearing cache", context)
608 if not context
in self.
_cache:
613 """List contents of cache""" 614 cache = self.
_cache[context]
if context
in self.
_cache else {}
615 sys.stderr.write(
"Cache on %s (%s): %s\n" % (self.
node, context, cache))
618 """List contents of store for a particular context""" 619 if not context
in self.
_store:
620 raise KeyError(
"No such context: %s" % context)
621 sys.stderr.write(
"Store on %s (%s): %s\n" % (self.
node, context, self.
_store[context]))
625 """Master node instance of MPI process pool 627 Only the master node should instantiate this. 629 WARNING: You should not let a pool instance hang around at program 630 termination, as the garbage collection behaves differently, and may 631 cause a segmentation fault (signal 11). 635 super(PoolMaster, self).
__init__(*args, **kwargs)
636 assert self.
root == self.
rank,
"This is the master node" 639 """Ensure slaves exit when we're done""" 642 def log(self, msg, *args):
643 """Log a debugging message""" 647 """Send command to slaves 649 A command is the name of the PoolSlave method they should run. 651 self.
log(
"command", cmd)
652 self.
comm.broadcast(cmd, root=self.
root)
654 def map(self, context, func, dataList, *args, **kwargs):
655 """!Scatter work to slaves and gather the results 657 Work is distributed dynamically, so that slaves that finish 658 quickly will receive more work. 660 Each slave applies the function to the data they're provided. 661 The slaves may optionally be passed a cache instance, which 662 they can use to store data for subsequent executions (to ensure 663 subsequent data is distributed in the same pattern as before, 664 use the 'mapToPrevious' method). The cache also contains 665 data that has been stored on the slaves. 667 The 'func' signature should be func(cache, data, *args, **kwargs) 668 if 'context' is non-None; otherwise func(data, *args, **kwargs). 670 @param context: Namespace for cache 671 @param func: function for slaves to run; must be picklable 672 @param dataList: List of data to distribute to slaves; must be picklable 673 @param args: List of constant arguments 674 @param kwargs: Dict of constant arguments 675 @return list of results from applying 'func' to dataList 677 return self.
reduce(context,
None, func, dataList, *args, **kwargs)
681 def reduce(self, context, reducer, func, dataList, *args, **kwargs):
682 """!Scatter work to slaves and reduce the results 684 Work is distributed dynamically, so that slaves that finish 685 quickly will receive more work. 687 Each slave applies the function to the data they're provided. 688 The slaves may optionally be passed a cache instance, which 689 they can use to store data for subsequent executions (to ensure 690 subsequent data is distributed in the same pattern as before, 691 use the 'mapToPrevious' method). The cache also contains 692 data that has been stored on the slaves. 694 The 'func' signature should be func(cache, data, *args, **kwargs) 695 if 'context' is non-None; otherwise func(data, *args, **kwargs). 697 The 'reducer' signature should be reducer(old, new). If the 'reducer' 698 is None, then we will return the full list of results 700 @param context: Namespace for cache 701 @param reducer: function for master to run to reduce slave results; or None 702 @param func: function for slaves to run; must be picklable 703 @param dataList: List of data to distribute to slaves; must be picklable 704 @param args: List of constant arguments 705 @param kwargs: Dict of constant arguments 706 @return reduced result (if reducer is non-None) or list of results 707 from applying 'func' to dataList 709 tags =
Tags(
"request",
"work")
712 return self.
_reduceQueue(context, reducer, func, list(zip(list(range(num)), dataList)),
716 return self.
reduceNoBalance(context, reducer, func, dataList, *args, **kwargs)
722 self.
comm.broadcast((tags, func, reducer, args, kwargs, context), root=self.
root)
725 queue = list(zip(range(num), dataList))
726 output = [
None]*num
if reducer
is None else None 727 initial = [
None if i == self.
rank else queue.pop(0)
if queue
else NoOp()
for 728 i
in range(self.
size)]
729 pending = min(num, self.
size - 1)
730 self.
log(
"scatter initial jobs")
731 self.
comm.scatter(initial, root=self.
rank)
733 while queue
or pending > 0:
734 status = mpi.Status()
735 report = self.
comm.recv(status=status, tag=tags.request, source=mpi.ANY_SOURCE)
736 source = status.source
737 self.
log(
"gather from slave", source)
739 index, result = report
740 output[index] = result
744 self.
log(
"send job to slave", job[0], source)
748 self.
comm.send(job, source, tag=tags.work)
750 if reducer
is not None:
751 results = self.
comm.gather(
None, root=self.
root)
753 for rank
in range(self.
size):
754 if rank == self.
root:
756 output = reducer(output, results[rank])
if output
is not None else results[rank]
762 """!Scatter work to slaves and gather the results 764 Work is distributed statically, so there is no load balancing. 766 Each slave applies the function to the data they're provided. 767 The slaves may optionally be passed a cache instance, which 768 they can store data in for subsequent executions (to ensure 769 subsequent data is distributed in the same pattern as before, 770 use the 'mapToPrevious' method). The cache also contains 771 data that has been stored on the slaves. 773 The 'func' signature should be func(cache, data, *args, **kwargs) 774 if 'context' is true; otherwise func(data, *args, **kwargs). 776 @param context: Namespace for cache 777 @param func: function for slaves to run; must be picklable 778 @param dataList: List of data to distribute to slaves; must be picklable 779 @param args: List of constant arguments 780 @param kwargs: Dict of constant arguments 781 @return list of results from applying 'func' to dataList 783 return self.
reduceNoBalance(context,
None, func, dataList, *args, **kwargs)
788 """!Scatter work to slaves and reduce the results 790 Work is distributed statically, so there is no load balancing. 792 Each slave applies the function to the data they're provided. 793 The slaves may optionally be passed a cache instance, which 794 they can store data in for subsequent executions (to ensure 795 subsequent data is distributed in the same pattern as before, 796 use the 'mapToPrevious' method). The cache also contains 797 data that has been stored on the slaves. 799 The 'func' signature should be func(cache, data, *args, **kwargs) 800 if 'context' is true; otherwise func(data, *args, **kwargs). 802 The 'reducer' signature should be reducer(old, new). If the 'reducer' 803 is None, then we will return the full list of results 805 @param context: Namespace for cache 806 @param reducer: function for master to run to reduce slave results; or None 807 @param func: function for slaves to run; must be picklable 808 @param dataList: List of data to distribute to slaves; must be picklable 809 @param args: List of constant arguments 810 @param kwargs: Dict of constant arguments 811 @return reduced result (if reducer is non-None) or list of results 812 from applying 'func' to dataList 814 tags =
Tags(
"result",
"work")
816 if self.
size == 1
or num <= 1:
817 return self.
_reduceQueue(context, reducer, func, list(zip(range(num), dataList)), *args, **kwargs)
823 self.
comm.broadcast((tags, func, args, kwargs, context), root=self.
root)
827 queue = list(zip(range(num), dataList))
829 distribution = [[queue[i]]
for i
in range(num)]
830 distribution.insert(self.
rank, [])
831 for i
in range(num, self.
size - 1):
832 distribution.append([])
833 elif num % self.
size == 0:
834 numEach = num//self.
size 835 distribution = [queue[i*numEach:(i+1)*numEach]
for i
in range(self.
size)]
837 numEach = num//self.
size 838 distribution = [queue[i*numEach:(i+1)*numEach]
for i
in range(self.
size)]
839 for i
in range(numEach*self.
size, num):
840 distribution[(self.
rank + 1) % self.
size].append
841 distribution = list([]
for i
in range(self.
size))
842 for i, job
in enumerate(queue, self.
rank + 1):
843 distribution[i % self.
size].append(job)
846 for source
in range(self.
size):
847 if source == self.
rank:
849 self.
log(
"send jobs to ", source)
850 self.
comm.send(distribution[source], source, tag=tags.work)
853 output = [
None]*num
if reducer
is None else None 855 def ingestResults(output, nodeResults, distList):
857 for i, result
in enumerate(nodeResults):
858 index = distList[i][0]
859 output[index] = result
862 output = nodeResults.pop(0)
863 for result
in nodeResults:
864 output = reducer(output, result)
867 ourResults = self.
_processQueue(context, func, distribution[self.
rank], *args, **kwargs)
868 output = ingestResults(output, ourResults, distribution[self.
rank])
871 pending = self.
size - 1
873 status = mpi.Status()
874 slaveResults = self.
comm.recv(status=status, tag=tags.result, source=mpi.ANY_SOURCE)
875 source = status.source
876 self.
log(
"gather from slave", source)
877 output = ingestResults(output, slaveResults, distribution[source])
884 """!Scatter work to the same target as before 886 Work is distributed so that each slave handles the same 887 indices in the dataList as when 'map' was called. 888 This allows the right data to go to the right cache. 890 It is assumed that the dataList is the same length as when it was 893 The 'func' signature should be func(cache, data, *args, **kwargs). 895 @param context: Namespace for cache 896 @param func: function for slaves to run; must be picklable 897 @param dataList: List of data to distribute to slaves; must be picklable 898 @param args: List of constant arguments 899 @param kwargs: Dict of constant arguments 900 @return list of results from applying 'func' to dataList 902 return self.
reduceToPrevious(context,
None, func, dataList, *args, **kwargs)
907 """!Reduction where work goes to the same target as before 909 Work is distributed so that each slave handles the same 910 indices in the dataList as when 'map' was called. 911 This allows the right data to go to the right cache. 913 It is assumed that the dataList is the same length as when it was 916 The 'func' signature should be func(cache, data, *args, **kwargs). 918 The 'reducer' signature should be reducer(old, new). If the 'reducer' 919 is None, then we will return the full list of results 921 @param context: Namespace for cache 922 @param reducer: function for master to run to reduce slave results; or None 923 @param func: function for slaves to run; must be picklable 924 @param dataList: List of data to distribute to slaves; must be picklable 925 @param args: List of constant arguments 926 @param kwargs: Dict of constant arguments 927 @return reduced result (if reducer is non-None) or list of results 928 from applying 'func' to dataList 931 raise ValueError(
"context must be set to map to same nodes as previous context")
932 tags =
Tags(
"result",
"work")
934 if self.
size == 1
or num <= 1:
936 return self.
_reduceQueue(context, reducer, func, list(zip(range(num), dataList)), *args, **kwargs)
939 return self.
reduceNoBalance(context, reducer, func, dataList, *args, **kwargs)
945 self.
comm.broadcast((tags, func, args, kwargs, context), root=self.
root)
947 requestList = self.
comm.gather(
None, root=self.
root)
948 self.
log(
"listen", requestList)
949 initial = [dataList[index]
if (index
is not None and index >= 0)
else None for index
in requestList]
950 self.
log(
"scatter jobs", initial)
951 self.
comm.scatter(initial, root=self.
root)
952 pending = min(num, self.
size - 1)
961 status = mpi.Status()
962 index, result, nextIndex = self.
comm.recv(status=status, tag=tags.result, source=mpi.ANY_SOURCE)
963 source = status.source
964 self.
log(
"gather from slave", source)
966 output[index] = result
971 job = dataList[nextIndex]
972 self.
log(
"send job to slave", source)
973 self.
comm.send(job, source, tag=tags.work)
977 self.
log(
"waiting on", pending)
979 if reducer
is not None:
980 output = thread.join()
988 """!Store data on slave for a particular context 990 The data is made available to functions through the cache. The 991 stored data differs from the cache in that it is identical for 992 all operations, whereas the cache is specific to the data being 995 @param context: namespace for store 996 @param kwargs: dict of name=value pairs 998 super(PoolMaster, self).
storeSet(context, **kwargs)
1000 self.
log(
"give data")
1001 self.
comm.broadcast((context, kwargs), root=self.
root)
1006 """Delete stored data on slave for a particular context""" 1007 super(PoolMaster, self).
storeDel(context, *nameList)
1009 self.
log(
"tell names")
1010 self.
comm.broadcast((context, nameList), root=self.
root)
1015 """Reset data store for a particular context on master and slaves""" 1018 self.
comm.broadcast(context, root=self.
root)
1022 """Reset cache for a particular context on master and slaves""" 1025 self.
comm.broadcast(context, root=self.
root)
1029 """List cache contents for a particular context on master and slaves""" 1030 super(PoolMaster, self).
cacheList(context)
1032 self.
comm.broadcast(context, root=self.
root)
1036 """List store contents for a particular context on master and slaves""" 1037 super(PoolMaster, self).
storeList(context)
1039 self.
comm.broadcast(context, root=self.
root)
1042 """Command slaves to exit""" 1047 """Slave node instance of MPI process pool""" 1050 """Log a debugging message""" 1051 assert self.
rank != self.
root,
"This is not the master node." 1056 """Serve commands of master node 1058 Slave accepts commands, which are the names of methods to execute. 1059 This exits when a command returns a true value. 1061 menu = dict((cmd, getattr(self, cmd))
for cmd
in (
"reduce",
"mapNoBalance",
"mapToPrevious",
1062 "storeSet",
"storeDel",
"storeClear",
"storeList",
1063 "cacheList",
"cacheClear",
"exit",))
1064 self.
log(
"waiting for command from", self.
root)
1065 command = self.
comm.broadcast(
None, root=self.
root)
1066 self.
log(
"command", command)
1067 while not menu[command]():
1068 self.
log(
"waiting for command from", self.
root)
1069 command = self.
comm.broadcast(
None, root=self.
root)
1070 self.
log(
"command", command)
1075 """Reduce scattered data and return results""" 1076 self.
log(
"waiting for instruction")
1077 tags, func, reducer, args, kwargs, context = self.
comm.broadcast(
None, root=self.
root)
1078 self.
log(
"waiting for job")
1079 job = self.
comm.scatter(
None, root=self.
root)
1082 while not isinstance(job, NoOp):
1084 self.
log(
"running job")
1085 result = self.
_processQueue(context, func, [(index, data)], *args, **kwargs)[0]
1087 report = (index, result)
1090 out = reducer(out, result)
if out
is not None else result
1091 self.
comm.send(report, self.
root, tag=tags.request)
1092 self.
log(
"waiting for job")
1093 job = self.
comm.recv(tag=tags.work, source=self.
root)
1095 if reducer
is not None:
1096 self.
comm.gather(out, root=self.
root)
1101 """Process bulk scattered data and return results""" 1102 self.
log(
"waiting for instruction")
1103 tags, func, args, kwargs, context = self.
comm.broadcast(
None, root=self.
root)
1104 self.
log(
"waiting for job")
1105 queue = self.
comm.recv(tag=tags.work, source=self.
root)
1108 for index, data
in queue:
1109 self.
log(
"running job", index)
1110 result = self.
_processQueue(context, func, [(index, data)], *args, **kwargs)[0]
1111 resultList.append(result)
1113 self.
comm.send(resultList, self.
root, tag=tags.result)
1118 """Process the same scattered data processed previously""" 1119 self.
log(
"waiting for instruction")
1120 tags, func, args, kwargs, context = self.
comm.broadcast(
None, root=self.
root)
1121 queue = list(self.
_cache[context].keys())
if context
in self.
_cache else None 1122 index = queue.pop(0)
if queue
else -1
1123 self.
log(
"request job", index)
1124 self.
comm.gather(index, root=self.
root)
1125 self.
log(
"waiting for job")
1126 data = self.
comm.scatter(
None, root=self.
root)
1129 self.
log(
"running job")
1130 result = func(self.
_getCache(context, index), data, *args, **kwargs)
1131 self.
log(
"pending", queue)
1132 nextIndex = queue.pop(0)
if queue
else -1
1133 self.
comm.send((index, result, nextIndex), self.
root, tag=tags.result)
1136 data = self.
comm.recv(tag=tags.work, source=self.
root)
1141 """Set value in store""" 1142 context, kwargs = self.
comm.broadcast(
None, root=self.
root)
1143 super(PoolSlave, self).
storeSet(context, **kwargs)
1146 """Delete value in store""" 1147 context, nameList = self.
comm.broadcast(
None, root=self.
root)
1148 super(PoolSlave, self).
storeDel(context, *nameList)
1151 """Reset data store""" 1152 context = self.
comm.broadcast(
None, root=self.
root)
1157 context = self.
comm.broadcast(
None, root=self.
root)
1161 """List cache contents""" 1162 context = self.
comm.broadcast(
None, root=self.
root)
1163 super(PoolSlave, self).
cacheList(context)
1166 """List store contents""" 1167 context = self.
comm.broadcast(
None, root=self.
root)
1168 super(PoolSlave, self).
storeList(context)
1171 """Allow exit from loop in 'run'""" 1176 """Metaclass for PoolWrapper to add methods pointing to PoolMaster 1178 The 'context' is automatically supplied to these methods as the first argument. 1182 instance = super(PoolWrapperMeta, self).
__call__(context)
1184 for name
in (
"map",
"mapNoBalance",
"mapToPrevious",
1185 "reduce",
"reduceNoBalance",
"reduceToPrevious",
1186 "storeSet",
"storeDel",
"storeClear",
"storeList",
1187 "cacheList",
"cacheClear",):
1188 setattr(instance, name, partial(getattr(pool, name), context))
1193 """Wrap PoolMaster to automatically provide context""" 1196 self.
_pool = PoolMaster._instance
1200 return getattr(self.
_pool, name)
1206 Use this class to automatically provide 'context' to 1207 the PoolMaster class. If you want to call functions 1208 that don't take a 'cache' object, use the PoolMaster 1209 class directly, and specify context=None. 1215 """!Start a process pool. 1217 Returns a PoolMaster object for the master node. 1218 Slave nodes are run and then optionally killed. 1220 If you elect not to kill the slaves, note that they 1221 will emerge at the point this function was called, 1222 which is likely very different from the point the 1223 master is at, so it will likely be necessary to put 1224 in some rank dependent code (e.g., look at the 'rank' 1225 attribute of the returned pools). 1227 Note that the pool objects should be deleted (either 1228 by going out of scope or explicit 'del') before program 1229 termination to avoid a segmentation fault. 1231 @param comm: MPI communicator 1232 @param root: Rank of root/master node 1233 @param killSlaves: Kill slaves on completion? 1237 if comm.rank == root:
1238 return PoolMaster(comm, root=root)
1239 slave = PoolSlave(comm, root=root)
def _reduceQueue(self, context, reducer, func, queue, args, kwargs)
Reduce a queue of data.
def __new__(cls, hold=None)
def reduceToPrevious(self, context, reducer, func, dataList, args, kwargs)
Reduction where work goes to the same target as before.
def log(self, source, msg, args)
Log message.
def pickleInstanceMethod(method)
def send(self, obj=None, args, kwargs)
def storeSet(self, context, kwargs)
def unpickleFunction(moduleName, funcName)
def storeDel(self, context, nameList)
def __init__(self, args, kwargs)
def __init__(self, reducer, initial=None, sleep=0.1)
Constructor.
def _processQueue(self, context, func, queue, args, kwargs)
Process a queue of data.
def cacheList(self, context)
def __init__(self, hold=None)
def reduce(self, context, reducer, func, dataList, args, kwargs)
Scatter work to slaves and reduce the results.
def broadcast(self, value, root=0)
def mapToPrevious(self, context, func, dataList, args, kwargs)
Scatter work to the same target as before.
def startPool(comm=None, root=0, killSlaves=True)
Start a process pool.
def unpickleInstanceMethod(obj, name)
def map(self, context, func, dataList, args, kwargs)
Scatter work to slaves and gather the results.
def storeClear(self, context)
def __new__(cls, comm=mpi.COMM_WORLD, recvSleep=0.1, barrierSleep=0.1)
Construct an MPI.Comm wrapper.
def __init__(self, comm=None, root=0)
def storeClear(self, context)
def storeDel(self, context, nameList)
def recv(self, obj=None, source=0, tag=0, status=None)
def _checkBarrierComm(self)
def pickleFunction(function)
def __exit__(self, excType, excVal, tb)
def cacheList(self, context)
def storeList(self, context)
def storeList(self, context)
def cacheClear(self, context)
def scatter(self, dataList, root=0, tag=0)
def storeSet(self, context, kwargs)
Store data on slave for a particular context.
def setBatchType(batchType)
def pickleSniffer(abort=False)
def __init__(self, context="default")
def mapNoBalance(self, context, func, dataList, args, kwargs)
Scatter work to slaves and gather the results.
def catchPicklingError(func)
def __getattr__(self, name)
def reduceNoBalance(self, context, reducer, func, dataList, args, kwargs)
Scatter work to slaves and reduce the results.
def _getCache(self, context, index)
def cacheClear(self, context)