我有一个Python多处理场景,我在这里简化了我的问题。有两个部分需要处理x个作业。在我的代码中,2个作业部分实际上是HTTP请求,其中第2部分取决于第1部分的结果。最后,第3部分简单地报告部分1和2所花费的时间并计算所用时间的运行平均值在所有工作中。
利用多处理功能,我为Job Part 1设置了2x流程工作者,为Job Part 2设置了2x流程工作者,并且只为1x Reporter worker设置了2x流程工作者。为了传达每个工作部分所花费的时间,我正在使用队列。
我已经完成了对指定x个作业的完整工作的代码,但是我想添加一个超时/取消事件,该事件应该停止所有工作并正常终止。
在我的代码中,我使用Report Worker检查此事件以及何时发生这种情况我认为这只是消耗所有队列中剩余作业并添加毒丸以表示终止的情况。 / p>
子流程确实终止但是似乎控制权没有传递回" main"程序,我仍然可以看到主进程挂起,直到我在命令提示符下取消ctrl-c取消。
请帮助查看我的代码并告诉我出错的地方:
#!/usr/bin/python3
import sys, time, datetime, os, math, multiprocessing, time, random
TYPE_JOB_PART_1 = '1'
TYPE_JOB_PART_2 = '2'
class Worker(multiprocessing.Process):
def __init__(self, task_queue, result_queue, task_type):
multiprocessing.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.task_type = task_type
def run(self):
while True:
next_task = self.task_queue.get()
if next_task is None:
#= Poison pill means shutdown
print('%s: is exiting: %s ===============' % (self.name, self.task_type))
self.task_queue.task_done()
break
job_response = next_task()
self.task_queue.task_done()
if self.task_type == TYPE_JOB_PART_1:
self.result_queue.put(do_jobPart_2(job_response))
elif self.task_type == TYPE_JOB_PART_2:
self.result_queue.put(do_reporting(job_response))
return
class Reporter(multiprocessing.Process):
def __init__(self, task_queue, result_queue, num_tasks, num_workers_jobPart1, num_workers_jobPart2, jobPart1_queue, jobPart2_queue):
multiprocessing.Process.__init__(self)
self.task_queue = task_queue
self.result_queue = result_queue
self.jobPart1_queue = jobPart1_queue
self.jobPart2_queue = jobPart2_queue
self.num_tasks = num_tasks
self.num_workers_jobPart1 = num_workers_jobPart1
self.num_workers_jobPart2 = num_workers_jobPart2
self.time_start = datetime.datetime.now() #= Start the timer
self.time_elapsed = 0
self.time_wait_to_terminate = 3 #= Define the timeout to terminate all jobs
def run(self):
while True:
next_task = self.task_queue.get()
if next_task is None:
#= Poison pill means shutdown
self.task_queue.task_done()
print('==================================================')
print('============ END OF PROCESSING ===================')
print('==================================================')
break
job_response = next_task()
self.task_queue.task_done()
self.result_queue.put(job_response)
queueSize = self.result_queue.qsize()
#= TERMINATTION time
print("================>i:%s" % (queueSize))
self.time_elapsed = (datetime.datetime.now() - self.time_start).total_seconds()
if self.time_elapsed > self.time_wait_to_terminate:
print("TIME IS UP. %s elapsed!" % self.time_wait_to_terminate)
#= Empty the JobPart_1 queue to relieve the workers
while not self.jobPart1_queue.empty():
self.jobPart1_queue.get()
self.jobPart1_queue.task_done()
#= And add poison pills again
for i in range(self.num_workers_jobPart1):
self.jobPart1_queue.put(None)
#= Empty the JobPart_2 queue to relieve the workers
while not self.jobPart2_queue.empty():
self.jobPart2_queue.get()
self.jobPart2_queue.task_done()
#= And add poison pills again
for i in range(self.num_workers_jobPart2):
self.jobPart2_queue.put(None)
#= Empty the report queue to relieve the reporter itself
while not self.task_queue.empty():
self.task_queue.get()
self.task_queue.task_done()
print("TIME IS UP: workers stopped, Reporter shutting itself down....")
break
#= Results queue is filled up when count = num_tasks, so give Poison pill to shutdown JobPart_2 workers
if queueSize == self.num_tasks:
for i in range(self.num_workers_jobPart2):
self.jobPart2_queue.put(None)
print("JobPart_2 workers will be poisoned")
return
class do_reporting(object):
def __init__(self, info):
self.info = info
def __call__(self):
try:
print("%s:do_reporting - is RUNNING " % (self.info['jobPart1_results']['i']))
randtime = 0.5 * random.random()
time.sleep(randtime)
print( 'jobPart1_time:%s, jobPart2_time:%s, report_time;%s' % ( self.info["jobPart1_results"]["jobPart1_time"], self.info["jobPart2_time"], randtime ) )
return {'results':self.info,'report_time':randtime}
except:
print("error:do_reporting")
class do_jobPart_1(object):
def __init__(self, i, t0):
self.t0 = t0
self.i = i
def __call__(self):
try:
print("%s:do_jobPart_1 - is RUNNING " % self.i)
randtime = 0.5 * random.random()
time.sleep(randtime)
time_elapsed = (datetime.datetime.now() - self.t0).total_seconds()
return {'i':self.i, 't0':self.t0, 'time_elapsed_job1':time_elapsed, 'jobPart1_time':randtime}
except:
print("error:do_jobPart_1")
class do_jobPart_2(object):
def __init__(self, info):
self.info = info
def __call__(self):
try:
print("%s:do_jobPart_2 - is RUNNING " % (self.info['i']))
randtime = 0.5 * random.random()
time.sleep(randtime)
return {"jobPart1_results":self.info,'jobPart2_time':randtime}
except:
print("error:do_jobPart_2")
if __name__ == '__main__':
print('==================================================')
print('============ START PROCESSING ====================')
print('==================================================')
#===============================================
#= Establish communication queues
q_jobPart_1 = multiprocessing.JoinableQueue()
q_jobPart_2 = multiprocessing.JoinableQueue()
q_reportTasks = multiprocessing.JoinableQueue()
q_results = multiprocessing.Queue()
#===============================================
#= Start workersReporter !!! Should always be just 1 worker !!!
numJobs = 90
numWorkers_jobPart1 = 2
numWorkers_jobPart2 = 2
workersJobPart_1 = [ Worker(q_jobPart_1, q_jobPart_2, TYPE_JOB_PART_1) for i in range(numWorkers_jobPart1) ]
workersJobPart_2 = [ Worker(q_jobPart_2, q_reportTasks, TYPE_JOB_PART_2) for i in range(numWorkers_jobPart2) ]
workerJobReporter = Reporter(q_reportTasks, q_results, numJobs, numWorkers_jobPart1, numWorkers_jobPart2, q_jobPart_1, q_jobPart_2)
#===============================================
#= Start the workers
print("Main PID:%s" % os.getpid())
for w in workersJobPart_1:
w.start()
print("JobPart_1 PID=%s" % w.pid)
for w in workersJobPart_2:
w.start()
print("JobPart_2 PID=%s" % w.pid)
workerJobReporter.start()
print("JobReporter PID=%s" % workerJobReporter.pid)
#= Start the timer and add tasks to the queues
time_start = datetime.datetime.now()
for i in range(numJobs):
q_jobPart_1.put(do_jobPart_1(i, time_start))
#= Add poison pill for each jobPart_1 workers
for i in range(numWorkers_jobPart1):
q_jobPart_1.put(None)
q_jobPart_1.join()
print("JobPart_1 workers terminated")
q_jobPart_2.join()
print("JobPart_2 workers terminated")
q_reportTasks.put(None)
q_reportTasks.join()
workerJobReporter.terminate()
print("Reporter terminated")
print("FINISHED")