python多进程之间共享变量

2024年3月27日 16:22 by wst

python高级

多进程之间如果要共享变量怎么实现呢?

比如,分别统计10个文件夹中的json文件数,并返回最终结果。

下面实例的内容:

分别统计10个文件夹中json文件数量,以及json中count, success值的总和。

# -*- encoding: utf-8 -*-
'''
@File        :stats_datacomp.py
@Author      :shitao
@Time        :2024/03/25 09:23:10
@Description :统计下载数据
'''

import time
from pathlib import Path
import json
import multiprocessing

t_count_json = multiprocessing.Value('i', 0)
t_count_success = multiprocessing.Value('i', 0)
t_count_total = multiprocessing.Value('i', 0)

def stats_one_folder(folder):
    "统计单个文件夹"
    count_json = 0
    count_success = 0
    count_total = 0
    dpath = folder.joinpath("shards")
    print("data_path:", dpath)
    for jp in dpath.iterdir():
        if jp.suffix=='.json':
            count_json += 1
            data = json.load(open(jp))
            count_total += data['count']
            count_success += data['successes']
    t_count_json.value +=  count_json
    t_count_success.value += count_success
    t_count_total.value += count_total
    print('count_json:',count_json, 
          '|count_success:',count_success, 
          '|count_total:',count_total)
    return count_json, count_success, count_total

def stats_img_multi(from_dir):
    "多进程统计"
    count_json = 0
    count_success = 0
    count_total = 0
    num = 10
    path = Path(from_dir)
    folders = [path.joinpath("data_dir"+str(i)) for i in range(num)]
    processes = []
    for folder in folders:
        process = multiprocessing.Process(target=stats_one_folder, args=(folder,))
        process.start()
        processes.append(process)

    for process in processes:
        process.join()

    print('json count:',t_count_json.value, 
          'success count:',t_count_success.value, 
          'total count:',t_count_total.value)



if __name__ == "__main__":
    start = time.time()
    aim_dir = "/home/wst/datacomp"
    stats_img_multi(aim_dir)
    end = time.time()
    print("use time:", end-start)

输出:

data_path: /home/wst/datacomp/data_dir0/shards
data_path: /home/wst/datacomp/data_dir1/shards
data_path: /home/wst/datacompdata_dir2/shards
data_path: /home/wst/datacomp/data_dir3/shards
data_path: /home/wst/datacomp/data_dir4/shards
data_path: /home/wst/datacomp/data_dir5/shards
data_path: /home/wst/datacomp/data_dir6/shards
data_path: /home/wst/datacomp/data_dir7/shards
data_path: /home/wst/datacomp/data_dir8/shards
data_path: /home/wst/datacomp/data_dir9/shards
count_json: 2013 |count_success: 15166260 |count_total: 19929869
count_json: 2395 |count_success: 17953193 |count_total: 23698339
count_json: 2396 |count_success: 17936282 |count_total: 23689078
count_json: 2564 |count_success: 19342910 |count_total: 25380636
count_json: 2635 |count_success: 19619759 |count_total: 26102709
count_json: 2638 |count_success: 19651983 |count_total: 26133653
count_json: 2642 |count_success: 19670029 |count_total: 26135694
count_json: 2734 |count_success: 20753371 |count_total: 27019484
count_json: 3506 |count_success: 26690067 |count_total: 34768344
count_json: 3507 |count_success: 26621927 |count_total: 34704393
json count: 27030 success count: 203405781 total count: 267562199
use time: 15.571681022644043

附:

除了以上共享整型变量,可以共享其他类型的变量:

# 数值类型
num = multiprocessing.Value('d', 1)
# 共享数组型变量
num = multiprocessing.Array('i', [1, 2, 3, 4, 5])

 

你还知道其他共享变量的方法吗?

 


Comments(0) Add Your Comment

Not Comment!