python_使用多线程来处理数据写入Excel文件
因为pandas的ExcelWriter不支持并发写入同一个文件,所以如果需要的话,后期还要合并。
import pandas as pd
from io import BytesIO
import threading
import multiprocessing
import time
import math# 假设我们有12万行的数据
data = pd.DataFrame({'A': range(120001),'B': range(120001),'C': range(120001)
})def write_to_excel(df, filename):with pd.ExcelWriter(filename) as writer:df.to_excel(writer, index=False)def thread_write(df, filename):thread = threading.Thread(target=write_to_excel, args=(df, filename))thread.start()return threaddef test_threading(data):threads = []# 分割数据split_size=30000#向上取整num_threads = math.ceil(120001/split_size)data_splits = [data[i:i + split_size] for i in range(0, len(data), split_size)]filenames = [f'D:\\desktop\\new\\thread_{i}.xlsx' for i in range(num_threads)]start_time = time.time()for i in range(num_threads):threads.append(thread_write(data_splits[i], filenames[i]))for thread in threads:#join方法用于等待线程完成thread.join()print(f"Threaded execution took {time.time() - start_time:.2f} seconds")# 模拟主函数入口
if __name__ == '__main__':test_threading(data)