Python弱引用与内存泄漏防治

📅 2026/6/17 5:02:06
Python弱引用与内存泄漏防治
Python弱引用与内存泄漏防治weakref模块提供了创建弱引用的能力。弱引用不增加对象的引用计数当对象只剩下弱引用时GC可以回收它。weakref.ref是最基础的弱引用import weakrefclass ExpensiveObject:def __init__(self, name):self.name nameself.data bytearray(1024 * 1024) # 1MB数据def __repr__(self):return fExpensiveObject({self.name})obj ExpensiveObject(test)ref weakref.ref(obj)print(ref()) # ExpensiveObject(test)del obj # 删除强引用print(ref()) # Noneref()返回引用的对象如果对象已被回收则返回None。ref()的返回值必须检查是否为None否则后续操作会出错。weakref的回调函数在对象被回收时触发def cleanup(ref):print(f对象被回收了: {ref})obj ExpensiveObject(monitored)ref weakref.ref(obj, cleanup)del obj # 输出: 对象被回收了:回调在对象引用计数归零生命周期结束时立即调用在对象的__del__之前执行。回调的接收参数是弱引用对象本身不是原始对象原始对象已不存在。weakref.proxy创建代理对象可以直接访问原对象的方法和属性但不需要额外的()调用obj ExpensiveObject(proxy)proxy weakref.proxy(obj)print(proxy.name) # proxy直接访问属性del objtry:print(proxy.name) # ReferenceError: weakly-referenced object no longer existsexcept ReferenceError as e:print(e)proxy的缺点是访问时无法感知对象是否存活只有实际访问时才会抛出ReferenceError。ref()则通过返回值是否为None明确告知对象状态。WeakValueDictionary的典型应用是缓存import weakrefclass ImageCache:def __init__(self):self._cache weakref.WeakValueDictionary()def get_image(self, path):img self._cache.get(path)if img is not None:print(f缓存命中: {path})return imgprint(f加载图像: {path})img Image(path)self._cache[path] imgreturn imgclass Image:def __init__(self, path):self.path pathself.data fimage_data:{path}def render(self):return frendering {self.data}cache ImageCache()img1 cache.get_image(/photo.jpg)img2 cache.get_image(/photo.jpg) # 缓存命中del img1 # 删除强引用import gcgc.collect()img3 cache.get_image(/photo.jpg) # 可能需要重新加载WeakValueDictionary在值对象不再被外部持有时自动移除条目。不需要手动清理缓存中的过期条目。但WeakValueDictionary有一个陷阱键的存活期也依赖值。如果值已经被回收键仍然存在但访问返回Nonecache weakref.WeakValueDictionary()obj ExpensiveObject(temp)cache[key] objprint(cache.get(key)) # ExpensiveObject(temp)del objgc.collect()print(cache.get(key)) # NoneWeakKeyDictionary是另一种方向的弱引用字典class EventHandler:def __init__(self):self._handlers weakref.WeakKeyDictionary()def register(self, obj, callback):self._handlers[obj] callbackdef notify(self, *args, **kwargs):for obj, callback in list(self._handlers.items()):if obj is not None:callback(obj, *args, **kwargs)# 字典自动清理已回收的对象handler EventHandler()class Listener:def on_event(self, data):print(f处理: {data})listener Listener()handler.register(listener, Listener.on_event)handler.notify(test) # 调用listener.on_eventdel listenergc.collect()handler.notify(test) # 不输出任何内容WeakKeyDictionary在对象被回收时自动移除条目。常用于需要给对象附加元数据但又不想延长对象生命周期的场景。weakref.WeakSet是弱引用集合class Service:def __init__(self):self._instances weakref.WeakSet()def track(self, obj):self._instances.add(obj)def active_count(self):return len(self._instances)service Service()for _ in range(10):obj ExpensiveObject(temp)service.track(obj)# obj在循环结束时可能被回收import gcgc.collect()print(f活跃实例: {service.active_count()}) # 可能少于10WeakSet自动管理元素的生存期。元素被回收后自动从集合中移除不需要手动清理。使用finalize替代__del__进行资源清理import weakrefimport tempfileimport osclass TempFileResource:def __init__(self, suffix.tmp):self.file tempfile.NamedTemporaryFile(suffixsuffix, deleteFalse)self.path self.file.namedef cleanup(path):try:if os.path.exists(path):os.unlink(path)print(f清理临时文件: {path})except OSError:passweakref.finalize(self, cleanup, self.path)rsc TempFileResource()print(f创建临时文件: {rsc.path})del rsc # finalize立即执行清理finalize比__del__更可靠它在对象被回收时保证执行不会因为循环引用而受阻。并且finalize可以接收参数不像__del__只能操作实例属性。finalize的另一个优势是支持调用优先级。通过atexit注册的函数在进程退出时也能执行确保资源释放import atexitimport weakrefclass DatabaseConnection:def __init__(self, conn_string):self.conn_string conn_stringself._connection Nonedef close_connection(conn_str):print(f关闭数据库连接: {conn_str})self._finalizer weakref.finalize(self, close_connection, conn_string)atexit.register(self._finalizer)db DatabaseConnection(postgresql://localhost/db)# 即使忘记显式关闭进程退出时也会清理atexit注册的finalize在Python进程正常退出时执行。注意如果程序被SIGKILL杀死atexit处理程序不会运行。weakref.getweakrefcount和getweakrefs检查对象的弱引用状态obj ExpensiveObject(weak_demo)ref1 weakref.ref(obj)ref2 weakref.ref(obj)print(weakref.getweakrefcount(obj)) # 2print(len(weakref.getweakrefs(obj))) # 2这个接口在调试时有用可以确认对象被创建了多少弱引用。循环引用中__del__的问题可以通过weakref解决class Node:def __init__(self):self.parent Noneself.children []def add_child(self, child):self.children.append(child)child.parent weakref.ref(self) # 使用弱引用def __del__(self):print(fNode deleted: {id(self)})parent Node()child Node()parent.add_child(child)del parentdel childgc.collect() # 两个节点都被正确回收如果Node.parent是强引用parent和child形成循环引用。使用weakref.ref(self)打破循环GC可以正常回收。weakref的代理模式在实现观察者模式时很实用class Observable:def __init__(self):self._observers weakref.WeakSet()def attach(self, observer):self._observers.add(observer)def detach(self, observer):self._observers.discard(observer)def notify(self, data):dead []for observer in self._observers:if observer() is not None:observer(data)else:dead.append(observer)for obs in dead:self._observers.discard(obs)class Widget:def update(self, data):print(fWidget updated: {data})observable Observable()widget Widget()observable.attach(widget.update)observable.notify(event1) # 正常通知del widgetgc.collect()observable.notify(event2) # widget已被回收WeakSet中的回调函数在widget被回收后自动移除不会尝试调用已销毁的对象。Python 3.10中weakref的优化ref对象的哈希操作速度提升了约40%因为内部实现从Python层面移到了C层面不再需要Python函数调用的开销。内置类型对弱引用的支持情况import weakrefclass WithSlots:__slots__ (x, __weakref__)obj WithSlots()ref weakref.ref(obj) # OK因为__slots__包含了__weakref__class WithoutWeak:__slots__ (x,)obj2 WithoutWeak()ref2 weakref.ref(obj2) # TypeError: cannot create weak reference to WithoutWeak objectPython内置的list、dict、int、str等类型默认不支持弱引用通过子类化可以class MyList(list):passlst MyList([1, 2, 3])ref weakref.ref(lst) # OK自定义类默认支持弱引用包含__weakref__预留槽位但定义了__slots__后需要显式加入__weakref__。弱引用在框架设计中的实际应用案例——信号系统class Signal:def __init__(self):self._receivers {}def connect(self, receiver, senderNone):key (sender, id(receiver))self._receivers[key] weakref.ref(receiver)def send(self, senderNone, **kwargs):for (s, _), ref in list(self._receivers.items()):if s is sender:receiver ref()if receiver is not None:receiver(**kwargs)else:# 清理已回收的接收者del self._receivers[(s, _)]signal Signal()class Subscriber:def __call__(self, **data):print(f收到信号: {data})sub Subscriber()signal.connect(sub)signal.send(senderNone, messagehello)del subgc.collect()signal.send(senderNone, messageworld) # 不会调用已回收的对象这个信号模式的优点是订阅者的生命周期不受信号系统的影响。即使忘记取消订阅也不会造成内存泄漏。