1. 核心对象模型 (Include/object.h)
1.1 基础对象结构 PyObject
// Include/object.h typedef struct _object { _PyObject_HEAD_EXTRA // 调试模式下的额外字段 Py_ssize_t ob_refcnt; // 引用计数 PyTypeObject *ob_type; // 类型对象指针 } PyObject; // 变长对象 PyVarObject是Python中用于表示可变长度对象(如列表、元组、字符串等)的基础结构。它扩展了基本的PyObject,增加了一个表示对象中元素数量的字段(ob_size) typedef struct { PyObject ob_base; // 基础对象 Py_ssize_t ob_size; // 对象大小 } PyVarObject;
源码位置: Include/object.h:106-120
内存布局:
PyObject: ┌─────────────┬──────────────┬─────────────┐ │ ob_refcnt │ ob_type │ data... │ │ 8 bytes │ 8 bytes │ variable │ └─────────────┴──────────────┴─────────────┘
1.2 类型对象 PyTypeObject
// Include/cpython/object.h typedef struct _typeobject { PyVarObject ob_base; const char *tp_name; // 类型名称 Py_ssize_t tp_basicsize; // 基本大小 Py_ssize_t tp_itemsize; // 元素大小 // 析构和打印 destructor tp_dealloc; Py_ssize_t tp_vectorcall_offset; // 标准方法 getattrfunc tp_getattr; setattrfunc tp_setattr; PyAsyncMethods *tp_as_async; reprfunc tp_repr; // 数值方法、序列方法、映射方法 PyNumberMethods *tp_as_number; PySequenceMethods *tp_as_sequence; PyMappingMethods *tp_as_mapping; // 更多字段... } PyTypeObject;
2. 整数对象内存模型 (Objects/longobject.c)
2.1 长整数结构
// Include/cpython/longintrepr.h struct _longobject { PyVarObject ob_base; digit ob_digit[1]; // 数字位数组 }; typedef struct _longobject PyLongObject; // digit 是 30 位或 15 位的无符号整数 #if PYLONG_BITS_IN_DIGIT == 30 typedef uint32_t digit; typedef int32_t sdigit; typedef uint64_t twodigits; #elif PYLONG_BITS_IN_DIGIT == 15 typedef unsigned short digit; typedef short sdigit; typedef unsigned long twodigits; #endif
小整数缓存机制:
// Objects/longobject.c #define IS_SMALL_INT(ival) (-_PY_NSMALLNEGINTS <= (ival) && (ival) < _PY_NSMALLPOSINTS) #define IS_SMALL_UINT(ival) ((ival) < _PY_NSMALLPOSINTS)
// Objects/longobject.c #define NSMALLPOSINTS 257 #define NSMALLNEGINTS 5 // 小整数对象池 [-5, 256] static PyLongObject small_ints[NSMALLNEGINTS + NSMALLPOSINTS]; PyObject * PyLong_FromLong(long ival) { // 使用小整数缓存 if (IS_SMALL_INT(ival)) { return get_small_int((sdigit)ival); } // 创建新的长整数对象 return PyLong_FromLongLong(ival); }
2.2 内存布局示例
小整数 (42): ┌───────────┬──────────┬────────┬──────────┐ │ ob_refcnt │ ob_type │ob_size │ ob_digit │ │ ? │ &Long │ 1 │ 42 │ └───────────┴──────────┴────────┴──────────┘ 大整数 (2^100): ┌───────────┬──────────┬────────┬─────────────────────┐ │ ob_refcnt │ ob_type │ob_size │ ob_digit[] │ │ 1 │ &Long │ 4 │ [低位...高位] │ └───────────┴──────────┴────────┴─────────────────────┘
3. 字符串对象内存模型 (Objects/unicodeobject.c)
3.1 Unicode 对象结构
// Include/cpython/unicodeobject.h typedef struct { PyObject_HEAD Py_ssize_t length; // 字符串长度 Py_hash_t hash; // 哈希值缓存 struct { unsigned int interned:2; // 字符串驻留状态 unsigned int kind:3; // 字符串类型 unsigned int compact:1; // 是否紧凑存储 unsigned int ascii:1; // 是否纯ASCII unsigned int ready:1; // 是否就绪 unsigned int :24; } state; wchar_t *wstr; // 宽字符表示(可选) } PyASCIIObject; // 紧凑 Unicode 对象 typedef struct { PyASCIIObject _base; Py_ssize_t utf8_length; // UTF-8 长度 char *utf8; // UTF-8 缓存 Py_ssize_t wstr_length; // 宽字符长度 } PyCompactUnicodeObject;
3.2 字符串驻留机制
// Objects/unicodeobject.c static PyObject *interned = NULL; // 驻留字符串字典 void PyUnicode_InternInPlace(PyObject **p) { PyObject *s = *p; if (PyUnicode_READY(s) == -1) { return; } // 检查是否已驻留 if (PyUnicode_CHECK_INTERNED(s)) { return; } // 添加到驻留字典 PyObject *t = PyDict_SetDefault(interned, s, s); if (t != s) { Py_SETREF(*p, Py_NewRef(t)); } PyUnicode_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL; }
4. 列表对象内存模型 (Objects/listobject.c)
4.1 列表结构
// Include/cpython/listobject.h typedef struct { PyVarObject ob_base; PyObject **ob_item; // 指向元素数组的指针 Py_ssize_t allocated; // 已分配的空间大小 } PyListObject;
// Objects/listobject.c static int list_resize(PyListObject *self, Py_ssize_t newsize) { PyObject **items; size_t new_allocated, num_allocated_bytes; Py_ssize_t allocated = self->allocated; // 扩容策略: newsize + (newsize >> 3) + (newsize < 9 ? 3 : 6) new_allocated = ((size_t)newsize + (newsize >> 3) + 6) & ~(size_t)3; if (newsize == 0) { PyMem_FREE(self->ob_item); self->ob_item = NULL; self->allocated = 0; return 0; } // 重新分配内存 items = (PyObject **)PyMem_Realloc(self->ob_item, new_allocated * sizeof(PyObject *)); if (items == NULL) { PyErr_NoMemory(); return -1; } self->ob_item = items; self->allocated = new_allocated; return 0; }
4.3 内存布局
PyObject: ┌─────────────┬──────────────┬─────────────┐ │ ob_refcnt │ ob_type │ data... │ │ 8 bytes │ 8 bytes │ variable │ └─────────────┴──────────────┴─────────────┘
1.2 类型对象 PyTypeObject
// Include/cpython/object.h typedef struct _typeobject { PyVarObject ob_base; const char *tp_name; // 类型名称 Py_ssize_t tp_basicsize; // 基本大小 Py_ssize_t tp_itemsize; // 元素大小 // 析构和打印 destructor tp_dealloc; Py_ssize_t tp_vectorcall_offset; // 标准方法 getattrfunc tp_getattr; setattrfunc tp_setattr; PyAsyncMethods *tp_as_async; reprfunc tp_repr; // 数值方法、序列方法、映射方法 PyNumberMethods *tp_as_number; PySequenceMethods *tp_as_sequence; PyMappingMethods *tp_as_mapping; // 更多字段... } PyTypeObject;
2. 整数对象内存模型 (Objects/longobject.c)
2.1 长整数结构
// Include/cpython/longintrepr.h struct _longobject { PyVarObject ob_base; digit ob_digit[1]; // 数字位数组 }; typedef struct _longobject PyLongObject; // digit 是 30 位或 15 位的无符号整数 #if PYLONG_BITS_IN_DIGIT == 30 typedef uint32_t digit; typedef int32_t sdigit; typedef uint64_t twodigits; #elif PYLONG_BITS_IN_DIGIT == 15 typedef unsigned short digit; typedef short sdigit; typedef unsigned long twodigits; #endif
小整数缓存机制:
// Objects/longobject.c #define IS_SMALL_INT(ival) (-_PY_NSMALLNEGINTS <= (ival) && (ival) < _PY_NSMALLPOSINTS) #define IS_SMALL_UINT(ival) ((ival) < _PY_NSMALLPOSINTS)
// Objects/longobject.c #define NSMALLPOSINTS 257 #define NSMALLNEGINTS 5 // 小整数对象池 [-5, 256] static PyLongObject small_ints[NSMALLNEGINTS + NSMALLPOSINTS]; PyObject * PyLong_FromLong(long ival) { // 使用小整数缓存 if (IS_SMALL_INT(ival)) { return get_small_int((sdigit)ival); } // 创建新的长整数对象 return PyLong_FromLongLong(ival); }
2.2 内存布局示例
小整数 (42): ┌───────────┬──────────┬────────┬──────────┐ │ ob_refcnt │ ob_type │ob_size │ ob_digit │ │ ? │ &Long │ 1 │ 42 │ └───────────┴──────────┴────────┴──────────┘ 大整数 (2^100): ┌───────────┬──────────┬────────┬─────────────────────┐ │ ob_refcnt │ ob_type │ob_size │ ob_digit[] │ │ 1 │ &Long │ 4 │ [低位...高位] │ └───────────┴──────────┴────────┴─────────────────────┘
3. 字符串对象内存模型 (Objects/unicodeobject.c)
3.1 Unicode 对象结构
// Include/cpython/unicodeobject.h typedef struct { PyObject_HEAD Py_ssize_t length; // 字符串长度 Py_hash_t hash; // 哈希值缓存 struct { unsigned int interned:2; // 字符串驻留状态 unsigned int kind:3; // 字符串类型 unsigned int compact:1; // 是否紧凑存储 unsigned int ascii:1; // 是否纯ASCII unsigned int ready:1; // 是否就绪 unsigned int :24; } state; wchar_t *wstr; // 宽字符表示(可选) } PyASCIIObject; // 紧凑 Unicode 对象 typedef struct { PyASCIIObject _base; Py_ssize_t utf8_length; // UTF-8 长度 char *utf8; // UTF-8 缓存 Py_ssize_t wstr_length; // 宽字符长度 } PyCompactUnicodeObject;
3.2 字符串驻留机制
// Objects/unicodeobject.c static PyObject *interned = NULL; // 驻留字符串字典 void PyUnicode_InternInPlace(PyObject **p) { PyObject *s = *p; if (PyUnicode_READY(s) == -1) { return; } // 检查是否已驻留 if (PyUnicode_CHECK_INTERNED(s)) { return; } // 添加到驻留字典 PyObject *t = PyDict_SetDefault(interned, s, s); if (t != s) { Py_SETREF(*p, Py_NewRef(t)); } PyUnicode_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL; }
4. 列表对象内存模型 (Objects/listobject.c)
4.1 列表结构
// Include/cpython/listobject.h typedef struct { PyVarObject ob_base; PyObject **ob_item; // 指向元素数组的指针 Py_ssize_t allocated; // 已分配的空间大小 } PyListObject;
4.2 动态扩容机制
// Objects/listobject.c static int list_resize(PyListObject *self, Py_ssize_t newsize) { PyObject **items; size_t new_allocated, num_allocated_bytes; Py_ssize_t allocated = self->allocated; // 扩容策略: newsize + (newsize >> 3) + (newsize < 9 ? 3 : 6) new_allocated = ((size_t)newsize + (newsize >> 3) + 6) & ~(size_t)3; if (newsize == 0) { PyMem_FREE(self->ob_item); self->ob_item = NULL; self->allocated = 0; return 0; } // 重新分配内存 items = (PyObject **)PyMem_Realloc(self->ob_item, new_allocated * sizeof(PyObject *)); if (items == NULL) { PyErr_NoMemory(); return -1; } self->ob_item = items; self->allocated = new_allocated; return 0; }
4.3 内存布局
空列表: ┌───────────┬──────────┬────────┬──────────┬───────────┐ │ ob_refcnt │ ob_type │ob_size │ ob_item │ allocated │ │ 1 │ &List │ 0 │ NULL │ 0 │ └───────────┴──────────┴────────┴──────────┴───────────┘ 包含元素的列表 [1, 2, 3]: ┌───────────┬──────────┬────────┬──────────┬───────────┐ │ ob_refcnt │ ob_type │ob_size │ ob_item │ allocated │ │ 1 │ &List │ 3 │ ptr │ 4 │ └───────────┴──────────┴────────┴──────────┴───────────┘ │ ▼ ┌──────────────────┐ │ PyObject *[4] │ │ [0]: &int(1) │ │ [1]: &int(2) │ │ [2]: &int(3) │ │ [3]: NULL │ └──────────────────┘
5. 字典对象内存模型 (Objects/dictobject.c)
5.1 字典结构 (紧凑表示)
// Objects/dictobject.c struct _dictkeysobject { Py_ssize_t dk_refcnt; // 键对象引用计数 Py_ssize_t dk_size; // 哈希表大小 dict_lookup_func dk_lookup; // 查找函数 Py_ssize_t dk_usable; // 可用槽位数 Py_ssize_t dk_nentries; // 已使用条目数 char dk_indices[]; // 索引数组 + 条目数组 }; typedef struct { PyObject_HEAD Py_ssize_t ma_used; // 已使用条目数 uint64_t ma_version_tag; // 版本标签 PyDictKeysObject *ma_keys; // 键对象 PyObject **ma_values; // 值数组(分离表示) } PyDictObject;
5.2 哈希冲突解决
// Objects/dictobject.c static Py_ssize_t lookdict(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr) { PyDictKeysObject *dk = mp->ma_keys; size_t mask = DK_MASK(dk); size_t perturb = hash; size_t i = (size_t)hash & mask; // 开放寻址法解决冲突 for (;;) { Py_ssize_t ix = dk_get_index(dk, i); if (ix == DKIX_EMPTY) { *value_addr = NULL; return ix; } PyDictKeyEntry *ep = &DK_ENTRIES(dk)[ix]; assert(ep->me_key != NULL); if (ep->me_key == key) { *value_addr = ep->me_value; return ix; } // 扰动探测 perturb >>= PERTURB_SHIFT; i = (i*5 + 1 + perturb) & mask; } }
6. 内存管理机制
6.1 对象分配器 (Objects/obmalloc.c)
// Objects/obmalloc.c // 内存池结构 struct pool_header { union { block *_padding; uint count; } ref; // 引用计数 block *freeblock; // 空闲块链表 struct pool_header *nextpool; // 下一个池 struct pool_header *prevpool; // 前一个池 uint arenaindex; // 竞技场索引 uint szidx; // 大小索引 uint nextoffset; // 下一个偏移 uint maxnextoffset; // 最大偏移 }; // PyObject_Malloc 实现 void * PyObject_Malloc(size_t size) { if (size > SMALL_REQUEST_THRESHOLD) { return PyMem_RawMalloc(size); } // 使用对象分配器 uint pool_idx = size_to_pool_idx(size); pool_header *pool = usedpools[pool_idx]; if (pool != NULL) { // 从现有池分配 block *bp = pool->freeblock; if (bp != NULL) { pool->freeblock = *(block **)bp; return (void *)bp; } } // 创建新池或使用系统分配器 return allocate_from_new_pool(size); }
6.2 垃圾回收机制 (Modules/gcmodule.c)
// Modules/gcmodule.c // GC 头结构 typedef union _gc_head { struct { union _gc_head *gc_next; union _gc_head *gc_prev; Py_ssize_t gc_refs; } gc; double dummy; // 对齐 } PyGC_Head; // 垃圾回收主函数 static Py_ssize_t gc_collect_main(PyThreadState *tstate, int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, int nofail) { Py_ssize_t m = 0; // 收集的对象数 Py_ssize_t n = 0; // 无法收集的对象数 PyGC_Head *young; // 年轻代 PyGC_Head *old; // 老年代 PyGC_Head unreachable; // 不可达对象 PyGC_Head finalizers; // 终结器对象 // 标记-清扫算法 // 1. 将引用计数复制到gc_refs update_refs(young); // 2. 减少内部引用 subtract_refs(young); // 3. 标记可达对象 move_unreachable(young, &unreachable); // 4. 处理终结器 move_legacy_finalizers(&unreachable, &finalizers); // 5. 删除不可达对象 delete_garbage(tstate, &unreachable, generation); return n+m; }
// Modules/gcmodule.c 详细展开 static Py_ssize_t gc_collect_main(PyThreadState *tstate, int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable, int nofail) { int i; Py_ssize_t m = 0; /* # objects collected */ Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */ PyGC_Head *young; /* the generation we are examining */ PyGC_Head *old; /* next older generation */ PyGC_Head unreachable; /* non-problematic unreachable trash */ PyGC_Head finalizers; /* objects with, & reachable from, __del__ */ PyGC_Head *gc; _PyTime_t t1 = 0; /* initialize to prevent a compiler warning */ GCState *gcstate = &tstate->interp->gc; // gc_collect_main() must not be called before _PyGC_Init // or after _PyGC_Fini() assert(gcstate->garbage != NULL); assert(!_PyErr_Occurred(tstate)); if (gcstate->debug & DEBUG_STATS) { PySys_WriteStderr("gc: collecting generation %d...\n", generation); show_stats_each_generations(gcstate); t1 = _PyTime_GetPerfCounter(); } if (PyDTrace_GC_START_ENABLED()) PyDTrace_GC_START(generation); **//重置当前代及更年轻代的计数器,增加下一代的计数器,之所以重制当前代的计数器,是为了完成垃圾回收后,当前的计数器还处于较高的峰值,再次触发回收,给下一代增加1 ,这个很好理解,为了出发老年代的对象** /* update collection and allocation counters */ if (generation+1 < NUM_GENERATIONS) gcstate->generations[generation+1].count += 1; for (i = 0; i <= generation; i++) gcstate->generations[i].count = 0; **//这是分代收集的关键:收集第N代时,会同时收集所有更年轻的代(0到N-1代)。** /* merge younger generations with one we are currently collecting */ for (i = 0; i < generation; i++) { gc_list_merge(GEN_HEAD(gcstate, i), GEN_HEAD(gcstate, generation)); } **//young: 指向当前正在收集的代 //old: 指向下一个更老的代(或在特殊情况下指向当前代)** /* handy references */ young = GEN_HEAD(gcstate, generation); if (generation < NUM_GENERATIONS-1) old = GEN_HEAD(gcstate, generation+1); else old = young; validate_list(old, collecting_clear_unreachable_clear); **//将不可达的对象封装到unreachable链表里,可达对象保留在young链表中,分离不可达对象(move_unreachable): 将`gc_refs`为0的对象移到不可达链表,大于0的对象留在可达链表。** deduce_unreachable(young, &unreachable); untrack_tuples(young); /* Move reachable objects to next generation. */ if (young != old) { if (generation == NUM_GENERATIONS - 2) { gcstate->long_lived_pending += gc_list_size(young); } gc_list_merge(young, old); } else { /* We only un-track dicts in full collections, to avoid quadratic dict build-up. See issue #14775. */ untrack_dicts(young); gcstate->long_lived_pending = 0; gcstate->long_lived_total = gc_list_size(young); } **//创建一个空的链表用于存放有终结器的对象 //这个列表将包含所有不能立即删除的"问题"对象** /* All objects in unreachable are trash, but objects reachable from * legacy finalizers (e.g. tp_del) can't safely be deleted. */ gc_list_init(&finalizers); // NEXT_MASK_UNREACHABLE is cleared here. // After move_legacy_finalizers(), unreachable is normal list. // 从unreachable列表中找出所有有遗留终结器的对象,移动到finalizers列表 //具体过程: // 遍历unreachable列表中的每个对象 // 检查对象是否有__del__方法或tp_del函数 // 如果有,将该对象从unreachable移动到finalizers // 同时清除对象的NEXT_MASK_UNREACHABLE标记 move_legacy_finalizers(&unreachable, &finalizers); **//找出从终结器对象可达的其他对象,也移动到finalizers列表** /* finalizers contains the unreachable objects with a legacy finalizer; * unreachable objects reachable *from* those are also uncollectable, * and we move those into the finalizers list too. */ move_legacy_finalizer_reachable(&finalizers); validate_list(&finalizers, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); /* Print debugging information. */ if (gcstate->debug & DEBUG_COLLECTABLE) { for (gc = GC_NEXT(&unreachable); gc != &unreachable; gc = GC_NEXT(gc)) { debug_cycle("collectable", FROM_GC(gc)); } } **//清理弱引用并调用相关回调函数。** /* Clear weakrefs and invoke callbacks as necessary. */ m += handle_weakrefs(&unreachable, old); validate_list(old, collecting_clear_unreachable_clear); validate_list(&unreachable, collecting_set_unreachable_clear); **//调用对象的tp_finalize方法** /* Call tp_finalize on objects which have one. */ finalize_garbage(tstate, &unreachable); /* Handle any objects that may have resurrected after the call * to 'finalize_garbage' and continue the collection with the * objects that are still unreachable */ PyGC_Head final_unreachable; handle_resurrected_objects(&unreachable, &final_unreachable, old); /* Call tp_clear on objects in the final_unreachable set. This will cause * the reference cycles to be broken. It may also cause some objects * in finalizers to be freed. */ m += gc_list_size(&final_unreachable); delete_garbage(tstate, gcstate, &final_unreachable, old); /* Collect statistics on uncollectable objects found and print * debugging information. */ for (gc = GC_NEXT(&finalizers); gc != &finalizers; gc = GC_NEXT(gc)) { n++; if (gcstate->debug & DEBUG_UNCOLLECTABLE) debug_cycle("uncollectable", FROM_GC(gc)); } if (gcstate->debug & DEBUG_STATS) { double d = _PyTime_AsSecondsDouble(_PyTime_GetPerfCounter() - t1); PySys_WriteStderr( "gc: done, %zd unreachable, %zd uncollectable, %.4fs elapsed\n", n+m, n, d); } /* Append instances in the uncollectable set to a Python * reachable list of garbage. The programmer has to deal with * this if they insist on creating this type of structure. */ handle_legacy_finalizers(tstate, gcstate, &finalizers, old); validate_list(old, collecting_clear_unreachable_clear); /* Clear free list only during the collection of the highest * generation */ if (generation == NUM_GENERATIONS-1) { clear_freelists(tstate->interp); } if (_PyErr_Occurred(tstate)) { if (nofail) { _PyErr_Clear(tstate); } else { _PyErr_WriteUnraisableMsg("in garbage collection", NULL); } } /* Update stats */ if (n_collected) { *n_collected = m; } if (n_uncollectable) { *n_uncollectable = n; } struct gc_generation_stats *stats = &gcstate->generation_stats[generation]; stats->collections++; stats->collected += m; stats->uncollectable += n; if (PyDTrace_GC_DONE_ENABLED()) { PyDTrace_GC_DONE(n + m); } assert(!_PyErr_Occurred(tstate)); return n + m; }
move_legacy_finalizers(&unreachable, &finalizers)作用 功能:找出从终结器对象可达的其他对象,也移动到finalizers列表 为什么需要这步? Container: def __del__(self): # 可能访问self.data print(f"删除容器: {self.data}") class Data: pass # 创建循环引用 container = Container() data = Data() container.data = data data.container = container 在这个例子中: Container有__del__方法,会被放入finalizers Data对象虽然没有终结器,但从Container可达 如果先删除Data,Container.__del__执行时会出错 因此Data也必须被标记为不可收集 执行过程: 从finalizers中的每个对象开始 通过BFS/DFS遍历所有可达对象 将这些可达对象也移动到finalizers列表 确保终结器执行时不会访问到已删除的对象
6.2.2. finalize_garbage方法
finalize_garbage(PyThreadState *tstate, PyGC_Head *collectable) { destructor finalize; PyGC_Head seen; /* While we're going through the loop, `finalize(op)` may cause op, or * other objects, to be reclaimed via refcounts falling to zero. So * there's little we can rely on about the structure of the input * `collectable` list across iterations. For safety, we always take the * first object in that list and move it to a temporary `seen` list. * If objects vanish from the `collectable` and `seen` lists we don't * care. */ gc_list_init(&seen); while (!gc_list_is_empty(collectable)) { PyGC_Head *gc = GC_NEXT(collectable); PyObject *op = FROM_GC(gc); gc_list_move(gc, &seen); if (!_PyGCHead_FINALIZED(gc) && (finalize = Py_TYPE(op)->tp_finalize) != NULL) { _PyGCHead_SET_FINALIZED(gc); Py_INCREF(op); finalize(op); assert(!_PyErr_Occurred(tstate)); Py_DECREF(op); } } gc_list_merge(&seen, collectable); }
(Objects/typeobject.c) FLSLOT(__init__, tp_init, slot_tp_init, (wrapperfunc)(void(*)(void))wrap_init, "__init__($self, /, *args, **kwargs)\n--\n\n" "Initialize self. See help(type(self)) for accurate signature.", PyWrapperFlag_KEYWORDS), TPSLOT(__new__, tp_new, slot_tp_new, NULL, "__new__(type, /, *args, **kwargs)\n--\n\n" "Create and return new object. See help(type) for accurate signature."), **TPSLOT(__del__, tp_finalize, slot_tp_finalize, (wrapperfunc)wrap_del, ""),** BUFSLOT(__buffer__, bf_getbuffer, slot_bf_getbuffer, wrap_buffer, "__buffer__($self, flags, /)\n--\n\n" "Return a buffer object that exposes the underlying memory of the object."), BUFSLOT(__release_buffer__, bf_releasebuffer, slot_bf_releasebuffer, wrap_releasebuffer, "__release_buffer__($self, buffer, /)\n--\n\n" "Release the buffer object that exposes the underlying memory of the object."), ...... static void slot_tp_finalize(PyObject *self) { int unbound; PyObject *del, *res; /* Save the current exception, if any. */ PyObject *exc = PyErr_GetRaisedException(); /* Execute __del__ method, if any. */ del = lookup_maybe_method(self, &_Py_ID(__del__), &unbound); if (del != NULL) { res = call_unbound_noarg(unbound, del, self); if (res == NULL) PyErr_WriteUnraisable(del); else Py_DECREF(res); Py_DECREF(del); } /* Restore the saved exception. */ PyErr_SetRaisedException(exc); } // 调用del方法 lookup_maybe_method(PyObject *self, PyObject *attr, int *unbound) { PyObject *res = _PyType_Lookup(Py_TYPE(self), attr); if (res == NULL) { return NULL; } if (_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR)) { /* Avoid temporary PyMethodObject */ *unbound = 1; Py_INCREF(res); } else { *unbound = 0; descrgetfunc f = Py_TYPE(res)->tp_descr_get; if (f == NULL) { Py_INCREF(res); } else { res = f(res, self, (PyObject *)(Py_TYPE(self))); } } return res; }
(Objects/typeobject.c) FLSLOT(__init__, tp_init, slot_tp_init, (wrapperfunc)(void(*)(void))wrap_init, "__init__($self, /, *args, **kwargs)\n--\n\n" "Initialize self. See help(type(self)) for accurate signature.", PyWrapperFlag_KEYWORDS), TPSLOT(__new__, tp_new, slot_tp_new, NULL, "__new__(type, /, *args, **kwargs)\n--\n\n" "Create and return new object. See help(type) for accurate signature."), **TPSLOT(__del__, tp_finalize, slot_tp_finalize, (wrapperfunc)wrap_del, ""),** BUFSLOT(__buffer__, bf_getbuffer, slot_bf_getbuffer, wrap_buffer, "__buffer__($self, flags, /)\n--\n\n" "Return a buffer object that exposes the underlying memory of the object."), BUFSLOT(__release_buffer__, bf_releasebuffer, slot_bf_releasebuffer, wrap_releasebuffer, "__release_buffer__($self, buffer, /)\n--\n\n" "Release the buffer object that exposes the underlying memory of the object."), ...... static void slot_tp_finalize(PyObject *self) { int unbound; PyObject *del, *res; /* Save the current exception, if any. */ PyObject *exc = PyErr_GetRaisedException(); /* Execute __del__ method, if any. */ del = lookup_maybe_method(self, &_Py_ID(__del__), &unbound); if (del != NULL) { res = call_unbound_noarg(unbound, del, self); if (res == NULL) PyErr_WriteUnraisable(del); else Py_DECREF(res); Py_DECREF(del); } /* Restore the saved exception. */ PyErr_SetRaisedException(exc); } // 调用del方法 lookup_maybe_method(PyObject *self, PyObject *attr, int *unbound) { PyObject *res = _PyType_Lookup(Py_TYPE(self), attr); if (res == NULL) { return NULL; } if (_PyType_HasFeature(Py_TYPE(res), Py_TPFLAGS_METHOD_DESCRIPTOR)) { /* Avoid temporary PyMethodObject */ *unbound = 1; Py_INCREF(res); } else { *unbound = 0; descrgetfunc f = Py_TYPE(res)->tp_descr_get; if (f == NULL) { Py_INCREF(res); } else { res = f(res, self, (PyObject *)(Py_TYPE(self))); } } return res; }
在 CPython 中,slotdefs
数组用于将 Python 的特殊方法(如 __getitem__
、__add__
等)映射到类型对象(PyTypeObject
)的相应槽位(slots)。注册过程发生在类型创建时,具体步骤如下:
/* 伪代码:简化版类型创建流程 */ PyObject* type_new(...) { // 1. 创建类型对象 PyTypeObject *type = ...; // 2. 遍历 slotdefs 数组 for (PySlotDef *slot = slotdefs; slot->name; slot++) { // 3. 在类字典中查找特殊方法 PyObject *method = _PyDict_GetItemId(class_dict, slot->name); if (method) { // 4. 将方法绑定到槽位 int res = _PyType_SetSlotFromSpec(type, slot, method); // 处理错误... } } }
/* Cpython里实际代码 */ /* Update the slots after assignment to a class (type) attribute. */ static int update_slot(PyTypeObject *type, PyObject *name) { pytype_slotdef *ptrs[MAX_EQUIV]; pytype_slotdef *p; pytype_slotdef **pp; int offset; assert(PyUnicode_CheckExact(name)); assert(PyUnicode_CHECK_INTERNED(name)); pp = ptrs; for (p = slotdefs; p->name; p++) { assert(PyUnicode_CheckExact(p->name_strobj)); assert(PyUnicode_CHECK_INTERNED(p->name_strobj)); assert(PyUnicode_CheckExact(name)); /* bpo-40521: Using interned strings. */ if (p->name_strobj == name) { *pp++ = p; } } *pp = NULL; for (pp = ptrs; *pp; pp++) { p = *pp; offset = p->offset; while (p > slotdefs && (p-1)->offset == offset) --p; *pp = p; } if (ptrs[0] == NULL) return 0; /* Not an attribute that affects any slots */ return update_subclasses(type, name, update_slots_callback, (void *)ptrs); }
-
-
验证方法签名
-
使用包装函数 将 Python 方法转为 C 可调用形式
-
将函数指针赋给类型槽位(如
type->tp_as_sequence->sq_item
)
-
-
包装函数的作用(如
wrap_binaryfunc
)
static PyObject* wrap_binaryfunc(PyObject *self, PyObject *args) { // 解包参数,调用用户定义的 __getitem__ 等 }
slotdefs
数组在类型创建阶段自动完成特殊方法到槽位的注册:
-
遍历
slotdefs
匹配类字典中的方法 -
使用包装函数桥接 Python/C 接口
-
填充类型对象的槽位函数指针
-
为未定义的方法设置默认实现
这一机制使得 Python 层面的特殊方法能直接影响对象在解释器中的底层行为,是 CPython 实现面向对象特性的核心机制。
update_refs(PyGC_Head *containers) { PyGC_Head *next; PyGC_Head *gc = GC_NEXT(containers); while (gc != containers) { next = GC_NEXT(gc); /* Move any object that might have become immortal to the * permanent generation as the reference count is not accurately * reflecting the actual number of live references to this object */ if (_Py_IsImmortal(FROM_GC(gc))) { gc_list_move(gc, &get_gc_state()->permanent_generation.head); gc = next; continue; } gc_reset_refs(gc, Py_REFCNT(FROM_GC(gc))); /* Python's cyclic gc should never see an incoming refcount * of 0: if something decref'ed to 0, it should have been * deallocated immediately at that time. * Possible cause (if the assert triggers): a tp_dealloc * routine left a gc-aware object tracked during its teardown * phase, and did something-- or allowed something to happen -- * that called back into Python. gc can trigger then, and may * see the still-tracked dying object. Before this assert * was added, such mistakes went on to allow gc to try to * delete the object again. In a debug build, that caused * a mysterious segfault, when _Py_ForgetReference tried * to remove the object from the doubly-linked list of all * objects a second time. In a release build, an actual * double deallocation occurred, which leads to corruption * of the allocator's internal bookkeeping pointers. That's * so serious that maybe this should be a release-build * check instead of an assert? */ _PyObject_ASSERT(FROM_GC(gc), gc_get_refs(gc) != 0); gc = next; } } 将每个对象的 gc_refs 初始化为其 ob_refcnt(原始引用计数)。ob_refcnt 包含了来自 所有来源 的引用,包括: 栈上的变量(函数局部变量) 全局/静态变量 其他代(非当前回收代)的对象 非容器对象(如整数、字符串等) 这些来源共同构成了 GC 的根。 subtract_refs(base): 遍历当前代(base 链表)的对象,对每个对象引用的 其他同代对象,减少其 gc_refs。这相当于减去了 同代对象间的内部引用。 结果: 若对象最终 gc_refs > 0:表示存在 来自根(外部)的引用。 若 gc_refs = 0:对象仅被同代对象引用(形成孤岛),判定为不可达。
-
GC 的根roots 包括:
-
栈上的变量(局部变量)
-
全局/静态变量
-
其他代的对象
-
非容器对象
-
-
这些根通过
ob_refcnt
的初始值 在update_refs
中间接捕获,并在subtract_refs
后通过gc_refs > 0
体现。显式的根扫描(如遍历栈、全局区)发生在 GC 的其他阶段(如分代回收触发前)。
7. 引用计数机制
7.1 引用计数宏定义
// Include/object.h // 增加引用计数 static inline void _Py_INCREF(PyObject *op) { #ifdef Py_REF_DEBUG _Py_RefTotal++; #endif op->ob_refcnt++; } // 减少引用计数 static inline void _Py_DECREF(PyObject *op) { #ifdef Py_REF_DEBUG _Py_RefTotal--; #endif if (--op->ob_refcnt != 0) { return; } _Py_Dealloc(op); // 引用计数为0时销毁对象 } #define Py_INCREF(op) _Py_INCREF(_PyObject_CAST(op)) #define Py_DECREF(op) _Py_DECREF(_PyObject_CAST(op))
7.2 对象销毁机制
// Objects/object.c void _Py_Dealloc(PyObject *op) { destructor dealloc = Py_TYPE(op)->tp_dealloc; #ifdef Py_TRACE_REFS _Py_ForgetReference(op); #endif // 调用类型特定的析构函数 (*dealloc)(op); }
8. 内存布局总结
8.1 对象内存开销
对象类型 | 固定开销 | 可变部分 | 总大小示例 |
---|---|---|---|
int (小) | 28 bytes | 0 | 28 bytes |
int (大) | 28 bytes | 4*n bytes | 28+4n bytes |
str (ASCII) | 49 bytes | 1*n bytes | 49+n bytes |
str (Unicode) | 80 bytes | 4*n bytes | 80+4n bytes |
list | 56 bytes | 8*cap bytes | 56+8*cap bytes |
dict | 240 bytes | 24*n bytes | 240+24n bytes |
// 内存对齐宏 #define _PyObject_SIZE(typeobj) ( (typeobj)->tp_basicsize ) #define _PyObject_VAR_SIZE(typeobj, nitems) \ (size_t) \ ( ( (typeobj)->tp_basicsize + \ (nitems)*(typeobj)->tp_itemsize + \ (SIZEOF_VOID_P-1) \ ) & ~(SIZEOF_VOID_P-1) \ )
9 执行流程
graph TD A[开始回收] --> B[合并年轻代] B --> C[标记可达对象] C --> D[分离不可达对象] D --> E[处理终结器对象] E --> F[处理弱引用] F --> G[执行__del__方法] G --> H[检查复活对象] H --> I[回收最终不可达对象] I --> J[处理不可回收对象] J --> K[更新统计信息] K --> L[结束]