多线程内存池

如果一个内存池需要线程同步了,估计和默认的内存操作也差不了多远了。现在对内存操作的优化只是在优化线程同步操作上面了。默认的lock和unlock可能实现得过于完美,因而要求更多的cpu周期。如果选择更原子的lock和unlock实现,还是可以加快内存操作的速度的。

多线程内存池在实现上也就是在申请和释放外面包裹了一对加锁和解锁操作而已。

如果我们采用模板编程,就可以实现一个功能强悍的模板类,该模板类有两个参数,单线程内存池和锁。这样的话,可以组合出很多情况。比如说,单线程内存池有对象大小固定和不固定两种实现,锁在Windows下可以用临界区,互斥体,信号量,事件实现。这样最多可以组合出8种不同的实例化。

一般说来互斥体比临界区慢很多,在这里可以进行很好的测试。我实现了临界区和互斥体版本的锁,经过测试发现前者比后者快30多倍。因为互斥体据说是内核对象,而临界区是用户态对象,那么互斥体的使用就需要系统在用户态和内核态之间进行切换,肯定会消耗更多的时间。

互斥体版本代码如下,

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <windows.h>

template <typename T> class MemoryPool
{
public:
MemoryPool(size_t size = EXPANSION_SIZE)
{
expandTheFreeList(size);
}
~MemoryPool();
//allocate a T element from the free list.
void* alloc(size_t size)
{
if (next == NULL)
{
expandTheFreeList();
}
MemoryPool<T>* head = next;
next = head->next;
return head;
}
//return a T element to the free list.
void free(void* doomed)
{
MemoryPool<T>* head = static_cast< MemoryPool<T>* > (doomed);
head->next = next;
next = head;
}

private:
//next element on the free list.
MemoryPool<T>* next;
//if the freelist is empty, expand it by this amount.
enum {EXPANSION_SIZE = 32};
//add free elements to the free list
void expandTheFreeList(int howMany = EXPANSION_SIZE);
};

template <typename T> MemoryPool<T> :: ~MemoryPool()
{
MemoryPool<T>* nextPtr = NULL;
while (nextPtr)
{
nextPtr = next;
next = next->next;
delete [] nextPtr;
}
}

template <typename T> void MemoryPool<T> :: expandTheFreeList(int howMany)
{
//we must allocate an object enough to contain the next pointer
size_t size = sizeof(T) > sizeof(MemoryPool<T>*) ? sizeof(T) :
sizeof(MemoryPool<T>*);

MemoryPool<T>* runner = (MemoryPool<T>*) new char[size];
next = runner;
for (int i = 0; i < howMany; ++i)
{
runner->next = (MemoryPool<T>*) new char[size];
runner = runner->next;
}
runner->next = NULL;
}

class ABClock //abstract base class
{
public:
virtual ~ABClock() {}
virtual void lock() = 0;
virtual void unlock() = 0;
};

class MutexLock : public ABClock
{
public:
MutexLock()
{
hMutex = CreateMutex(NULL, FALSE, NULL);
}
~MutexLock()
{
CloseHandle(hMutex);
}
void lock()
{
WaitForSingleObject(hMutex, INFINITE);
}
void unlock()
{
ReleaseMutex(hMutex);
}
private:
HANDLE hMutex;
};

template <typename POOLTYPE, typename LOCK>
class MTMemoryPool
{
public:
//allocate an element from the freelist.
void* alloc(size_t size)
{
void* mem;
theLock.lock();
mem = stPool.alloc(size);
theLock.unlock();
return mem;
}

//return an element to the freelist
void free(void* someElement)
{
theLock.lock();
stPool.free(someElement);
theLock.unlock();
}

private:
POOLTYPE stPool;//Single-threaded pool.
LOCK theLock;
};

class Rational
{
public:
Rational(int a = 0, int b = 1) : n(a), d(b) {}
void* operator new(size_t size)
{
return memPool->alloc(size);
}
void operator delete(void* doomed, size_t size)
{
memPool->free(doomed);
}
static void newMemPool()
{
memPool = new MTMemoryPool< MemoryPool<Rational>, MutexLock>;
}
static void deleteMemPool()
{
delete memPool;
}

private:
int n;
int d;
static MTMemoryPool< MemoryPool<Rational>, MutexLock>* memPool;
};
MTMemoryPool< MemoryPool<Rational>, MutexLock>* Rational::memPool = NULL;

int main()
{
const int ARRAY_SIZE = 1000;
const int LOOP_TIMES = 5000;

Rational* array[ARRAY_SIZE];
clock_t beg = clock();
Rational::newMemPool();
for (int i = 0; i < LOOP_TIMES; ++i)
{
for (int j = 0; j < ARRAY_SIZE; ++j)
{
array[j] = new Rational(j);
}
for (int j = 0; j < ARRAY_SIZE; ++j)
{
delete array[j];
}
}
clock_t end = clock();
printf("use %f second(s).\n", 1.0 * (end - beg) / CLOCKS_PER_SEC);
system("pause");
Rational::deleteMemPool();

return 0;
}

运行结果,

临界区版本代码如下,

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <windows.h>

template <typename T> class MemoryPool
{
public:
MemoryPool(size_t size = EXPANSION_SIZE)
{
expandTheFreeList(size);
}
~MemoryPool();
//allocate a T element from the free list.
void* alloc(size_t size)
{
if (next == NULL)
{
expandTheFreeList();
}
MemoryPool<T>* head = next;
next = head->next;
return head;
}
//return a T element to the free list.
void free(void* doomed)
{
MemoryPool<T>* head = static_cast< MemoryPool<T>* > (doomed);
head->next = next;
next = head;
}

private:
//next element on the free list.
MemoryPool<T>* next;
//if the freelist is empty, expand it by this amount.
enum {EXPANSION_SIZE = 32};
//add free elements to the free list
void expandTheFreeList(int howMany = EXPANSION_SIZE);
};

template <typename T> MemoryPool<T> :: ~MemoryPool()
{
MemoryPool<T>* nextPtr = NULL;
while (nextPtr)
{
nextPtr = next;
next = next->next;
delete [] nextPtr;
}
}

template <typename T> void MemoryPool<T> :: expandTheFreeList(int howMany)
{
//we must allocate an object enough to contain the next pointer
size_t size = sizeof(T) > sizeof(MemoryPool<T>*) ? sizeof(T) :
sizeof(MemoryPool<T>*);

MemoryPool<T>* runner = (MemoryPool<T>*) new char[size];
next = runner;
for (int i = 0; i < howMany; ++i)
{
runner->next = (MemoryPool<T>*) new char[size];
runner = runner->next;
}
runner->next = NULL;
}

class ABClock //abstract base class
{
public:
virtual ~ABClock() {}
virtual void lock() = 0;
virtual void unlock() = 0;
};

class CriticalSectionLock : public ABClock
{
public:
CriticalSectionLock()
{
InitializeCriticalSection(csMyCriticalSection);
}
~CriticalSectionLock()
{
DeleteCriticalSection(csMyCriticalSection);
}
void lock()
{
EnterCriticalSection(csMyCriticalSection);
}
void unlock()
{
LeaveCriticalSection(csMyCriticalSection);
}
private:
CRITICAL_SECTION csMyCriticalSection;
};

template <typename POOLTYPE, typename LOCK>
class MTMemoryPool
{
public:
//allocate an element from the freelist.
void* alloc(size_t size)
{
void* mem;
theLock.lock();
mem = stPool.alloc(size);
theLock.unlock();
return mem;
}

//return an element to the freelist
void free(void* someElement)
{
theLock.lock();
stPool.free(someElement);
theLock.unlock();
}

private:
POOLTYPE stPool;//Single-threaded pool.
LOCK theLock;
};

class Rational
{
public:
Rational(int a = 0, int b = 1) : n(a), d(b) {}
void* operator new(size_t size)
{
return memPool->alloc(size);
}
void operator delete(void* doomed, size_t size)
{
memPool->free(doomed);
}
static void newMemPool()
{
memPool = new MTMemoryPool< MemoryPool<Rational>, CriticalSectionLock>;
}
static void deleteMemPool()
{
delete memPool;
}

private:
int n;
int d;
static MTMemoryPool< MemoryPool<Rational>, CriticalSectionLock>* memPool;
};
MTMemoryPool< MemoryPool<Rational>, CriticalSectionLock>* Rational::memPool = NULL;

int main()
{
const int ARRAY_SIZE = 1000;
const int LOOP_TIMES = 5000;

Rational* array[ARRAY_SIZE];
clock_t beg = clock();
Rational::newMemPool();
for (int i = 0; i < LOOP_TIMES; ++i)
{
for (int j = 0; j < ARRAY_SIZE; ++j)
{
array[j] = new Rational(j);
}
for (int j = 0; j < ARRAY_SIZE; ++j)
{
delete array[j];
}
}
clock_t end = clock();
printf("use %f second(s).\n", 1.0 * (end - beg) / CLOCKS_PER_SEC);
system("pause");
Rational::deleteMemPool();

return 0;
}

运行结果,

两个版本的代码只在线程同步锁的实现上有差别,但是速度却相差了30多倍。