-
Notifications
You must be signed in to change notification settings - Fork 14
/
ps_slab.c
191 lines (163 loc) · 4.64 KB
/
ps_slab.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
/***
* Copyright 2011-2015 by Gabriel Parmer. All rights reserved.
* Redistribution of this file is permitted under the BSD 2 clause license.
*
* Author: Gabriel Parmer, [email protected], 2011
*
* History:
* - Initial slab allocator, 2011
* - Adapted for parsec, 2015
*/
#include <ps_slab.h>
/*
* Default allocation and deallocation functions: assume header is
* internal to the slab's memory
*/
struct ps_slab *
ps_slab_defalloc(struct ps_mem *m, size_t sz, coreid_t coreid)
{
struct ps_slab *s = ps_plat_alloc(sz, coreid);
(void)coreid; (void)m;
if (!s) return NULL;
s->memory = s;
return s;
}
void
ps_slab_deffree(struct ps_mem *m, struct ps_slab *s, size_t sz, coreid_t coreid)
{ (void)m; ps_plat_free(s, sz, coreid); }
void
__ps_slab_init(struct ps_slab *s, struct ps_slab_info *si, PS_SLAB_PARAMS)
{
size_t nfree, i;
size_t objmemsz = __ps_slab_objmemsz(obj_sz);
struct ps_mheader *alloc, *prev;
PS_SLAB_DEWARN;
s->nfree = nfree = (allocsz - headoff) / objmemsz;
s->memsz = allocsz;
s->coreid = ps_coreid();
/*
* Set up the slab's freelist
*
* TODO: cache coloring
*/
alloc = (struct ps_mheader *)((char *)s->memory + headoff);
prev = s->freelist = alloc;
for (i = 0 ; i < nfree ; i++, prev = alloc, alloc = (struct ps_mheader *)((char *)alloc + objmemsz)) {
__ps_mhead_init(alloc, s);
prev->next = alloc;
}
/* better not overrun memory */
assert((void *)alloc <= (void *)((char*)s->memory + allocsz));
ps_list_init(s, list);
__slab_freelist_add(&si->fl, s);
__ps_slab_freelist_check(&si->fl);
}
/*
* This is not thread-safe. It may lost some concurrent remote freed
* objects. So use it only for approximate accounting or debugging.
*/
int
__ps_remote_free_cnt(struct ps_mheader *h)
{
struct ps_mheader *t;
int ret = 0;
for (t = h; t; t = t->next, ret++) ;
return ret;
}
void
ps_slabptr_init(struct ps_mem *m)
{
/* ns_info, slab_info and smr_info are all inlined into struct ps_mem, */
/* see ps_global.h. So this single memset initializes everything.*/
memset(m, 0, sizeof(struct ps_mem));
}
void
ps_slabptr_stats(struct ps_mem *m, struct ps_slab_stats *stats)
{
int i, j, k;
struct ps_slab *s;
struct ps_mem_percore *pc;
memset(stats, 0, sizeof(struct ps_slab_stats));
for (i = 0 ; i < PS_NUMCORES ; i++) {
pc = &m->percore[i];
s = pc->slab_info.fl.list;
stats->percore[i].nslabs = pc->slab_info.nslabs;
do {
if (!s) break;
stats->percore[i].npartslabs++;
stats->percore[i].nfree += s->nfree;
s = ps_list_next(s, list);
} while (s != pc->slab_info.fl.list);
for (j = 0 ; j < PS_NUMLOCALITIES ; j++) {
for (k = 0 ; k < PS_NUMLOCALITIES ; k++) {
stats->percore[i].nremote += __ps_remote_free_cnt(pc->slab_remote[j].remote_frees[k]);
}
}
}
}
int
ps_slabptr_isempty(struct ps_mem *m)
{
int i, j, k;
struct ps_mem_percore *pc;
for (i = 0 ; i < PS_NUMCORES ; i++) {
pc = &m->percore[i];
if (pc->slab_info.nslabs) return 0;
for (j = 0 ; j < PS_NUMLOCALITIES ; j++) {
for (k = 0 ; k < PS_NUMLOCALITIES ; k++) {
if (pc->slab_remote[j].remote_frees[k]) return 0;
}
}
}
return 1;
}
void
__ps_slab_mem_remote_free(struct ps_mem *mem, struct ps_mheader *h, coreid_t core_target)
{
struct ps_slab_remote_list *r;
coreid_t tmpcoreid;
localityid_t numaid;
ps_tsc_locality(&tmpcoreid, &numaid);
r = &mem->percore[core_target].slab_remote[numaid];
__ps_rfl_stack_push(&(r->remote_frees[tmpcoreid % NUM_REMOTE_LIST]), h);
}
static inline int
__ps_slab_mem_remote_clear(struct ps_mem *mem, int locality, PS_SLAB_PARAMS)
{
int ret = 0;
unsigned int i;
struct ps_mheader *h, *n;
struct ps_slab_remote_list *r = &mem->percore[coreid].slab_remote[locality];
for (i = 0 ; i < NUM_REMOTE_LIST ; i++) {
h = r->remote_frees[i];
if (h) h = __ps_rfl_stack_remove_all(&(r->remote_frees[i]));
while (h) {
n = h->next;
h->next = NULL;
__ps_slab_mem_free(__ps_mhead_mem(h), mem, PS_SLAB_ARGS);
h = n;
ret += 1;
}
}
return ret;
}
/*
* This function wants to contend cache-lines with another numa chip
* at most once, or else the latency will blow up. It can detect this
* contention fairly well with the fact that there are, or aren't,
* aren't any items in the remote freelist. Thus, this function
* processes the remote free lists for exactly _one_ remote numa node
* each time it is called.
*/
void
__ps_slab_mem_remote_process(struct ps_mem *mem, struct ps_slab_info *si, PS_SLAB_PARAMS)
{
int ret;
unsigned long locality = si->remote_token;
PS_SLAB_DEWARN;
do {
ret = __ps_slab_mem_remote_clear(mem, locality, PS_SLAB_ARGS);
locality = (locality + 1) % PS_NUMLOCALITIES;
} while (!ret && locality != si->remote_token);
si->remote_token = locality;
}