forked from naver/arcus-memcached
-
Notifications
You must be signed in to change notification settings - Fork 0
/
memcached.c
14858 lines (13345 loc) · 521 KB
/
memcached.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* -*- Mode: C; tab-width: 4; c-basic-offset: 4; indent-tabs-mode: nil -*- */
/*
* arcus-memcached - Arcus memory cache server
* Copyright 2010-2014 NAVER Corp.
* Copyright 2014-2015 JaM2in Co., Ltd.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* memcached - memory caching daemon
*
* http://www.danga.com/memcached/
*
* Copyright 2003 Danga Interactive, Inc. All rights reserved.
*
* Use and distribution licensed under the BSD license. See
* the LICENSE file for full text.
*
* Authors:
* Anatoly Vorobey <[email protected]>
* Brad Fitzpatrick <[email protected]>
*/
#include "config.h"
#include "memcached.h"
#include "memcached/extension_loggers.h"
#ifdef ENABLE_ZK_INTEGRATION
#include "arcus_zk.h"
#endif
#if defined(ENABLE_SASL) || defined(ENABLE_ISASL)
#define SASL_ENABLED
#endif
#define ZK_CONNECTIONS 1
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <signal.h>
#include <getopt.h>
#include <fcntl.h>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <assert.h>
#include <limits.h>
#include <ctype.h>
#include <stdarg.h>
#include <stddef.h>
/* max collection size */
static int ARCUS_COLL_SIZE_LIMIT = 1000000;
static int MAX_LIST_SIZE = 50000;
static int MAX_SET_SIZE = 50000;
static int MAX_BTREE_SIZE = 50000;
/* The item must always be called "it" */
#define SLAB_GUTS(conn, thread_stats, slab_op, thread_op) \
thread_stats->slab_stats[info.clsid].slab_op++;
#define THREAD_GUTS(conn, thread_stats, slab_op, thread_op) \
thread_stats->thread_op++;
#define THREAD_GUTS2(conn, thread_stats, slab_op, thread_op) \
thread_stats->slab_op++; \
thread_stats->thread_op++;
#define SLAB_THREAD_GUTS(conn, thread_stats, slab_op, thread_op) \
SLAB_GUTS(conn, thread_stats, slab_op, thread_op) \
THREAD_GUTS(conn, thread_stats, slab_op, thread_op)
#define STATS_INCR1(GUTS, conn, slab_op, thread_op, key, nkey) { \
struct independent_stats *independent_stats = get_independent_stats(conn); \
struct thread_stats *thread_stats = \
&independent_stats->thread_stats[conn->thread->index]; \
topkeys_t *topkeys = independent_stats->topkeys; \
pthread_mutex_lock(&thread_stats->mutex); \
GUTS(conn, thread_stats, slab_op, thread_op); \
pthread_mutex_unlock(&thread_stats->mutex); \
TK(topkeys, slab_op, key, nkey, current_time); \
}
#define STATS_INCR(conn, op, key, nkey) \
STATS_INCR1(THREAD_GUTS, conn, op, op, key, nkey)
#define SLAB_INCR(conn, op, key, nkey) \
STATS_INCR1(SLAB_GUTS, conn, op, op, key, nkey)
#define STATS_TWO(conn, slab_op, thread_op, key, nkey) \
STATS_INCR1(THREAD_GUTS2, conn, slab_op, thread_op, key, nkey)
#define SLAB_TWO(conn, slab_op, thread_op, key, nkey) \
STATS_INCR1(SLAB_THREAD_GUTS, conn, slab_op, thread_op, key, nkey)
#define STATS_HIT(conn, op, key, nkey) \
SLAB_TWO(conn, op##_hits, cmd_##op, key, nkey)
#define STATS_HITS(conn, op, key, nkey) \
STATS_TWO(conn, op##_hits, cmd_##op, key, nkey)
#define STATS_OKS(conn, op, key, nkey) \
STATS_TWO(conn, op##_oks, cmd_##op, key, nkey)
#define STATS_ELEM_HITS(conn, op, key, nkey) \
STATS_TWO(conn, op##_elem_hits, cmd_##op, key, nkey)
#define STATS_NONE_HITS(conn, op, key, nkey) \
STATS_TWO(conn, op##_none_hits, cmd_##op, key, nkey)
#define STATS_MISS(conn, op, key, nkey) \
STATS_TWO(conn, op##_misses, cmd_##op, key, nkey)
#define STATS_NOKEY(conn, op) { \
struct thread_stats *thread_stats = \
get_thread_stats(conn); \
pthread_mutex_lock(&thread_stats->mutex); \
thread_stats->op++; \
pthread_mutex_unlock(&thread_stats->mutex); \
}
#define STATS_NOKEY2(conn, op1, op2) { \
struct thread_stats *thread_stats = \
get_thread_stats(conn); \
pthread_mutex_lock(&thread_stats->mutex); \
thread_stats->op1++; \
thread_stats->op2++; \
pthread_mutex_unlock(&thread_stats->mutex); \
}
#define STATS_ADD(conn, op, amt) { \
struct thread_stats *thread_stats = \
get_thread_stats(conn); \
pthread_mutex_lock(&thread_stats->mutex); \
thread_stats->op += amt; \
pthread_mutex_unlock(&thread_stats->mutex); \
}
#define GET_8ALIGN_SIZE(size) \
(((size) % 8) == 0 ? (size) : ((size) + (8 - ((size) % 8))))
volatile sig_atomic_t memcached_shutdown=0;
/*
* We keep the current time of day in a global variable that's updated by a
* timer event. This saves us a bunch of time() system calls (we really only
* need to get the time once a second, whereas there can be tens of thousands
* of requests a second) and allows us to use server-start-relative timestamps
* rather than absolute UNIX timestamps, a space savings on systems where
* sizeof(time_t) > sizeof(unsigned int).
*/
volatile rel_time_t current_time;
/** exported globals **/
struct settings settings;
struct mc_stats mc_stats;
EXTENSION_LOGGER_DESCRIPTOR *mc_logger;
static union {
ENGINE_HANDLE *v0;
ENGINE_HANDLE_V1 *v1;
} mc_engine;
static time_t process_started; /* when the process was started */
/* The size of string representing 4 bytes integer is 10. */
static int lenstr_size = 10;
/** file scope variables **/
static conn *listen_conn = NULL;
static struct event_base *main_base;
static struct independent_stats *default_independent_stats;
static struct engine_event_handler *engine_event_handlers[MAX_ENGINE_EVENT_TYPE + 1];
#ifdef ENABLE_ZK_INTEGRATION
static char *arcus_zk_cfg = NULL;
#endif
#ifdef COMMAND_LOGGING
static bool cmdlog_in_use = false;
#endif
#ifdef DETECT_LONG_QUERY
static bool lqdetect_in_use = false;
#endif
/*
* forward declarations
*/
static int new_socket(struct addrinfo *ai);
static int try_read_command(conn *c);
static inline struct independent_stats *get_independent_stats(conn *c);
static inline struct thread_stats *get_thread_stats(conn *c);
enum try_read_result {
READ_DATA_RECEIVED,
READ_NO_DATA_RECEIVED,
READ_ERROR, /** an error occured (on the socket) (or client closed connection) */
READ_MEMORY_ERROR /** failed to allocate more memory */
};
static enum try_read_result try_read_network(conn *c);
static enum try_read_result try_read_udp(conn *c);
/* stats */
static void stats_init(void);
static void server_stats(ADD_STAT add_stats, conn *c, bool aggregate);
static void process_stat_settings(ADD_STAT add_stats, void *c);
/* defaults */
static void settings_init(void);
/* event handling, network IO */
static void event_handler(const int fd, const short which, void *arg);
static bool update_event(conn *c, const int new_flags);
static void complete_nread(conn *c);
static void process_command(conn *c, char *command);
static void write_and_free(conn *c, char *buf, int bytes);
static int ensure_iov_space(conn *c);
static int add_iov(conn *c, const void *buf, int len);
static int add_msghdr(conn *c);
enum transmit_result {
TRANSMIT_COMPLETE, /** All done writing. */
TRANSMIT_INCOMPLETE, /** More data remaining to write. */
TRANSMIT_SOFT_ERROR, /** Can't write any more right now. */
TRANSMIT_HARD_ERROR /** Can't write (c->state is set to conn_closing) */
};
static enum transmit_result transmit(conn *c);
/* time-sensitive callers can call it by hand with this,
* outside the normal ever-1-second timer
*/
static void set_current_time(void)
{
struct timeval timer;
gettimeofday(&timer, NULL);
current_time = (rel_time_t) (timer.tv_sec - process_started);
}
static rel_time_t get_current_time(void)
{
return current_time;
}
#define REALTIME_MAXDELTA 60*60*24*30
/*
* given time value that's either unix time or delta from current unix time,
* return unix time. Use the fact that delta can't exceed one month
* (and real time value can't be that low).
*/
static rel_time_t realtime(const time_t exptime) {
/* no. of seconds in 30 days - largest possible delta exptime */
if (exptime == 0) return 0; /* 0 means never expire */
#ifdef ENABLE_STICKY_ITEM
if (exptime == -1) return (rel_time_t)(-1);
#endif
if (exptime > REALTIME_MAXDELTA) {
/* if item expiration is at/before the server started, give it an
expiration time of 1 second after the server started.
(because 0 means don't expire). without this, we'd
underflow and wrap around to some large value way in the
future, effectively making items expiring in the past
really expiring never */
if (exptime <= process_started)
return (rel_time_t)1;
return (rel_time_t)(exptime - process_started);
} else {
return (rel_time_t)(exptime + current_time);
}
}
static void stats_init(void) {
mc_stats.daemon_conns = 0;
mc_stats.rejected_conns = 0;
mc_stats.quit_conns = 0;
mc_stats.curr_conns = mc_stats.total_conns = mc_stats.conn_structs = 0;
/* make the time we started always be 2 seconds before we really
did, so time(0) - time.started is never zero. if so, things
like 'settings.oldest_live' which act as booleans as well as
values are now false in boolean context... */
process_started = time(0) - 2;
stats_prefix_init();
}
static void stats_reset(const void *cookie) {
struct conn *conn = (struct conn*)cookie;
STATS_LOCK();
mc_stats.rejected_conns = 0;
mc_stats.quit_conns = 0;
mc_stats.total_conns = 0;
stats_prefix_clear();
STATS_UNLOCK();
threadlocal_stats_reset(get_independent_stats(conn)->thread_stats);
mc_engine.v1->reset_stats(mc_engine.v0, cookie);
}
static void settings_init(void) {
settings.use_cas = true;
settings.access = 0700;
settings.port = 11211;
settings.udpport = 11211;
/* By default this string should be NULL for getaddrinfo() */
settings.inter = NULL;
settings.maxbytes = 64 * 1024 * 1024; /* default is 64MB */
settings.maxconns = 1024; /* to limit connections-related memory to about 5MB */
settings.sticky_ratio = 0; /* default: 0 */
settings.verbose = 0;
settings.oldest_live = 0;
settings.evict_to_free = 1; /* push old items out of cache when memory runs out */
settings.socketpath = NULL; /* by default, not using a unix socket */
settings.factor = 1.25;
settings.chunk_size = 48; /* space for a modest key and value */
settings.num_threads = 4; /* N workers */
settings.prefix_delimiter = ':';
settings.detail_enabled = 0;
settings.allow_detailed = true;
settings.reqs_per_event = DEFAULT_REQS_PER_EVENT;
settings.backlog = 1024;
settings.binding_protocol = negotiating_prot;
settings.item_size_max = 1024 * 1024; /* The famous 1MB upper limit. */
settings.max_list_size = MAX_LIST_SIZE;
settings.max_set_size = MAX_SET_SIZE;
settings.max_btree_size = MAX_BTREE_SIZE;
settings.topkeys = 0;
settings.require_sasl = false;
settings.extensions.logger = get_stderr_logger();
}
/*
* Adds a message header to a connection.
*
* Returns 0 on success, -1 on out-of-memory.
*/
static int add_msghdr(conn *c)
{
struct msghdr *msg;
assert(c != NULL);
if (c->msgsize == c->msgused) {
msg = realloc(c->msglist, c->msgsize * 2 * sizeof(struct msghdr));
if (! msg)
return -1;
c->msglist = msg;
c->msgsize *= 2;
}
msg = c->msglist + c->msgused;
/* this wipes msg_iovlen, msg_control, msg_controllen, and
msg_flags, the last 3 of which aren't defined on solaris: */
memset(msg, 0, sizeof(struct msghdr));
msg->msg_iov = &c->iov[c->iovused];
if (c->request_addr_size > 0) {
msg->msg_name = &c->request_addr;
msg->msg_namelen = c->request_addr_size;
}
c->msgbytes = 0;
c->msgused++;
if (IS_UDP(c->transport)) {
/* Leave room for the UDP header, which we'll fill in later. */
return add_iov(c, NULL, UDP_HEADER_SIZE);
}
return 0;
}
static const char *prot_text(enum protocol prot) {
char *rv = "unknown";
switch(prot) {
case ascii_prot:
rv = "ascii";
break;
case binary_prot:
rv = "binary";
break;
case negotiating_prot:
rv = "auto-negotiate";
break;
}
return rv;
}
void safe_close(int sfd) {
if (sfd != -1) {
int rval;
while ((rval = close(sfd)) == -1 &&
(errno == EINTR || errno == EAGAIN)) {
/* go ahead and retry */
}
if (rval == -1) {
mc_logger->log(EXTENSION_LOG_WARNING, NULL,
"Failed to close socket %d (%s)!!\n",
(int)sfd, strerror(errno));
} else {
STATS_LOCK();
mc_stats.curr_conns--;
STATS_UNLOCK();
}
}
}
// Register a callback.
static void register_callback(ENGINE_HANDLE *eh,
ENGINE_EVENT_TYPE type,
EVENT_CALLBACK cb, const void *cb_data) {
struct engine_event_handler *h =
calloc(sizeof(struct engine_event_handler), 1);
assert(h);
h->cb = cb;
h->cb_data = cb_data;
h->next = engine_event_handlers[type];
engine_event_handlers[type] = h;
}
// Perform all callbacks of a given type for the given connection.
static void perform_callbacks(ENGINE_EVENT_TYPE type,
const void *data,
const void *c) {
for (struct engine_event_handler *h = engine_event_handlers[type];
h; h = h->next) {
h->cb(c, type, data, h->cb_data);
}
}
/*
* Free list management for connections.
*/
cache_t *conn_cache; /* suffix cache */
/**
* Reset all of the dynamic buffers used by a connection back to their
* default sizes. The strategy for resizing the buffers is to allocate a
* new one of the correct size and free the old one if the allocation succeeds
* instead of using realloc to change the buffer size (because realloc may
* not shrink the buffers, and will also copy the memory). If the allocation
* fails the buffer will be unchanged.
*
* @param c the connection to resize the buffers for
* @return true if all allocations succeeded, false if one or more of the
* allocations failed.
*/
static bool conn_reset_buffersize(conn *c) {
bool ret = true;
if (c->rsize != DATA_BUFFER_SIZE) {
void *ptr = malloc(DATA_BUFFER_SIZE);
if (ptr != NULL) {
free(c->rbuf);
c->rbuf = ptr;
c->rsize = DATA_BUFFER_SIZE;
} else {
ret = false;
}
}
if (c->wsize != DATA_BUFFER_SIZE) {
void *ptr = malloc(DATA_BUFFER_SIZE);
if (ptr != NULL) {
free(c->wbuf);
c->wbuf = ptr;
c->wsize = DATA_BUFFER_SIZE;
} else {
ret = false;
}
}
if (c->isize != ITEM_LIST_INITIAL) {
void *ptr = malloc(sizeof(item *) * ITEM_LIST_INITIAL);
if (ptr != NULL) {
free(c->ilist);
c->ilist = ptr;
c->isize = ITEM_LIST_INITIAL;
} else {
ret = false;
}
}
if (c->suffixsize != SUFFIX_LIST_INITIAL) {
void *ptr = malloc(sizeof(char *) * SUFFIX_LIST_INITIAL);
if (ptr != NULL) {
free(c->suffixlist);
c->suffixlist = ptr;
c->suffixsize = SUFFIX_LIST_INITIAL;
} else {
ret = false;
}
}
if (c->iovsize != IOV_LIST_INITIAL) {
void *ptr = malloc(sizeof(struct iovec) * IOV_LIST_INITIAL);
if (ptr != NULL) {
free(c->iov);
c->iov = ptr;
c->iovsize = IOV_LIST_INITIAL;
} else {
ret = false;
}
}
if (c->msgsize != MSG_LIST_INITIAL) {
void *ptr = malloc(sizeof(struct msghdr) * MSG_LIST_INITIAL);
if (ptr != NULL) {
free(c->msglist);
c->msglist = ptr;
c->msgsize = MSG_LIST_INITIAL;
} else {
ret = false;
}
}
return ret;
}
/**
* Constructor for all memory allocations of connection objects. Initialize
* all members and allocate the transfer buffers.
*
* @param buffer The memory allocated by the object cache
* @param unused1 not used
* @param unused2 not used
* @return 0 on success, 1 if we failed to allocate memory
*/
static int conn_constructor(void *buffer, void *unused1, int unused2) {
(void)unused1; (void)unused2;
conn *c = buffer;
memset(c, 0, sizeof(*c));
MEMCACHED_CONN_CREATE(c);
if (!conn_reset_buffersize(c)) {
free(c->rbuf);
free(c->wbuf);
free(c->ilist);
free(c->suffixlist);
free(c->iov);
free(c->msglist);
mc_logger->log(EXTENSION_LOG_WARNING, NULL,
"Failed to allocate buffers for connection\n");
return 1;
}
STATS_LOCK();
mc_stats.conn_structs++;
STATS_UNLOCK();
return 0;
}
/**
* Destructor for all connection objects. Release all allocated resources.
*
* @param buffer The memory allocated by the objec cache
* @param unused not used
*/
static void conn_destructor(void *buffer, void *unused) {
(void)unused;
conn *c = buffer;
free(c->rbuf);
free(c->wbuf);
free(c->ilist);
free(c->suffixlist);
free(c->iov);
free(c->msglist);
STATS_LOCK();
mc_stats.conn_structs--;
STATS_UNLOCK();
}
conn *conn_new(const int sfd, STATE_FUNC init_state,
const int event_flags,
const int read_buffer_size, enum network_transport transport,
struct event_base *base, struct timeval *timeout) {
conn *c = cache_alloc(conn_cache);
if (c == NULL) {
return NULL;
}
assert(c->thread == NULL);
if (c->rsize < read_buffer_size) {
void *mem = malloc(read_buffer_size);
if (mem) {
c->rsize = read_buffer_size;
free(c->rbuf);
c->rbuf = mem;
} else {
assert(c->thread == NULL);
cache_free(conn_cache, c);
return NULL;
}
}
c->transport = transport;
c->protocol = settings.binding_protocol;
/* unix socket mode doesn't need this, so zeroed out. but why
* is this done for every command? presumably for UDP
* mode. */
if (!settings.socketpath) {
c->request_addr_size = sizeof(c->request_addr);
} else {
c->request_addr_size = 0;
}
if (settings.verbose > 1) {
if (init_state == conn_listening) {
mc_logger->log(EXTENSION_LOG_DEBUG, c,
"<%d server listening (%s)\n", sfd, prot_text(c->protocol));
} else if (IS_UDP(transport)) {
mc_logger->log(EXTENSION_LOG_DEBUG, c,
"<%d server listening (udp)\n", sfd);
} else if (c->protocol == negotiating_prot) {
mc_logger->log(EXTENSION_LOG_DEBUG, c,
"<%d new auto-negotiating client connection\n", sfd);
} else if (c->protocol == ascii_prot) {
mc_logger->log(EXTENSION_LOG_DEBUG, c,
"<%d new ascii client connection.\n", sfd);
} else if (c->protocol == binary_prot) {
mc_logger->log(EXTENSION_LOG_DEBUG, c,
"<%d new binary client connection.\n", sfd);
} else {
mc_logger->log(EXTENSION_LOG_DEBUG, c,
"<%d new unknown (%d) client connection\n", sfd, c->protocol);
assert(false);
}
}
c->sfd = sfd;
c->state = init_state;
c->cmd = -1;
c->ascii_cmd = NULL;
c->rbytes = c->wbytes = 0;
c->wcurr = c->wbuf;
c->rcurr = c->rbuf;
c->ritem = 0;
c->rlbytes = 0;
c->icurr = c->ilist;
c->suffixcurr = c->suffixlist;
c->ileft = 0;
c->suffixleft = 0;
c->iovused = 0;
c->msgcurr = 0;
c->msgused = 0;
c->next = NULL;
c->conn_prev = NULL;
c->conn_next = NULL;
#ifdef DETECT_LONG_QUERY
c->lq_bufcnt = 0;
#endif
c->write_and_go = init_state;
c->write_and_free = 0;
c->item = 0;
c->coll_strkeys = 0;
c->coll_eitem = 0;
c->coll_resps = 0;
// COMMAND PIPELINING
c->pipe_state = PIPE_STATE_OFF;
c->pipe_count = 0;
c->noreply = false;
event_set(&c->event, sfd, event_flags, event_handler, (void *)c);
event_base_set(base, &c->event);
c->ev_flags = event_flags;
if (event_add(&c->event, timeout) == -1) {
mc_logger->log(EXTENSION_LOG_WARNING, NULL,
"Failed to add connection to libevent: %s", strerror(errno));
assert(c->thread == NULL);
cache_free(conn_cache, c);
return NULL;
}
STATS_LOCK();
mc_stats.total_conns++;
STATS_UNLOCK();
c->aiostat = ENGINE_SUCCESS;
c->ewouldblock = false;
c->io_blocked = false;
c->premature_notify_io_complete = false;
/* save client ip address in connection object */
struct sockaddr_in addr;
socklen_t addrlen = sizeof(addr);
if (getpeername(c->sfd, (struct sockaddr*)&addr, &addrlen) != 0) {
if (init_state == conn_new_cmd) {
mc_logger->log(EXTENSION_LOG_WARNING, c,
"getpeername(fd=%d) has failed: %s\n",
c->sfd, strerror(errno));
}
}
snprintf(c->client_ip, 16, "%s", inet_ntoa(addr.sin_addr));
MEMCACHED_CONN_ALLOCATE(c->sfd);
perform_callbacks(ON_CONNECT, NULL, c);
return c;
}
static void conn_coll_eitem_free(conn *c) {
switch (c->coll_op) {
case OPERATION_LOP_INSERT:
mc_engine.v1->list_elem_release(mc_engine.v0, c, &c->coll_eitem, 1);
break;
case OPERATION_LOP_GET:
mc_engine.v1->list_elem_release(mc_engine.v0, c, c->coll_eitem, c->coll_ecount);
free(c->coll_eitem);
if (c->coll_resps != NULL) {
free(c->coll_resps); c->coll_resps = NULL;
}
break;
case OPERATION_SOP_INSERT:
mc_engine.v1->set_elem_release(mc_engine.v0, c, &c->coll_eitem, 1);
break;
case OPERATION_SOP_DELETE:
case OPERATION_SOP_EXIST:
free(c->coll_eitem);
break;
case OPERATION_SOP_GET:
mc_engine.v1->set_elem_release(mc_engine.v0, c, c->coll_eitem, c->coll_ecount);
free(c->coll_eitem);
if (c->coll_resps != NULL) {
free(c->coll_resps); c->coll_resps = NULL;
}
break;
case OPERATION_BOP_INSERT:
case OPERATION_BOP_UPSERT:
mc_engine.v1->btree_elem_release(mc_engine.v0, c, &c->coll_eitem, 1);
break;
case OPERATION_BOP_UPDATE:
if (c->coll_eitem != NULL)
free(c->coll_eitem);
break;
case OPERATION_BOP_GET:
case OPERATION_BOP_PWG: /* position with get */
case OPERATION_BOP_GBP: /* get by position */
mc_engine.v1->btree_elem_release(mc_engine.v0, c, c->coll_eitem, c->coll_ecount);
free(c->coll_eitem);
if (c->coll_resps != NULL) {
free(c->coll_resps); c->coll_resps = NULL;
}
break;
#if defined(SUPPORT_BOP_MGET) || defined(SUPPORT_BOP_SMGET)
#ifdef SUPPORT_BOP_MGET
case OPERATION_BOP_MGET:
#endif
#ifdef SUPPORT_BOP_SMGET
case OPERATION_BOP_SMGET:
#endif
mc_engine.v1->btree_elem_release(mc_engine.v0, c, c->coll_eitem, c->coll_ecount);
free(c->coll_eitem);
free(c->coll_strkeys); c->coll_strkeys = NULL;
break;
#endif
default:
assert(0); /* This case must not happen */
}
c->coll_eitem = NULL;
}
static void conn_cleanup(conn *c) {
assert(c != NULL);
if (c->item) {
mc_engine.v1->release(mc_engine.v0, c, c->item);
c->item = 0;
}
#ifdef DETECT_LONG_QUERY
if (c->lq_bufcnt != 0) {
lqdetect_buffer_release(c->lq_bufcnt);
c->lq_bufcnt = 0;
}
#endif
if (c->coll_eitem != NULL) {
conn_coll_eitem_free(c);
}
if (c->ileft != 0) {
for (; c->ileft > 0; c->ileft--,c->icurr++) {
mc_engine.v1->release(mc_engine.v0, c, *(c->icurr));
}
}
if (c->suffixleft != 0) {
for (; c->suffixleft > 0; c->suffixleft--, c->suffixcurr++) {
cache_free(c->thread->suffix_cache, *(c->suffixcurr));
}
}
if (c->write_and_free) {
free(c->write_and_free);
c->write_and_free = 0;
}
if (c->sasl_conn) {
sasl_dispose(&c->sasl_conn);
c->sasl_conn = NULL;
}
c->engine_storage = NULL;
/* disconnect it from the conn_list of a thread in charge */
if (c->conn_prev != NULL) {
c->conn_prev->conn_next = c->conn_next;
} else {
assert(c->thread->conn_list == c);
c->thread->conn_list = c->conn_next;
}
if (c->conn_next != NULL) {
c->conn_next->conn_prev = c->conn_prev;
}
c->thread = NULL;
assert(c->next == NULL);
c->ascii_cmd = NULL;
c->sfd = -1;
c->ewouldblock = false;
c->io_blocked = false;
c->premature_notify_io_complete = false;
}
void conn_close(conn *c) {
assert(c != NULL);
/* delete the event, the socket and the conn */
if (c->sfd != -1) {
MEMCACHED_CONN_RELEASE(c->sfd);
event_del(&c->event);
if (settings.verbose > 1) {
mc_logger->log(EXTENSION_LOG_DEBUG, c,
"<%d connection closed.\n", c->sfd);
}
safe_close(c->sfd);
c->sfd = -1;
}
if (c->ascii_cmd != NULL) {
c->ascii_cmd->abort(c->ascii_cmd, c);
}
assert(c->thread);
perform_callbacks(ON_DISCONNECT, NULL, c);
LOCK_THREAD(c->thread);
/* remove from pending-io list */
if (settings.verbose > 1 && list_contains(c->thread->pending_io, c)) {
mc_logger->log(EXTENSION_LOG_DEBUG, c,
"Current connection was in the pending-io list.. Nuking it\n");
}
c->thread->pending_io = list_remove(c->thread->pending_io, c);
UNLOCK_THREAD(c->thread);
conn_cleanup(c);
/*
* The contract with the object cache is that we should return the
* object in a constructed state. Reset the buffers to the default
* size
*/
conn_reset_buffersize(c);
assert(c->thread == NULL);
cache_free(conn_cache, c);
}
/*
* Shrinks a connection's buffers if they're too big. This prevents
* periodic large "get" requests from permanently chewing lots of server
* memory.
*
* This should only be called in between requests since it can wipe output
* buffers!
*/
static void conn_shrink(conn *c) {
assert(c != NULL);
if (IS_UDP(c->transport))
return;
if (c->rsize > READ_BUFFER_HIGHWAT && c->rbytes < DATA_BUFFER_SIZE) {
char *newbuf;
if (c->rcurr != c->rbuf)
memmove(c->rbuf, c->rcurr, (size_t)c->rbytes);
newbuf = (char *)realloc((void *)c->rbuf, DATA_BUFFER_SIZE);
if (newbuf) {
c->rbuf = newbuf;
c->rsize = DATA_BUFFER_SIZE;
}
/* TODO check other branch... */
c->rcurr = c->rbuf;
}
if (c->isize > ITEM_LIST_HIGHWAT) {
item **newbuf = (item**) realloc((void *)c->ilist, ITEM_LIST_INITIAL * sizeof(c->ilist[0]));
if (newbuf) {
c->ilist = newbuf;
c->isize = ITEM_LIST_INITIAL;
}
/* TODO check error condition? */
}
if (c->msgsize > MSG_LIST_HIGHWAT) {
struct msghdr *newbuf = (struct msghdr *) realloc((void *)c->msglist, MSG_LIST_INITIAL * sizeof(c->msglist[0]));
if (newbuf) {
c->msglist = newbuf;
c->msgsize = MSG_LIST_INITIAL;
}
/* TODO check error condition? */
}
if (c->iovsize > IOV_LIST_HIGHWAT) {
struct iovec *newbuf = (struct iovec *) realloc((void *)c->iov, IOV_LIST_INITIAL * sizeof(c->iov[0]));
if (newbuf) {
c->iov = newbuf;
c->iovsize = IOV_LIST_INITIAL;
}
/* TODO check return value */
}
}
/**
* Convert a state name to a human readable form.
*/
const char *state_text(STATE_FUNC state) {
if (state == conn_listening) {
return "conn_listening";
} else if (state == conn_new_cmd) {
return "conn_new_cmd";
} else if (state == conn_waiting) {
return "conn_waiting";
} else if (state == conn_read) {
return "conn_read";
} else if (state == conn_parse_cmd) {
return "conn_parse_cmd";
} else if (state == conn_write) {
return "conn_write";
} else if (state == conn_nread) {
return "conn_nread";
} else if (state == conn_swallow) {
return "conn_swallow";
} else if (state == conn_closing) {
return "conn_closing";
} else if (state == conn_mwrite) {
return "conn_mwrite";
} else {
return "Unknown";
}
}
/*
* Sets a connection's current state in the state machine. Any special
* processing that needs to happen on certain state transitions can
* happen here.
*/
void conn_set_state(conn *c, STATE_FUNC state) {
assert(c != NULL);
if (state != c->state) {
if (settings.verbose > 2 || c->state == conn_closing) {
mc_logger->log(EXTENSION_LOG_DETAIL, c, "%d: going from %s to %s\n",
c->sfd, state_text(c->state), state_text(state));