kgio.git  about / heads / tags
kinder, gentler I/O for Ruby
blob a723b689e2cc5cf3df7cf1cf4132c231183d6b88 7928 bytes (raw)
$ git show rbx-wip:ext/kgio/writev.c	# shows this blob on the CLI

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
 
/*
 * we're currently too lazy to use rb_ensure to free an allocation, so we
 * the abuse rb_str_* API for a temporary buffer
 */
#define RSTRING_MODIFIED 1

#include "kgio.h"
#include "my_fileno.h"
#include "nonblock.h"
#ifdef HAVE_WRITEV
#  include <sys/uio.h>
#  define USE_WRITEV 1
#else
#  define USE_WRITEV 0
static ssize_t assert_writev(int fd, void* iov, int len)
{
	assert(0 && "you should not try to call writev");
	return -1;
}
#  define writev assert_writev
#endif
static VALUE sym_wait_writable;

#ifndef HAVE_WRITEV
#define iovec my_iovec
struct my_iovec {
	void  *iov_base;
	size_t iov_len;
};
#endif

/* tests for choosing following constants were done on Linux 3.0 x86_64
 * (Ubuntu 12.04) Core i3 i3-2330M slowed to 1600MHz
 * testing script https://gist.github.com/2850641
 * fill free to make more thorough testing and choose better value
 */

/* test shows that its meaningless to set WRITEV_MEMLIMIT more that 1M
 * even when tcp_wmem set to relatively high value (2M) (in fact, it becomes
 * even slower). 512K performs a bit better in average case. */
#define WRITEV_MEMLIMIT (512*1024)
/* same test shows that custom_writev is faster than glibc writev when
 * average string is smaller than ~500 bytes and slower when average strings
 * is greater then ~600 bytes. 512 bytes were choosen cause current compilers
 * turns x/512 into x>>9 */
#define WRITEV_IMPL_THRESHOLD 512

static unsigned int iov_max = 1024; /* this could be overriden in init */

struct wrv_args {
	VALUE io;
	VALUE buf;
	VALUE vec_buf; /* FIXME: this requires RSTRING_MODIFY for rbx */
	struct iovec *vec;
	unsigned long iov_cnt;
	size_t batch_len;
	int something_written;
	int fd;
};

static ssize_t custom_writev(int fd, const struct iovec *vec, unsigned int iov_cnt, size_t total_len)
{
	unsigned int i;
	ssize_t result;
	char *buf, *curbuf;
	const struct iovec *curvec = vec;

	/* we do not want to use ruby's xmalloc because
	 * it can fire GC, and we'll free buffer shortly anyway */
	curbuf = buf = malloc(total_len);
	if (buf == NULL) return -1;

	for (i = 0; i < iov_cnt; i++, curvec++) {
		memcpy(curbuf, curvec->iov_base, curvec->iov_len);
		curbuf += curvec->iov_len;
	}

	result = write(fd, buf, total_len);

	/* well, it seems that `free` could not change errno
	 * but lets save it anyway */
	i = errno;
	free(buf);
	errno = i;

	return result;
}

static void prepare_writev(struct wrv_args *a, VALUE io, VALUE ary)
{
	a->io = io;
	a->fd = my_fileno(io);
	a->something_written = 0;

	if (TYPE(ary) == T_ARRAY)
		/* rb_ary_subseq will not copy array unless it modified */
		a->buf = rb_ary_subseq(ary, 0, RARRAY_LEN(ary));
	else
		a->buf = rb_Array(ary);

	a->vec_buf = rb_str_new(0, 0);
	a->vec = NULL;
}

static void fill_iovec(struct wrv_args *a)
{
	unsigned long i;
	struct iovec *curvec;

	a->iov_cnt = RARRAY_LEN(a->buf);
	a->batch_len = 0;
	if (a->iov_cnt == 0) return;
	if (a->iov_cnt > iov_max) a->iov_cnt = iov_max;
	rb_str_resize(a->vec_buf, sizeof(struct iovec) * a->iov_cnt);
	curvec = a->vec = (struct iovec*)RSTRING_PTR(a->vec_buf);

	for (i=0; i < a->iov_cnt; i++, curvec++) {
		VALUE str = rb_ary_entry(a->buf, i);
		long str_len, next_len;

		if (TYPE(str) != T_STRING) {
			str = rb_obj_as_string(str);
			rb_ary_store(a->buf, i, str);
		}

		str_len = RSTRING_LEN(str);

		/* lets limit total memory to write,
		 * but always take first string */
		next_len = a->batch_len + str_len;
		if (i && next_len > WRITEV_MEMLIMIT) {
			a->iov_cnt = i;
			break;
		}
		a->batch_len = next_len;

		curvec->iov_base = RSTRING_PTR(str);
		curvec->iov_len = str_len;
	}
}

static long trim_writev_buffer(struct wrv_args *a, long n)
{
	long i;
	long ary_len = RARRAY_LEN(a->buf);

	if (n == (long)a->batch_len) {
		i = a->iov_cnt;
		n = 0;
	} else {
		for (i = 0; n && i < ary_len; i++) {
			VALUE entry = rb_ary_entry(a->buf, i);
			n -= RSTRING_LEN(entry);
			if (n < 0) break;
		}
	}

	/* all done */
	if (i == ary_len) {
		assert(n == 0 && "writev system call is broken");
		a->buf = Qnil;
		return 0;
	}

	/* partially done, remove fully-written buffers */
	if (i > 0)
		a->buf = rb_ary_subseq(a->buf, i, ary_len - i);

	/* setup+replace partially written buffer */
	if (n < 0) {
		VALUE str = rb_ary_entry(a->buf, 0);
		long str_len = RSTRING_LEN(str);
		str = rb_str_subseq(str, str_len + n, -n);
		rb_ary_store(a->buf, 0, str);
	}
	return RARRAY_LEN(a->buf);
}

static int writev_check(struct wrv_args *a, long n, const char *msg, int io_wait)
{
	if (n >= 0) {
		if (n > 0) a->something_written = 1;
		return trim_writev_buffer(a, n);
	} else if (n < 0) {
		if (errno == EINTR) {
			a->fd = my_fileno(a->io);
			return -1;
		}
		if (errno == EAGAIN) {
			if (io_wait) {
				(void)kgio_call_wait_writable(a->io);
				return -1;
			} else if (!a->something_written) {
				a->buf = sym_wait_writable;
			}
			return 0;
		}
		kgio_wr_sys_fail(msg);
	}
	return 0;
}

static VALUE my_writev(VALUE io, VALUE ary, int io_wait)
{
	struct wrv_args a;
	long n;

	prepare_writev(&a, io, ary);
	set_nonblocking(a.fd);

	do {
		fill_iovec(&a);
		if (a.iov_cnt == 0)
			n = 0;
		else if (a.iov_cnt == 1)
			n = (long)write(a.fd, a.vec[0].iov_base,
			                a.vec[0].iov_len);
		/* for big strings use library function */
		else if (USE_WRITEV &&
		        ((a.batch_len / WRITEV_IMPL_THRESHOLD) > a.iov_cnt))
			n = (long)writev(a.fd, a.vec, a.iov_cnt);
		else
			n = (long)custom_writev(a.fd, a.vec, a.iov_cnt,
			                        a.batch_len);
	} while (writev_check(&a, n, "writev", io_wait) != 0);
	rb_str_resize(a.vec_buf, 0);

	if (TYPE(a.buf) != T_SYMBOL)
		kgio_autopush_write(io);
	return a.buf;
}

/*
 * call-seq:
 *
 *	io.kgio_writev(array)	-> nil
 *
 * Returns nil when the write completes.
 *
 * This may block and call any method defined to +kgio_wait_writable+
 * for the class.
 *
 * Note: it uses +Array()+ semantic for converting argument, so that
 * it will succeed if you pass something else.
 */
static VALUE kgio_writev(VALUE io, VALUE ary)
{
	return my_writev(io, ary, 1);
}

/*
 * call-seq:
 *
 *	io.kgio_trywritev(array)	-> nil, Array or :wait_writable
 *
 * Returns nil if the write was completed in full.
 *
 * Returns an Array of strings containing the unwritten portion
 * if EAGAIN was encountered, but some portion was successfully written.
 *
 * Returns :wait_writable if EAGAIN is encountered and nothing
 * was written.
 *
 * Note: it uses +Array()+ semantic for converting argument, so that
 * it will succeed if you pass something else.
 */
static VALUE kgio_trywritev(VALUE io, VALUE ary)
{
	return my_writev(io, ary, 0);
}

/*
 * call-seq:
 *
 *	Kgio.trywritev(io, array)    -> nil, Array or :wait_writable
 *
 * Returns nil if the write was completed in full.
 *
 * Returns a Array of strings containing the unwritten portion if EAGAIN
 * was encountered, but some portion was successfully written.
 *
 * Returns :wait_writable if EAGAIN is encountered and nothing
 * was written.
 *
 * Maybe used in place of PipeMethods#kgio_trywritev for non-Kgio objects
 */
static VALUE s_trywritev(VALUE mod, VALUE io, VALUE ary)
{
	return kgio_trywritev(io, ary);
}

void init_kgio_writev(void)
{
#ifdef IOV_MAX
	unsigned int sys_iov_max = IOV_MAX;
#else
	unsigned int sys_iov_max = sysconf(_SC_IOV_MAX);
#endif

	VALUE mPipeMethods, mSocketMethods;
	VALUE mKgio = rb_define_module("Kgio");

	if (sys_iov_max < iov_max)
		iov_max = sys_iov_max;

	sym_wait_writable = ID2SYM(rb_intern("wait_writable"));

	rb_define_singleton_method(mKgio, "trywritev", s_trywritev, 2);

	mPipeMethods = rb_define_module_under(mKgio, "PipeMethods");
	rb_define_method(mPipeMethods, "kgio_writev", kgio_writev, 1);
	rb_define_method(mPipeMethods, "kgio_trywritev", kgio_trywritev, 1);

	mSocketMethods = rb_define_module_under(mKgio, "SocketMethods");
	rb_define_method(mSocketMethods, "kgio_writev", kgio_writev, 1);
	rb_define_method(mSocketMethods, "kgio_trywritev", kgio_trywritev, 1);
}

git clone git://yhbt.net/kgio.git
git clone https://yhbt.net/kgio.git