QEMU-Devel Archive mirror
 help / color / mirror / Atom feed
* [Qemu-devel] [PATCH]ish NPTL support.
@ 2006-12-09 22:38 David Woodhouse
  2006-12-13 16:02 ` Mulyadi Santosa
  0 siblings, 1 reply; 14+ messages in thread
From: David Woodhouse @ 2006-12-09 22:38 UTC (permalink / raw
  To: qemu-devel

I started playing with nspluginwrapper -- and finally got annoyed with
the fact that not even /bin/echo from current i386 userspace will run in
qemu-i386 any more. So I had a go at implementing set_thread_area, futex
and set_tid_address.

Some of the more esoteric futex ops aren't going to work cross-endian
unless we do bi-endian atomic op support in the kernel itself -- and
robust futexes are going to be a complete PITA. But hopefully we can
live without those for a while longer.

When attempting to use older userspace in /usr/qemu-i386 and set
LD_ASSUME_KERNEL, I observed that qemu itself wouldn't start up. So I
also made it set LD_ASSUME_KERNEL to the contents of QEMU_ASSUME_KERNEL,
_after_ it's started.

This isn't really ready to be applied -- I need to implement
get_thread_area, go through the futex operations and make sure we're
doing the best we can, and fix the duplication of code between write_ldt
and set_thread_area. But since I'm relatively unlikely to get it into a
perfect state in the near future and since it is at least working well
enough to run current userspace, I figured I might as well post it...

Oh, and if we do it this way we should malloc the original GDT. And then
fix the FIXME about freeing them.

Index: linux-user/main.c
===================================================================
RCS file: /cvsroot/qemu/qemu/linux-user/main.c,v
retrieving revision 1.96
diff -u -p -r1.96 main.c
--- linux-user/main.c	4 Nov 2006 16:46:29 -0000	1.96
+++ linux-user/main.c	9 Dec 2006 22:35:49 -0000
@@ -156,7 +156,7 @@ static void set_gate(void *ptr, unsigned
     p[1] = tswapl(e2);
 }
 
-uint64_t gdt_table[6];
+uint64_t gdt_table[9];
 uint64_t idt_table[256];
 
 /* only dpl matters as we do only user space emulation */
@@ -1566,7 +1566,11 @@ int main(int argc, char **argv)
     int optind;
     const char *r;
     int gdbstub_port = 0;
-    
+    char *assume_kernel = getenv("QEMU_ASSUME_KERNEL");
+
+    if (assume_kernel)
+	setenv("LD_ASSUME_KERNEL", assume_kernel, 1);
+
     if (argc <= 1)
         usage();
 
Index: linux-user/syscall.c
===================================================================
RCS file: /cvsroot/qemu/qemu/linux-user/syscall.c,v
retrieving revision 1.78
diff -u -p -r1.78 syscall.c
--- linux-user/syscall.c	8 Dec 2006 01:32:58 -0000	1.78
+++ linux-user/syscall.c	9 Dec 2006 22:35:50 -0000
@@ -56,6 +56,7 @@
 #define tchars host_tchars /* same as target */
 #define ltchars host_ltchars /* same as target */
 
+#include <linux/futex.h>
 #include <linux/termios.h>
 #include <linux/unistd.h>
 #include <linux/utsname.h>
@@ -1634,6 +1635,80 @@ int do_modify_ldt(CPUX86State *env, int 
     return ret;
 }
 
+int do_set_thread_area(CPUX86State *env, target_ulong ptr)
+{
+    uint64_t *gdt_table = g2h(env->gdt.base);
+    struct target_modify_ldt_ldt_s ldt_info;
+    struct target_modify_ldt_ldt_s *target_ldt_info;
+    int seg_32bit, contents, read_exec_only, limit_in_pages;
+    int seg_not_present, useable;
+    uint32_t *lp, entry_1, entry_2;
+    int i;
+
+    lock_user_struct(target_ldt_info, ptr, 1);
+    ldt_info.entry_number = tswap32(target_ldt_info->entry_number);
+    ldt_info.base_addr = tswapl(target_ldt_info->base_addr);
+    ldt_info.limit = tswap32(target_ldt_info->limit);
+    ldt_info.flags = tswap32(target_ldt_info->flags);
+    if (ldt_info.entry_number == -1) {
+	    for (i=6; i<8; i++)
+		    if (gdt_table[i] == 0) {
+			    ldt_info.entry_number = i;
+			    target_ldt_info->entry_number = tswap32(i);
+			    break;
+		    }
+    }
+    unlock_user_struct(target_ldt_info, ptr, 0);
+    
+    if (ldt_info.entry_number < 6 || ldt_info.entry_number > 8)
+	    return -EINVAL;
+    seg_32bit = ldt_info.flags & 1;
+    contents = (ldt_info.flags >> 1) & 3;
+    read_exec_only = (ldt_info.flags >> 3) & 1;
+    limit_in_pages = (ldt_info.flags >> 4) & 1;
+    seg_not_present = (ldt_info.flags >> 5) & 1;
+    useable = (ldt_info.flags >> 6) & 1;
+
+    if (contents == 3) {
+        if (seg_not_present == 0)
+            return -EINVAL;
+    }
+
+    /* NOTE: same code as Linux kernel */
+    /* Allow LDTs to be cleared by the user. */
+    if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+        if ((contents == 0		&&
+             read_exec_only == 1	&&
+             seg_32bit == 0		&&
+             limit_in_pages == 0	&&
+             seg_not_present == 1	&&
+             useable == 0 )) {
+            entry_1 = 0;
+            entry_2 = 0;
+            goto install;
+        }
+    }
+    
+    entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
+        (ldt_info.limit & 0x0ffff);
+    entry_2 = (ldt_info.base_addr & 0xff000000) |
+        ((ldt_info.base_addr & 0x00ff0000) >> 16) |
+        (ldt_info.limit & 0xf0000) |
+        ((read_exec_only ^ 1) << 9) |
+        (contents << 10) |
+        ((seg_not_present ^ 1) << 15) |
+        (seg_32bit << 22) |
+        (limit_in_pages << 23) |
+	(useable << 20) |
+	0x7000;
+
+    /* Install the new entry ...  */
+install:
+    lp = (uint32_t *)(gdt_table + ldt_info.entry_number);
+    lp[0] = tswap32(entry_1);
+    lp[1] = tswap32(entry_2);
+    return 0;
+}
 #endif /* defined(TARGET_I386) */
 
 /* this stack is the equivalent of the kernel stack associated with a
@@ -1654,9 +1729,14 @@ int do_fork(CPUState *env, unsigned int 
     TaskState *ts;
     uint8_t *new_stack;
     CPUState *new_env;
-    
+#if defined(TARGET_I386)
+    uint64_t *new_gdt_table;
+#endif
+    printf("qemu fork\n");
     if (flags & CLONE_VM) {
         ts = malloc(sizeof(TaskState) + NEW_STACK_SIZE);
+	if (!ts)
+		return -ENOMEM;
         memset(ts, 0, sizeof(TaskState));
         new_stack = ts->stack;
         ts->used = 1;
@@ -1669,6 +1749,29 @@ int do_fork(CPUState *env, unsigned int 
 #if defined(TARGET_I386)
         if (!newsp)
             newsp = env->regs[R_ESP];
+	new_gdt_table = malloc(9 * 8);
+	if (!new_gdt_table) {
+		free(new_env);
+		return -ENOMEM;
+	}
+	/* Copy main GDT table from parent, but clear TLS entries */
+	memcpy(new_gdt_table, g2h(env->gdt.base), 6 * 8);
+	memset(&new_gdt_table[6], 0, 3 * 8); 
+	new_env->gdt.base = h2g(new_gdt_table);
+	if (flags & 0x00080000 /* CLONE_SETTLS */) {
+		ret = do_set_thread_area(new_env, new_env->regs[R_ESI]);
+		if (ret) {
+			free(new_gdt_table);
+			free(new_env);
+			return ret;
+		}
+	}
+	cpu_x86_load_seg(env, R_CS, new_env->regs[R_CS]);
+	cpu_x86_load_seg(env, R_DS, new_env->regs[R_DS]);
+	cpu_x86_load_seg(env, R_ES, new_env->regs[R_ES]);
+	cpu_x86_load_seg(env, R_SS, new_env->regs[R_SS]);
+	cpu_x86_load_seg(env, R_FS, new_env->regs[R_FS]);
+	cpu_x86_load_seg(env, R_GS, new_env->regs[R_GS]);
         new_env->regs[R_ESP] = newsp;
         new_env->regs[R_EAX] = 0;
 #elif defined(TARGET_ARM)
@@ -1916,6 +2019,68 @@ static inline void host_to_target_timesp
     unlock_user_struct(target_ts, target_addr, 1);
 }
 
+static long do_futex(target_ulong uaddr, int op, uint32_t val,
+		     target_ulong utime, target_ulong uaddr2,
+		     uint32_t val3)
+{
+	struct timespec host_utime;
+	unsigned long val2 = utime;
+
+	if (utime && (op == FUTEX_WAIT || op == FUTEX_LOCK_PI)) {
+		target_to_host_timespec(&host_utime, utime);
+		val2 = (unsigned long)&host_utime;
+	}
+ 
+#ifdef BSWAP_NEEDED
+	switch(op) {
+	case FUTEX_CMP_REQUEUE:
+		val3 = tswap32(val3);
+	case FUTEX_REQUEUE:
+		val2 = tswap32(val2);
+	case FUTEX_WAIT:
+	case FUTEX_WAKE:
+		val = tswap32(val);
+	case FUTEX_LOCK_PI: /* This one's icky, but comes out OK */
+	case FUTEX_UNLOCK_PI:
+		break;
+	default: 
+		gemu_log("qemu: Unsupported futex op %d\n", op);
+		return -ENOSYS;
+	} 
+#if 0 /* No, it's worse than this */
+	if (op == FUTEX_WAKE_OP) {
+		/* Need to munge the secondary operation (val3) */
+		val3 = tswap32(val3);
+	        int op2 = (val3 >> 28) & 7;
+		int cmp = (val3 >> 24) & 15;
+		int oparg = (val3 << 8) >> 20;
+		int cmparg = (val3 << 20) >> 20;
+		int shift = val3 & (FUTEX_OP_OPARG_SHIFT << 28);
+
+		if (shift)
+		    oparg = (oparg & 7) + 24 - (oparg & 24);
+		else oparg = 
+		if (op2 == FUTEX_OP_ADD) {
+			gemu_log("qemu: Unsupported wrong-endian FUTEX_OP_ADD\n");
+			return -ENOSYS;
+		}
+		if (cmparg == FUTEX_OP_CMP_LT || cmparg == FUTEX_OP_CMP_GE ||
+		    cmparg == FUTEX_OP_CMP_LE || cmparg == FUTEX_OP_CMP_GT) {
+			gemu_log("qemu: Unsupported wrong-endian futex cmparg %d\n", cmparg);
+			return -ENOSYS;
+		}
+		val3 = shift | (op2<<28) | (cmp<<24) | (oparg<<12) | cmparg;
+	}
+#endif
+#endif
+	return syscall(__NR_futex, g2h(uaddr), op, val, val2, g2h(uaddr2), val3);
+}
+
+int do_set_tid_address(target_ulong tidptr)
+{
+	return syscall(__NR_set_tid_address, g2h(tidptr));
+}
+
 long do_syscall(void *cpu_env, int num, long arg1, long arg2, long arg3, 
                 long arg4, long arg5, long arg6)
 {
@@ -1933,7 +2098,7 @@ long do_syscall(void *cpu_env, int num, 
         _mcleanup();
 #endif
         gdb_exit(cpu_env, arg1);
-        /* XXX: should free thread stack and CPU env */
+        /* XXX: should free thread stack, GDT and CPU env */
         _exit(arg1);
         ret = 0; /* avoid warning */
         break;
@@ -3074,6 +3239,9 @@ long do_syscall(void *cpu_env, int num, 
     case TARGET_NR_vm86:
         ret = do_vm86(cpu_env, arg1, arg2);
         break;
+    case TARGET_NR_set_thread_area:
+	ret = get_errno(do_set_thread_area(cpu_env, arg1));
+	break;
 #endif
     case TARGET_NR_adjtimex:
         goto unimplemented;
@@ -3864,19 +4032,28 @@ long do_syscall(void *cpu_env, int num, 
     case TARGET_NR_fremovexattr:
         goto unimplemented_nowarn;
 #endif
-#ifdef TARGET_NR_set_thread_area
-    case TARGET_NR_set_thread_area:
-    case TARGET_NR_get_thread_area:
-        goto unimplemented_nowarn;
-#endif
 #ifdef TARGET_NR_getdomainname
     case TARGET_NR_getdomainname:
         goto unimplemented_nowarn;
 #endif
+#ifdef TARGET_NR_futex
+    case TARGET_NR_futex:
+	ret = get_errno(do_futex(arg1, arg2, arg3, arg4, arg5, arg6));
+	break;
+#endif
+#ifdef TARGET_NR_set_tid_address
+    case TARGET_NR_set_tid_address:
+        ret = get_errno(do_set_tid_address(arg1));
+	break;
+#endif
+#ifdef TARGET_NR_set_robust_list
+    case TARGET_NR_set_robust_list:
+	    goto unimplemented_nowarn;
+#endif
     default:
     unimplemented:
         gemu_log("qemu: Unsupported syscall: %d\n", num);
-#if defined(TARGET_NR_setxattr) || defined(TARGET_NR_set_thread_area) || defined(TARGET_NR_getdomainname)
+#if defined(TARGET_NR_setxattr) || defined(TARGET_NR_set_robust_list) || defined(TARGET_NR_getdomainname)
     unimplemented_nowarn:
 #endif
         ret = -ENOSYS;

-- 
dwmw2

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH]ish NPTL support.
  2006-12-09 22:38 [Qemu-devel] [PATCH]ish NPTL support David Woodhouse
@ 2006-12-13 16:02 ` Mulyadi Santosa
  2006-12-13 17:01   ` David Woodhouse
  0 siblings, 1 reply; 14+ messages in thread
From: Mulyadi Santosa @ 2006-12-13 16:02 UTC (permalink / raw
  To: qemu-devel, David Woodhouse

Hi David...

> I started playing with nspluginwrapper -- and finally got annoyed with
> the fact that not even /bin/echo from current i386 userspace will run in
> qemu-i386 any more. So I had a go at implementing set_thread_area, futex
> and set_tid_address.

A small request, if you are willing to do it though.... I think this patch is 
really useful (IIRC NPTL is a long time trouble with qemu-i386), so instead 
of leaving this patch just archieved inside qemu-devel, could you please post 
it to qemu user forum too? So, it will be easy for some one else to get this 
patch until your patch gets merged by Fabrice..

Thanks for sharing your work.... bravo!

regards,

Mulyadi

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH]ish NPTL support.
  2006-12-13 16:02 ` Mulyadi Santosa
@ 2006-12-13 17:01   ` David Woodhouse
  2006-12-13 17:22     ` Paul Brook
  2006-12-13 17:35     ` Thiemo Seufer
  0 siblings, 2 replies; 14+ messages in thread
From: David Woodhouse @ 2006-12-13 17:01 UTC (permalink / raw
  To: a_mulyadi; +Cc: qemu-devel

On Wed, 2006-12-13 at 23:02 +0700, Mulyadi Santosa wrote:
> A small request, if you are willing to do it though.... I think this patch is 
> really useful (IIRC NPTL is a long time trouble with qemu-i386), so instead 
> of leaving this patch just archieved inside qemu-devel, could you please post 
> it to qemu user forum too? So, it will be easy for some one else to get this 
> patch until your patch gets merged by Fabrice..

Feel free to do so.

Better still, feel free to break it down into simple fixes and feed it
to Fabrice. The only thing that definitely isn't ready is the futex
stuff -- we could merge set_tid_address, set_thread_area and the clone
bits. And especially the QEMU_ASSUME_KERNEL thing.

Could do with testing the CLONE_SETTLS bits too, and making sure
CLONE_PARENT_SETTID and CLONE_CHILD_CLEARTID are implemented. 

-- 
dwmw2

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH]ish NPTL support.
  2006-12-13 17:01   ` David Woodhouse
@ 2006-12-13 17:22     ` Paul Brook
  2006-12-13 17:32       ` David Woodhouse
  2006-12-13 17:35     ` Thiemo Seufer
  1 sibling, 1 reply; 14+ messages in thread
From: Paul Brook @ 2006-12-13 17:22 UTC (permalink / raw
  To: qemu-devel

On Wednesday 13 December 2006 17:01, David Woodhouse wrote:
> On Wed, 2006-12-13 at 23:02 +0700, Mulyadi Santosa wrote:
> > A small request, if you are willing to do it though.... I think this
> > patch is really useful (IIRC NPTL is a long time trouble with qemu-i386),
> > so instead of leaving this patch just archieved inside qemu-devel, could
> > you please post it to qemu user forum too? So, it will be easy for some
> > one else to get this patch until your patch gets merged by Fabrice..
>
> Feel free to do so.
>
> Better still, feel free to break it down into simple fixes and feed it
> to Fabrice. The only thing that definitely isn't ready is the futex
> stuff -- we could merge set_tid_address, set_thread_area and the clone
> bits. And especially the QEMU_ASSUME_KERNEL thing.
>
> Could do with testing the CLONE_SETTLS bits too, and making sure
> CLONE_PARENT_SETTID and CLONE_CHILD_CLEARTID are implemented.

I've a nasty feeling you're going to break the host libc if you do threading 
this way. One possibly solution is to use the pthreads API instead, and map 
everything onto that.

Paul

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH]ish NPTL support.
  2006-12-13 17:22     ` Paul Brook
@ 2006-12-13 17:32       ` David Woodhouse
  2006-12-13 17:42         ` Paul Brook
  0 siblings, 1 reply; 14+ messages in thread
From: David Woodhouse @ 2006-12-13 17:32 UTC (permalink / raw
  To: Paul Brook; +Cc: qemu-devel

On Wed, 2006-12-13 at 17:22 +0000, Paul Brook wrote:
> I've a nasty feeling you're going to break the host libc if you do threading 
> this way. One possibly solution is to use the pthreads API instead, and map 
> everything onto that. 

Qemu doesn't use the host's threading support, does it?

-- 
dwmw2

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH]ish NPTL support.
  2006-12-13 17:01   ` David Woodhouse
  2006-12-13 17:22     ` Paul Brook
@ 2006-12-13 17:35     ` Thiemo Seufer
  1 sibling, 0 replies; 14+ messages in thread
From: Thiemo Seufer @ 2006-12-13 17:35 UTC (permalink / raw
  To: David Woodhouse; +Cc: qemu-devel

David Woodhouse wrote:
> On Wed, 2006-12-13 at 23:02 +0700, Mulyadi Santosa wrote:
> > A small request, if you are willing to do it though.... I think this patch is 
> > really useful (IIRC NPTL is a long time trouble with qemu-i386), so instead 
> > of leaving this patch just archieved inside qemu-devel, could you please post 
> > it to qemu user forum too? So, it will be easy for some one else to get this 
> > patch until your patch gets merged by Fabrice..
> 
> Feel free to do so.
> 
> Better still, feel free to break it down into simple fixes and feed it
> to Fabrice.

Or send the broken down versions to this list, I (or pbrook) can then
commit the easy bits at least.


Thiemo

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH]ish NPTL support.
  2006-12-13 17:32       ` David Woodhouse
@ 2006-12-13 17:42         ` Paul Brook
  2006-12-13 17:50           ` David Woodhouse
  0 siblings, 1 reply; 14+ messages in thread
From: Paul Brook @ 2006-12-13 17:42 UTC (permalink / raw
  To: qemu-devel

On Wednesday 13 December 2006 17:32, David Woodhouse wrote:
> On Wed, 2006-12-13 at 17:22 +0000, Paul Brook wrote:
> > I've a nasty feeling you're going to break the host libc if you do
> > threading this way. One possibly solution is to use the pthreads API
> > instead, and map everything onto that.
>
> Qemu doesn't use the host's threading support, does it?

Qemu doesn't currently have any real thread support. It has a few hacks that 
work for simple linuxthreads cases, but I doubt real multithreaded 
applications will work.

My point was that instead of blindly passing the threading syscalls through to 
the host we should consider using the host libc/libpthread thread support.

Paul

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH]ish NPTL support.
  2006-12-13 17:42         ` Paul Brook
@ 2006-12-13 17:50           ` David Woodhouse
  2006-12-13 18:07             ` Paul Brook
  2006-12-14  2:16             ` Jamie Lokier
  0 siblings, 2 replies; 14+ messages in thread
From: David Woodhouse @ 2006-12-13 17:50 UTC (permalink / raw
  To: Paul Brook; +Cc: qemu-devel

On Wed, 2006-12-13 at 17:42 +0000, Paul Brook wrote:
> Qemu doesn't currently have any real thread support. It has a few hacks that 
> work for simple linuxthreads cases, but I doubt real multithreaded 
> applications will work.
> 
> My point was that instead of blindly passing the threading syscalls through to 
> the host we should consider using the host libc/libpthread thread support. 

Well, let's break things down individually.

- sys_set_thread_area():
- sys_get_thread_area():
- clone(CLONE_SETTLS)

This _is_ handled in qemu, since it's just a case of loading new stuff
into the emulated GDT. (Well, I didn't do get_thread_area but it's
simple).

- sys_futex():

We have to translate these into calls to the host's sys_futex() anyway.
I need to go through the rest of the futex operations and see how many
we can emulate cross-endian, and perhaps add some kernel support to make
the answer "all of them".

- sys_set_tid_address():
- clone(CLONE_CHILD_CLEARTID):

We _could_ manage to do this in qemu for controlled thread exit -- it
would be hard for uncontrolled exit though. But I don't see any harm in
just letting the kernel do it either. I don't mind too much, but if we
can let the kernel do it I'm happier that way.

- clone(CLONE_PARENT_SETTID):
- clone(CLONE_CHILD_SETTID):

We need endianness-mangling on these so we have to get involved somehow.
But I think we do need to use the kernel's support and then marshal the
result back to the guest's memory.

-- 
dwmw2

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH]ish NPTL support.
  2006-12-13 17:50           ` David Woodhouse
@ 2006-12-13 18:07             ` Paul Brook
  2006-12-13 18:44               ` Fabrice Bellard
  2006-12-14  2:16             ` Jamie Lokier
  1 sibling, 1 reply; 14+ messages in thread
From: Paul Brook @ 2006-12-13 18:07 UTC (permalink / raw
  To: David Woodhouse; +Cc: qemu-devel

> - sys_set_tid_address():
> - clone(CLONE_CHILD_CLEARTID):
>
> We _could_ manage to do this in qemu for controlled thread exit -- it
> would be hard for uncontrolled exit though. But I don't see any harm in
> just letting the kernel do it either. I don't mind too much, but if we
> can let the kernel do it I'm happier that way.

The harm occurs if the host libc had per-thread state (eg. it has thread local 
variables). If we bypass the host thread library then libc doesn't have 
chance to initialize it's per-thread structures for that new thread, and bad 
things are liable to happen then that thread uses libc functions.

> We need endianness-mangling on these so we have to get involved somehow.
> But I think we do need to use the kernel's support and then marshal the
> result back to the guest's memory.

Once you start proxying things to convert endianness I'd expect it to be 
easier to just emulate everything.


Even when you implement all the syscalls qemu still won't work reliably. In 
particular loads and stores will not be atomic. On real hardware a word 
aligned load or store is guaranteed to complete atomically. qemu sometimes 
splits these into multiple byte accesses, so the guest could see a partial 
access. There are also memory ordering issues (x86 has comparatively strong 
memory ordering guarantees, other hosts require a memory barrier to enforce 
proper ordering). I've seen both these cause failures in in real 
applications.

Paul

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH]ish NPTL support.
  2006-12-13 18:07             ` Paul Brook
@ 2006-12-13 18:44               ` Fabrice Bellard
  0 siblings, 0 replies; 14+ messages in thread
From: Fabrice Bellard @ 2006-12-13 18:44 UTC (permalink / raw
  To: qemu-devel

Paul Brook wrote:
>>- sys_set_tid_address():
>>- clone(CLONE_CHILD_CLEARTID):
>>
>>We _could_ manage to do this in qemu for controlled thread exit -- it
>>would be hard for uncontrolled exit though. But I don't see any harm in
>>just letting the kernel do it either. I don't mind too much, but if we
>>can let the kernel do it I'm happier that way.
> 
> 
> The harm occurs if the host libc had per-thread state (eg. it has thread local 
> variables). If we bypass the host thread library then libc doesn't have 
> chance to initialize it's per-thread structures for that new thread, and bad 
> things are liable to happen then that thread uses libc functions.
> 
> 
>>We need endianness-mangling on these so we have to get involved somehow.
>>But I think we do need to use the kernel's support and then marshal the
>>result back to the guest's memory.
> 
> 
> Once you start proxying things to convert endianness I'd expect it to be 
> easier to just emulate everything.
> 
> 
> Even when you implement all the syscalls qemu still won't work reliably. In 
> particular loads and stores will not be atomic. On real hardware a word 
> aligned load or store is guaranteed to complete atomically. qemu sometimes 
> splits these into multiple byte accesses, so the guest could see a partial 
> access. There are also memory ordering issues (x86 has comparatively strong 
> memory ordering guarantees, other hosts require a memory barrier to enforce 
> proper ordering). I've seen both these cause failures in in real 
> applications.
> 
> Paul

Another point is that the dynamic translator itself is not thread safe 
(I tried to implement thread safety a long time ago, but it is not 
finished).

Using the pthreads may not be necessary provided we assume the host 
kernel supports NPTL. I don't think it is worth to spend time on the 
case where the host kernel does not support NPTL.

Fabrice.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH]ish NPTL support.
  2006-12-13 17:50           ` David Woodhouse
  2006-12-13 18:07             ` Paul Brook
@ 2006-12-14  2:16             ` Jamie Lokier
  2006-12-16 13:26               ` David Woodhouse
  1 sibling, 1 reply; 14+ messages in thread
From: Jamie Lokier @ 2006-12-14  2:16 UTC (permalink / raw
  To: qemu-devel; +Cc: Paul Brook

David Woodhouse wrote:
> - sys_futex():
> 
> We have to translate these into calls to the host's sys_futex() anyway.

I don't think it's necessary to translate to the host's sys_futex(),
unless the guest will be doing futex operations on memory which the
host _also_ does futex operations on.

CLONE_CHILD_CLEARTID is one of those, if it's simply relayed to the host.
So are locks in shared memory, if they are to work between host and
guest processes.  But I guess they are not expected to work.

The atomicity, queueing etc. semantics, provided they are only among
threads of a single qemu process, can be guaranteed using normal
pthreads locking and atomic operations, analogous to the way the host
kernel maps futex calls to its own waitqueues, semaphores, and atomic
ops.

However, it is probably easier to use the host's, than to write the
equivalent (basically duplicating the kernel's futex code in qemu, the
hashed locks and wait queues etc.).

On the other hand, using the host's makes it hard to run Linux guest
binaries on non-Linux hosts (those which don't have futex), or newer
Linux guest binaries on older Linux hosts which have fewer futex ops,
or none at all.

-- Jamie

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH]ish NPTL support.
  2006-12-14  2:16             ` Jamie Lokier
@ 2006-12-16 13:26               ` David Woodhouse
  2006-12-16 15:17                 ` Paul Brook
  2006-12-16 18:48                 ` Jamie Lokier
  0 siblings, 2 replies; 14+ messages in thread
From: David Woodhouse @ 2006-12-16 13:26 UTC (permalink / raw
  To: qemu-devel; +Cc: Paul Brook

On Thu, 2006-12-14 at 02:16 +0000, Jamie Lokier wrote:
> David Woodhouse wrote:
> > - sys_futex():
> > 
> > We have to translate these into calls to the host's sys_futex() anyway.
> 
> I don't think it's necessary to translate to the host's sys_futex(),
> unless the guest will be doing futex operations on memory which the
> host _also_ does futex operations on.

Yes, that observation makes sense. We just need proper locking against
other qemu guest threads within the same process; nothing more.

> CLONE_CHILD_CLEARTID is one of those, if it's simply relayed to the host.
> So are locks in shared memory, if they are to work between host and
> guest processes.  But I guess they are not expected to work.

Right. Those would be robust futexes -- and for the moment we can ignore
them because we haven't implemented sys_set_robust_list() anyway. That
would make life a whole lot more complicated, but I think we can do
without it for now.

> The atomicity, queueing etc. semantics, provided they are only among
> threads of a single qemu process, can be guaranteed using normal
> pthreads locking and atomic operations, analogous to the way the host
> kernel maps futex calls to its own waitqueues, semaphores, and atomic
> ops.

Yes. We might also be able to cheat in a way inspired by the 'Big Kernel
Lock' -- by pinning all threads to the same host CPU to help eliminate
some of the locking issues. 

> However, it is probably easier to use the host's, than to write the
> equivalent (basically duplicating the kernel's futex code in qemu, the
> hashed locks and wait queues etc.).

The kernel's implementation is a _whole_ lot more complicated than ours
needs to be in qemu, because of the security implications of dealing
with arbitrary pointers in userspace. I think it's reasonable enough for
qemu to do its own.

> On the other hand, using the host's makes it hard to run Linux guest
> binaries on non-Linux hosts (those which don't have futex), or newer
> Linux guest binaries on older Linux hosts which have fewer futex ops,
> or none at all.

I don't think we care. You can't run qemu-i386 on a non-Linux box
_anyway_, can you? And having some syscalls return -ENOSYS if you run on
a prehistoric kernel is perfectly normal.

I did briefly think about implementing threading entirely within qemu
_without_ using threads on the host -- having the qemu process itself
schedule between the different CPU contexts. That would make the GDB
stub a whole lot saner for debugging multi-threaded guest programs. But
I don't think it's workable -- the whole point in NPTL was that you
_can't_ emulate proper POSIX-compliant threading with hacks in
userspace; especially the details of signal delivery.

-- 
dwmw2

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH]ish NPTL support.
  2006-12-16 13:26               ` David Woodhouse
@ 2006-12-16 15:17                 ` Paul Brook
  2006-12-16 18:48                 ` Jamie Lokier
  1 sibling, 0 replies; 14+ messages in thread
From: Paul Brook @ 2006-12-16 15:17 UTC (permalink / raw
  To: qemu-devel

> > On the other hand, using the host's makes it hard to run Linux guest
> > binaries on non-Linux hosts (those which don't have futex), or newer
> > Linux guest binaries on older Linux hosts which have fewer futex ops,
> > or none at all.
>
> I don't think we care. You can't run qemu-i386 on a non-Linux box
> _anyway_, can you? And having some syscalls return -ENOSYS if you run on
> a prehistoric kernel is perfectly normal.

Not out the box, not. However It's not all that hard to make it work. 
Certainly on any sane unix host It should be feasible. Most of the syscalls we 
currently translate in C library calls or implement ourselves, we don't use 
host syscalls directly. I've even had a fair amount of success successfully 
run linux applications on windows hosts via qemu.

> I did briefly think about implementing threading entirely within qemu
> _without_ using threads on the host -- having the qemu process itself
> schedule between the different CPU contexts. That would make the GDB
> stub a whole lot saner for debugging multi-threaded guest programs. But
> I don't think it's workable -- the whole point in NPTL was that you
> _can't_ emulate proper POSIX-compliant threading with hacks in
> userspace; especially the details of signal delivery.

I'm fairly sure some of the BSDs have multiple userspace threads per kernel 
context. There was at least 1 proposed linux implementation like this as 
well. IIRC we only ended up with the current 1:1 mapping because it was 
simpler.

One possibility is to use host threads (to get PID/TID mappings right), but 
still explicitly schedule from userspace. ie. have qemu ensure no more than 
one thread is active at any time.

Paul

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [Qemu-devel] [PATCH]ish NPTL support.
  2006-12-16 13:26               ` David Woodhouse
  2006-12-16 15:17                 ` Paul Brook
@ 2006-12-16 18:48                 ` Jamie Lokier
  1 sibling, 0 replies; 14+ messages in thread
From: Jamie Lokier @ 2006-12-16 18:48 UTC (permalink / raw
  To: qemu-devel; +Cc: Paul Brook

David Woodhouse wrote:
> > However, it is probably easier to use the host's, than to write the
> > equivalent (basically duplicating the kernel's futex code in qemu, the
> > hashed locks and wait queues etc.).
> 
> The kernel's implementation is a _whole_ lot more complicated than ours
> needs to be in qemu, because of the security implications of dealing
> with arbitrary pointers in userspace. I think it's reasonable enough for
> qemu to do its own.

Although there are security issues, things like page table walking and
address hashing would need to be implemented in qemu too, if the
emulated program is allowed to fork.

> > On the other hand, using the host's makes it hard to run Linux guest
> > binaries on non-Linux hosts (those which don't have futex), or newer
> > Linux guest binaries on older Linux hosts which have fewer futex ops,
> > or none at all.
> 
> I don't think we care. You can't run qemu-i386 on a non-Linux box
> _anyway_, can you? And having some syscalls return -ENOSYS if you run on
> a prehistoric kernel is perfectly normal.

If we want non-Linux hosts to behave as prehistoric kernels, that's
fine.  If you want non-Linux hosts to run current Linux binaries, I
guess that's not enough.

> I did briefly think about implementing threading entirely within qemu
> _without_ using threads on the host -- having the qemu process itself
> schedule between the different CPU contexts. That would make the GDB
> stub a whole lot saner for debugging multi-threaded guest programs. But
> I don't think it's workable -- the whole point in NPTL was that you
> _can't_ emulate proper POSIX-compliant threading with hacks in
> userspace; especially the details of signal delivery.

NPTL is a number of changes from LinuxThreads, some for performance
and some for POSIXness.

What you can't sanely do is implement POSIX-thread signals when using
the old style of clone()'d kernel threads which LinuxThreads uses.
That was due to a kernel limitation.

But you can implement POSIX-thread signals them when all threads run
within a single POSIX process, and are all scheduled in userspace.
AFAIK, POSIX thread signals are specified the way they are
specifically to allow that kind of implementation.

See GNU Pth for a fairly portable implementation of POSIX threads
entirely in a userspace library, using a single kernel thread.

You might be able to adapt Pth to implement your single process idea
for Qemu.

What's much more difficult is providing Linux thread semantics for
non-POSIX things, particularly blocking operations on devices and
files, and tid-specific things like queued SIGIO delivery.

But any emulation which doesn't simply map those things to the host
Linux kernel will have trouble getting those right.

-- Jamie

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2006-12-16 18:48 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-12-09 22:38 [Qemu-devel] [PATCH]ish NPTL support David Woodhouse
2006-12-13 16:02 ` Mulyadi Santosa
2006-12-13 17:01   ` David Woodhouse
2006-12-13 17:22     ` Paul Brook
2006-12-13 17:32       ` David Woodhouse
2006-12-13 17:42         ` Paul Brook
2006-12-13 17:50           ` David Woodhouse
2006-12-13 18:07             ` Paul Brook
2006-12-13 18:44               ` Fabrice Bellard
2006-12-14  2:16             ` Jamie Lokier
2006-12-16 13:26               ` David Woodhouse
2006-12-16 15:17                 ` Paul Brook
2006-12-16 18:48                 ` Jamie Lokier
2006-12-13 17:35     ` Thiemo Seufer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).