From: rtm@csail.mit.edu
To: Eric Van Hensbergen <ericvh@kernel.org>,
Latchesar Ionkov <lucho@ionkov.net>,
Dominique Martinet <asmadeus@codewreck.org>,
v9fs@lists.linux.dev
Subject: 9p server can confuse client about FIFO vs regular file -> crash
Date: Wed, 18 Dec 2024 15:04:24 -0500 [thread overview]
Message-ID: <67125.1734552264@localhost> (raw)
[-- Attachment #1: Type: text/plain, Size: 6253 bytes --]
If a 9p client asks to creat() a new file, and the server does so but
unexpectedly claims in answer to the ensuing Tgetattr that the new
file is a FIFO, then the client's file->f_op is &pipefifo_fops, but
file->private_data points to a p9_fid, not a pipe_inode_info. This
causes trouble when pipe_*() try to use file->private_data.
Ordinarily, d_dentry_open() both sets file->f_op and calls
file->f_op->open(), which sets file->private_data consistently with
file->f_op.
But v9fs_vfs_atomic_open_dotl() calls finish_open() with the open
argument set to generic_file_open, which causes d_dentry_open() to *not*
call file->f_op->open(). And v9fs_vfs_atomic_open_dotl() sets
file->private_data to a p9_fid.
A summary:
v9fs_vfs_atomic_open_dotl()
v9fs_get_new_inode_from_fid()
v9fs_inode_from_fid_dotl()
v9fs_qid_iget_dotl()
v9fs_init_inode()
case S_IFIFO:
init_special_inode()
} else if (S_ISFIFO(mode))
inode->i_fop = &pipefifo_fops;
finish_open(..., open=generic_file_open)
do_dentry_open(..., open=generic_file_open)
f->f_op = fops_get(inode->i_fop)
if (!open)
open = f->f_op->open;
if (open) {
error = open(inode, f); // calls generic_file_open, not pipe_open
file->private_data = ofid
I've attached a demo, which first gets a mutex error because
pipe_write() thinks file->private_data ought to start with a mutex,
and then a page fault.
# uname -a
Linux xxx 6.13.0-rc3-00017-gf44d154d6e3d #13 SMP Tue Dec 17 07:03:22 EST 2024 x86_64 x86_64 x86_64 GNU/Linux
# cc 9p6c.c
# ./a.out
...
------------[ cut here ]------------
DEBUG_LOCKS_WARN_ON(lock->magic != lock)
WARNING: CPU: 3 PID: 1551 at kernel/locking/mutex.c:564 __mutex_lock.constprop.0
+0x6b9/0x990
CPU: 3 UID: 0 PID: 1551 Comm: a.out Not tainted 6.13.0-rc3-00017-gf44d154d6e3d #
13
Hardware name: FreeBSD BHYVE/BHYVE, BIOS 14.0 10/17/2021
RIP: 0010:__mutex_lock.constprop.0+0x6b9/0x990
Code: ff 85 c0 0f 84 cc f9 ff ff 8b 15 c2 5a 5d 01 85 d2 0f 85 be f9 ff ff 48 c7
c6 4d 5e c7 82 48 c7 c7 8e e1 c6 82 e8 e7 7b d6 fe <0f> 0b e9 a4 f9 ff ff 0f 0b
e9 d1 fa ff ff 48 8b 03 a8 08 0f 85 fa
RSP: 0018:ffffc90001f03d50 EFLAGS: 00010286
RAX: 0000000000000000 RBX: 0000000000000001 RCX: 0000000000000027
RDX: ffff88842dadc848 RSI: 0000000000000001 RDI: ffff88842dadc840
RBP: ffffc90001f03de0 R08: 00000000ffffefff R09: 0000000000000001
R10: 00000000ffffefff R11: ffffffff8365b2c0 R12: ffff8881021b5a80
R13: ffff888113d33280 R14: ffffc90001f03f10 R15: 0000000000000000
FS: 00007f294d1df740(0000) GS:ffff88842dac0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 0000560867bb0008 CR3: 0000000109c66004 CR4: 00000000003706f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
? __warn+0x7f/0x130
? __mutex_lock.constprop.0+0x6b9/0x990
? report_bug+0x16e/0x1a0
? prb_read_valid+0x16/0x20
? handle_bug+0x53/0x90
? exc_invalid_op+0x17/0x70
? asm_exc_invalid_op+0x1a/0x20
? __mutex_lock.constprop.0+0x6b9/0x990
? do_sys_openat2+0x78/0xc0
? set_track_prepare+0x3b/0x60
? do_sys_openat2+0x78/0xc0
? check_bytes_and_report.isra.0+0x48/0x120
pipe_write+0x48/0x660
? free_to_partial_list+0x116/0x5e0
? do_sys_openat2+0x78/0xc0
vfs_write+0x23d/0x400
ksys_write+0x67/0xe0
do_syscall_64+0x3f/0xd0
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f294d2fe574
Code: c7 00 16 00 00 00 b8 ff ff ff ff c3 66 2e 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 80 3d d5 ea 0e 00 00 74 13 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 55 48 89 e5 48 83 ec 20 48 89
RSP: 002b:00007fffb311e078 EFLAGS: 00000202 ORIG_RAX: 0000000000000001
RAX: ffffffffffffffda RBX: 00007fffb311ea58 RCX: 00007f294d2fe574
RDX: 0000000000000001 RSI: 0000560867bad0a6 RDI: 0000000000000003
RBP: 00007fffb311e930 R08: 00007f294d3e5b20 R09: 0000000000000410
R10: 0000000000000001 R11: 0000000000000202 R12: 0000000000000001
R13: 0000000000000000 R14: 0000560867baece8 R15: 00007f294d440000
</TASK>
---[ end trace 0000000000000000 ]---
BUG: unable to handle page fault for address: 000000000002fcc0
#PF: supervisor write access in kernel mode
#PF: error_code(0x0002) - not-present page
PGD 0 P4D 0
Oops: Oops: 0002 [#1] SMP DEBUG_PAGEALLOC PTI
CPU: 3 UID: 0 PID: 1551 Comm: a.out Tainted: G W 6.13.0-rc3-00017-gf44d154d6e3d #13
Tainted: [W]=WARN
Hardware name: FreeBSD BHYVE/BHYVE, BIOS 14.0 10/17/2021
RIP: 0010:osq_lock+0x57/0xf0
Code: 00 00 00 00 00 89 42 14 87 07 85 c0 0f 84 98 00 00 00 83 e8 01 48 c7 c1 c0 fc 02 00 48 98 48 03 0c c5 c0 49 cf 82 48 89 4a 08 <48> 89 11 8b 42 10 85 c0 75 76 65 48 8b 3d 17 e6 f2 7e eb 09 f3 90
RSP: 0018:ffffc90001f03d48 EFLAGS: 00010206
RAX: fffffffffffffffe RBX: ffff888107269940 RCX: 000000000002fcc0
RDX: ffff88842daefcc0 RSI: ffff888113d332a0 RDI: ffff888113d332a0
RBP: ffffc90001f03de0 R08: 00000000ffffefff R09: 0000000000000001
R10: 00000000ffffefff R11: ffffffff8365b2c0 R12: ffff88810cc6ba00
R13: ffff888113d33280 R14: ffff888113d332a0 R15: 0000000000000000
FS: 00007f294d1df740(0000) GS:ffff88842dac0000(0000) knlGS:0000000000000000
CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000000002fcc0 CR3: 0000000109c66004 CR4: 00000000003706f0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
<TASK>
? __die+0x1e/0x60
? page_fault_oops+0x157/0x450
? __warn+0xa5/0x130
? __mutex_lock.constprop.0+0x6b9/0x990
? nbcon_get_cpu_emergency_nesting+0x5/0x30
? exc_page_fault+0x66/0x140
? asm_exc_page_fault+0x26/0x30
? osq_lock+0x57/0xf0
__mutex_lock.constprop.0+0x2b2/0x990
? do_sys_openat2+0x78/0xc0
? set_track_prepare+0x3b/0x60
? do_sys_openat2+0x78/0xc0
? check_bytes_and_report.isra.0+0x48/0x120
pipe_write+0x48/0x660
? free_to_partial_list+0x116/0x5e0
? do_sys_openat2+0x78/0xc0
vfs_write+0x23d/0x400
ksys_write+0x67/0xe0
do_syscall_64+0x3f/0xd0
entry_SYSCALL_64_after_hwframe+0x76/0x7e
RIP: 0033:0x7f294d2fe574
...
Kernel panic - not syncing: Fatal exception
Kernel Offset: disabled
---[ end Kernel panic - not syncing: Fatal exception ]---
Robert Morris
rtm@mit.edu
[-- Attachment #2: 9p6c.c --]
[-- Type: application/octet-stream, Size: 6910 bytes --]
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <fcntl.h>
#include <errno.h>
#include <time.h>
#include <sys/stat.h>
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <netinet/in.h>
#include <sys/wait.h>
#include <sys/resource.h>
int readn(int fd, char *buf, int n) {
int orig = n;
while(n > 0){
int cc = read(fd, buf, n);
if(cc <= 0) { perror("read"); return -1; }
n -= cc;
buf += cc;
}
return orig;
}
char *
getstr(unsigned char *p)
{
unsigned int n = *(unsigned short *)p;
char *buf = malloc(n+1);
memcpy(buf, p+2, n);
buf[n] = '\0';
return buf;
}
int
main(){
struct rlimit r;
r.rlim_cur = r.rlim_max = 0;
setrlimit(RLIMIT_CORE, &r);
int s = socket(AF_INET, SOCK_STREAM, 0);
{ int yes = 1;
setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes));
}
struct sockaddr_in sin;
memset(&sin, 0, sizeof(sin));
sin.sin_family = AF_INET;
sin.sin_port = htons(564);
if(bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0){
perror("bind"); exit(1);
}
listen(s, 10);
sync(); sleep(1);
if(fork() == 0){
close(s);
// -o ...,debug=0x10f
if(system("echo -n mount: ; mount -t 9p -o nodevmap,trans=tcp,cache=none,access=any,debug=0x0 127.0.0.1 /mnt") == 0){
system("mount | grep /mnt");
printf("open /mnt/b:\n");
int fd = creat("/mnt/b", 0777);
if(fd < 0) perror("creat");
write(fd, "x", 1);
char junk[1];
read(fd, junk, 1);
printf("close /mnt/b:\n");
close(fd);
system("echo -n umount: ; umount -f /mnt");
}
exit(0);
}
int spid = fork();
if(spid == 0){
socklen_t sinlen = sizeof(sin);
int s1 = accept(s, (struct sockaddr *) &sin, &sinlen);
if(s1 < 0) { perror("accept"); exit(1); }
close(s);
int opno = 0;
while(1){
char ibuf[1024];
if(readn(s1, ibuf, 4) < 0) break;
int ilen = *(int*)(ibuf+0);
if(readn(s1, ibuf+4, ilen - 4) < 0) break;
printf("%d: ", opno);
fflush(stdout);
char obuf[sizeof(ibuf)];
memset(obuf, 0xff, sizeof(obuf));
*(int*)(obuf+0) = ilen; // length
if(ibuf[4] == 100){ // Tversion
printf("version %d %s\n", *(int*)(ibuf+7), getstr(ibuf+11));
memcpy(obuf, ibuf, ilen);
} else if(ibuf[4] == 24){ // Tgetattr (different from Tstat!)
printf("getattr\n");
// https://github.com/chaos/diod/blob/master/protocol.md
int sz = 161;
*(int*)(obuf+0) = sz + 7;
*(int*)(obuf+32) = 0; // uid
*(int*)(obuf+36) = 0; // gid
if(opno == 7){
//*(int*)(obuf+28) = 0100777; // S_IFREG, rwxrwxrwx
*(int*)(obuf+28) = 010777; // S_IFIFO, rwxrwxrwx
} else {
*(int*)(obuf+28) = 0040777; // S_IFDIR, rwxrwxrwx
}
} else if(ibuf[4] == 110){ // Twalk
int nwqid = *(short*)(ibuf+15);
printf("walk %d %s\n", nwqid, nwqid?getstr(ibuf+17):"-");
if(opno == 3){
// error...
ibuf[4] = 106; // Terror
*(int*)(obuf+0) = 11;
*(int*)(obuf+7) = ENOENT;
} else {
*(short*)(obuf+7) = nwqid;
*(int*)(obuf+0) = 9 + nwqid*13;
if(opno == 24){
*(char*)(obuf+21) = 1;
}
}
} else if(ibuf[4] == 104){ // Tattach
printf("attach\n");
*(int*)(obuf+0) = 20;
} else if(ibuf[4] == 120){ // Tclunk
printf("clunk\n");
*(int*)(obuf+0) = 7;
} else if(ibuf[4] == 30){ // Txattrwalk
printf("xattrwalk\n");
*(int*)(obuf+0) = 15;
*(long*)(obuf+7) = 2; // size
} else if(ibuf[4] == 116){ // Tread
unsigned long offset = *(long*)(ibuf+11);
unsigned int count = *(int*)(ibuf+19);
printf("read %ld %d\n", offset, count); fflush(stdout);
int n = 0;
if(offset == 0 && count > 2){
unsigned char *p = obuf+11;
unsigned char *p0 = p;
p += 2; // size;
p += 2; // type
p += 4; // dev
p += 1; // qid.type
p += 4; // qid.vers
p += 8; // qid.path
p += 4; // permissions
p += 4; // atime
p += 4; // mtime
p += 8; // length
*(short*)p = 1; // name length
p++;
*p++ = 'x';
*(short*)p = 1; // owner name length
*p++ = 'x';
*(short*)p = 1; // group name length
*p++ = 'x';
*(short*)p = 1; // last modify user name length
*p++ = 'x';
n = p - p0;
printf(" >>> n=%d <<< ", n); fflush(stdout);
*(short*)(p0) = n;
}
*(int*)(obuf+0) = n + 11;
*(int*)(obuf+7) = n;
} else if(ibuf[4] == 12){ // Tlopen
printf("lopen\n");
*(int*)(obuf+0) = 24;
} else if(ibuf[4] == 40){ // Treaddir
printf("readdir\n");
// each dirent is 25 bytes
unsigned long offset = *(long*)(ibuf+11);
unsigned int count = *(int*)(ibuf+19);
int n = 0;
if(offset == 0){
n = 1;
unsigned char *p0 = obuf + 11;
unsigned char *p = p0;
p += 13; // qid
p += 8; // offset
p += 1; // type
*(short*)p = 1;
p += 2;
*p++ = 'x';
}
*(int*)(obuf+0) = 11 + n*25;
} else if(ibuf[4] == 8){ // Tstatfs
printf("statfs\n");
*(int*)(obuf+0) = 67;
} else if(ibuf[4] == 72){ // Tmkdir
printf("mkdir %s\n", getstr(ibuf + 11));
*(int*)(obuf+0) = 20;
} else if(ibuf[4] == 74){ // Trenameat
printf("renameat %s\n", getstr(ibuf + 11));
*(int*)(obuf+0) = 7;
} else if(ibuf[4] == 14){ // Tlcreate
printf("lcreate %s\n", getstr(ibuf + 11));
*(int*)(obuf+0) = 24;
} else if(ibuf[4] == 26){ // Tsetattr
printf("setattr %s\n", getstr(ibuf + 11));
*(int*)(obuf+0) = 7;
} else if(ibuf[4] == 76){ // Tunlinkat
printf("unlinkat %s\n", getstr(ibuf + 11));
*(int*)(obuf+0) = 7;
} else {
printf("%d ???\n", ibuf[4] & 0xff);
}
fflush(stdout);
obuf[4] = ibuf[4] + 1; // convert Txxx to Rxxx
*(short*)(obuf+5) = *(short*)(ibuf+5); // tag
if(obuf[4] == 25){
printf("Rgetattr #%d: ", opno);
// https://github.com/chaos/diod/blob/master/protocol.md
printf("op %d ", obuf[4]);
printf("mode 0%o ", *(unsigned int *)(obuf+28));
printf("\n");
}
if(write(s1, obuf, *(int*)(obuf+0))<=0) perror("write");
opno += 1;
}
exit(0);
}
close(s);
time_t t0 = time(0);
while(1){
int st;
int ret = waitpid(-1, &st, WNOHANG);
if(ret > 0)
break;
usleep(200000);
time_t t1 = time(0);
if(t1 - t0 >= 10){
printf("9pnew: timeout\n");
break;
}
}
}
reply other threads:[~2024-12-18 20:04 UTC|newest]
Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=67125.1734552264@localhost \
--to=rtm@csail.mit.edu \
--cc=asmadeus@codewreck.org \
--cc=ericvh@kernel.org \
--cc=lucho@ionkov.net \
--cc=v9fs@lists.linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).