diff --git a/sys/compat/linux/arch/amd64/linux_syscall.h b/sys/compat/linux/arch/amd64/linux_syscall.h index 215a2ff4bafeb..4b014c6fa5271 100644 --- a/sys/compat/linux/arch/amd64/linux_syscall.h +++ b/sys/compat/linux/arch/amd64/linux_syscall.h @@ -1,9 +1,9 @@ -/* $NetBSD: linux_syscall.h,v 1.87 2025/11/10 15:41:57 christos Exp $ */ +/* $NetBSD$ */ /* * System call numbers. * - * DO NOT EDIT-- this file is automatically generated. + * DO NOT EDIT-- this file is generated by makesyscalls.sh * created from NetBSD: syscalls.master,v 1.78 2025/11/10 15:41:38 christos Exp */ @@ -135,6 +135,9 @@ /* syscall: "getpid" ret: "pid_t" args: */ #define LINUX_SYS_getpid 39 +/* syscall: "sendfile" ret: "ssize_t" args: "int" "int" "off_t *" "size_t" */ +#define LINUX_SYS_sendfile 40 + /* syscall: "socket" ret: "int" args: "int" "int" "int" */ #define LINUX_SYS_socket 41 diff --git a/sys/compat/linux/arch/amd64/linux_syscallargs.h b/sys/compat/linux/arch/amd64/linux_syscallargs.h index 6f67a4e56ed9e..d56b391276448 100644 --- a/sys/compat/linux/arch/amd64/linux_syscallargs.h +++ b/sys/compat/linux/arch/amd64/linux_syscallargs.h @@ -1,9 +1,9 @@ -/* $NetBSD: linux_syscallargs.h,v 1.87 2025/11/10 15:41:57 christos Exp $ */ +/* $NetBSD$ */ /* * System call argument lists. * - * DO NOT EDIT-- this file is automatically generated. + * DO NOT EDIT-- this file is generated by makesyscalls.sh * created from NetBSD: syscalls.master,v 1.78 2025/11/10 15:41:38 christos Exp */ @@ -186,6 +186,14 @@ check_syscall_args(linux_sys_alarm) struct compat_50_sys_setitimer_args; +struct linux_sys_sendfile_args { + syscallarg(int) out_fd; + syscallarg(int) in_fd; + syscallarg(off_t *) offset; + syscallarg(size_t) count; +}; +check_syscall_args(linux_sys_sendfile) + struct linux_sys_socket_args { syscallarg(int) domain; syscallarg(int) type; @@ -1425,6 +1433,8 @@ int compat_50_sys_setitimer(struct lwp *, const struct compat_50_sys_setitimer_a int sys_getpid(struct lwp *, const void *, register_t *); +int linux_sys_sendfile(struct lwp *, const struct linux_sys_sendfile_args *, register_t *); + int linux_sys_socket(struct lwp *, const struct linux_sys_socket_args *, register_t *); int linux_sys_connect(struct lwp *, const struct linux_sys_connect_args *, register_t *); diff --git a/sys/compat/linux/arch/amd64/linux_syscalls.c b/sys/compat/linux/arch/amd64/linux_syscalls.c index 7b435991531a6..295ad8b2ec462 100644 --- a/sys/compat/linux/arch/amd64/linux_syscalls.c +++ b/sys/compat/linux/arch/amd64/linux_syscalls.c @@ -1,14 +1,14 @@ -/* $NetBSD: linux_syscalls.c,v 1.87 2025/11/10 15:41:57 christos Exp $ */ +/* $NetBSD$ */ /* * System call names. * - * DO NOT EDIT-- this file is automatically generated. + * DO NOT EDIT-- this file is generated by makesyscalls.sh * created from NetBSD: syscalls.master,v 1.78 2025/11/10 15:41:38 christos Exp */ #include -__KERNEL_RCSID(0, "$NetBSD: linux_syscalls.c,v 1.87 2025/11/10 15:41:57 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD$"); #if defined(_KERNEL_OPT) #if defined(_KERNEL_OPT) @@ -89,7 +89,7 @@ const char *const linux_syscallnames[] = { /* 37 */ "alarm", /* 38 */ "setitimer", /* 39 */ "getpid", - /* 40 */ "#40 (unimplemented sendfile)", + /* 40 */ "sendfile", /* 41 */ "socket", /* 42 */ "connect", /* 43 */ "oaccept", @@ -633,7 +633,7 @@ const char *const altlinux_syscallnames[] = { /* 37 */ NULL, /* alarm */ /* 38 */ NULL, /* setitimer */ /* 39 */ NULL, /* getpid */ - /* 40 */ NULL, /* unimplemented sendfile */ + /* 40 */ NULL, /* sendfile */ /* 41 */ NULL, /* socket */ /* 42 */ NULL, /* connect */ /* 43 */ "accept", diff --git a/sys/compat/linux/arch/amd64/linux_sysent.c b/sys/compat/linux/arch/amd64/linux_sysent.c index 8d91d859ebad6..5cc1b65a645a3 100644 --- a/sys/compat/linux/arch/amd64/linux_sysent.c +++ b/sys/compat/linux/arch/amd64/linux_sysent.c @@ -1,14 +1,14 @@ -/* $NetBSD: linux_sysent.c,v 1.87 2025/11/10 15:41:57 christos Exp $ */ +/* $NetBSD$ */ /* * System call switch table. * - * DO NOT EDIT-- this file is automatically generated. + * DO NOT EDIT-- this file is generated by makesyscalls.sh * created from NetBSD: syscalls.master,v 1.78 2025/11/10 15:41:38 christos Exp */ #include -__KERNEL_RCSID(0, "$NetBSD: linux_sysent.c,v 1.87 2025/11/10 15:41:57 christos Exp $"); +__KERNEL_RCSID(0, "$NetBSD$"); #if defined(_KERNEL_OPT) #include "opt_sysv.h" @@ -241,8 +241,10 @@ struct sysent linux_sysent[] = { .sy_call = (sy_call_t *)sys_getpid }, /* 39 = getpid */ { - .sy_call = linux_sys_nosys, - }, /* 40 = filler */ + ns(struct linux_sys_sendfile_args), + .sy_flags = SYCALL_ARG_PTR, + .sy_call = (sy_call_t *)linux_sys_sendfile + }, /* 40 = sendfile */ { ns(struct linux_sys_socket_args), .sy_call = (sy_call_t *)linux_sys_socket diff --git a/sys/compat/linux/arch/amd64/linux_systrace_args.c b/sys/compat/linux/arch/amd64/linux_systrace_args.c index 4effe6f8f11fa..9ab95cc240d06 100644 --- a/sys/compat/linux/arch/amd64/linux_systrace_args.c +++ b/sys/compat/linux/arch/amd64/linux_systrace_args.c @@ -1,9 +1,9 @@ -/* $NetBSD: linux_systrace_args.c,v 1.31 2025/11/10 15:41:57 christos Exp $ */ +/* $NetBSD$ */ /* * System call argument to DTrace register array conversion. * - * DO NOT EDIT-- this file is automatically generated. + * DO NOT EDIT-- this file is generated by makesyscalls.sh * This file is part of the DTrace syscall provider. */ @@ -351,6 +351,16 @@ systrace_args(register_t sysnum, const void *params, uintptr_t *uarg, size_t *n_ *n_args = 0; break; } + /* linux_sys_sendfile */ + case 40: { + const struct linux_sys_sendfile_args *p = params; + iarg[0] = SCARG(p, out_fd); /* int */ + iarg[1] = SCARG(p, in_fd); /* int */ + uarg[2] = (intptr_t) SCARG(p, offset); /* off_t * */ + uarg[3] = SCARG(p, count); /* size_t */ + *n_args = 4; + break; + } /* linux_sys_socket */ case 41: { const struct linux_sys_socket_args *p = params; @@ -2780,6 +2790,25 @@ systrace_entry_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) /* sys_getpid */ case 39: break; + /* linux_sys_sendfile */ + case 40: + switch(ndx) { + case 0: + p = "int"; + break; + case 1: + p = "int"; + break; + case 2: + p = "off_t *"; + break; + case 3: + p = "size_t"; + break; + default: + break; + }; + break; /* linux_sys_socket */ case 41: switch(ndx) { @@ -6113,6 +6142,11 @@ systrace_return_setargdesc(int sysnum, int ndx, char *desc, size_t descsz) break; /* sys_getpid */ case 39: + /* linux_sys_sendfile */ + case 40: + if (ndx == 0 || ndx == 1) + p = "ssize_t"; + break; /* linux_sys_socket */ case 41: if (ndx == 0 || ndx == 1) diff --git a/sys/compat/linux/arch/amd64/syscalls.master b/sys/compat/linux/arch/amd64/syscalls.master index 2279eb36d8182..be492840c28a7 100644 --- a/sys/compat/linux/arch/amd64/syscalls.master +++ b/sys/compat/linux/arch/amd64/syscalls.master @@ -145,7 +145,8 @@ struct itimerval50 *itv, \ struct itimerval50 *oitv); } 39 STD { pid_t|sys||getpid(void); } -40 UNIMPL sendfile +40 STD { ssize_t|linux_sys||sendfile(int out_fd, int in_fd, \ + off_t *offset, size_t count); } 41 STD { int|linux_sys||socket(int domain, \ int type, int protocol); } 42 STD { int|linux_sys||connect(int s, \ diff --git a/sys/compat/linux/common/linux_file.c b/sys/compat/linux/common/linux_file.c index 4c3ead76b6eb0..e60098dc2de79 100644 --- a/sys/compat/linux/common/linux_file.c +++ b/sys/compat/linux/common/linux_file.c @@ -275,6 +275,222 @@ linux_sys_openat(struct lwp *l, const struct linux_sys_openat_args *uap, return 0; } +/* sendfile(2) */ +int +linux_sys_sendfile(struct lwp *l, const struct linux_sys_sendfile_args *uap, + register_t *retval) +{ + /* + * syscallarg(int) out_fd; + * syscallarg(int) in_fd; + * syscallarg(off_t *) offset; + * syscallarg(size_t) count; + */ + + /* Args from the syscall */ + int in_fd = SCARG(uap, in_fd); + int out_fd = SCARG(uap, out_fd); + off_t *user_offset = SCARG(uap, offset); + size_t count = SCARG(uap, count); + + file_t *in_fp = NULL; + file_t *out_fp = NULL; + + off_t in_offset = 0; + off_t in_offset_before_reads; + bool has_user_offset = (user_offset != NULL); + size_t bytes_left; + size_t total_bytes_copied = 0; + const size_t MAX_BYTES_TO_TRANSFER = 2147479552; + const off_t OFF_MAX = __type_max(off_t); + + /* Structures for actual copy */ + char *buffer = NULL; + struct uio auio; + struct iovec aiov; + + int error = 0; + + /* The count must not be more than what the man page specifies */ + if (count > MAX_BYTES_TO_TRANSFER) + count = MAX_BYTES_TO_TRANSFER; + + if (has_user_offset) { + error = copyin(user_offset, &in_offset, sizeof(in_offset)); + if (error) + goto out; + } + + if (count > OFF_MAX - in_offset) { + error = EOVERFLOW; + goto out; + } + + in_fp = fd_getfile(in_fd); + out_fp = fd_getfile(out_fd); + + if ((in_fp == NULL) || (out_fp == NULL)) { + error = EBADF; + goto out; + } + + /* + * Normally, in_fd can only be a regular file, however, if out_fd + * is a pipe the linux sendfile desugars to a splice, allowing + * in_fd to be a socket, but NOT a pipe (even though splice accepts). + */ + switch (in_fp->f_type) { + + case DTYPE_VNODE: + struct vnode *in_vn = in_fp->f_vnode; + + if (in_vn->v_type != VREG) { + error = EINVAL; + goto out; + } + break; + + case DTYPE_SOCKET: + /* If in_fd is a socket, user_offset must be NULL */ + if (has_user_offset) { + error = EINVAL; + goto out; + } + if (out_fp->f_type != DTYPE_PIPE) { + error = EINVAL; + goto out; + } + break; + default: + error = EINVAL; + goto out; + } + + /* out_fp may be a regular file, a pipe or a socket */ + switch (out_fp->f_type) { + + case DTYPE_VNODE: + struct vnode *out_vn = out_fp->f_vnode; + + if (out_vn->v_type != VREG) { + error = EINVAL; + goto out; + } + break; + + case DTYPE_SOCKET: + break; + + case DTYPE_PIPE: + break; + + default: + error = EINVAL; + goto out; + } + + if ((in_fp->f_flag & FREAD) == 0) { + error = EBADF; + goto out; + } + + if (((out_fp->f_flag & FWRITE) == 0) || + ((out_fp->f_flag & FAPPEND) != 0)) { + error = EBADF; + goto out; + } + + buffer = kmem_alloc(MAXBSIZE, KM_SLEEP); + + bytes_left = count; + in_offset_before_reads = in_fp->f_offset; + if (has_user_offset) + in_fp->f_offset = in_offset; + + while (bytes_left > 0) { + + size_t to_copy = MIN(bytes_left, MAXBSIZE); + size_t bytes_read = 0; + size_t bytes_written = 0; + + /* Set up iovec and uio for reading */ + aiov.iov_base = buffer; + aiov.iov_len = to_copy; + auio.uio_resid = to_copy; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_rw = UIO_READ; + UIO_SETUP_SYSSPACE(&auio); + + /* Read the in_fp */ + error = (*in_fp->f_ops->fo_read)(in_fp, &in_fp->f_offset, &auio, + in_fp->f_cred, 0); + + if (error) + break; /* Error when reading */ + + bytes_read = to_copy - auio.uio_resid; + + if (bytes_read == 0) { + /* EOF reached */ + break; + } + + /* Set up iovec and uio for writing */ + aiov.iov_base = buffer; + aiov.iov_len = bytes_read; + auio.uio_resid = bytes_read; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_rw = UIO_WRITE; + UIO_SETUP_SYSSPACE(&auio); + + /* Write to out_fp */ + error = (*out_fp->f_ops->fo_write)(out_fp, &out_fp->f_offset, &auio, + out_fp->f_cred, 0); + + bytes_written = bytes_read - auio.uio_resid; + + if (error) { + if (error == ENOBUFS) { + error = EAGAIN; /* What the syscall expects */ + } + break; /* Error when writing */ + } + + in_fp->f_offset += bytes_written; + out_fp->f_offset += bytes_written; + + total_bytes_copied += bytes_written; + bytes_left -= bytes_written; + } + + if (total_bytes_copied > 0) { + error = 0; + } + + if (has_user_offset) { + int copy_err = copyout(&in_fp->f_offset, user_offset, sizeof(in_offset)); + in_fp->f_offset = in_offset_before_reads; /* returns to original */ + /* Overrides error only if there's something wrong with the copyout */ + if (copy_err) + error = copy_err; + } + + *retval = total_bytes_copied; + + goto out; + +out: + if (buffer) + kmem_free(buffer, MAXBSIZE); + if (in_fp) + fd_putfile(in_fd); + if (out_fp) + fd_putfile(out_fd); + return error; +} + /* * Most actions in the fcntl() call are straightforward; simply * pass control to the NetBSD system call. A few commands need