From 88cbd5598499d976cdf24b5e01faec43ed9bfa64 Mon Sep 17 00:00:00 2001 From: shamb0 Date: Thu, 26 Dec 2024 15:23:41 +0530 Subject: [PATCH] Add callback support to FileDescription - Implementing atomic reads for contiguous and scattered buffers Signed-off-by: shamb0 --- src/concurrency/thread.rs | 18 +- src/shims/files.rs | 177 +++++++++ src/shims/unix/fd.rs | 187 ++++++++- src/shims/unix/foreign_items.rs | 6 + src/shims/unix/fs.rs | 161 +++++++- tests/pass-dep/libc/libc-fs.rs | 645 ++++++++++++++++++++++++++++++++ tests/utils/fs.rs | 47 +++ 7 files changed, 1219 insertions(+), 22 deletions(-) diff --git a/src/concurrency/thread.rs b/src/concurrency/thread.rs index 6d57cb3876..ac9cf813e4 100644 --- a/src/concurrency/thread.rs +++ b/src/concurrency/thread.rs @@ -48,9 +48,23 @@ pub enum MachineCallbackState { TimedOut, } -/// Generic callback trait for machine operations. +/// A generic callback trait for asynchronous machine operations. +/// +/// # Type Parameters +/// * `'tcx`: The typing context lifetime +/// +/// Callbacks are executed while holding mutable access to the interpreter +/// context, so they must maintain interpreter invariants. pub trait MachineCallback<'tcx>: VisitProvenance { - /// Called when the operation completes (successfully or with timeout). + /// Called when an operation completes, either successfully or due to timeout. + /// + /// # Arguments + /// * `self`: Owned callback, boxed to allow for dynamic dispatch + /// * `ecx`: Mutable interpreter context + /// * `result`: Operation completion state + /// + /// # Returns + /// Result indicating if callback execution succeeded fn call( self: Box, ecx: &mut InterpCx<'tcx, MiriMachine<'tcx>>, diff --git a/src/shims/files.rs b/src/shims/files.rs index f673b834be..40b16763d7 100644 --- a/src/shims/files.rs +++ b/src/shims/files.rs @@ -17,6 +17,7 @@ pub trait FileDescription: std::fmt::Debug + Any { /// Reads as much as possible into the given buffer `ptr`. /// `len` indicates how many bytes we should try to read. /// `dest` is where the return value should be stored: number of bytes read, or `-1` in case of error. + #[allow(dead_code)] fn read<'tcx>( &self, _self_ref: &FileDescriptionRef, @@ -29,6 +30,29 @@ pub trait FileDescription: std::fmt::Debug + Any { throw_unsup_format!("cannot read from {}", self.name()); } + /// Performs an atomic read operation on the file. + /// + /// # Arguments + /// * `self_ref` - Strong reference to file description for lifetime management + /// * `communicate_allowed` - Whether external communication is permitted + /// * `op` - The I/O operation containing buffer and layout information + /// * `dest` - Destination for storing operation results + /// * `ecx` - Mutable reference to interpreter context + /// + /// # Returns + /// * `Ok(())` on successful read + /// * `Err(_)` if read fails or is unsupported + fn read_atomic<'tcx>( + &self, + _self_ref: &FileDescriptionRef, + _communicate_allowed: bool, + _op: &mut IoTransferOperation<'tcx>, + _dest: &MPlaceTy<'tcx>, + _ecx: &mut MiriInterpCx<'tcx>, + ) -> InterpResult<'tcx> { + throw_unsup_format!("cannot read from {}", self.name()); + } + /// Writes as much as possible from the given buffer `ptr`. /// `len` indicates how many bytes we should try to write. /// `dest` is where the return value should be stored: number of bytes written, or `-1` in case of error. @@ -409,3 +433,156 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { interp_ok(()) } } + +/// Represents an atomic I/O operation that handles data transfer between memory regions. +/// Supports both contiguous and scattered memory layouts for efficient I/O operations. +#[derive(Clone)] +pub struct IoTransferOperation<'tcx> { + /// Intermediate buffer for atomic transfer operations. + /// For reads: Temporary storage before distribution to destinations + /// For writes: Aggregation point before writing to file + transfer_buffer: Vec, + + /// Memory layout specification for the transfer operation. + layout: IoBufferLayout, + + /// Total number of bytes to be processed in this operation. + total_size: usize, + + /// Interpreter context lifetime marker. + _phantom: std::marker::PhantomData<&'tcx ()>, +} + +/// Specifies how memory regions are organized for I/O operations +#[derive(Clone)] +enum IoBufferLayout { + /// Single continuous memory region for transfer. + Contiguous { address: Pointer }, + /// Multiple discontinuous memory regions. + Scattered { regions: Vec<(Pointer, usize)> }, +} + +impl VisitProvenance for IoTransferOperation<'_> { + fn visit_provenance(&self, _visit: &mut VisitWith<'_>) { + // Visits any references that need provenance tracking. + // Currently a no-op as IoTransferOperation contains no such references. + } +} + +impl<'tcx> IoTransferOperation<'tcx> { + /// Creates a new I/O operation for a contiguous memory region. + pub fn new_contiguous(ptr: Pointer, len: usize) -> Self { + IoTransferOperation { + transfer_buffer: vec![0; len], + layout: IoBufferLayout::Contiguous { address: ptr }, + total_size: len, + _phantom: std::marker::PhantomData, + } + } + + /// Creates a new I/O operation for scattered memory regions. + pub fn new_scattered(buffers: Vec<(Pointer, usize)>) -> Self { + let total_size = buffers.iter().map(|(_, len)| len).sum(); + IoTransferOperation { + transfer_buffer: vec![0; total_size], + layout: IoBufferLayout::Scattered { regions: buffers }, + total_size, + _phantom: std::marker::PhantomData, + } + } + + /// Provides mutable access to the transfer buffer. + pub fn buffer_mut(&mut self) -> &mut [u8] { + &mut self.transfer_buffer + } + + /// Distributes data from the transfer buffer to final destinations. + pub fn distribute_data( + &mut self, + ecx: &mut MiriInterpCx<'tcx>, + dest: &MPlaceTy<'tcx>, + bytes_processed: usize, + ) -> InterpResult<'tcx> { + if bytes_processed > self.total_size { + return ecx.set_last_error_and_return(LibcError("EINVAL"), dest); + } + + match &self.layout { + IoBufferLayout::Contiguous { address } => { + // POSIX Compliance: Verify buffer accessibility before writing + if ecx + .check_ptr_access( + *address, + Size::from_bytes(bytes_processed), + CheckInAllocMsg::MemoryAccessTest, + ) + .report_err() + .is_err() + { + return ecx.set_last_error_and_return(LibcError("EFAULT"), dest); + } + + // Attempt the write operation + if ecx + .write_bytes_ptr( + *address, + self.transfer_buffer[..bytes_processed].iter().copied(), + ) + .report_err() + .is_err() + { + return ecx.set_last_error_and_return(LibcError("EIO"), dest); + } + } + + IoBufferLayout::Scattered { regions } => { + let mut current_pos = 0; + + for (ptr, len) in regions { + if current_pos >= bytes_processed { + break; + } + + // Calculate copy size with safe arithmetic + let remaining_bytes = bytes_processed + .checked_sub(current_pos) + .expect("current_pos should never exceed bytes_read"); + let copy_size = (*len).min(remaining_bytes); + + // POSIX Compliance: Verify each buffer's accessibility + if ecx + .check_ptr_access( + *ptr, + Size::from_bytes(copy_size), + CheckInAllocMsg::MemoryAccessTest, + ) + .report_err() + .is_err() + { + return ecx.set_last_error_and_return(LibcError("EFAULT"), dest); + } + + let end_pos = current_pos + .checked_add(copy_size) + .expect("end position calculation should not overflow"); + + // Attempt the write operation with proper error handling + if ecx + .write_bytes_ptr( + *ptr, + self.transfer_buffer[current_pos..end_pos].iter().copied(), + ) + .report_err() + .is_err() + { + return ecx.set_last_error_and_return(LibcError("EIO"), dest); + } + + current_pos = end_pos; + } + } + } + + interp_ok(()) + } +} diff --git a/src/shims/unix/fd.rs b/src/shims/unix/fd.rs index e5dead1a26..f6599defe9 100644 --- a/src/shims/unix/fd.rs +++ b/src/shims/unix/fd.rs @@ -5,9 +5,10 @@ use std::io; use std::io::ErrorKind; use rustc_abi::Size; +use rustc_middle::ty::layout::TyAndLayout; use crate::helpers::check_min_arg_count; -use crate::shims::files::FileDescription; +use crate::shims::files::{FileDescription, IoTransferOperation}; use crate::shims::unix::linux_like::epoll::EpollReadyEvents; use crate::shims::unix::*; use crate::*; @@ -246,7 +247,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // `usize::MAX` because it is bounded by the host's `isize`. match offset { - None => fd.read(&fd, communicate, buf, count, dest, this)?, + None => { + let mut op = IoTransferOperation::new_contiguous(buf, count); + fd.read_atomic(&fd, communicate, &mut op, dest, this)? + } Some(offset) => { let Ok(offset) = u64::try_from(offset) else { return this.set_last_error_and_return(LibcError("EINVAL"), dest); @@ -257,6 +261,185 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { interp_ok(()) } + /// Reads data from a file descriptor into multiple buffers atomically (vectored I/O). + /// + /// This implementation follows POSIX readv() semantics, reading data from the file descriptor + /// specified by `fd_num` into the buffers described by the array of iovec structures pointed + /// to by `iov_ptr`. The `iovcnt` argument specifies the number of iovec structures in the array. + /// + /// # Arguments + /// * `fd_num` - The file descriptor to read from + /// * `iov_ptr` - Pointer to an array of iovec structures + /// * `iovcnt` - Number of iovec structures in the array + /// * `dest` - Destination for storing the number of bytes read + /// + /// # Returns + /// * `Ok(())` - Operation completed successfully, with total bytes read stored in `dest` + /// * `Err(_)` - Operation failed with appropriate errno set + /// + /// # Errors + /// * `EBADF` - `fd_num` is not a valid file descriptor + /// * `EFAULT` - Part of iovec array or buffers point outside accessible address space + /// * `EINVAL` - `iovcnt` is negative or exceeds system limit + /// * `EIO` - I/O error occurred while reading from the file descriptor + /// + /// # POSIX Compliance + /// Implements standard POSIX readv() behavior: + /// * Performs reads atomically with respect to other threads + /// * Returns exact number of bytes read or -1 on error + /// * Handles partial reads and end-of-file conditions + /// * Respects system-imposed limits on total transfer size + fn readv( + &mut self, + fd_num: i32, + iov_ptr: &OpTy<'tcx>, + iovcnt: i32, + dest: &MPlaceTy<'tcx>, + ) -> InterpResult<'tcx> { + let this = self.eval_context_mut(); + + // POSIX Compliance: Must handle negative values (EINVAL). + if iovcnt < 0 { + return this.set_last_error_and_return(LibcError("EINVAL"), dest); + } + + // POSIX Compliance: Must handle zero properly. + if iovcnt == 0 { + return this.write_scalar(Scalar::from_i32(0), dest); + } + + // POSIX Compliance: Check if iovcnt exceeds system limits. + // Most implementations limit this to IOV_MAX + // Common system default + // https://github.com/turbolent/w2c2/blob/d94227c22f8d78a04fbad70fa744481ca4a1912e/examples/clang/sys/include/limits.h#L50 + const IOV_MAX: i32 = 1024; + if iovcnt > IOV_MAX { + trace!("readv: iovcnt exceeds IOV_MAX"); + return this.set_last_error_and_return(LibcError("EINVAL"), dest); + } + + // POSIX Compliance: Validate file descriptor. + let Some(fd) = this.machine.fds.get(fd_num) else { + return this.set_last_error_and_return(LibcError("EBADF"), dest); + }; + + // Convert iovcnt to usize for array indexing. + let iovcnt = usize::try_from(iovcnt).expect("iovcnt exceeds platform size"); + let iovec_layout = this.libc_ty_layout("iovec"); + + // Gather iovec information. + // Pre-allocate vectors for iovec information + let mut iov_info = Vec::with_capacity(iovcnt); + let mut total_size: usize = 0; + let communicate = this.machine.communicate(); + + // POSIX Compliance: Validate each iovec structure. + // Must check for EFAULT (invalid buffer addresses) and EINVAL (invalid length). + for i in 0..iovcnt { + // Calculate offset to current iovec structure. + let offset = iovec_layout + .size + .bytes() + .checked_mul(i as u64) + .expect("iovec array index overflow"); + + // Access current iovec structure. + let current_iov = match this + .deref_pointer_and_offset_vectored( + iov_ptr, + offset, + iovec_layout, + iovcnt, + iovec_layout, + ) + .report_err() + { + Ok(iov) => iov, + Err(_) => { + return this.set_last_error_and_return(LibcError("EFAULT"), dest); + } + }; + + // Extract and validate buffer pointer and length. + let base_field = this.project_field_named(¤t_iov, "iov_base")?; + let base = this.read_pointer(&base_field)?; + + let len_field = this.project_field_named(¤t_iov, "iov_len")?; + let len: usize = usize::try_from(this.read_target_usize(&len_field)?).unwrap(); + + // Validate buffer alignment and accessibility. + if this + .check_ptr_access(base, Size::from_bytes(len), CheckInAllocMsg::MemoryAccessTest) + .report_err() + .is_err() + { + return this.set_last_error_and_return(LibcError("EFAULT"), dest); + } + + // Update total size safely. + total_size = match total_size.checked_add(len) { + Some(new_size) => new_size, + None => { + return this.set_last_error_and_return(LibcError("EINVAL"), dest); + } + }; + + iov_info.push((base, len)); + } + + let mut op = IoTransferOperation::new_scattered(iov_info); + fd.read_atomic(&fd, communicate, &mut op, dest, this)?; + + interp_ok(()) + } + + /// Dereferences a pointer to access an element within a source array, with specialized bounds checking + /// for vectored I/O operations like readv(). + /// + /// This function provides array-aware bounds checking that is specifically designed for situations + /// where we need to access multiple independent memory regions, such as when processing an array + /// of iovec structures. Unlike simple pointer arithmetic bounds checking, this implementation + /// understands and validates array-based access patterns. + fn deref_pointer_and_offset_vectored( + &self, + op: &impl Projectable<'tcx, Provenance>, + offset_bytes: u64, + base_layout: TyAndLayout<'tcx>, + count: usize, + value_layout: TyAndLayout<'tcx>, + ) -> InterpResult<'tcx, MPlaceTy<'tcx>> { + // 1. Validate the iovec array bounds. + let array_size = base_layout + .size + .bytes() + .checked_mul(count as u64) + .ok_or_else(|| err_ub_format!("iovec array size overflow"))?; + + // 2. Check if our offset is within the array. + if offset_bytes >= array_size { + throw_ub_format!( + "{}", + format!( + "iovec array access out of bounds: offset {} in array of size {}", + offset_bytes, array_size + ) + ); + } + + // 3. Ensure the iovec structure we're accessing is fully contained. + if offset_bytes.checked_add(base_layout.size.bytes()).is_none_or(|end| end > array_size) { + throw_ub_format!("iovec structure would extend past array bounds"); + } + + // 4. Proceed with the dereferencing. + let this = self.eval_context_ref(); + let op_place = this.deref_pointer_as(op, base_layout)?; + let offset = Size::from_bytes(offset_bytes); + + let value_place = op_place.offset(offset, value_layout, this)?; + interp_ok(value_place) + } + fn write( &mut self, fd_num: i32, diff --git a/src/shims/unix/foreign_items.rs b/src/shims/unix/foreign_items.rs index f47a96b10f..399c7d0433 100644 --- a/src/shims/unix/foreign_items.rs +++ b/src/shims/unix/foreign_items.rs @@ -157,6 +157,12 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let count = this.read_target_usize(count)?; this.read(fd, buf, count, None, dest)?; } + "readv" => { + let [fd, iov, iovcnt] = this.check_shim(abi, Conv::C , link_name, args)?; + let fd = this.read_scalar(fd)?.to_i32()?; + let iovcnt = this.read_scalar(iovcnt)?.to_i32()?; + this.readv(fd, iov, iovcnt, dest)?; + } "write" => { let [fd, buf, n] = this.check_shim(abi, Conv::C , link_name, args)?; let fd = this.read_scalar(fd)?.to_i32()?; diff --git a/src/shims/unix/fs.rs b/src/shims/unix/fs.rs index 5682fb659e..b9530dc42b 100644 --- a/src/shims/unix/fs.rs +++ b/src/shims/unix/fs.rs @@ -14,7 +14,10 @@ use rustc_data_structures::fx::FxHashMap; use self::shims::time::system_time_to_duration; use crate::helpers::check_min_arg_count; -use crate::shims::files::{EvalContextExt as _, FileDescription, FileDescriptionRef}; +use crate::shims::files::{ + EvalContextExt as _, FileDescription, FileDescriptionRef, IoTransferOperation, + WeakFileDescriptionRef, +}; use crate::shims::os_str::bytes_to_os_str; use crate::shims::unix::fd::{FlockOp, UnixFileDescription}; use crate::*; @@ -23,6 +26,74 @@ use crate::*; struct FileHandle { file: File, writable: bool, + file_lock: MutexRef, +} + +impl VisitProvenance for FileHandle { + fn visit_provenance(&self, _visit: &mut VisitWith<'_>) { + // No provenance tracking needed for FileHandle as it contains no references. + // This implementation satisfies the trait requirement but performs no operations. + } +} + +impl FileHandle { + fn new(file: File, writable: bool, file_lock: MutexRef) -> Self { + Self { file, writable, file_lock } + } + + fn try_clone<'tcx>(&self) -> InterpResult<'tcx, FileHandle> { + // Explicitly handling errors with more context + let cloned_file = self + .file + .try_clone() + .map_err(|e| err_unsup_format!("Failed to clone file handle: {}", e))?; + + interp_ok(FileHandle { + file: cloned_file, + writable: self.writable, + file_lock: self.file_lock.clone(), + }) + } + + fn perform_read<'tcx>( + this: &mut MiriInterpCx<'tcx>, + dest: MPlaceTy<'tcx>, + mut file_handle: FileHandle, + weak_fd: WeakFileDescriptionRef, + op: std::rc::Rc>>, + ) -> InterpResult<'tcx> { + this.mutex_lock(&file_handle.file_lock); + + let result = (|| { + // Verify file descriptor is still valid + let Some(_fd_ref) = weak_fd.upgrade() else { + throw_unsup_format!("file got closed while blocking"); + }; + + // Perform the actual read operation on the underlying file + let read_result = (file_handle.file).read(op.borrow_mut().buffer_mut()); + + match read_result { + Ok(read_size) => { + // Update the read operation with results + op.borrow_mut().distribute_data(this, &dest, read_size)?; + + // Write the number of bytes read to the destination + this.write_int(u64::try_from(read_size).unwrap(), &dest) + } + Err(_ec) => { + // Propagate any read errors through our error handling system + this.set_last_error_and_return(LibcError("EIO"), &dest) + } + } + })(); + + // Always unlock the mutex, even if the read operation failed + this.mutex_unlock(&file_handle.file_lock)?; + + // Return the result of our read operation + result + } } impl FileDescription for FileHandle { @@ -30,21 +101,67 @@ impl FileDescription for FileHandle { "file" } - fn read<'tcx>( + /// Performs an atomic read operation on the file. + /// + /// # Arguments + /// * `self_ref` - Strong reference to file description for lifetime management + /// * `communicate_allowed` - Whether external communication is permitted + /// * `op` - The I/O operation containing buffer and layout information + /// * `dest` - Destination for storing operation results + /// * `ecx` - Mutable reference to interpreter context + /// + /// # Returns + /// * `Ok(())` on successful read + /// * `Err(_)` if read fails or is unsupported + fn read_atomic<'tcx>( &self, - _self_ref: &FileDescriptionRef, + self_ref: &FileDescriptionRef, communicate_allowed: bool, - ptr: Pointer, - len: usize, + op: &mut IoTransferOperation<'tcx>, dest: &MPlaceTy<'tcx>, ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx> { + let this = ecx; assert!(communicate_allowed, "isolation should have prevented even opening a file"); - let mut bytes = vec![0; len]; - let result = (&mut &self.file).read(&mut bytes); - match result { - Ok(read_size) => ecx.return_read_success(ptr, &bytes, read_size, dest), - Err(e) => ecx.set_last_error_and_return(e, dest), + + // Clone the underlying File + let clone_file_handle = match self.try_clone().report_err() { + Ok(handle) => handle, + Err(_e) => return this.set_last_error_and_return(LibcError("EIO"), dest), + }; + + let weak_fd = self_ref.downgrade(); + let op = std::rc::Rc::new(std::cell::RefCell::new(op.clone())); + let dest = dest.clone(); + + // Rest of the implementation remains the same + if this.mutex_is_locked(&self.file_lock) { + this.block_thread( + BlockReason::Mutex, + None, + callback!( + @capture<'tcx> { + dest: MPlaceTy<'tcx>, + clone_file_handle: FileHandle, + weak_fd: WeakFileDescriptionRef, + op: std::rc::Rc>>, + } + @unblock = |this, state| { + match state { + MachineCallbackState::Ready => { + FileHandle::perform_read(this, dest, clone_file_handle, weak_fd, op) + } + MachineCallbackState::TimedOut => { + panic!("Mutex operation received unexpected timeout state") + }, + } + } + ), + ); + + unreachable!() + } else { + FileHandle::perform_read(this, dest, clone_file_handle, weak_fd, op) } } @@ -586,9 +703,13 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { return this.set_last_error_and_return_i32(ErrorKind::PermissionDenied); } - let fd = options - .open(path) - .map(|file| this.machine.fds.insert_new(FileHandle { file, writable })); + let fd = options.open(path).map(|file| { + this.machine.fds.insert_new(FileHandle::new( + file, + writable, + this.machine.sync.mutex_create(), + )) + }); interp_ok(Scalar::from_i32(this.try_unwrap_io_result(fd)?)) } @@ -1311,7 +1432,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { }; // FIXME: Support ftruncate64 for all FDs - let FileHandle { file, writable } = fd.downcast::().ok_or_else(|| { + let FileHandle { file, writable, .. } = fd.downcast::().ok_or_else(|| { err_unsup_format!("`ftruncate64` is only supported on file-backed file descriptors") })?; @@ -1358,7 +1479,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { return this.set_last_error_and_return_i32(LibcError("EBADF")); }; // Only regular files support synchronization. - let FileHandle { file, writable } = fd.downcast::().ok_or_else(|| { + let FileHandle { file, writable, .. } = fd.downcast::().ok_or_else(|| { err_unsup_format!("`fsync` is only supported on file-backed file descriptors") })?; let io_result = maybe_sync_file(file, *writable, File::sync_all); @@ -1382,7 +1503,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { return this.set_last_error_and_return_i32(LibcError("EBADF")); }; // Only regular files support synchronization. - let FileHandle { file, writable } = fd.downcast::().ok_or_else(|| { + let FileHandle { file, writable, .. } = fd.downcast::().ok_or_else(|| { err_unsup_format!("`fdatasync` is only supported on file-backed file descriptors") })?; let io_result = maybe_sync_file(file, *writable, File::sync_data); @@ -1425,7 +1546,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { return this.set_last_error_and_return_i32(LibcError("EBADF")); }; // Only regular files support synchronization. - let FileHandle { file, writable } = fd.downcast::().ok_or_else(|| { + let FileHandle { file, writable, .. } = fd.downcast::().ok_or_else(|| { err_unsup_format!("`sync_data_range` is only supported on file-backed file descriptors") })?; let io_result = maybe_sync_file(file, *writable, File::sync_data); @@ -1653,7 +1774,11 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { match file { Ok(f) => { - let fd = this.machine.fds.insert_new(FileHandle { file: f, writable: true }); + let fd = this.machine.fds.insert_new(FileHandle::new( + f, + true, + this.machine.sync.mutex_create(), + )); return interp_ok(Scalar::from_i32(fd)); } Err(e) => diff --git a/tests/pass-dep/libc/libc-fs.rs b/tests/pass-dep/libc/libc-fs.rs index f85abe2cc4..007de62313 100644 --- a/tests/pass-dep/libc/libc-fs.rs +++ b/tests/pass-dep/libc/libc-fs.rs @@ -14,6 +14,42 @@ use std::path::PathBuf; #[path = "../../utils/mod.rs"] mod utils; +/// Platform-specific offset type for file seeking operations. +/// the lseek system call typically expects an off_t type, which can be 64 bits +/// even on some 32-bit systems. +#[cfg(any( + target_os = "illumos", + target_os = "solaris", + target_os = "android", + all(target_os = "linux", target_pointer_width = "64"), + all(target_os = "macos", target_arch = "aarch64") +))] +type LseekOffset = i64; + +#[cfg(any( + target_os = "freebsd", + all(target_os = "macos", not(target_arch = "aarch64")), + all(target_os = "linux", target_pointer_width = "32") +))] +type LseekOffset = i32; + +/// Seeks to a specific position in the file +fn seek(fd: i32, offset: LseekOffset) -> LseekOffset { + let result = unsafe { + // the lseek64 function is not part of the POSIX standard and + // may not be available on all systems. + #[cfg(all(target_os = "linux", target_pointer_width = "64"))] + let result = libc::lseek64(fd, offset.into(), libc::SEEK_SET); + + #[cfg(not(all(target_os = "linux", target_pointer_width = "64")))] + let result = libc::lseek(fd, offset.into(), libc::SEEK_SET); + + result + }; + + LseekOffset::try_from(result).expect("seek operation failed") +} + fn main() { test_dup(); test_dup_stdout_stderr(); @@ -38,6 +74,12 @@ fn main() { test_isatty(); test_read_and_uninit(); test_nofollow_not_symlink(); + test_readv_basic(); + test_readv_large_buffers(); + test_readv_partial_and_eof(); + test_readv_error_conditions(); + test_read_atomic_contiguous(); + test_read_atomic_scattered(); } fn test_file_open_unix_allow_two_args() { @@ -431,3 +473,606 @@ fn test_nofollow_not_symlink() { let ret = unsafe { libc::open(cpath.as_ptr(), libc::O_NOFOLLOW | libc::O_CLOEXEC) }; assert!(ret >= 0); } + +/// Tests basic functionality of the readv() system call by reading a small file +/// with multiple buffers. +/// +/// Verifies that: +/// - File contents are read correctly into provided buffers +/// - The total number of bytes read matches file size +/// - Buffer boundaries are respected +/// - Return values match expected behavior +fn test_readv_basic() { + let bytes = b"abcdefgh"; + let path = utils::prepare_with_content("miri_test_libc_readv.txt", bytes); + + // Convert path to a null-terminated CString. + let name = CString::new(path.into_os_string().into_string().unwrap()).unwrap(); + let name_ptr = name.as_ptr(); + + let mut first_buf = [0u8; 4]; + let mut second_buf = [0u8; 8]; + + unsafe { + // Define iovec structures. + let iov: [libc::iovec; 2] = [ + libc::iovec { + iov_len: first_buf.len() as usize, + iov_base: first_buf.as_mut_ptr() as *mut libc::c_void, + }, + libc::iovec { + iov_len: second_buf.len() as usize, + iov_base: second_buf.as_mut_ptr() as *mut libc::c_void, + }, + ]; + + // Open file. + let fd = libc::open(name_ptr, libc::O_RDONLY); + if fd < 0 { + eprintln!("Failed to open file: {}", Error::last_os_error().to_string()); + return; + } + + // Call readv with proper type conversions. + let iovcnt = libc::c_int::try_from(iov.len()).expect("iovec count too large for platform"); + + // Call readv with proper type handling for the count. + let res = libc::readv(fd, iov.as_ptr() as *const libc::iovec, iovcnt); + + if res < 0 { + eprintln!("Failed to readv: {}", Error::last_os_error()); + libc::close(fd); + return; + } + + // Close the file descriptor. + libc::close(fd); + } + + // Validate buffers. + if first_buf != *b"abcd" { + eprintln!("First buffer mismatch: {:?}", first_buf); + } + + if second_buf != *b"efgh\0\0\0\0" { + eprintln!("Second buffer mismatch: {:?}", second_buf); + } +} + +/// Tests readv() system call with large buffer sizes and pattern verification. +/// Uses multiple buffers (16KB, 16KB, 32KB) to read a 64KB file containing +/// a repeating 'ABCD' pattern with markers at buffer boundaries. +/// +/// Verifies that: +/// - Large file contents are read correctly +/// - Markers at buffer boundaries are preserved +/// - Pattern integrity is maintained between markers +/// - Memory safety with large allocations +/// - Buffer boundary handling for larger sizes +fn test_readv_large_buffers() { + const BUFFER_SIZE_1: usize = 16384; // 16KB + const BUFFER_SIZE_2: usize = 16384; // 16KB + const BUFFER_SIZE_3: usize = 32768; // 32KB + + // Define our buffer sizes + let buffer_sizes = &[ + BUFFER_SIZE_1, // 16KB + BUFFER_SIZE_2, // 16KB + BUFFER_SIZE_3, // 32KB + ]; + + // Create large test file with patterns and markers. + // Generate pattern with awareness of buffer boundaries. + let large_content = utils::generate_test_pattern(buffer_sizes); + + let path = utils::prepare_with_content("large_readv_test.txt", &large_content); + + // Create buffers based on our defined sizes. + let mut buffers: Vec> = buffer_sizes.iter().map(|&size| vec![0u8; size]).collect(); + + // Convert path to CString for libc interface. + let path_cstr = CString::new(path.into_os_string().into_string().unwrap()).unwrap(); + + let bytes_read: usize = unsafe { + let fd = libc::open(path_cstr.as_ptr(), libc::O_RDONLY); + assert!(fd > 0, "Failed to open test file"); + + // Create iovec array using our buffers. + let iov = buffers + .iter_mut() + .map(|buf| { + libc::iovec { iov_base: buf.as_mut_ptr() as *mut libc::c_void, iov_len: buf.len() } + }) + .collect::>(); + + // Perform readv operation. + let read_result = libc::readv(fd, iov.as_ptr(), iov.len() as i32); + + libc::close(fd); + read_result.try_into().unwrap() + }; + + // Verify total bytes read. + let expected_total: usize = buffer_sizes.iter().sum(); + assert_eq!( + bytes_read, expected_total, + "Unexpected bytes read. Expected {}, got {}", + expected_total, bytes_read + ); + + // Verify markers in each buffer with correct positioning. + let mut current_pos = 0; + for (i, buf) in buffers.iter().enumerate() { + let marker = format!("##MARKER{}##", i + 1); + let marker_len = marker.len(); + + // Calculate correct position for this buffer + let buffer_size = buf.len(); + let marker_pos = buffer_size - marker_len; + + // Read the exact number of bytes needed for the marker. + let content = std::str::from_utf8(&buf[marker_pos..marker_pos + marker_len]) + .unwrap_or("Invalid UTF-8"); + + assert_eq!( + content, + marker, + "Marker {} mismatch at position {}. Expected '{}', found '{}'", + i + 1, + current_pos + marker_pos, + marker, + content + ); + + // Update position for next buffer + current_pos += buffer_size; + } + + // Helper function to verify the repeating ABCD pattern. + let verify_pattern = |buf: &[u8], start: usize, end: usize, buffer_num: usize| { + // Safety check for range validity + if start >= end || end > buf.len() { + println!( + "Invalid range for buffer {}: start={}, end={}, len={}", + buffer_num, + start, + end, + buf.len() + ); + return false; + } + + let chunk = &buf[start..end]; + + // Calculate the pattern offset for alignment. + let pattern_offset = start % 4; + let expected_pattern = [b'A', b'B', b'C', b'D']; + + // Verify each byte against the expected pattern at the correct offset. + chunk.iter().enumerate().all(|(i, &byte)| { + let expected = expected_pattern[(i + pattern_offset) % 4]; + if byte != expected { + println!( + "Mismatch at position {}: expected {}, found {}", + start + i, + expected as char, + byte as char + ); + false + } else { + true + } + }) + }; + + // Adjust verification ranges and pattern alignment. + for (i, buf) in buffers.iter().enumerate() { + let buffer_num = i + 1; + let buffer_size = buf.len(); + let marker_len = 11; + + // Calculate correct start position based on marker alignment. + let start = if buffer_num == 1 { 0 } else { marker_len }; + let end = buffer_size - marker_len; + + assert!( + verify_pattern(buf, start, end, buffer_num), + "Pattern corruption detected in buffer {}. Expected aligned 'ABCD' pattern \ + in range {}..{}", + buffer_num, + start, + end + ); + } +} + +/// Tests readv() system call behavior with EOF conditions and partial reads. +/// Uses a test file smaller than total buffer size to verify correct handling +/// of file boundaries and partial data transfers. +/// +/// Verifies that: +/// - Partial reads near EOF work correctly +/// - Reading exactly at EOF returns 0 +/// - Buffer contents match expected data +/// - Total bytes read matches available data +/// - Remaining buffer space is unmodified +fn test_readv_partial_and_eof() { + // Let's create a file smaller than our total buffer sizes. + // We'll use a structured pattern to make validation easier. + let test_data = b"HEADER_DATA_SECTION_ONE_DATA_SECTION_TWO_END"; // 41 bytes + let path = utils::prepare_with_content("partial_read_test.txt", test_data); + + // Test Case 1: Normal buffers larger than file size. + { + let mut first_buf = vec![0u8; 20]; // Should be filled completely + let mut second_buf = vec![0u8; 20]; // Should be filled completely + let mut third_buf = vec![0u8; 20]; // Should be partially filled + + let path_cstr = CString::new(path.to_str().unwrap()).unwrap(); + + let bytes_read: usize = unsafe { + let fd = libc::open(path_cstr.as_ptr(), libc::O_RDONLY); + assert!(fd > 0, "Failed to open test file"); + + let iov = [ + libc::iovec { + iov_base: first_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: first_buf.len(), + }, + libc::iovec { + iov_base: second_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: second_buf.len(), + }, + libc::iovec { + iov_base: third_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: third_buf.len(), + }, + ]; + + let result = libc::readv(fd, iov.as_ptr(), iov.len() as i32); + libc::close(fd); + result.try_into().unwrap() + }; + + // Verify total bytes read matches file size. + assert_eq!( + bytes_read, + test_data.len(), + "Expected {} bytes read, got {}", + test_data.len(), + bytes_read + ); + + // Verify buffer contents + assert_eq!(&first_buf[..20], &test_data[..20], "First buffer content mismatch"); + assert_eq!(&second_buf[..20], &test_data[20..40], "Second buffer content mismatch"); + assert_eq!(&third_buf[..1], &test_data[40..41], "Third buffer partial content mismatch"); + } + + // Test Case 2: Reading from an offset near EOF. + { + let mut first_buf = vec![0u8; 10]; + let mut second_buf = vec![0u8; 10]; + + let path_cstr = CString::new(path.to_str().unwrap()).unwrap(); + + let bytes_read: usize = unsafe { + let fd = libc::open(path_cstr.as_ptr(), libc::O_RDONLY); + assert!(fd > 0, "Failed to open test file"); + + // Seek to near end of file + // Use the platform-specific offset type directly + let offset = LseekOffset::try_from(test_data.len() - 15).unwrap(); + let seek_result = seek(fd, offset); + + // Compare using the same types + assert_eq!(LseekOffset::try_from(seek_result).unwrap(), offset); + + let iov = [ + libc::iovec { + iov_base: first_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: first_buf.len(), + }, + libc::iovec { + iov_base: second_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: second_buf.len(), + }, + ]; + + let result = libc::readv(fd, iov.as_ptr(), iov.len() as i32); + libc::close(fd); + result.try_into().unwrap() + }; + + // Should read remaining 15 bytes + assert_eq!(bytes_read, 15, "Expected 15 bytes read from offset, got {}", bytes_read); + } + + // Test Case 3: Reading at EOF. + { + let mut buf = vec![0u8; 10]; + + let path_cstr = CString::new(path.to_str().unwrap()).unwrap(); + + let bytes_read: usize = unsafe { + let fd = libc::open(path_cstr.as_ptr(), libc::O_RDONLY); + assert!(fd > 0, "Failed to open test file"); + + // Seek to EOF + // Cast the offset to the appropriate type for the platform + let offset = LseekOffset::try_from(test_data.len()).unwrap(); + let seek_result = seek(fd, offset); + assert_eq!( + LseekOffset::try_from(seek_result).unwrap(), + LseekOffset::try_from(test_data.len()).unwrap() + ); + + let iov = [libc::iovec { + iov_base: buf.as_mut_ptr() as *mut libc::c_void, + iov_len: buf.len(), + }]; + + let result = libc::readv(fd, iov.as_ptr(), iov.len() as i32); + libc::close(fd); + result.try_into().unwrap() + }; + + // Should read 0 bytes at EOF + assert_eq!(bytes_read, 0, "Expected 0 bytes read at EOF, got {}", bytes_read); + } + + // Test Case 4: Small buffers with exact boundaries. + { + let mut first_buf = vec![0u8; 7]; // "HEADER_" + let mut second_buf = vec![0u8; 5]; // "DATA_" + let mut third_buf = vec![0u8; 7]; // "SECTION" + + let path_cstr = CString::new(path.to_str().unwrap()).unwrap(); + + let bytes_read: usize = unsafe { + let fd = libc::open(path_cstr.as_ptr(), libc::O_RDONLY); + assert!(fd > 0, "Failed to open test file"); + + let iov = [ + libc::iovec { + iov_base: first_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: first_buf.len(), + }, + libc::iovec { + iov_base: second_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: second_buf.len(), + }, + libc::iovec { + iov_base: third_buf.as_mut_ptr() as *mut libc::c_void, + iov_len: third_buf.len(), + }, + ]; + + let result = libc::readv(fd, iov.as_ptr(), iov.len() as i32); + libc::close(fd); + result.try_into().unwrap() + }; + + // Verify exact buffer fills. + assert_eq!( + bytes_read, 19, + "Expected 19 bytes read for exact boundaries, got {}", + bytes_read + ); + assert_eq!(&first_buf, b"HEADER_", "First buffer exact content mismatch"); + assert_eq!(&second_buf, b"DATA_", "Second buffer exact content mismatch"); + assert_eq!(&third_buf[..7], b"SECTION", "Third buffer exact content mismatch"); + } +} + +/// Tests error handling conditions of the readv() system call. +/// Verifies that the implementation properly handles various error scenarios +/// including invalid file descriptors, +/// +/// Test coverage includes: +/// - Invalid file descriptor scenarios +fn test_readv_error_conditions() { + #[cfg(any(target_os = "illumos", target_os = "solaris"))] + use libc::___errno as __errno_location; + #[cfg(target_os = "android")] + use libc::__errno as __errno_location; + #[cfg(target_os = "linux")] + use libc::__errno_location; + #[cfg(any(target_os = "freebsd", target_os = "macos"))] + use libc::__error as __errno_location; + + // Test Case 1: Invalid File Descriptor Scenarios. + { + let mut buffer = vec![0u8; 10]; + + // Create a single valid iovec structure for testing. + let iov = [libc::iovec { + iov_base: buffer.as_mut_ptr() as *mut libc::c_void, + iov_len: buffer.len(), + }]; + + unsafe { + // Test with negative file descriptor. + let result = libc::readv(-1, iov.as_ptr(), 1); + assert_eq!(result, -1, "Expected error for negative file descriptor"); + assert_eq!( + *__errno_location(), + libc::EBADF, + "Expected EBADF for negative file descriptor" + ); + + // Test with unopened but potentially valid fd number. + let result = libc::readv(999999, iov.as_ptr(), 1); + assert_eq!(result, -1, "Expected error for invalid file descriptor"); + assert_eq!( + *__errno_location(), + libc::EBADF, + "Expected EBADF for invalid file descriptor" + ); + } + } +} + +/// Tests concurrent atomic reads with a contiguous buffer. +/// Validates that multiple threads can safely read from the same file +/// using a single continuous memory region for each read operation. +fn test_read_atomic_contiguous() { + // Create test file with known content + let content = b"The quick brown fox jumps over the lazy dog"; + let path = utils::prepare_with_content("miri_test_atomic_read_contiguous.txt", content); + + // Convert path to a string that can be safely cloned for each thread + let path_string = path.into_os_string().into_string().unwrap(); + + // Spawn multiple threads to read concurrently + let mut handles = Vec::new(); + let thread_count = 4; + let read_size = 8; // Each thread reads 8 bytes + + for thread_id in 0..thread_count { + // Clone the path string for this thread + let thread_path = path_string.clone(); + + let handle = std::thread::spawn(move || { + // Create CString inside the thread + let name = CString::new(thread_path).unwrap(); + let name_ptr = name.as_ptr(); + + unsafe { + // Open file descriptor for this thread + let fd = libc::open(name_ptr, libc::O_RDONLY); + assert!(fd >= 0, "Failed to open file in thread {}", thread_id); + + // Seek to thread-specific offset + let offset = thread_id * read_size; + assert_eq!( + libc::lseek(fd, offset as i64, libc::SEEK_SET), + offset as i64, + "Failed to seek in thread {}", + thread_id + ); + + // Perform atomic read + let mut buffer = vec![0u8; read_size]; + let bytes_read = + libc::read(fd, buffer.as_mut_ptr() as *mut libc::c_void, read_size as usize); + + assert_eq!( + bytes_read as usize, read_size, + "Incorrect read size in thread {}", + thread_id + ); + + // Verify read content + assert_eq!( + &buffer, + &content[offset..offset + read_size], + "Incorrect data read in thread {}", + thread_id + ); + + libc::close(fd); + } + }); + + handles.push(handle); + } + + // Wait for all threads to complete + for handle in handles { + handle.join().unwrap(); + } +} + +/// Tests concurrent atomic reads with scattered buffers. +/// Validates that multiple threads can safely read from the same file +/// using multiple discontinuous memory regions for each read operation. +fn test_read_atomic_scattered() { + // Create test file with repeating pattern for easy verification + let pattern = b"ABCDEFGH"; + let repeat_count = 4; + let test_content: Vec = + pattern.iter().cycle().take(pattern.len() * repeat_count).copied().collect(); + let path = utils::prepare_with_content("miri_test_atomic_read_scattered.txt", &test_content); + + // Convert path to a string that can be safely cloned for each thread + let path_string = path.into_os_string().into_string().unwrap(); + + // Spawn threads for concurrent scattered reads + let mut handles = Vec::new(); + let thread_count = 2; + + for thread_id in 0..thread_count { + // Clone the path string for this thread + let thread_path = path_string.clone(); + let content = test_content.clone(); + + let handle = std::thread::spawn(move || { + // Convert path to C string for libc + let name = CString::new(thread_path).unwrap(); + let name_ptr = name.as_ptr(); + + unsafe { + // Open file descriptor + let fd = libc::open(name_ptr, libc::O_RDONLY); + assert!(fd >= 0, "Failed to open file in thread {}", thread_id); + + // Create scattered buffers: two 4-byte regions + let mut buffer1 = vec![0u8; 4]; + let mut buffer2 = vec![0u8; 4]; + + // Set up iovec structures + let iov = [ + libc::iovec { + iov_base: buffer1.as_mut_ptr() as *mut libc::c_void, + iov_len: buffer1.len(), + }, + libc::iovec { + iov_base: buffer2.as_mut_ptr() as *mut libc::c_void, + iov_len: buffer2.len(), + }, + ]; + + // Seek to thread-specific offset + let offset = thread_id * 8; // Each thread reads 8 bytes total + assert_eq!( + libc::lseek(fd, offset as i64, libc::SEEK_SET), + offset as i64, + "Failed to seek in thread {}", + thread_id + ); + + // Perform scattered read + let bytes_read = libc::readv(fd, iov.as_ptr(), 2); + assert_eq!( + bytes_read as usize, 8, + "Incorrect scattered read size in thread {}", + thread_id + ); + + // Verify read content + assert_eq!( + &buffer1, + &content[offset..offset + 4], + "Incorrect data in first buffer of thread {}", + thread_id + ); + assert_eq!( + &buffer2, + &content[offset + 4..offset + 8], + "Incorrect data in second buffer of thread {}", + thread_id + ); + + libc::close(fd); + } + }); + + handles.push(handle); + } + + // Wait for all threads to complete + for handle in handles { + handle.join().unwrap(); + } +} diff --git a/tests/utils/fs.rs b/tests/utils/fs.rs index 7340908626..fcaae1d8e8 100644 --- a/tests/utils/fs.rs +++ b/tests/utils/fs.rs @@ -51,3 +51,50 @@ pub fn prepare_dir(dirname: &str) -> PathBuf { fs::remove_dir_all(&path).ok(); path } + +/// Generates a test pattern with markers placed at buffer boundaries +/// +/// Arguments: +/// * `buffer_sizes` - An array of buffer sizes that will be used in the readv operation +/// +/// Returns: +/// * A vector containing the test pattern with markers placed at buffer boundaries +/// +/// The function creates a pattern by: +/// 1. Filling the content with repeating "ABCD" sequences +/// 2. Placing markers at each buffer boundary +/// 3. Adding an end pattern to detect overruns +pub fn generate_test_pattern(buffer_sizes: &[usize]) -> Vec { + // Calculate total size needed for all buffers. + let total_size: usize = buffer_sizes.iter().sum(); + + // Create our base content vector. + let mut content = Vec::with_capacity(total_size); + + // Fill with repeating ABCD pattern. + let base_pattern = b"ABCD"; + while content.len() < total_size { + content.extend_from_slice(base_pattern); + } + content.truncate(total_size); + + // Calculate marker positions at buffer boundaries. + // We'll accumulate sizes to find boundary positions. + // Calculate correct marker positions based on cumulative buffer boundaries. + let mut cumulative_position = 0; + for (i, &buffer_size) in buffer_sizes.iter().enumerate() { + let marker = format!("##MARKER{}##", i + 1).into_bytes(); + let marker_len = marker.len(); + + // Position marker relative to the current buffer's end + let marker_position = cumulative_position + buffer_size - marker_len; + + if marker_position + marker_len <= total_size { + content[marker_position..marker_position + marker_len].copy_from_slice(&marker); + } + + cumulative_position += buffer_size; + } + + content +}