Skip to content

Commit

Permalink
lib/cpuinfo: Increase the file descriptors limit to handle more CPUs
Browse files Browse the repository at this point in the history
The pqos tool fails with the following errors on systems with 300 or more
CPU cores.
$pqos
NOTE:  Mixed use of MSR and kernel interfaces to manage
       CAT or CMT & MBM may lead to unexpected behavior.
ERROR: Could not open /sys/fs/resctrl directory
ERROR: Failed to stop resctrl events
ERROR: Failed to start all selected OS monitoring events
Monitoring start error on core(s) 339, status 1

By default, the file descriptor limit is set to 1024 for a session. pqos
monitor uses 3 descriptors for each CPU for perf monitoring. So, it runs
out of limit(1024) on systems with 300 or more CPUs.

Fix the issue by detecting the number of CPUs in the system and increasing
the descriptor limit using system call getrlimit and setrlimit respectively.
Increase the limit to 4 times the number of CPUs to take care of open files
limit.

Signed-off-by: Babu Moger <[email protected]>
  • Loading branch information
babumoger committed Apr 30, 2024
1 parent 14e3840 commit 42475e2
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 1 deletion.
31 changes: 31 additions & 0 deletions lib/common.c
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,14 @@
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <unistd.h>

/* Maximum required file descriptors per core */
#define MAX_FD_PER_CORE 5
/* pqos tool opens some file descriptors while using msr interface */
#define MAX_PQOS_FD 100

FILE *
pqos_fopen(const char *name, const char *mode)
{
Expand Down Expand Up @@ -392,3 +398,28 @@ pqos_read(int fd, void *buf, size_t count)

return count;
}

int
pqos_set_no_files_limit(unsigned long max_core_count)
{
struct rlimit files_limit;
const rlim_t required_fd =
(max_core_count * MAX_FD_PER_CORE) + MAX_PQOS_FD;

if (getrlimit(RLIMIT_NOFILE, &files_limit))
return PQOS_RETVAL_ERROR;

/* Check Kernel allows to open required file descriptors */
if (files_limit.rlim_max < required_fd ||
files_limit.rlim_cur < required_fd) {
if (files_limit.rlim_max < required_fd)
files_limit.rlim_max = required_fd;

files_limit.rlim_cur = required_fd;

if (setrlimit(RLIMIT_NOFILE, &files_limit))
return PQOS_RETVAL_ERROR;
}

return PQOS_RETVAL_OK;
}
11 changes: 11 additions & 0 deletions lib/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,17 @@ PQOS_LOCAL void pqos_munmap(void *mem, const uint64_t size);
*/
PQOS_LOCAL ssize_t pqos_read(int fd, void *buf, size_t count);

/**
* @brief Increase the number of open files limit to handle more
* than 256 CPUs.
*
* @param [in] Max CPUs on the system
*
* @return PQOS_RETVAL_OK for success
* @retval PQOS_RETVAL_ERROR for failure
*/
PQOS_LOCAL int pqos_set_no_files_limit(unsigned long max_core_count);

#ifdef __cplusplus
}
#endif
Expand Down
6 changes: 6 additions & 0 deletions lib/cpuinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@

#include "allocation.h"
#include "cap.h"
#include "common.h"
#include "cpu_registers.h"
#include "log.h"
#include "machine.h"
Expand Down Expand Up @@ -452,6 +453,11 @@ cpuinfo_build_topo(struct apic_info *apic)
return NULL;
}

if (pqos_set_no_files_limit(max_core_count)) {
LOG_ERROR("Open files limit not sufficient!\n");
return NULL;
}

const size_t mem_sz =
sizeof(*l_cpu) + (max_core_count * sizeof(struct pqos_coreinfo));

Expand Down
5 changes: 5 additions & 0 deletions lib/os_cpuinfo.c
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,11 @@ os_cpuinfo_topology(void)
return NULL;
}

if (pqos_set_no_files_limit(max_core_count)) {
LOG_ERROR("Open files limit not sufficient!\n");
return NULL;
}

const size_t mem_sz =
sizeof(*cpu) + (max_core_count * sizeof(struct pqos_coreinfo));

Expand Down
2 changes: 1 addition & 1 deletion pqos/alloc.c
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ set_allocation_assoc(const struct pqos_devinfo *dev)
static void
fill_core_tab(char *str)
{
unsigned max_cores_count = 128;
unsigned max_cores_count = sysconf(_SC_NPROCESSORS_CONF);
uint64_t *cores = calloc(max_cores_count, sizeof(uint64_t));
unsigned i = 0, n = 0, cos = 0;
char *p = NULL;
Expand Down

0 comments on commit 42475e2

Please sign in to comment.