Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Zero timestamp query results #2378

Open
giomasce opened this issue Oct 15, 2024 · 0 comments
Open

Zero timestamp query results #2378

giomasce opened this issue Oct 15, 2024 · 0 comments

Comments

@giomasce
Copy link
Contributor

As my previous issues, this comes from the vkd3d test suite. I converted it to pure Vulkan and made it as minimal as possible. The Vulkan program doesn't hit any Metal and Vulkan validation error and seems correct to me. It should simply make four timestamp query one after the other and print the results. The problem is that on my M3 Max MacBook Pro (Sonoma 14.6.1) sometimes the last few queries are zero. Typical results will look like this:

giovanni@melanera vktest % ./test
1291647281773750
1291647281780250
1291647281786375
1291647281792958
giovanni@melanera vktest % ./test
1291647708930166
1291647709002291
0
0
giovanni@melanera vktest % ./test
1291648110597000
1291648110603708
0
0
giovanni@melanera vktest % ./test
1291648560792500
0
0
0
giovanni@melanera vktest % ./test
1291648992832500
1291648992839291
1291648992845625
0
giovanni@melanera vktest % ./test
1291649440116750
1291649440151125
1291649440157250
1291649440163791

The feeling I get from these results is that there is some race between filling the buffer with the query results and copying it to the CPU memory. But at the Vulkan level the queue submission with the query commands and the queue submission reading the data are separated by a full wait for idle device, so I don't know what I might be missing.

Test program source code
// Compile with: g++ -g -O2 -std=c++20 -Wall -pedantic -o test test.cpp $(pkg-config --cflags --libs vulkan)

#include <cassert>
#include <iostream>
#include <vector>

#include <vulkan/vulkan.h>

static inline VkInstance create_instance() {
    VkApplicationInfo app_info{};
    app_info.sType = VK_STRUCTURE_TYPE_APPLICATION_INFO;
    app_info.pApplicationName = "Test";
    app_info.applicationVersion = VK_MAKE_VERSION(1, 0, 0);
    app_info.pEngineName = "No Engine";
    app_info.engineVersion = VK_MAKE_VERSION(1, 0, 0);
    app_info.apiVersion = VK_API_VERSION_1_2;

    VkInstanceCreateInfo instance_create_info{};
    instance_create_info.sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO;
    instance_create_info.pApplicationInfo = &app_info;

    VkInstance instance;
    VkResult result = vkCreateInstance(&instance_create_info, nullptr, &instance);
    assert(result >= 0);

    return instance;
}

static inline VkPhysicalDevice select_physical_device(VkInstance instance) {
    uint32_t physical_device_count = 1;
    VkPhysicalDevice physical_device;
    VkResult result = vkEnumeratePhysicalDevices(instance, &physical_device_count, &physical_device);
    assert(result >= 0);
    assert(physical_device_count > 0);

    return physical_device;
}

static inline uint32_t select_queue_family(VkPhysicalDevice physical_device) {
    uint32_t queue_family_properties_count = 1;
    vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_family_properties_count, nullptr);
    assert(queue_family_properties_count > 0);
    std::vector<VkQueueFamilyProperties> queue_family_properties(queue_family_properties_count);
    vkGetPhysicalDeviceQueueFamilyProperties(physical_device, &queue_family_properties_count, queue_family_properties.data());

    for (uint32_t i = 0; i < queue_family_properties_count; ++i) {
        const auto flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
        if ((queue_family_properties[i].queueFlags & flags) == flags) {
            return i;
        }
    }

    assert(false);
}

static inline VkDevice create_device(VkPhysicalDevice physical_device, uint32_t queue_family, std::vector<const char *> extension_names) {
    VkDeviceQueueCreateInfo device_queue_create_info{};
    device_queue_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
    device_queue_create_info.queueFamilyIndex = queue_family;
    device_queue_create_info.queueCount = 1;
    float queue_priority = 1.0;
    device_queue_create_info.pQueuePriorities = &queue_priority;

    uint32_t property_count;
    VkResult result = vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &property_count, nullptr);
    assert(result >= 0);
    std::vector<VkExtensionProperties> extension_properties(property_count);
    result = vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &property_count, extension_properties.data());
    assert(result >= 0);

    for (const auto &extension_property : extension_properties) {
        if (strcmp(extension_property.extensionName, "VK_KHR_portability_subset") == 0) {
            extension_names.push_back(extension_property.extensionName);
        }
    }

    VkPhysicalDeviceMeshShaderFeaturesEXT physical_device_mesh_shader_features_ext{};
    void *last_struct = nullptr;
    for (const auto &extension_name : extension_names) {
        if (strcmp(extension_name, "VK_EXT_mesh_shader") == 0) {
            physical_device_mesh_shader_features_ext.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MESH_SHADER_FEATURES_EXT;
            physical_device_mesh_shader_features_ext.pNext = last_struct;
            last_struct = &physical_device_mesh_shader_features_ext;
        }
    }
/*
    VkPhysicalDeviceVulkan13Features physical_device_vulkan13_features{};
    physical_device_vulkan13_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES;
    physical_device_vulkan13_features.pNext = last_struct;
    last_struct = &physical_device_vulkan13_features;
 */
    VkPhysicalDeviceVulkan12Features physical_device_vulkan12_features{};
    physical_device_vulkan12_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
    physical_device_vulkan12_features.pNext = last_struct;
    last_struct = &physical_device_vulkan12_features;

    VkPhysicalDeviceVulkan11Features physical_device_vulkan11_features{};
    physical_device_vulkan11_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
    physical_device_vulkan11_features.pNext = last_struct;
    last_struct = &physical_device_vulkan11_features;

    VkPhysicalDeviceFeatures2 physical_device_features2{};
    physical_device_features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
    physical_device_features2.pNext = last_struct;

    vkGetPhysicalDeviceFeatures2(physical_device, &physical_device_features2);

    VkDeviceCreateInfo device_create_info{};
    device_create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
    device_create_info.pNext = &physical_device_features2;
    device_create_info.queueCreateInfoCount = 1;
    device_create_info.pQueueCreateInfos = &device_queue_create_info;
    device_create_info.enabledExtensionCount = static_cast<uint32_t>(extension_names.size());
    device_create_info.ppEnabledExtensionNames = extension_names.data();

    VkDevice device;
    result = vkCreateDevice(physical_device, &device_create_info, nullptr, &device);
    assert(result >= 0);

    return device;
}

static inline VkQueue get_queue(VkDevice device, uint32_t queue_family) {
    VkQueue queue;
    vkGetDeviceQueue(device, queue_family, 0, &queue);

    return queue;
}

static inline VkDeviceMemory allocate_memory(VkDevice device, VkPhysicalDevice physical_device, const VkMemoryRequirements &memory_requirements, VkMemoryPropertyFlags memory_property_flags) {
    VkPhysicalDeviceMemoryProperties physical_device_memory_properties;
    vkGetPhysicalDeviceMemoryProperties(physical_device, &physical_device_memory_properties);

    uint32_t i;
    for (i = 0; i < VK_MAX_MEMORY_TYPES; ++i) {
        if (!(memory_requirements.memoryTypeBits & (1u << i))) {
            continue;
        }

        if (physical_device_memory_properties.memoryTypes[i].propertyFlags & memory_property_flags) {
            break;
        }
    }
    assert(i < VK_MAX_MEMORY_TYPES);

    VkMemoryAllocateInfo memory_allocate_info{};
    memory_allocate_info.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO;
    memory_allocate_info.allocationSize = memory_requirements.size;
    memory_allocate_info.memoryTypeIndex = i;

    VkDeviceMemory device_memory;
    VkResult result = vkAllocateMemory(device, &memory_allocate_info, nullptr, &device_memory);
    assert(result >= 0);

    return device_memory;
}

static inline std::pair<VkBuffer, VkDeviceMemory> create_buffer_and_memory(VkPhysicalDevice physical_device, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags memory_property_flags) {
    VkBufferCreateInfo buffer_create_info{};
    buffer_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
    buffer_create_info.size = size;
    buffer_create_info.usage = usage;
    buffer_create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;

    VkBuffer buffer;
    VkResult result = vkCreateBuffer(device, &buffer_create_info, nullptr, &buffer);
    assert(result >= 0);

    VkMemoryRequirements memory_requirements;
    vkGetBufferMemoryRequirements(device, buffer, &memory_requirements);

    VkDeviceMemory device_memory = allocate_memory(device, physical_device, memory_requirements, memory_property_flags);

    result = vkBindBufferMemory(device, buffer, device_memory, 0);
    assert(result >= 0);

    return {buffer, device_memory};
}

static inline VkBuffer create_buffer(VkPhysicalDevice physical_device, VkDevice device, VkDeviceSize size, VkBufferUsageFlags usage, VkMemoryPropertyFlags memory_property_flags) {
    VkBuffer buffer;
    std::tie(buffer, std::ignore) = create_buffer_and_memory(physical_device, device, size, usage, memory_property_flags);
    return buffer;
}

static inline VkQueryPool create_query_pool(VkDevice device, VkQueryType query_type, uint32_t query_count) {
    VkQueryPoolCreateInfo query_pool_create_info{};
    query_pool_create_info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
    query_pool_create_info.queryType = query_type;
    query_pool_create_info.queryCount = query_count;

    VkQueryPool query_pool;
    VkResult result = vkCreateQueryPool(device, &query_pool_create_info, nullptr, &query_pool);
    assert(result >= 0);

    return query_pool;
}

static inline std::vector<char> read_device_memory(VkDevice device, VkDeviceMemory device_memory, VkDeviceSize size) {
    char *ptr;
    VkResult result = vkMapMemory(device, device_memory, 0, size, 0, reinterpret_cast<void **>(&ptr));
    assert(result >= 0);

    VkMappedMemoryRange mapped_memory_range{};
    mapped_memory_range.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE;
    mapped_memory_range.memory = device_memory;
    mapped_memory_range.offset = 0;
    mapped_memory_range.size = VK_WHOLE_SIZE;

    result = vkInvalidateMappedMemoryRanges(device, 1, &mapped_memory_range);
    assert(result >= 0);

    return std::vector<char>(ptr, ptr + size);
}

static inline VkCommandBuffer create_command_buffer(VkDevice device) {
    VkCommandPoolCreateInfo command_pool_create_info{};
    command_pool_create_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
    command_pool_create_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
    command_pool_create_info.queueFamilyIndex = 0;

    VkCommandPool command_pool;
    VkResult result = vkCreateCommandPool(device, &command_pool_create_info, nullptr, &command_pool);
    assert(result >= 0);

    VkCommandBufferAllocateInfo command_buffer_allocate_info{};
    command_buffer_allocate_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
    command_buffer_allocate_info.commandPool = command_pool;
    command_buffer_allocate_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
    command_buffer_allocate_info.commandBufferCount = 1;

    VkCommandBuffer command_buffer;
    result = vkAllocateCommandBuffers(device, &command_buffer_allocate_info, &command_buffer);
    assert(result >= 0);
    assert(command_buffer);

    return command_buffer;
}

static inline void begin_command_buffer(VkCommandBuffer command_buffer) {
    VkCommandBufferBeginInfo command_buffer_begin_info{};
    command_buffer_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;

    VkResult result = vkBeginCommandBuffer(command_buffer, &command_buffer_begin_info);
    assert(result >= 0);
}

static inline void submit_command_buffer(VkQueue queue, VkCommandBuffer command_buffer) {
    VkResult result = vkEndCommandBuffer(command_buffer);
    assert(result >= 0);

    VkSubmitInfo submit_info{};
    submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
    submit_info.commandBufferCount = 1;
    submit_info.pCommandBuffers = &command_buffer;

    result = vkQueueSubmit(queue, 1, &submit_info, VK_NULL_HANDLE);
    assert(result >= 0);

    result = vkQueueWaitIdle(queue);
    assert(result >= 0);
}

static inline std::vector<char> read_buffer(VkPhysicalDevice physical_device, VkDevice device, VkQueue queue, VkDeviceSize size, VkBuffer buffer) {
    VkCommandBuffer command_buffer = create_command_buffer(device);
    VkBuffer temp_buffer;
    VkDeviceMemory device_memory;
    std::tie(temp_buffer, device_memory) = create_buffer_and_memory(physical_device, device, size, VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);

    begin_command_buffer(command_buffer);

    VkBufferCopy buffer_copy{};
    buffer_copy.srcOffset = 0;
    buffer_copy.dstOffset = 0;
    buffer_copy.size = size;

    vkCmdCopyBuffer(command_buffer, buffer, temp_buffer, 1, &buffer_copy);

    submit_command_buffer(queue, command_buffer);

    std::vector<char> ret = read_device_memory(device, device_memory, size);

    vkDestroyBuffer(device, temp_buffer, nullptr);
    vkFreeMemory(device, device_memory, nullptr);

    return ret;
}

int main() {
    VkInstance instance = create_instance();
    VkPhysicalDevice physical_device = select_physical_device(instance);
    uint32_t queue_family = select_queue_family(physical_device);
    VkDevice device = create_device(physical_device, queue_family, {});
    VkQueue queue = get_queue(device, queue_family);

    VkBuffer buffer = create_buffer(physical_device, device, 4 * sizeof(uint64_t),
            VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);

    VkQueryPool query_pool = create_query_pool(device, VK_QUERY_TYPE_TIMESTAMP, 4);
    VkCommandBuffer command_buffer = create_command_buffer(device);
    begin_command_buffer(command_buffer);

    vkCmdResetQueryPool(command_buffer, query_pool, 0, 1);
    vkCmdWriteTimestamp(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, query_pool, 0);
    vkCmdResetQueryPool(command_buffer, query_pool, 1, 1);
    vkCmdWriteTimestamp(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, query_pool, 1);
    vkCmdResetQueryPool(command_buffer, query_pool, 2, 1);
    vkCmdWriteTimestamp(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, query_pool, 2);
    vkCmdResetQueryPool(command_buffer, query_pool, 3, 1);
    vkCmdWriteTimestamp(command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, query_pool, 3);

    vkCmdCopyQueryPoolResults(command_buffer, query_pool, 0, 4, buffer, 0, sizeof(uint64_t),
            VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);

    submit_command_buffer(queue, command_buffer);

    std::vector<char> content = read_buffer(physical_device, device, queue, 4 * sizeof(uint64_t), buffer);
    auto data = reinterpret_cast<uint64_t *>(content.data());
    for (unsigned i = 0; i < 4; ++i) {
        std::cout << data[i] << std::endl;
    }

    return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant