Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

prov/sharp: Add mocks for SHARP #6

Draft
wants to merge 10 commits into
base: main
Choose a base branch
from
1 change: 1 addition & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ install:
--disable-udp
--disable-usnic
--disable-verbs
--disable-sharp
- make -j2 $MAKE_FLAGS
- make install
- make test
Expand Down
2 changes: 2 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
# Copyright (c) 2017-2018 Intel Corporation, Inc. All right reserved.
# Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All rights reserved.
# (C) Copyright 2020 Hewlett Packard Enterprise Development LP
# Copyright (c) 2022 Intel Corporation. All right reserved.
#
# Makefile.am for libfabric

Expand Down Expand Up @@ -449,6 +450,7 @@ include prov/rxd/Makefile.include
include prov/bgq/Makefile.include
include prov/opx/Makefile.include
include prov/shm/Makefile.include
include prov/sharp/Makefile.include
include prov/tcp/Makefile.include
include prov/net/Makefile.include
include prov/rstream/Makefile.include
Expand Down
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,15 @@ over MSG endpoints of a core provider.

See [`fi_rxm`(7)](https://ofiwg.github.io/libfabric/main/man/fi_rxm.7.html) for more information.

### sharp

***

The `off_sharp` provider is an utility provider that supports collective endpoints utilizing
SHARP protocol for barier and allreduce operations.

See [`fi_sharp`(7)](https://ofiwg.github.io/libfabric/main/man/fi_sharp.7.html) for more information.

### sockets

***
Expand Down
1 change: 1 addition & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,7 @@ FI_PROVIDER_SETUP([hook_debug])
FI_PROVIDER_SETUP([hook_hmem])
FI_PROVIDER_SETUP([dmabuf_peer_mem])
FI_PROVIDER_SETUP([opx])
FI_PROVIDER_SETUP([sharp])
FI_PROVIDER_FINI
dnl Configure the .pc file
FI_PROVIDER_SETUP_PC
Expand Down
4 changes: 4 additions & 0 deletions include/ofi_coll.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,8 @@ struct util_coll_operation {
uint64_t flags;
};

int coll_cq_init(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq_fid, ofi_cq_progress_func progress,
void *context);

#endif // _OFI_COLL_H_
11 changes: 11 additions & 0 deletions include/ofi_prov.h
Original file line number Diff line number Diff line change
Expand Up @@ -323,4 +323,15 @@ OPX_INI ;
#define COLL_INIT fi_coll_ini()
COLL_INI ;

#if (HAVE_SHARP) && (HAVE_SHARP_DL)
# define SHARP_INI FI_EXT_INI
# define SHARP_INIT NULL
#elif (HAVE_SHARP)
# define SHARP_INI INI_SIG(fi_sharp_ini)
# define SHARP_INIT fi_sharp_ini()
SHARP_INI ;
#else
# define SHARP_INIT NULL
#endif

#endif /* _OFI_PROV_H_ */
70 changes: 70 additions & 0 deletions include/ofi_sharp.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
/*
* Copyright (c) 2022 Intel Corporation. All rights reserved.
*
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* BSD license below:
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/

#ifndef _OFI_SHARP_H_
#define _OFI_SHARP_H_

#include "config.h"

#include <stdint.h>
#include <stddef.h>
#include <sys/un.h>

#include <ofi_atom.h>
#include <ofi_proto.h>
#include <ofi_mem.h>
#include <ofi_rbuf.h>
#include <ofi_tree.h>
#include <ofi_hmem.h>

#include <rdma/providers/fi_prov.h>

#include "ofi_coll.h"

#ifdef __cplusplus
extern "C" {
#endif

#define SHARP_VERSION 1


#if ENABLE_DEBUG
#define SHARP_FLAG_DEBUG (1 << 1)
#else
#define SHARP_FLAG_DEBUG (0 << 1)
#endif

#ifdef __cplusplus
}
#endif

#endif /* _OFI_SHM_H_ */
65 changes: 65 additions & 0 deletions man/fi_sharp.7.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
---
layout: page
title: fi_sharp(7)
tagline: Libfabric Programmer's Manual
---
{% include JB/setup %}

# NAME

fi_sharp \- The SHARP Fabric Provider

# OVERVIEW

The SHARP provider is a collectives offload provider that can be used on Linux
systems supporting SHARP protocol.

# SUPPORTED FEATURES

This release contains an initial implementation of the SHM provider that
offers the following support:

*Endpoint types*
: The provider supports only endpoint type *FI_EP_COLLECTIVE*.

*Endpoint capabilities*
: Endpoints cna support only fi_barrier and fi_allreduce operations.

*Modes*
: The provider does not require the use of any mode bits.

*Progress*
: The SHARP provider supports *FI_PROGRESS_MANUAL*.

*Address Format*
: TBD

*Msg flags*
The provider does not support messaging.

*MR registration mode*
The provider implements FI_MR_VIRT_ADDR memory mode.

*Atomic operations*
The provider does not support any atomic operation.

# LIMITATIONS

The SHARP provider has hard-coded maximums for supported queue sizes and data
transfers. These values are reflected in the related fabric attribute
structures

No support for counters.

# RUNTIME PARAMETERS

The *SHARP* provider checks for the following environment variables:

*FI_SHARP_PARAM1*
: TBD Default: 720401

# SEE ALSO

[`fabric`(7)](fabric.7.html),
[`fi_provider`(7)](fi_provider.7.html),
[`fi_getinfo`(3)](fi_getinfo.3.html)
69 changes: 69 additions & 0 deletions man/man7/fi_sharp.7
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
.\" Automatically generated by Pandoc 2.0.6
.\"
.TH "fi_sharp" "7" "2022\-11\-10" "Libfabric Programmer's Manual" "#VERSION#"
.hy
.SH NAME
.PP
fi_sharp \- The SHARP Fabric Provider
.SH OVERVIEW
.PP
The SHARP provider is a collectives offload provider that can be used on
Linux systems supporting SHARP protocol.
.SH SUPPORTED FEATURES
.PP
This release contains an initial implementation of the SHM provider that
offers the following support:
.TP
.B \f[I]Endpoint types\f[]
The provider supports only endpoint type \f[I]FI_EP_COLLECTIVE\f[].
.RS
.RE
.TP
.B \f[I]Endpoint capabilities\f[]
Endpoints cna support only fi_barrier and fi_allreduce operations.
.RS
.RE
.TP
.B \f[I]Modes\f[]
The provider does not require the use of any mode bits.
.RS
.RE
.TP
.B \f[I]Progress\f[]
The SHARP provider supports \f[I]FI_PROGRESS_MANUAL\f[].
.RS
.RE
.TP
.B \f[I]Address Format\f[]
TBD
.RS
.RE
.PP
\f[I]Msg flags\f[] The provider does not support messaging.
.PP
\f[I]MR registration mode\f[] The provider implements FI_MR_VIRT_ADDR
memory mode.
.PP
\f[I]Atomic operations\f[] The provider does not support any atomic
operation.
.SH LIMITATIONS
.PP
The SHARP provider has hard\-coded maximums for supported queue sizes
and data transfers.
These values are reflected in the related fabric attribute structures
.PP
No support for counters.
.SH RUNTIME PARAMETERS
.PP
The \f[I]SHARP\f[] provider checks for the following environment
variables:
.TP
.B \f[I]FI_SHARP_PARAM1\f[]
TBD Default: 720401
.RS
.RE
.SH SEE ALSO
.PP
\f[C]fabric\f[](7), \f[C]fi_provider\f[](7), \f[C]fi_getinfo\f[](3)
.SH AUTHORS
OpenFabrics.
1 change: 1 addition & 0 deletions prov/coll/src/coll.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
#include <ofi_hmem.h>
#include <ofi_prov.h>
#include <ofi_atomic.h>
#include <ofi_coll.h>

#define COLL_IOV_LIMIT 4
#define COLL_MR_MODES (OFI_MR_BASIC_MAP | FI_MR_LOCAL)
Expand Down
20 changes: 16 additions & 4 deletions prov/coll/src/coll_cq.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,31 @@ static struct fi_ops_cq coll_cq_ops = {

int coll_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,
struct fid_cq **cq_fid, void *context)
{
return coll_cq_init(domain, attr, cq_fid, &ofi_cq_progress, context);
}

int coll_cq_init(struct fid_domain *domain,
struct fi_cq_attr *attr, struct fid_cq **cq_fid,
ofi_cq_progress_func progress, void *context)
{
struct coll_cq *cq;
struct fi_peer_cq_context *peer_context = context;
int ret;

const struct coll_domain *coll_domain;
const struct fi_provider* provider;

coll_domain = container_of(domain, struct coll_domain, util_domain.domain_fid.fid);
provider = coll_domain->util_domain.fabric->prov;

if (!attr || !(attr->flags & FI_PEER)) {
FI_WARN(&coll_prov, FI_LOG_CORE, "FI_PEER flag required\n");
FI_WARN(provider, FI_LOG_CORE, "FI_PEER flag required\n");
return -EINVAL;
}

if (!peer_context || peer_context->size < sizeof(*peer_context)) {
FI_WARN(&coll_prov, FI_LOG_CORE, "invalid peer CQ context\n");
FI_WARN(provider, FI_LOG_CORE, "invalid peer CQ context\n");
return -EINVAL;
}

Expand All @@ -89,8 +102,7 @@ int coll_cq_open(struct fid_domain *domain, struct fi_cq_attr *attr,

cq->peer_cq = peer_context->cq;

ret = ofi_cq_init(&coll_prov, domain, attr, &cq->util_cq, &ofi_cq_progress,
context);
ret = ofi_cq_init(provider, domain, attr, &cq->util_cq, progress, context);
if (ret)
goto err;

Expand Down
17 changes: 17 additions & 0 deletions prov/rxm/src/rxm_domain.c
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,9 @@ static int rxm_query_collective(struct fid_domain *domain,

if (!rxm_domain->util_coll_domain)
return -FI_ENOSYS;
if (rxm_domain->offload_coll_domain)
return fi_query_collective(rxm_domain->offload_coll_domain,
coll, attr, flags);

return fi_query_collective(rxm_domain->util_coll_domain,
coll, attr, flags);
Expand Down Expand Up @@ -841,6 +844,8 @@ int rxm_domain_open(struct fid_fabric *fabric, struct fi_info *info,
struct rxm_fabric *rxm_fabric;
struct fi_info *msg_info, *base_info;
struct fi_peer_domain_context peer_context;
struct fi_collective_attr attr;

int ret;

rxm_domain = calloc(1, sizeof(*rxm_domain));
Expand Down Expand Up @@ -890,6 +895,18 @@ int rxm_domain_open(struct fid_fabric *fabric, struct fi_info *info,
FI_PEER, &peer_context);
if (ret)
goto err5;

attr.op = FI_MIN;
attr.datatype = FI_INT8;
attr.datatype_attr.count =1;
attr.datatype_attr.size =1;
attr.mode = 0;
for (int i = FI_BARRIER; i < FI_GATHER; i++) {
ret = fi_query_collective(rxm_domain->offload_coll_domain,
i, &attr, 0);
if (FI_SUCCESS == ret)
rxm_domain->offload_coll_mask |= BIT(i);
}
}
}

Expand Down
Loading