rcu: add resource reclamation APIs

Add resource reclamation using defer queues to make it simple for
applications and libraries to integrate rte_rcu library.

Signed-off-by: Honnappa Nagarahalli <honnappa.nagarahalli@arm.com>
Reviewed-by: Ola Liljedahl <ola.liljedahl@arm.com>
Reviewed-by: Ruifeng Wang <ruifeng.wang@arm.com>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
This commit is contained in:
Honnappa Nagarahalli 2020-04-21 22:30:03 -05:00 committed by David Marchand
parent fed5ee5f18
commit 706d306ea3
8 changed files with 497 additions and 6 deletions

View file

@ -117,7 +117,7 @@ DEPDIRS-librte_ipsec := librte_eal librte_mbuf librte_cryptodev librte_security
DIRS-$(CONFIG_RTE_LIBRTE_TELEMETRY) += librte_telemetry
DEPDIRS-librte_telemetry := librte_eal librte_metrics librte_ethdev
DIRS-$(CONFIG_RTE_LIBRTE_RCU) += librte_rcu
DEPDIRS-librte_rcu := librte_eal
DEPDIRS-librte_rcu := librte_eal librte_ring
ifeq ($(CONFIG_RTE_EXEC_ENV_LINUX),y)
DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni

View file

@ -7,7 +7,7 @@ include $(RTE_SDK)/mk/rte.vars.mk
LIB = librte_rcu.a
CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3
LDLIBS += -lrte_eal
LDLIBS += -lrte_eal -lrte_ring
EXPORT_MAP := rte_rcu_version.map

View file

@ -3,3 +3,5 @@
sources = files('rte_rcu_qsbr.c')
headers = files('rte_rcu_qsbr.h')
deps += ['ring']

View file

@ -0,0 +1,66 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2020 Arm Limited
*/
#ifndef _RTE_RCU_QSBR_PVT_H_
#define _RTE_RCU_QSBR_PVT_H_
/**
* This file is private to the RCU library. It should not be included
* by the user of this library.
*/
#ifdef __cplusplus
extern "C" {
#endif
#include <rte_ring.h>
#include <rte_ring_elem.h>
#include "rte_rcu_qsbr.h"
/* Defer queue structure.
* This structure holds the defer queue. The defer queue is used to
* hold the deleted entries from the data structure that are not
* yet freed.
*/
struct rte_rcu_qsbr_dq {
struct rte_rcu_qsbr *v; /**< RCU QSBR variable used by this queue.*/
struct rte_ring *r; /**< RCU QSBR defer queue. */
uint32_t size;
/**< Number of elements in the defer queue */
uint32_t esize;
/**< Size (in bytes) of data, including the token, stored on the
* defer queue.
*/
uint32_t trigger_reclaim_limit;
/**< Trigger automatic reclamation after the defer queue
* has at least these many resources waiting.
*/
uint32_t max_reclaim_size;
/**< Reclaim at the max these many resources during auto
* reclamation.
*/
rte_rcu_qsbr_free_resource_t free_fn;
/**< Function to call to free the resource. */
void *p;
/**< Pointer passed to the free function. Typically, this is the
* pointer to the data structure to which the resource to free
* belongs.
*/
};
/* Internal structure to represent the element on the defer queue.
* Use alias as a character array is type casted to a variable
* of this structure type.
*/
typedef struct {
uint64_t token; /**< Token */
uint8_t elem[0]; /**< Pointer to user element */
} __attribute__((__may_alias__)) __rte_rcu_qsbr_dq_elem_t;
#ifdef __cplusplus
}
#endif
#endif /* _RTE_RCU_QSBR_PVT_H_ */

View file

@ -1,6 +1,6 @@
/* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 2018 Arm Limited
* Copyright (c) 2018-2020 Arm Limited
*/
#include <stdio.h>
@ -18,8 +18,10 @@
#include <rte_per_lcore.h>
#include <rte_lcore.h>
#include <rte_errno.h>
#include <rte_ring_elem.h>
#include "rte_rcu_qsbr.h"
#include "rcu_qsbr_pvt.h"
/* Get the memory size of QSBR variable */
size_t
@ -270,6 +272,229 @@ rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v)
return 0;
}
/* Create a queue used to store the data structure elements that can
* be freed later. This queue is referred to as 'defer queue'.
*/
struct rte_rcu_qsbr_dq *
rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters *params)
{
struct rte_rcu_qsbr_dq *dq;
uint32_t qs_fifo_size;
unsigned int flags;
if (params == NULL || params->free_fn == NULL ||
params->v == NULL || params->name == NULL ||
params->size == 0 || params->esize == 0 ||
(params->esize % 4 != 0)) {
rte_log(RTE_LOG_ERR, rte_rcu_log_type,
"%s(): Invalid input parameter\n", __func__);
rte_errno = EINVAL;
return NULL;
}
/* If auto reclamation is configured, reclaim limit
* should be a valid value.
*/
if ((params->trigger_reclaim_limit <= params->size) &&
(params->max_reclaim_size == 0)) {
rte_log(RTE_LOG_ERR, rte_rcu_log_type,
"%s(): Invalid input parameter, size = %u, trigger_reclaim_limit = %u, max_reclaim_size = %u\n",
__func__, params->size, params->trigger_reclaim_limit,
params->max_reclaim_size);
rte_errno = EINVAL;
return NULL;
}
dq = rte_zmalloc(NULL, sizeof(struct rte_rcu_qsbr_dq),
RTE_CACHE_LINE_SIZE);
if (dq == NULL) {
rte_errno = ENOMEM;
return NULL;
}
/* Decide the flags for the ring.
* If MT safety is requested, use RTS for ring enqueue as most
* use cases involve dq-enqueue happening on the control plane.
* Ring dequeue is always HTS due to the possibility of revert.
*/
flags = RING_F_MP_RTS_ENQ;
if (params->flags & RTE_RCU_QSBR_DQ_MT_UNSAFE)
flags = RING_F_SP_ENQ;
flags |= RING_F_MC_HTS_DEQ;
/* round up qs_fifo_size to next power of two that is not less than
* max_size.
*/
qs_fifo_size = rte_align32pow2(params->size + 1);
/* Add token size to ring element size */
dq->r = rte_ring_create_elem(params->name,
__RTE_QSBR_TOKEN_SIZE + params->esize,
qs_fifo_size, SOCKET_ID_ANY, flags);
if (dq->r == NULL) {
rte_log(RTE_LOG_ERR, rte_rcu_log_type,
"%s(): defer queue create failed\n", __func__);
rte_free(dq);
return NULL;
}
dq->v = params->v;
dq->size = params->size;
dq->esize = __RTE_QSBR_TOKEN_SIZE + params->esize;
dq->trigger_reclaim_limit = params->trigger_reclaim_limit;
dq->max_reclaim_size = params->max_reclaim_size;
dq->free_fn = params->free_fn;
dq->p = params->p;
return dq;
}
/* Enqueue one resource to the defer queue to free after the grace
* period is over.
*/
int rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e)
{
__rte_rcu_qsbr_dq_elem_t *dq_elem;
uint32_t cur_size;
if (dq == NULL || e == NULL) {
rte_log(RTE_LOG_ERR, rte_rcu_log_type,
"%s(): Invalid input parameter\n", __func__);
rte_errno = EINVAL;
return 1;
}
char data[dq->esize];
dq_elem = (__rte_rcu_qsbr_dq_elem_t *)data;
/* Start the grace period */
dq_elem->token = rte_rcu_qsbr_start(dq->v);
/* Reclaim resources if the queue size has hit the reclaim
* limit. This helps the queue from growing too large and
* allows time for reader threads to report their quiescent state.
*/
cur_size = rte_ring_count(dq->r);
if (cur_size > dq->trigger_reclaim_limit) {
rte_log(RTE_LOG_INFO, rte_rcu_log_type,
"%s(): Triggering reclamation\n", __func__);
rte_rcu_qsbr_dq_reclaim(dq, dq->max_reclaim_size,
NULL, NULL, NULL);
}
/* Enqueue the token and resource. Generating the token and
* enqueuing (token + resource) on the queue is not an
* atomic operation. When the defer queue is shared by multiple
* writers, this might result in tokens enqueued out of order
* on the queue. So, some tokens might wait longer than they
* are required to be reclaimed.
*/
memcpy(dq_elem->elem, e, dq->esize - __RTE_QSBR_TOKEN_SIZE);
/* Check the status as enqueue might fail since the other threads
* might have used up the freed space.
* Enqueue uses the configured flags when the DQ was created.
*/
if (rte_ring_enqueue_elem(dq->r, data, dq->esize) != 0) {
rte_log(RTE_LOG_ERR, rte_rcu_log_type,
"%s(): Enqueue failed\n", __func__);
/* Note that the token generated above is not used.
* Other than wasting tokens, it should not cause any
* other issues.
*/
rte_log(RTE_LOG_INFO, rte_rcu_log_type,
"%s(): Skipped enqueuing token = %"PRIu64"\n",
__func__, dq_elem->token);
rte_errno = ENOSPC;
return 1;
}
rte_log(RTE_LOG_INFO, rte_rcu_log_type,
"%s(): Enqueued token = %"PRIu64"\n", __func__, dq_elem->token);
return 0;
}
/* Reclaim resources from the defer queue. */
int
rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq, unsigned int n,
unsigned int *freed, unsigned int *pending,
unsigned int *available)
{
uint32_t cnt;
__rte_rcu_qsbr_dq_elem_t *dq_elem;
if (dq == NULL || n == 0) {
rte_log(RTE_LOG_ERR, rte_rcu_log_type,
"%s(): Invalid input parameter\n", __func__);
rte_errno = EINVAL;
return 1;
}
cnt = 0;
char data[dq->esize];
/* Check reader threads quiescent state and reclaim resources */
while (cnt < n &&
rte_ring_dequeue_bulk_elem_start(dq->r, &data,
dq->esize, 1, available) != 0) {
dq_elem = (__rte_rcu_qsbr_dq_elem_t *)data;
/* Reclaim the resource */
if (rte_rcu_qsbr_check(dq->v, dq_elem->token, false) != 1) {
rte_ring_dequeue_elem_finish(dq->r, 0);
break;
}
rte_ring_dequeue_elem_finish(dq->r, 1);
rte_log(RTE_LOG_INFO, rte_rcu_log_type,
"%s(): Reclaimed token = %"PRIu64"\n",
__func__, dq_elem->token);
dq->free_fn(dq->p, dq_elem->elem, 1);
cnt++;
}
rte_log(RTE_LOG_INFO, rte_rcu_log_type,
"%s(): Reclaimed %u resources\n", __func__, cnt);
if (freed != NULL)
*freed = cnt;
if (pending != NULL)
*pending = rte_ring_count(dq->r);
return 0;
}
/* Delete a defer queue. */
int
rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq)
{
unsigned int pending;
if (dq == NULL) {
rte_log(RTE_LOG_DEBUG, rte_rcu_log_type,
"%s(): Invalid input parameter\n", __func__);
return 0;
}
/* Reclaim all the resources */
rte_rcu_qsbr_dq_reclaim(dq, ~0, NULL, &pending, NULL);
if (pending != 0) {
rte_errno = EAGAIN;
return 1;
}
rte_ring_free(dq->r);
rte_free(dq);
return 0;
}
int rte_rcu_log_type;
RTE_INIT(rte_rcu_register)

View file

@ -1,5 +1,5 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2018 Arm Limited
* Copyright (c) 2018-2020 Arm Limited
*/
#ifndef _RTE_RCU_QSBR_H_
@ -34,6 +34,7 @@ extern "C" {
#include <rte_lcore.h>
#include <rte_debug.h>
#include <rte_atomic.h>
#include <rte_ring.h>
extern int rte_rcu_log_type;
@ -84,6 +85,7 @@ struct rte_rcu_qsbr_cnt {
#define __RTE_QSBR_CNT_THR_OFFLINE 0
#define __RTE_QSBR_CNT_INIT 1
#define __RTE_QSBR_CNT_MAX ((uint64_t)~0)
#define __RTE_QSBR_TOKEN_SIZE sizeof(uint64_t)
/* RTE Quiescent State variable structure.
* This structure has two elements that vary in size based on the
@ -114,6 +116,86 @@ struct rte_rcu_qsbr {
*/
} __rte_cache_aligned;
/**
* Call back function called to free the resources.
*
* @param p
* Pointer provided while creating the defer queue
* @param e
* Pointer to the resource data stored on the defer queue
* @param n
* Number of resources to free. Currently, this is set to 1.
*
* @return
* None
*/
typedef void (*rte_rcu_qsbr_free_resource_t)(void *p, void *e, unsigned int n);
#define RTE_RCU_QSBR_DQ_NAMESIZE RTE_RING_NAMESIZE
/**
* Various flags supported.
*/
/**< Enqueue and reclaim operations are multi-thread safe by default.
* The call back functions registered to free the resources are
* assumed to be multi-thread safe.
* Set this flag if multi-thread safety is not required.
*/
#define RTE_RCU_QSBR_DQ_MT_UNSAFE 1
/**
* Parameters used when creating the defer queue.
*/
struct rte_rcu_qsbr_dq_parameters {
const char *name;
/**< Name of the queue. */
uint32_t flags;
/**< Flags to control API behaviors */
uint32_t size;
/**< Number of entries in queue. Typically, this will be
* the same as the maximum number of entries supported in the
* lock free data structure.
* Data structures with unbounded number of entries is not
* supported currently.
*/
uint32_t esize;
/**< Size (in bytes) of each element in the defer queue.
* This has to be multiple of 4B.
*/
uint32_t trigger_reclaim_limit;
/**< Trigger automatic reclamation after the defer queue
* has at least these many resources waiting. This auto
* reclamation is triggered in rte_rcu_qsbr_dq_enqueue API
* call.
* If this is greater than 'size', auto reclamation is
* not triggered.
* If this is set to 0, auto reclamation is triggered
* in every call to rte_rcu_qsbr_dq_enqueue API.
*/
uint32_t max_reclaim_size;
/**< When automatic reclamation is enabled, reclaim at the max
* these many resources. This should contain a valid value, if
* auto reclamation is on. Setting this to 'size' or greater will
* reclaim all possible resources currently on the defer queue.
*/
rte_rcu_qsbr_free_resource_t free_fn;
/**< Function to call to free the resource. */
void *p;
/**< Pointer passed to the free function. Typically, this is the
* pointer to the data structure to which the resource to free
* belongs. This can be NULL.
*/
struct rte_rcu_qsbr *v;
/**< RCU QSBR variable to use for this defer queue */
};
/* RTE defer queue structure.
* This structure holds the defer queue. The defer queue is used to
* hold the deleted entries from the data structure that are not
* yet freed.
*/
struct rte_rcu_qsbr_dq;
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
@ -692,6 +774,116 @@ __rte_experimental
int
rte_rcu_qsbr_dump(FILE *f, struct rte_rcu_qsbr *v);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
* Create a queue used to store the data structure elements that can
* be freed later. This queue is referred to as 'defer queue'.
*
* @param params
* Parameters to create a defer queue.
* @return
* On success - Valid pointer to defer queue
* On error - NULL
* Possible rte_errno codes are:
* - EINVAL - NULL parameters are passed
* - ENOMEM - Not enough memory
*/
__rte_experimental
struct rte_rcu_qsbr_dq *
rte_rcu_qsbr_dq_create(const struct rte_rcu_qsbr_dq_parameters *params);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
* Enqueue one resource to the defer queue and start the grace period.
* The resource will be freed later after at least one grace period
* is over.
*
* If the defer queue is full, it will attempt to reclaim resources.
* It will also reclaim resources at regular intervals to avoid
* the defer queue from growing too big.
*
* Multi-thread safety is provided as the defer queue configuration.
* When multi-thread safety is requested, it is possible that the
* resources are not stored in their order of deletion. This results
* in resources being held in the defer queue longer than they should.
*
* @param dq
* Defer queue to allocate an entry from.
* @param e
* Pointer to resource data to copy to the defer queue. The size of
* the data to copy is equal to the element size provided when the
* defer queue was created.
* @return
* On success - 0
* On error - 1 with rte_errno set to
* - EINVAL - NULL parameters are passed
* - ENOSPC - Defer queue is full. This condition can not happen
* if the defer queue size is equal (or larger) than the
* number of elements in the data structure.
*/
__rte_experimental
int
rte_rcu_qsbr_dq_enqueue(struct rte_rcu_qsbr_dq *dq, void *e);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
* Free resources from the defer queue.
*
* This API is multi-thread safe.
*
* @param dq
* Defer queue to free an entry from.
* @param n
* Maximum number of resources to free.
* @param freed
* Number of resources that were freed.
* @param pending
* Number of resources pending on the defer queue. This number might not
* be accurate if multi-thread safety is configured.
* @param available
* Number of resources that can be added to the defer queue.
* This number might not be accurate if multi-thread safety is configured.
* @return
* On successful reclamation of at least 1 resource - 0
* On error - 1 with rte_errno set to
* - EINVAL - NULL parameters are passed
*/
__rte_experimental
int
rte_rcu_qsbr_dq_reclaim(struct rte_rcu_qsbr_dq *dq, unsigned int n,
unsigned int *freed, unsigned int *pending, unsigned int *available);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice
*
* Delete a defer queue.
*
* It tries to reclaim all the resources on the defer queue.
* If any of the resources have not completed the grace period
* the reclamation stops and returns immediately. The rest of
* the resources are not reclaimed and the defer queue is not
* freed.
*
* @param dq
* Defer queue to delete.
* @return
* On success - 0
* On error - 1
* Possible rte_errno codes are:
* - EAGAIN - Some of the resources have not completed at least 1 grace
* period, try again.
*/
__rte_experimental
int
rte_rcu_qsbr_dq_delete(struct rte_rcu_qsbr_dq *dq);
#ifdef __cplusplus
}
#endif

View file

@ -8,6 +8,10 @@ EXPERIMENTAL {
rte_rcu_qsbr_synchronize;
rte_rcu_qsbr_thread_register;
rte_rcu_qsbr_thread_unregister;
rte_rcu_qsbr_dq_create;
rte_rcu_qsbr_dq_enqueue;
rte_rcu_qsbr_dq_reclaim;
rte_rcu_qsbr_dq_delete;
local: *;
};

View file

@ -11,7 +11,9 @@
libraries = [
'kvargs', # eal depends on kvargs
'eal', # everything depends on eal
'ring', 'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core
'ring',
'rcu', # rcu depends on ring
'mempool', 'mbuf', 'net', 'meter', 'ethdev', 'pci', # core
'cmdline',
'metrics', # bitrate/latency stats depends on this
'hash', # efd depends on this
@ -22,7 +24,7 @@ libraries = [
'gro', 'gso', 'ip_frag', 'jobstats',
'kni', 'latencystats', 'lpm', 'member',
'power', 'pdump', 'rawdev',
'rcu', 'rib', 'reorder', 'sched', 'security', 'stack', 'vhost',
'rib', 'reorder', 'sched', 'security', 'stack', 'vhost',
# ipsec lib depends on net, crypto and security
'ipsec',
#fib lib depends on rib