vhost: fix async copy on multi-page buffers

Async copy fails when single ring buffer vector is split on multiple
physical pages. This happens because current hpa address translation
function doesn't handle multi-page buffers. A new gpa to hpa address
conversion function, which returns the hpa on the first hitting host
pages, is implemented in this patch. Async data path recursively calls
this new function to construct a multi-segments async copy descriptor
for ring buffers crossing physical page boundaries.

Fixes: cd6760da10 ("vhost: introduce async enqueue for split ring")

Signed-off-by: Patrick Fu <patrick.fu@intel.com>
Reviewed-by: Maxime Coquelin <maxime.coquelin@redhat.com>
This commit is contained in:
Patrick Fu 2020-07-29 10:04:56 +08:00 committed by Ferruh Yigit
parent 570ee25677
commit 6563cf9238
2 changed files with 74 additions and 15 deletions

View file

@ -619,6 +619,56 @@ gpa_to_hpa(struct virtio_net *dev, uint64_t gpa, uint64_t size)
return 0;
}
static __rte_always_inline rte_iova_t
gpa_to_first_hpa(struct virtio_net *dev, uint64_t gpa,
uint64_t gpa_size, uint64_t *hpa_size)
{
uint32_t i;
struct guest_page *page;
struct guest_page key;
*hpa_size = gpa_size;
if (dev->nr_guest_pages >= VHOST_BINARY_SEARCH_THRESH) {
key.guest_phys_addr = gpa & ~(dev->guest_pages[0].size - 1);
page = bsearch(&key, dev->guest_pages, dev->nr_guest_pages,
sizeof(struct guest_page), guest_page_addrcmp);
if (page) {
if (gpa + gpa_size <=
page->guest_phys_addr + page->size) {
return gpa - page->guest_phys_addr +
page->host_phys_addr;
} else if (gpa < page->guest_phys_addr +
page->size) {
*hpa_size = page->guest_phys_addr +
page->size - gpa;
return gpa - page->guest_phys_addr +
page->host_phys_addr;
}
}
} else {
for (i = 0; i < dev->nr_guest_pages; i++) {
page = &dev->guest_pages[i];
if (gpa >= page->guest_phys_addr) {
if (gpa + gpa_size <=
page->guest_phys_addr + page->size) {
return gpa - page->guest_phys_addr +
page->host_phys_addr;
} else if (gpa < page->guest_phys_addr +
page->size) {
*hpa_size = page->guest_phys_addr +
page->size - gpa;
return gpa - page->guest_phys_addr +
page->host_phys_addr;
}
}
}
}
*hpa_size = 0;
return 0;
}
static __rte_always_inline uint64_t
hva_to_gpa(struct virtio_net *dev, uint64_t vva, uint64_t len)
{

View file

@ -980,6 +980,7 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
struct batch_copy_elem *batch_copy = vq->batch_copy_elems;
struct virtio_net_hdr_mrg_rxbuf tmp_hdr, *hdr = NULL;
int error = 0;
uint64_t mapped_len;
uint32_t tlen = 0;
int tvec_idx = 0;
@ -1072,24 +1073,31 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
cpy_len = RTE_MIN(buf_avail, mbuf_avail);
if (unlikely(cpy_len >= cpy_threshold)) {
hpa = (void *)(uintptr_t)gpa_to_hpa(dev,
buf_iova + buf_offset, cpy_len);
while (unlikely(cpy_len && cpy_len >= cpy_threshold)) {
hpa = (void *)(uintptr_t)gpa_to_first_hpa(dev,
buf_iova + buf_offset,
cpy_len, &mapped_len);
if (unlikely(!hpa)) {
error = -1;
goto out;
}
if (unlikely(!hpa || mapped_len < cpy_threshold))
break;
async_fill_vec(src_iovec + tvec_idx,
(void *)(uintptr_t)rte_pktmbuf_iova_offset(m,
mbuf_offset), cpy_len);
mbuf_offset), (size_t)mapped_len);
async_fill_vec(dst_iovec + tvec_idx, hpa, cpy_len);
async_fill_vec(dst_iovec + tvec_idx,
hpa, (size_t)mapped_len);
tlen += cpy_len;
tlen += (uint32_t)mapped_len;
cpy_len -= (uint32_t)mapped_len;
mbuf_avail -= (uint32_t)mapped_len;
mbuf_offset += (uint32_t)mapped_len;
buf_avail -= (uint32_t)mapped_len;
buf_offset += (uint32_t)mapped_len;
tvec_idx++;
} else {
}
if (likely(cpy_len)) {
if (unlikely(vq->batch_copy_nb_elems >= vq->size)) {
rte_memcpy(
(void *)((uintptr_t)(buf_addr + buf_offset)),
@ -1110,12 +1118,13 @@ async_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq,
cpy_len;
vq->batch_copy_nb_elems++;
}
mbuf_avail -= cpy_len;
mbuf_offset += cpy_len;
buf_avail -= cpy_len;
buf_offset += cpy_len;
}
mbuf_avail -= cpy_len;
mbuf_offset += cpy_len;
buf_avail -= cpy_len;
buf_offset += cpy_len;
}
out: