From df98d7ccc2e9e3e5080cce30a6d9c09dd827dc15 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 24 Jun 2019 10:13:04 +0100 Subject: [PATCH 01/12] docs: clarify multiqueue vs multiple virtqueues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vhost-user specification does not explain when VHOST_USER_PROTOCOL_F_MQ must be implemented. This may lead implementors of vhost-user masters to believe that this protocol feature is required for any device that has multiple virtqueues. That would be a mistake since existing vhost-user slaves offer multiple virtqueues but do not advertise VHOST_USER_PROTOCOL_F_MQ. For example, a vhost-net device with one rx/tx queue pair is not multiqueue. The slave does not need to advertise VHOST_USER_PROTOCOL_F_MQ. Therefore the master must assume it has these virtqueues and cannot rely on askingt the slave how many virtqueues exist. Extend the specification to explain the different between true multiqueue and regular devices with a fixed virtqueue layout. Signed-off-by: Stefan Hajnoczi Message-Id: <20190624091304.666-1-stefanha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Marc-André Lureau --- docs/interop/vhost-user.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index 5750668aba..7827b710aa 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -324,6 +324,15 @@ must support changing some configuration aspects on the fly. Multiple queue support ---------------------- +Many devices have a fixed number of virtqueues. In this case the master +already knows the number of available virtqueues without communicating with the +slave. + +Some devices do not have a fixed number of virtqueues. Instead the maximum +number of virtqueues is chosen by the slave. The number can depend on host +resource availability or slave implementation details. Such devices are called +multiple queue devices. + Multiple queue support allows the slave to advertise the maximum number of queues. This is treated as a protocol extension, hence the slave has to implement protocol features first. The multiple queues feature is supported @@ -339,6 +348,14 @@ queue in the sent message to identify a specified queue. The master enables queues by sending message ``VHOST_USER_SET_VRING_ENABLE``. vhost-user-net has historically automatically enabled the first queue pair. +Slaves should always implement the ``VHOST_USER_PROTOCOL_F_MQ`` protocol +feature, even for devices with a fixed number of virtqueues, since it is simple +to implement and offers a degree of introspection. + +Masters must not rely on the ``VHOST_USER_PROTOCOL_F_MQ`` protocol feature for +devices with a fixed number of virtqueues. Only true multiqueue devices +require this protocol feature. + Migration --------- From 21e2acd583126db94f6d881005cd58e835160582 Mon Sep 17 00:00:00 2001 From: Evgeny Yakovlev Date: Thu, 18 Jul 2019 19:14:23 +0300 Subject: [PATCH 02/12] i386/acpi: fix gint overflow in crs_range_compare When very large regions (32GB sized in our case, PCI pass-through of GPUs) are compared substraction result does not fit into gint. As a result crs_replace_with_free_ranges does not get sorted ranges and incorrectly computes PCI64 free space regions. Which then makes linux guest complain about device and PCI64 hole intersection and device becomes unusable. Fix that by returning exactly fitting ranges. Also fix indentation of an entire crs_replace_with_free_ranges to make checkpatch happy. Cc: qemu-stable@nongnu.org Signed-off-by: Evgeny Yakovlev Message-Id: <1563466463-26012-1-git-send-email-wrfsh@yandex-team.ru> Signed-off-by: Evgeny Yakovlev --- hw/i386/acpi-build.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index d281ffa89e..e7b756b51b 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -755,10 +755,16 @@ static void crs_range_set_free(CrsRangeSet *range_set) static gint crs_range_compare(gconstpointer a, gconstpointer b) { - CrsRangeEntry *entry_a = *(CrsRangeEntry **)a; - CrsRangeEntry *entry_b = *(CrsRangeEntry **)b; + CrsRangeEntry *entry_a = *(CrsRangeEntry **)a; + CrsRangeEntry *entry_b = *(CrsRangeEntry **)b; - return (int64_t)entry_a->base - (int64_t)entry_b->base; + if (entry_a->base < entry_b->base) { + return -1; + } else if (entry_a->base > entry_b->base) { + return 1; + } else { + return 0; + } } /* From be1927c97e564346cbd409cb17fe611df74b84e5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Sun, 2 Jun 2019 13:42:13 +0200 Subject: [PATCH 03/12] ioapic: kvm: Skip route updates for masked pins Masked entries will not generate interrupt messages, thus do no need to be routed by KVM. This is a cosmetic cleanup, just avoiding warnings of the kind qemu-system-x86_64: vtd_irte_get: detected non-present IRTE (index=0, high=0xff00, low=0x100) if the masked entry happens to reference a non-present IRTE. Cc: qemu-stable@nongnu.org Signed-off-by: Jan Kiszka Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Peter Xu --- hw/intc/ioapic.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/hw/intc/ioapic.c b/hw/intc/ioapic.c index c408749876..e99c37cceb 100644 --- a/hw/intc/ioapic.c +++ b/hw/intc/ioapic.c @@ -197,9 +197,11 @@ static void ioapic_update_kvm_routes(IOAPICCommonState *s) MSIMessage msg; struct ioapic_entry_info info; ioapic_entry_parse(s->ioredtbl[i], &info); - msg.address = info.addr; - msg.data = info.data; - kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); + if (!info.masked) { + msg.address = info.addr; + msg.data = info.data; + kvm_irqchip_update_msi_route(kvm_state, i, msg, NULL); + } } kvm_irqchip_commit_routes(kvm_state); } From ee4b0c8686f781987879508d7c6dd605b5435bac Mon Sep 17 00:00:00 2001 From: Evgeny Yakovlev Date: Fri, 19 Jul 2019 11:54:29 +0300 Subject: [PATCH 04/12] i386/acpi: show PCI Express bus on pxb-pcie expanders Show PCIe host bridge PNP id with PCI host bridge as a compatible id when expanding a pcie bus. Cc: qemu-stable@nongnu.org Signed-off-by: Evgeny Yakovlev Message-Id: <1563526469-15588-1-git-send-email-wrfsh@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index e7b756b51b..f3fdfefcd5 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -1914,10 +1914,13 @@ build_dsdt(GArray *table_data, BIOSLinker *linker, scope = aml_scope("\\_SB"); dev = aml_device("PC%.02X", bus_num); aml_append(dev, aml_name_decl("_UID", aml_int(bus_num))); - aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); aml_append(dev, aml_name_decl("_BBN", aml_int(bus_num))); if (pci_bus_is_express(bus)) { + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A08"))); + aml_append(dev, aml_name_decl("_CID", aml_eisaid("PNP0A03"))); aml_append(dev, build_q35_osc_method()); + } else { + aml_append(dev, aml_name_decl("_HID", aml_eisaid("PNP0A03"))); } if (numa_node != NUMA_NODE_UNASSIGNED) { From ffa207d08253ffffb3993a1dbe09e40af4fc91f1 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Jul 2019 15:41:03 +0200 Subject: [PATCH 05/12] virtio-balloon: Fix wrong sign extension of PFNs If we directly cast from int to uint64_t, we will first sign-extend to an int64_t, which is wrong. We actually want to treat the PFNs like unsigned values. As far as I can see, this dates back to the initial virtio-balloon commit, but wasn't triggered as fairly big guests would be required. Cc: qemu-stable@nongnu.org Reported-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand Message-Id: <20190722134108.22151-2-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: David Gibson --- hw/virtio/virtio-balloon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index e85d1c0d5c..515abf6553 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -343,8 +343,8 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) } while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) { + unsigned int p = virtio_ldl_p(vdev, &pfn); hwaddr pa; - int p = virtio_ldl_p(vdev, &pfn); pa = (hwaddr) p << VIRTIO_BALLOON_PFN_SHIFT; offset += 4; From 483f13524bb2a08b7ff6a7560b846564ed3b0c33 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Jul 2019 15:41:04 +0200 Subject: [PATCH 06/12] virtio-balloon: Fix QEMU crashes on pagesize > BALLOON_PAGE_SIZE We are using the wrong functions to set/clear bits, effectively touching multiple bits, writing out of range of the bitmap, resulting in memory corruptions. We have to use set_bit()/clear_bit() instead. Can easily be reproduced by starting a qemu guest on hugetlbfs memory, inflating the balloon. QEMU crashes. This never could have worked properly - especially, also pages would have been discarded when the first sub-page would be inflated (the whole bitmap would be set). While testing I realized, that on hugetlbfs it is pretty much impossible to discard a page - the guest just frees the 4k sub-pages in random order most of the time. I was only able to discard a hugepage a handful of times - so I hope that now works correctly. Fixes: ed48c59875b6 ("virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size") Fixes: b27b32391404 ("virtio-balloon: Fix possible guest memory corruption with inflates & deflates") Cc: qemu-stable@nongnu.org #v4.0.0 Acked-by: David Gibson Signed-off-by: David Hildenbrand Message-Id: <20190722134108.22151-3-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-balloon.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 515abf6553..a78d2d2184 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -94,9 +94,8 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, balloon->pbp->base = host_page_base; } - bitmap_set(balloon->pbp->bitmap, - (ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, - subpages); + set_bit((ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, + balloon->pbp->bitmap); if (bitmap_full(balloon->pbp->bitmap, subpages)) { /* We've accumulated a full host page, we can actually discard @@ -140,9 +139,8 @@ static void balloon_deflate_page(VirtIOBalloon *balloon, * for a guest to do this in practice, but handle it anyway, * since getting it wrong could mean discarding memory the * guest is still using. */ - bitmap_clear(balloon->pbp->bitmap, - (ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, - subpages); + clear_bit((ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, + balloon->pbp->bitmap); if (bitmap_empty(balloon->pbp->bitmap, subpages)) { g_free(balloon->pbp); From 2ffc49eea1bbd454913a88a0ad872c2649b36950 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Jul 2019 15:41:05 +0200 Subject: [PATCH 07/12] virtio-balloon: Simplify deflate with pbp Let's simplify this - the case we are optimizing for is very hard to trigger and not worth the effort. If we're switching from inflation to deflation, let's reset the pbp. Acked-by: David Gibson Signed-off-by: David Hildenbrand Message-Id: <20190722134108.22151-4-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-balloon.c | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index a78d2d2184..04a7e6c772 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -117,7 +117,7 @@ static void balloon_deflate_page(VirtIOBalloon *balloon, void *addr = memory_region_get_ram_ptr(mr) + offset; RAMBlock *rb; size_t rb_page_size; - ram_addr_t ram_offset, host_page_base; + ram_addr_t ram_offset; void *host_addr; int ret; @@ -125,27 +125,11 @@ static void balloon_deflate_page(VirtIOBalloon *balloon, * host address? */ rb = qemu_ram_block_from_host(addr, false, &ram_offset); rb_page_size = qemu_ram_pagesize(rb); - host_page_base = ram_offset & ~(rb_page_size - 1); - if (balloon->pbp - && rb == balloon->pbp->rb - && host_page_base == balloon->pbp->base) { - int subpages = rb_page_size / BALLOON_PAGE_SIZE; - - /* - * This means the guest has asked to discard some of the 4kiB - * subpages of a host page, but then changed its mind and - * asked to keep them after all. It's exceedingly unlikely - * for a guest to do this in practice, but handle it anyway, - * since getting it wrong could mean discarding memory the - * guest is still using. */ - clear_bit((ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, - balloon->pbp->bitmap); - - if (bitmap_empty(balloon->pbp->bitmap, subpages)) { - g_free(balloon->pbp); - balloon->pbp = NULL; - } + if (balloon->pbp) { + /* Let's play safe and always reset the pbp on deflation requests. */ + g_free(balloon->pbp); + balloon->pbp = NULL; } host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1)); From e6129b271b9dccca22c84870e313c315f2c70063 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Jul 2019 15:41:06 +0200 Subject: [PATCH 08/12] virtio-balloon: Better names for offset variables in inflate/deflate code "host_page_base" is really confusing, let's make this clearer, also rename the other offsets to indicate to which base they apply. offset -> mr_offset ram_offset -> rb_offset host_page_base -> rb_aligned_offset While at it, use QEMU_ALIGN_DOWN() instead of a handcrafted computation and move the computation to the place where it is needed. Acked-by: David Gibson Signed-off-by: David Hildenbrand Message-Id: <20190722134108.22151-5-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-balloon.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 04a7e6c772..f206cc8bf7 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -41,24 +41,23 @@ struct PartiallyBalloonedPage { }; static void balloon_inflate_page(VirtIOBalloon *balloon, - MemoryRegion *mr, hwaddr offset) + MemoryRegion *mr, hwaddr mr_offset) { - void *addr = memory_region_get_ram_ptr(mr) + offset; + void *addr = memory_region_get_ram_ptr(mr) + mr_offset; + ram_addr_t rb_offset, rb_aligned_offset; RAMBlock *rb; size_t rb_page_size; int subpages; - ram_addr_t ram_offset, host_page_base; /* XXX is there a better way to get to the RAMBlock than via a * host address? */ - rb = qemu_ram_block_from_host(addr, false, &ram_offset); + rb = qemu_ram_block_from_host(addr, false, &rb_offset); rb_page_size = qemu_ram_pagesize(rb); - host_page_base = ram_offset & ~(rb_page_size - 1); if (rb_page_size == BALLOON_PAGE_SIZE) { /* Easy case */ - ram_block_discard_range(rb, ram_offset, rb_page_size); + ram_block_discard_range(rb, rb_offset, rb_page_size); /* We ignore errors from ram_block_discard_range(), because it * has already reported them, and failing to discard a balloon * page is not fatal */ @@ -74,11 +73,12 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, warn_report_once( "Balloon used with backing page size > 4kiB, this may not be reliable"); + rb_aligned_offset = QEMU_ALIGN_DOWN(rb_offset, rb_page_size); subpages = rb_page_size / BALLOON_PAGE_SIZE; if (balloon->pbp && (rb != balloon->pbp->rb - || host_page_base != balloon->pbp->base)) { + || rb_aligned_offset != balloon->pbp->base)) { /* We've partially ballooned part of a host page, but now * we're trying to balloon part of a different one. Too hard, * give up on the old partial page */ @@ -91,10 +91,10 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, size_t bitlen = BITS_TO_LONGS(subpages) * sizeof(unsigned long); balloon->pbp = g_malloc0(sizeof(PartiallyBalloonedPage) + bitlen); balloon->pbp->rb = rb; - balloon->pbp->base = host_page_base; + balloon->pbp->base = rb_aligned_offset; } - set_bit((ram_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, + set_bit((rb_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, balloon->pbp->bitmap); if (bitmap_full(balloon->pbp->bitmap, subpages)) { @@ -112,18 +112,18 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, } static void balloon_deflate_page(VirtIOBalloon *balloon, - MemoryRegion *mr, hwaddr offset) + MemoryRegion *mr, hwaddr mr_offset) { - void *addr = memory_region_get_ram_ptr(mr) + offset; + void *addr = memory_region_get_ram_ptr(mr) + mr_offset; + ram_addr_t rb_offset; RAMBlock *rb; size_t rb_page_size; - ram_addr_t ram_offset; void *host_addr; int ret; /* XXX is there a better way to get to the RAMBlock than via a * host address? */ - rb = qemu_ram_block_from_host(addr, false, &ram_offset); + rb = qemu_ram_block_from_host(addr, false, &rb_offset); rb_page_size = qemu_ram_pagesize(rb); if (balloon->pbp) { From 1c5cfc2b7153dd72bf4b8ddc456408eb2b9b66d8 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Jul 2019 15:41:07 +0200 Subject: [PATCH 09/12] virtio-balloon: Rework pbp tracking data Using the address of a RAMBlock to test for a matching pbp is not really safe. Instead, let's use the guest physical address of the base page along with the page size (via the number of subpages). Also, let's allocate the bitmap separately. This makes the code easier to read and maintain - we can reuse bitmap_new(). Prepare the code to move the PBP out of the device. Fixes: ed48c59875b6 ("virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size") Fixes: b27b32391404 ("virtio-balloon: Fix possible guest memory corruption with inflates & deflates") Cc: qemu-stable@nongnu.org #v4.0.0 Signed-off-by: David Hildenbrand Message-Id: <20190722134108.22151-6-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-balloon.c | 69 +++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 23 deletions(-) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index f206cc8bf7..40d493a31a 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -35,16 +35,44 @@ #define BALLOON_PAGE_SIZE (1 << VIRTIO_BALLOON_PFN_SHIFT) struct PartiallyBalloonedPage { - RAMBlock *rb; - ram_addr_t base; - unsigned long bitmap[]; + ram_addr_t base_gpa; + long subpages; + unsigned long *bitmap; }; +static void virtio_balloon_pbp_free(PartiallyBalloonedPage *pbp) +{ + if (!pbp) { + return; + } + g_free(pbp->bitmap); + g_free(pbp); +} + +static PartiallyBalloonedPage *virtio_balloon_pbp_alloc(ram_addr_t base_gpa, + long subpages) +{ + PartiallyBalloonedPage *pbp = g_new0(PartiallyBalloonedPage, 1); + + pbp->base_gpa = base_gpa; + pbp->subpages = subpages; + pbp->bitmap = bitmap_new(subpages); + + return pbp; +} + +static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp, + ram_addr_t base_gpa, long subpages) +{ + return pbp->subpages == subpages && pbp->base_gpa == base_gpa; +} + static void balloon_inflate_page(VirtIOBalloon *balloon, MemoryRegion *mr, hwaddr mr_offset) { void *addr = memory_region_get_ram_ptr(mr) + mr_offset; - ram_addr_t rb_offset, rb_aligned_offset; + ram_addr_t rb_offset, rb_aligned_offset, base_gpa; + PartiallyBalloonedPage **pbp = &balloon->pbp; RAMBlock *rb; size_t rb_page_size; int subpages; @@ -75,39 +103,34 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, rb_aligned_offset = QEMU_ALIGN_DOWN(rb_offset, rb_page_size); subpages = rb_page_size / BALLOON_PAGE_SIZE; + base_gpa = memory_region_get_ram_addr(mr) + mr_offset - + (rb_offset - rb_aligned_offset); - if (balloon->pbp - && (rb != balloon->pbp->rb - || rb_aligned_offset != balloon->pbp->base)) { + if (*pbp && !virtio_balloon_pbp_matches(*pbp, base_gpa, subpages)) { /* We've partially ballooned part of a host page, but now * we're trying to balloon part of a different one. Too hard, * give up on the old partial page */ - g_free(balloon->pbp); - balloon->pbp = NULL; + virtio_balloon_pbp_free(*pbp); + *pbp = NULL; } - if (!balloon->pbp) { - /* Starting on a new host page */ - size_t bitlen = BITS_TO_LONGS(subpages) * sizeof(unsigned long); - balloon->pbp = g_malloc0(sizeof(PartiallyBalloonedPage) + bitlen); - balloon->pbp->rb = rb; - balloon->pbp->base = rb_aligned_offset; + if (!*pbp) { + *pbp = virtio_balloon_pbp_alloc(base_gpa, subpages); } - set_bit((rb_offset - balloon->pbp->base) / BALLOON_PAGE_SIZE, - balloon->pbp->bitmap); + set_bit((rb_offset - rb_aligned_offset) / BALLOON_PAGE_SIZE, + (*pbp)->bitmap); - if (bitmap_full(balloon->pbp->bitmap, subpages)) { + if (bitmap_full((*pbp)->bitmap, subpages)) { /* We've accumulated a full host page, we can actually discard * it now */ - ram_block_discard_range(rb, balloon->pbp->base, rb_page_size); + ram_block_discard_range(rb, rb_aligned_offset, rb_page_size); /* We ignore errors from ram_block_discard_range(), because it * has already reported them, and failing to discard a balloon * page is not fatal */ - - g_free(balloon->pbp); - balloon->pbp = NULL; + virtio_balloon_pbp_free(*pbp); + *pbp = NULL; } } @@ -128,7 +151,7 @@ static void balloon_deflate_page(VirtIOBalloon *balloon, if (balloon->pbp) { /* Let's play safe and always reset the pbp on deflation requests. */ - g_free(balloon->pbp); + virtio_balloon_pbp_free(balloon->pbp); balloon->pbp = NULL; } From a8cd64d488325f3be5c4ddec4bf07efb3b8c7330 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Mon, 22 Jul 2019 15:41:08 +0200 Subject: [PATCH 10/12] virtio-balloon: Use temporary PBP only We still have multiple issues in the current code - The PBP is not freed during unrealize() - The PBP is not reset on device resets: After a reset, the PBP is stale. - We are not indicating VIRTIO_BALLOON_F_MUST_TELL_HOST, therefore guests (esp. legacy guests) will reuse pages without deflating, turning the PBP stale. Adding that would require compat handling. Instead, let's use the PBP only temporarily, when processing one bulk of inflation requests. This will keep guest_page_size > 4k working (with Linux guests). There is nothing to do for deflation requests anymore. The pbp is only used for a limited amount of time. Fixes: ed48c59875b6 ("virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size") Cc: qemu-stable@nongnu.org #v4.0.0 Suggested-by: Michael S. Tsirkin Signed-off-by: David Hildenbrand Message-Id: <20190722134108.22151-7-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: David Gibson --- hw/virtio/virtio-balloon.c | 21 +++++++++------------ include/hw/virtio/virtio-balloon.h | 3 --- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 40d493a31a..a6282d58d4 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -34,11 +34,11 @@ #define BALLOON_PAGE_SIZE (1 << VIRTIO_BALLOON_PFN_SHIFT) -struct PartiallyBalloonedPage { +typedef struct PartiallyBalloonedPage { ram_addr_t base_gpa; long subpages; unsigned long *bitmap; -}; +} PartiallyBalloonedPage; static void virtio_balloon_pbp_free(PartiallyBalloonedPage *pbp) { @@ -68,11 +68,11 @@ static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp, } static void balloon_inflate_page(VirtIOBalloon *balloon, - MemoryRegion *mr, hwaddr mr_offset) + MemoryRegion *mr, hwaddr mr_offset, + PartiallyBalloonedPage **pbp) { void *addr = memory_region_get_ram_ptr(mr) + mr_offset; ram_addr_t rb_offset, rb_aligned_offset, base_gpa; - PartiallyBalloonedPage **pbp = &balloon->pbp; RAMBlock *rb; size_t rb_page_size; int subpages; @@ -149,12 +149,6 @@ static void balloon_deflate_page(VirtIOBalloon *balloon, rb = qemu_ram_block_from_host(addr, false, &rb_offset); rb_page_size = qemu_ram_pagesize(rb); - if (balloon->pbp) { - /* Let's play safe and always reset the pbp on deflation requests. */ - virtio_balloon_pbp_free(balloon->pbp); - balloon->pbp = NULL; - } - host_addr = (void *)((uintptr_t)addr & ~(rb_page_size - 1)); /* When a page is deflated, we hint the whole host page it lives @@ -336,6 +330,7 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v, static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) { VirtIOBalloon *s = VIRTIO_BALLOON(vdev); + PartiallyBalloonedPage *pbp = NULL; VirtQueueElement *elem; MemoryRegionSection section; @@ -344,7 +339,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) uint32_t pfn; elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); if (!elem) { - return; + break; } while (iov_to_buf(elem->out_sg, elem->out_num, offset, &pfn, 4) == 4) { @@ -373,7 +368,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) if (!qemu_balloon_is_inhibited()) { if (vq == s->ivq) { balloon_inflate_page(s, section.mr, - section.offset_within_region); + section.offset_within_region, &pbp); } else if (vq == s->dvq) { balloon_deflate_page(s, section.mr, section.offset_within_region); } else { @@ -387,6 +382,8 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) virtio_notify(vdev, vq); g_free(elem); } + + virtio_balloon_pbp_free(pbp); } static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq) diff --git a/include/hw/virtio/virtio-balloon.h b/include/hw/virtio/virtio-balloon.h index 5a99293a45..d1c968d237 100644 --- a/include/hw/virtio/virtio-balloon.h +++ b/include/hw/virtio/virtio-balloon.h @@ -33,8 +33,6 @@ typedef struct virtio_balloon_stat_modern { uint64_t val; } VirtIOBalloonStatModern; -typedef struct PartiallyBalloonedPage PartiallyBalloonedPage; - enum virtio_balloon_free_page_report_status { FREE_PAGE_REPORT_S_STOP = 0, FREE_PAGE_REPORT_S_REQUESTED = 1, @@ -70,7 +68,6 @@ typedef struct VirtIOBalloon { int64_t stats_last_update; int64_t stats_poll_interval; uint32_t host_features; - PartiallyBalloonedPage *pbp; bool qemu_4_0_config_size; } VirtIOBalloon; From 9a7ca8a7c920360db9dcaf616ca6f1440c025043 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 25 Jul 2019 13:36:38 +0200 Subject: [PATCH 11/12] virtio-balloon: don't track subpages for the PBP As ramblocks cannot get removed/readded while we are processing a bulk of inflation requests, there is no more need to track the page size in form of the number of subpages. Suggested-by: David Gibson Signed-off-by: David Hildenbrand Message-Id: <20190725113638.4702-8-david@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-balloon.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index a6282d58d4..fe9664e42c 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -36,7 +36,6 @@ typedef struct PartiallyBalloonedPage { ram_addr_t base_gpa; - long subpages; unsigned long *bitmap; } PartiallyBalloonedPage; @@ -55,16 +54,15 @@ static PartiallyBalloonedPage *virtio_balloon_pbp_alloc(ram_addr_t base_gpa, PartiallyBalloonedPage *pbp = g_new0(PartiallyBalloonedPage, 1); pbp->base_gpa = base_gpa; - pbp->subpages = subpages; pbp->bitmap = bitmap_new(subpages); return pbp; } static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp, - ram_addr_t base_gpa, long subpages) + ram_addr_t base_gpa) { - return pbp->subpages == subpages && pbp->base_gpa == base_gpa; + return pbp->base_gpa == base_gpa; } static void balloon_inflate_page(VirtIOBalloon *balloon, @@ -106,7 +104,7 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, base_gpa = memory_region_get_ram_addr(mr) + mr_offset - (rb_offset - rb_aligned_offset); - if (*pbp && !virtio_balloon_pbp_matches(*pbp, base_gpa, subpages)) { + if (*pbp && !virtio_balloon_pbp_matches(*pbp, base_gpa)) { /* We've partially ballooned part of a host page, but now * we're trying to balloon part of a different one. Too hard, * give up on the old partial page */ From 1b47b37c33ec01ae1efc527f4c97f97f93723bc4 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 25 Jul 2019 07:54:25 -0400 Subject: [PATCH 12/12] virtio-balloon: free pbp more aggressively Previous patches switched to a temporary pbp but that does not go far enough: after device uses a buffer, guest is free to reuse it, so tracking the page and freeing it later is wrong. Free and reset the pbp after we push each element. Fixes: ed48c59875b6 ("virtio-balloon: Safely handle BALLOON_PAGE_SIZE < host page size") Cc: qemu-stable@nongnu.org #v4.0.0 Cc: David Hildenbrand Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-balloon.c | 37 ++++++++++++++++--------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index fe9664e42c..25de154307 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -41,22 +41,19 @@ typedef struct PartiallyBalloonedPage { static void virtio_balloon_pbp_free(PartiallyBalloonedPage *pbp) { - if (!pbp) { + if (!pbp->bitmap) { return; } g_free(pbp->bitmap); - g_free(pbp); + pbp->bitmap = NULL; } -static PartiallyBalloonedPage *virtio_balloon_pbp_alloc(ram_addr_t base_gpa, - long subpages) +static void virtio_balloon_pbp_alloc(PartiallyBalloonedPage *pbp, + ram_addr_t base_gpa, + long subpages) { - PartiallyBalloonedPage *pbp = g_new0(PartiallyBalloonedPage, 1); - pbp->base_gpa = base_gpa; pbp->bitmap = bitmap_new(subpages); - - return pbp; } static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp, @@ -67,7 +64,7 @@ static bool virtio_balloon_pbp_matches(PartiallyBalloonedPage *pbp, static void balloon_inflate_page(VirtIOBalloon *balloon, MemoryRegion *mr, hwaddr mr_offset, - PartiallyBalloonedPage **pbp) + PartiallyBalloonedPage *pbp) { void *addr = memory_region_get_ram_ptr(mr) + mr_offset; ram_addr_t rb_offset, rb_aligned_offset, base_gpa; @@ -104,22 +101,21 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, base_gpa = memory_region_get_ram_addr(mr) + mr_offset - (rb_offset - rb_aligned_offset); - if (*pbp && !virtio_balloon_pbp_matches(*pbp, base_gpa)) { + if (pbp->bitmap && !virtio_balloon_pbp_matches(pbp, base_gpa)) { /* We've partially ballooned part of a host page, but now * we're trying to balloon part of a different one. Too hard, * give up on the old partial page */ - virtio_balloon_pbp_free(*pbp); - *pbp = NULL; + virtio_balloon_pbp_free(pbp); } - if (!*pbp) { - *pbp = virtio_balloon_pbp_alloc(base_gpa, subpages); + if (!pbp->bitmap) { + virtio_balloon_pbp_alloc(pbp, base_gpa, subpages); } set_bit((rb_offset - rb_aligned_offset) / BALLOON_PAGE_SIZE, - (*pbp)->bitmap); + pbp->bitmap); - if (bitmap_full((*pbp)->bitmap, subpages)) { + if (bitmap_full(pbp->bitmap, subpages)) { /* We've accumulated a full host page, we can actually discard * it now */ @@ -127,8 +123,7 @@ static void balloon_inflate_page(VirtIOBalloon *balloon, /* We ignore errors from ram_block_discard_range(), because it * has already reported them, and failing to discard a balloon * page is not fatal */ - virtio_balloon_pbp_free(*pbp); - *pbp = NULL; + virtio_balloon_pbp_free(pbp); } } @@ -328,13 +323,14 @@ static void balloon_stats_set_poll_interval(Object *obj, Visitor *v, static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) { VirtIOBalloon *s = VIRTIO_BALLOON(vdev); - PartiallyBalloonedPage *pbp = NULL; VirtQueueElement *elem; MemoryRegionSection section; for (;;) { + PartiallyBalloonedPage pbp = {}; size_t offset = 0; uint32_t pfn; + elem = virtqueue_pop(vq, sizeof(VirtQueueElement)); if (!elem) { break; @@ -379,9 +375,8 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) virtqueue_push(vq, elem, offset); virtio_notify(vdev, vq); g_free(elem); + virtio_balloon_pbp_free(&pbp); } - - virtio_balloon_pbp_free(pbp); } static void virtio_balloon_receive_stats(VirtIODevice *vdev, VirtQueue *vq)