xemu/hw/remote/proxy.c
Jagannathan Raman bd36adb8df multi-process: create IOHUB object to handle irq
IOHUB object is added to manage PCI IRQs. It uses KVM_IRQFD
ioctl to create irqfd to injecting PCI interrupts to the guest.
IOHUB object forwards the irqfd to the remote process. Remote process
uses this fd to directly send interrupts to the guest, bypassing QEMU.

Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com>
Message-id: 51d5c3d54e28a68b002e3875c59599c9f5a424a1.1611938319.git.jag.raman@oracle.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
2021-02-10 09:23:28 +00:00

276 lines
7.1 KiB
C

/*
* Copyright © 2018, 2021 Oracle and/or its affiliates.
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "hw/remote/proxy.h"
#include "hw/pci/pci.h"
#include "qapi/error.h"
#include "io/channel-util.h"
#include "hw/qdev-properties.h"
#include "monitor/monitor.h"
#include "migration/blocker.h"
#include "qemu/sockets.h"
#include "hw/remote/mpqemu-link.h"
#include "qemu/error-report.h"
#include "hw/remote/proxy-memory-listener.h"
#include "qom/object.h"
#include "qemu/event_notifier.h"
#include "sysemu/kvm.h"
#include "util/event_notifier-posix.c"
static void proxy_intx_update(PCIDevice *pci_dev)
{
PCIProxyDev *dev = PCI_PROXY_DEV(pci_dev);
PCIINTxRoute route;
int pin = pci_get_byte(pci_dev->config + PCI_INTERRUPT_PIN) - 1;
if (dev->virq != -1) {
kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, &dev->intr, dev->virq);
dev->virq = -1;
}
route = pci_device_route_intx_to_irq(pci_dev, pin);
dev->virq = route.irq;
if (dev->virq != -1) {
kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &dev->intr,
&dev->resample, dev->virq);
}
}
static void setup_irqfd(PCIProxyDev *dev)
{
PCIDevice *pci_dev = PCI_DEVICE(dev);
MPQemuMsg msg;
Error *local_err = NULL;
event_notifier_init(&dev->intr, 0);
event_notifier_init(&dev->resample, 0);
memset(&msg, 0, sizeof(MPQemuMsg));
msg.cmd = MPQEMU_CMD_SET_IRQFD;
msg.num_fds = 2;
msg.fds[0] = event_notifier_get_fd(&dev->intr);
msg.fds[1] = event_notifier_get_fd(&dev->resample);
msg.size = 0;
if (!mpqemu_msg_send(&msg, dev->ioc, &local_err)) {
error_report_err(local_err);
}
dev->virq = -1;
proxy_intx_update(pci_dev);
pci_device_set_intx_routing_notifier(pci_dev, proxy_intx_update);
}
static void pci_proxy_dev_realize(PCIDevice *device, Error **errp)
{
ERRP_GUARD();
PCIProxyDev *dev = PCI_PROXY_DEV(device);
int fd;
if (!dev->fd) {
error_setg(errp, "fd parameter not specified for %s",
DEVICE(device)->id);
return;
}
fd = monitor_fd_param(monitor_cur(), dev->fd, errp);
if (fd == -1) {
error_prepend(errp, "proxy: unable to parse fd %s: ", dev->fd);
return;
}
if (!fd_is_socket(fd)) {
error_setg(errp, "proxy: fd %d is not a socket", fd);
close(fd);
return;
}
dev->ioc = qio_channel_new_fd(fd, errp);
error_setg(&dev->migration_blocker, "%s does not support migration",
TYPE_PCI_PROXY_DEV);
migrate_add_blocker(dev->migration_blocker, errp);
qemu_mutex_init(&dev->io_mutex);
qio_channel_set_blocking(dev->ioc, true, NULL);
proxy_memory_listener_configure(&dev->proxy_listener, dev->ioc);
setup_irqfd(dev);
}
static void pci_proxy_dev_exit(PCIDevice *pdev)
{
PCIProxyDev *dev = PCI_PROXY_DEV(pdev);
if (dev->ioc) {
qio_channel_close(dev->ioc, NULL);
}
migrate_del_blocker(dev->migration_blocker);
error_free(dev->migration_blocker);
proxy_memory_listener_deconfigure(&dev->proxy_listener);
event_notifier_cleanup(&dev->intr);
event_notifier_cleanup(&dev->resample);
}
static void config_op_send(PCIProxyDev *pdev, uint32_t addr, uint32_t *val,
int len, unsigned int op)
{
MPQemuMsg msg = { 0 };
uint64_t ret = -EINVAL;
Error *local_err = NULL;
msg.cmd = op;
msg.data.pci_conf_data.addr = addr;
msg.data.pci_conf_data.val = (op == MPQEMU_CMD_PCI_CFGWRITE) ? *val : 0;
msg.data.pci_conf_data.len = len;
msg.size = sizeof(PciConfDataMsg);
ret = mpqemu_msg_send_and_await_reply(&msg, pdev, &local_err);
if (local_err) {
error_report_err(local_err);
}
if (ret == UINT64_MAX) {
error_report("Failed to perform PCI config %s operation",
(op == MPQEMU_CMD_PCI_CFGREAD) ? "READ" : "WRITE");
}
if (op == MPQEMU_CMD_PCI_CFGREAD) {
*val = (uint32_t)ret;
}
}
static uint32_t pci_proxy_read_config(PCIDevice *d, uint32_t addr, int len)
{
uint32_t val;
config_op_send(PCI_PROXY_DEV(d), addr, &val, len, MPQEMU_CMD_PCI_CFGREAD);
return val;
}
static void pci_proxy_write_config(PCIDevice *d, uint32_t addr, uint32_t val,
int len)
{
/*
* Some of the functions access the copy of remote device's PCI config
* space which is cached in the proxy device. Therefore, maintain
* it updated.
*/
pci_default_write_config(d, addr, val, len);
config_op_send(PCI_PROXY_DEV(d), addr, &val, len, MPQEMU_CMD_PCI_CFGWRITE);
}
static Property proxy_properties[] = {
DEFINE_PROP_STRING("fd", PCIProxyDev, fd),
DEFINE_PROP_END_OF_LIST(),
};
static void pci_proxy_dev_class_init(ObjectClass *klass, void *data)
{
DeviceClass *dc = DEVICE_CLASS(klass);
PCIDeviceClass *k = PCI_DEVICE_CLASS(klass);
k->realize = pci_proxy_dev_realize;
k->exit = pci_proxy_dev_exit;
k->config_read = pci_proxy_read_config;
k->config_write = pci_proxy_write_config;
device_class_set_props(dc, proxy_properties);
}
static const TypeInfo pci_proxy_dev_type_info = {
.name = TYPE_PCI_PROXY_DEV,
.parent = TYPE_PCI_DEVICE,
.instance_size = sizeof(PCIProxyDev),
.class_init = pci_proxy_dev_class_init,
.interfaces = (InterfaceInfo[]) {
{ INTERFACE_CONVENTIONAL_PCI_DEVICE },
{ },
},
};
static void pci_proxy_dev_register_types(void)
{
type_register_static(&pci_proxy_dev_type_info);
}
type_init(pci_proxy_dev_register_types)
static void send_bar_access_msg(PCIProxyDev *pdev, MemoryRegion *mr,
bool write, hwaddr addr, uint64_t *val,
unsigned size, bool memory)
{
MPQemuMsg msg = { 0 };
long ret = -EINVAL;
Error *local_err = NULL;
msg.size = sizeof(BarAccessMsg);
msg.data.bar_access.addr = mr->addr + addr;
msg.data.bar_access.size = size;
msg.data.bar_access.memory = memory;
if (write) {
msg.cmd = MPQEMU_CMD_BAR_WRITE;
msg.data.bar_access.val = *val;
} else {
msg.cmd = MPQEMU_CMD_BAR_READ;
}
ret = mpqemu_msg_send_and_await_reply(&msg, pdev, &local_err);
if (local_err) {
error_report_err(local_err);
}
if (!write) {
*val = ret;
}
}
static void proxy_bar_write(void *opaque, hwaddr addr, uint64_t val,
unsigned size)
{
ProxyMemoryRegion *pmr = opaque;
send_bar_access_msg(pmr->dev, &pmr->mr, true, addr, &val, size,
pmr->memory);
}
static uint64_t proxy_bar_read(void *opaque, hwaddr addr, unsigned size)
{
ProxyMemoryRegion *pmr = opaque;
uint64_t val;
send_bar_access_msg(pmr->dev, &pmr->mr, false, addr, &val, size,
pmr->memory);
return val;
}
const MemoryRegionOps proxy_mr_ops = {
.read = proxy_bar_read,
.write = proxy_bar_write,
.endianness = DEVICE_NATIVE_ENDIAN,
.impl = {
.min_access_size = 1,
.max_access_size = 8,
},
};