driver/virtio: Add Memory Balloon Support for SEV/SEV-ES
Commit Message
For now, SEV pins guest's memory to avoid swapping or
moving ciphertext, but leading to the inhibition of
Memory Ballooning.
In Memory Ballooning, only guest's free pages will be relocated
in balloon inflation and deflation, so the difference of plaintext
doesn't matter to guest.
Memory Ballooning is a nice memory overcommitment technology can
be used in CVM based on SEV and SEV-ES, so userspace tools can
provide an option to allow SEV not to pin memory and enable
Memory Ballooning. Guest kernel may not inhibit Balloon and
should set shared memory for Balloon decrypted.
Signed-off-by: Zheyun Shen <szy0127@sjtu.edu.cn>
---
drivers/virtio/virtio_balloon.c | 18 ++++++++++++++++++
drivers/virtio/virtio_ring.c | 7 +++++++
2 files changed, 25 insertions(+)
--
2.34.1
Comments
On Wed, Jan 10, 2024 at 02:22:42PM +0800, Zheyun Shen wrote:
> For now, SEV pins guest's memory to avoid swapping or
> moving ciphertext, but leading to the inhibition of
> Memory Ballooning.
>
> In Memory Ballooning, only guest's free pages will be relocated
> in balloon inflation and deflation, so the difference of plaintext
> doesn't matter to guest.
>
> Memory Ballooning is a nice memory overcommitment technology can
> be used in CVM based on SEV and SEV-ES, so userspace tools can
> provide an option to allow SEV not to pin memory and enable
> Memory Ballooning. Guest kernel may not inhibit Balloon and
> should set shared memory for Balloon decrypted.
>
> Signed-off-by: Zheyun Shen <szy0127@sjtu.edu.cn>
Sorry I don't get what you are saying at all.
Please format the commit log along the following lines:
Currently .....
This is bad because ...
To fix ...
As a result ...
> ---
> drivers/virtio/virtio_balloon.c | 18 ++++++++++++++++++
> drivers/virtio/virtio_ring.c | 7 +++++++
> 2 files changed, 25 insertions(+)
>
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 1fe93e93f..aca4c8a58 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -18,6 +18,9 @@
> #include <linux/wait.h>
> #include <linux/mm.h>
> #include <linux/page_reporting.h>
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> +#include <linux/set_memory.h>
> +#endif
>
> /*
> * Balloon device works in 4K page units. So each page is pointed to by
> @@ -870,6 +873,9 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
> static int virtballoon_probe(struct virtio_device *vdev)
> {
> struct virtio_balloon *vb;
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> + size_t vb_size = PAGE_ALIGN(sizeof(*vb));
> +#endif
> int err;
>
> if (!vdev->config->get) {
> @@ -878,11 +884,19 @@ static int virtballoon_probe(struct virtio_device *vdev)
> return -EINVAL;
> }
>
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> + vdev->priv = vb = kzalloc(vb_size, GFP_KERNEL);
> +#else
> vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL);
> +#endif
> if (!vb) {
> err = -ENOMEM;
> goto out;
> }
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> + set_memory_decrypted((unsigned long)vb, vb_size / PAGE_SIZE);
> + memset(vb, 0, vb_size);
> +#endif
>
> INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func);
> INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func);
> @@ -1101,7 +1115,11 @@ static int virtballoon_validate(struct virtio_device *vdev)
> else if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON))
> __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_REPORTING);
>
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> + __virtio_set_bit(vdev, VIRTIO_F_ACCESS_PLATFORM);
> +#else
> __virtio_clear_bit(vdev, VIRTIO_F_ACCESS_PLATFORM);
> +#endif
> return 0;
> }
>
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 49299b1f9..875612a2e 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -14,6 +14,9 @@
> #include <linux/kmsan.h>
> #include <linux/spinlock.h>
> #include <xen/xen.h>
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> +#include <linux/set_memory.h>
> +#endif
>
> #ifdef DEBUG
> /* For development, we want to crash whenever the ring is screwed. */
> @@ -321,6 +324,10 @@ static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
> if (queue) {
> phys_addr_t phys_addr = virt_to_phys(queue);
> *dma_handle = (dma_addr_t)phys_addr;
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> + set_memory_decrypted((unsigned long)queue, PAGE_ALIGN(size) / PAGE_SIZE);
> + memset(queue, 0, PAGE_ALIGN(size));
> +#endif
>
> /*
> * Sanity check: make sure we dind't truncate
No way I am going to spead CONFIG_AMD_MEM_ENCRYPT all over the place
like this.
> --
> 2.34.1
On Wed, Jan 10, 2024 at 2:23 PM Zheyun Shen <szy0127@sjtu.edu.cn> wrote:
>
> For now, SEV pins guest's memory to avoid swapping or
> moving ciphertext, but leading to the inhibition of
> Memory Ballooning.
>
> In Memory Ballooning, only guest's free pages will be relocated
> in balloon inflation and deflation, so the difference of plaintext
> doesn't matter to guest.
This seems only true if the page is zeroed, is this true here?
Thanks
On 10.01.24 07:22, Zheyun Shen wrote:
> For now, SEV pins guest's memory to avoid swapping or
> moving ciphertext, but leading to the inhibition of
> Memory Ballooning.
>
> In Memory Ballooning, only guest's free pages will be relocated
> in balloon inflation and deflation, so the difference of plaintext
> doesn't matter to guest.
A Linux hypervisor will always give you a fresh, zeroed page. I don't
recall what the spec says, could be that that is a guarantee.
>
> Memory Ballooning is a nice memory overcommitment technology can
> be used in CVM based on SEV and SEV-ES, so userspace tools can
> provide an option to allow SEV not to pin memory and enable
> Memory Ballooning. Guest kernel may not inhibit Balloon and
> should set shared memory for Balloon decrypted.
Two points:
1) Memory overcommit means that you promise to have more memory than you
actually have.
To be able to use that in a *safe* way in the hypervisor, to fulfill
that promise, you need some backup strategy, which is usually swap space
in the hypervisor. Further one might apply other techniques (ram
compression, memory deduplication) in the hypervisor to make that
swapping unlikely to ever happen when overcommitting (because nobody
wants to swap).
Assume you run a lot of VMs that mostly have private/encrypted memory
(which is the default). Imagine you previously inflated the balloon on
VM0, and that VM needs more memory (you promised it could have more!).
You reach out to other VMs to inflate the balloon so you get memory
back, but they cannot give up memory safely.
In that scenario (a) you cannot swap something out because all pages are
pinned (b) memory compression cannot be applied because pages are pinned
and (c) memory deduplication cannot be applied because pages are pinned.
Pinned memory is a resource that cannot be overcomitted.
So I am not convinced the use case you are targeting can be considered
any way of sane memory overcommit. You should better call it resizing VM
memory instead. Then, it's clearer that the hypervisor cannot promise to
ever give you that memory when you are in need.
2) What about other features?
What if the hypervisor enabled free-page-reporting? Would that work
(unlikely, I assunme). Don't we have to block that?
@@ -18,6 +18,9 @@
#include <linux/wait.h>
#include <linux/mm.h>
#include <linux/page_reporting.h>
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+#include <linux/set_memory.h>
+#endif
/*
* Balloon device works in 4K page units. So each page is pointed to by
@@ -870,6 +873,9 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
static int virtballoon_probe(struct virtio_device *vdev)
{
struct virtio_balloon *vb;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ size_t vb_size = PAGE_ALIGN(sizeof(*vb));
+#endif
int err;
if (!vdev->config->get) {
@@ -878,11 +884,19 @@ static int virtballoon_probe(struct virtio_device *vdev)
return -EINVAL;
}
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ vdev->priv = vb = kzalloc(vb_size, GFP_KERNEL);
+#else
vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL);
+#endif
if (!vb) {
err = -ENOMEM;
goto out;
}
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ set_memory_decrypted((unsigned long)vb, vb_size / PAGE_SIZE);
+ memset(vb, 0, vb_size);
+#endif
INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func);
INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func);
@@ -1101,7 +1115,11 @@ static int virtballoon_validate(struct virtio_device *vdev)
else if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON))
__virtio_clear_bit(vdev, VIRTIO_BALLOON_F_REPORTING);
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ __virtio_set_bit(vdev, VIRTIO_F_ACCESS_PLATFORM);
+#else
__virtio_clear_bit(vdev, VIRTIO_F_ACCESS_PLATFORM);
+#endif
return 0;
}
@@ -14,6 +14,9 @@
#include <linux/kmsan.h>
#include <linux/spinlock.h>
#include <xen/xen.h>
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+#include <linux/set_memory.h>
+#endif
#ifdef DEBUG
/* For development, we want to crash whenever the ring is screwed. */
@@ -321,6 +324,10 @@ static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
if (queue) {
phys_addr_t phys_addr = virt_to_phys(queue);
*dma_handle = (dma_addr_t)phys_addr;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+ set_memory_decrypted((unsigned long)queue, PAGE_ALIGN(size) / PAGE_SIZE);
+ memset(queue, 0, PAGE_ALIGN(size));
+#endif
/*
* Sanity check: make sure we dind't truncate