driver/virtio: Add Memory Balloon Support for SEV/SEV-ES

Message ID 2035137075.1083380.1704867762955.JavaMail.zimbra@sjtu.edu.cn
State New
Headers
Series driver/virtio: Add Memory Balloon Support for SEV/SEV-ES |

Commit Message

Zheyun Shen Jan. 10, 2024, 6:22 a.m. UTC
  For now, SEV pins guest's memory to avoid swapping or
moving ciphertext, but leading to the inhibition of
Memory Ballooning.

In Memory Ballooning, only guest's free pages will be relocated
in balloon inflation and deflation, so the difference of plaintext
doesn't matter to guest.

Memory Ballooning is a nice memory overcommitment technology can
be used in CVM based on SEV and SEV-ES, so userspace tools can
provide an option to allow SEV not to pin memory and enable 
Memory Ballooning. Guest kernel may not inhibit Balloon and 
should set shared memory for Balloon decrypted.

Signed-off-by: Zheyun Shen <szy0127@sjtu.edu.cn>
---
 drivers/virtio/virtio_balloon.c | 18 ++++++++++++++++++
 drivers/virtio/virtio_ring.c    |  7 +++++++
 2 files changed, 25 insertions(+)

--
2.34.1
  

Comments

Michael S. Tsirkin Jan. 10, 2024, 8:01 a.m. UTC | #1
On Wed, Jan 10, 2024 at 02:22:42PM +0800, Zheyun Shen wrote:
> For now, SEV pins guest's memory to avoid swapping or
> moving ciphertext, but leading to the inhibition of
> Memory Ballooning.
> 
> In Memory Ballooning, only guest's free pages will be relocated
> in balloon inflation and deflation, so the difference of plaintext
> doesn't matter to guest.
> 
> Memory Ballooning is a nice memory overcommitment technology can
> be used in CVM based on SEV and SEV-ES, so userspace tools can
> provide an option to allow SEV not to pin memory and enable 
> Memory Ballooning. Guest kernel may not inhibit Balloon and 
> should set shared memory for Balloon decrypted.
> 
> Signed-off-by: Zheyun Shen <szy0127@sjtu.edu.cn>

Sorry I don't get what you are saying at all.
Please format the commit log along the following lines:

Currently .....
This is bad because ...
To fix ...
As a result ...


> ---
>  drivers/virtio/virtio_balloon.c | 18 ++++++++++++++++++
>  drivers/virtio/virtio_ring.c    |  7 +++++++
>  2 files changed, 25 insertions(+)
> 
> diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
> index 1fe93e93f..aca4c8a58 100644
> --- a/drivers/virtio/virtio_balloon.c
> +++ b/drivers/virtio/virtio_balloon.c
> @@ -18,6 +18,9 @@
>  #include <linux/wait.h>
>  #include <linux/mm.h>
>  #include <linux/page_reporting.h>
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> +#include <linux/set_memory.h>
> +#endif
>  
>  /*
>   * Balloon device works in 4K page units.  So each page is pointed to by
> @@ -870,6 +873,9 @@ static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
>  static int virtballoon_probe(struct virtio_device *vdev)
>  {
>          struct virtio_balloon *vb;
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> +        size_t vb_size = PAGE_ALIGN(sizeof(*vb));
> +#endif
>          int err;
>  
>          if (!vdev->config->get) {
> @@ -878,11 +884,19 @@ static int virtballoon_probe(struct virtio_device *vdev)
>                  return -EINVAL;
>          }
>  
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> +        vdev->priv = vb = kzalloc(vb_size, GFP_KERNEL);
> +#else
>          vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL);
> +#endif
>          if (!vb) {
>                  err = -ENOMEM;
>                  goto out;
>          }
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> +        set_memory_decrypted((unsigned long)vb, vb_size / PAGE_SIZE);
> +        memset(vb, 0, vb_size);
> +#endif
>  
>          INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func);
>          INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func);
> @@ -1101,7 +1115,11 @@ static int virtballoon_validate(struct virtio_device *vdev)
>          else if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON))
>                  __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_REPORTING);
>  
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> +        __virtio_set_bit(vdev, VIRTIO_F_ACCESS_PLATFORM);
> +#else
>          __virtio_clear_bit(vdev, VIRTIO_F_ACCESS_PLATFORM);
> +#endif
>          return 0;
>  }
>  
> diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
> index 49299b1f9..875612a2e 100644
> --- a/drivers/virtio/virtio_ring.c
> +++ b/drivers/virtio/virtio_ring.c
> @@ -14,6 +14,9 @@
>  #include <linux/kmsan.h>
>  #include <linux/spinlock.h>
>  #include <xen/xen.h>
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> +#include <linux/set_memory.h>
> +#endif
>  
>  #ifdef DEBUG
>  /* For development, we want to crash whenever the ring is screwed. */
> @@ -321,6 +324,10 @@ static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
>                  if (queue) {
>                          phys_addr_t phys_addr = virt_to_phys(queue);
>                          *dma_handle = (dma_addr_t)phys_addr;
> +#ifdef CONFIG_AMD_MEM_ENCRYPT
> +                        set_memory_decrypted((unsigned long)queue, PAGE_ALIGN(size) / PAGE_SIZE);
> +                        memset(queue, 0, PAGE_ALIGN(size));
> +#endif
>  
>                          /*
>                           * Sanity check: make sure we dind't truncate

No way I am going to spead CONFIG_AMD_MEM_ENCRYPT all over the place
like this.


> --
> 2.34.1
  
Jason Wang Jan. 11, 2024, 3:20 a.m. UTC | #2
On Wed, Jan 10, 2024 at 2:23 PM Zheyun Shen <szy0127@sjtu.edu.cn> wrote:
>
> For now, SEV pins guest's memory to avoid swapping or
> moving ciphertext, but leading to the inhibition of
> Memory Ballooning.
>
> In Memory Ballooning, only guest's free pages will be relocated
> in balloon inflation and deflation, so the difference of plaintext
> doesn't matter to guest.

This seems only true if the page is zeroed, is this true here?

Thanks
  
David Hildenbrand Jan. 11, 2024, 8:35 a.m. UTC | #3
On 10.01.24 07:22, Zheyun Shen wrote:
> For now, SEV pins guest's memory to avoid swapping or
> moving ciphertext, but leading to the inhibition of
> Memory Ballooning.
> 
> In Memory Ballooning, only guest's free pages will be relocated
> in balloon inflation and deflation, so the difference of plaintext
> doesn't matter to guest.

A Linux hypervisor will always give you a fresh, zeroed page. I don't 
recall what the spec says, could be that that is a guarantee.

> 
> Memory Ballooning is a nice memory overcommitment technology can
> be used in CVM based on SEV and SEV-ES, so userspace tools can
> provide an option to allow SEV not to pin memory and enable
> Memory Ballooning. Guest kernel may not inhibit Balloon and
> should set shared memory for Balloon decrypted.

Two points:

1) Memory overcommit means that you promise to have more memory than you 
actually have.

To be able to use that in a *safe* way in the hypervisor, to fulfill 
that promise, you need some backup strategy, which is usually swap space 
in the hypervisor. Further one might apply other techniques (ram 
compression, memory deduplication) in the hypervisor to make that 
swapping unlikely to ever happen when overcommitting (because nobody 
wants to swap).

Assume you run a lot of VMs that mostly have private/encrypted memory 
(which is the default). Imagine you previously inflated the balloon on 
VM0, and that VM needs more memory (you promised it could have more!). 
You reach out to other VMs to inflate the balloon so you get memory 
back, but they cannot give up memory safely.

In that scenario (a) you cannot swap something out because all pages are 
pinned (b) memory compression cannot be applied because pages are pinned 
and (c) memory deduplication cannot be applied because pages are pinned.

Pinned memory is a resource that cannot be overcomitted.

So I am not convinced the use case you are targeting can be considered 
any way of sane memory overcommit. You should better call it resizing VM 
memory instead. Then, it's clearer that the hypervisor cannot promise to 
ever give you that memory when you are in need.


2) What about other features?

What if the hypervisor enabled free-page-reporting? Would that work 
(unlikely, I assunme). Don't we have to block that?
  

Patch

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 1fe93e93f..aca4c8a58 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -18,6 +18,9 @@ 
 #include <linux/wait.h>
 #include <linux/mm.h>
 #include <linux/page_reporting.h>
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+#include <linux/set_memory.h>
+#endif
 
 /*
  * Balloon device works in 4K page units.  So each page is pointed to by
@@ -870,6 +873,9 @@  static int virtio_balloon_register_shrinker(struct virtio_balloon *vb)
 static int virtballoon_probe(struct virtio_device *vdev)
 {
         struct virtio_balloon *vb;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+        size_t vb_size = PAGE_ALIGN(sizeof(*vb));
+#endif
         int err;
 
         if (!vdev->config->get) {
@@ -878,11 +884,19 @@  static int virtballoon_probe(struct virtio_device *vdev)
                 return -EINVAL;
         }
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+        vdev->priv = vb = kzalloc(vb_size, GFP_KERNEL);
+#else
         vdev->priv = vb = kzalloc(sizeof(*vb), GFP_KERNEL);
+#endif
         if (!vb) {
                 err = -ENOMEM;
                 goto out;
         }
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+        set_memory_decrypted((unsigned long)vb, vb_size / PAGE_SIZE);
+        memset(vb, 0, vb_size);
+#endif
 
         INIT_WORK(&vb->update_balloon_stats_work, update_balloon_stats_func);
         INIT_WORK(&vb->update_balloon_size_work, update_balloon_size_func);
@@ -1101,7 +1115,11 @@  static int virtballoon_validate(struct virtio_device *vdev)
         else if (!virtio_has_feature(vdev, VIRTIO_BALLOON_F_PAGE_POISON))
                 __virtio_clear_bit(vdev, VIRTIO_BALLOON_F_REPORTING);
 
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+        __virtio_set_bit(vdev, VIRTIO_F_ACCESS_PLATFORM);
+#else
         __virtio_clear_bit(vdev, VIRTIO_F_ACCESS_PLATFORM);
+#endif
         return 0;
 }
 
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 49299b1f9..875612a2e 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -14,6 +14,9 @@ 
 #include <linux/kmsan.h>
 #include <linux/spinlock.h>
 #include <xen/xen.h>
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+#include <linux/set_memory.h>
+#endif
 
 #ifdef DEBUG
 /* For development, we want to crash whenever the ring is screwed. */
@@ -321,6 +324,10 @@  static void *vring_alloc_queue(struct virtio_device *vdev, size_t size,
                 if (queue) {
                         phys_addr_t phys_addr = virt_to_phys(queue);
                         *dma_handle = (dma_addr_t)phys_addr;
+#ifdef CONFIG_AMD_MEM_ENCRYPT
+                        set_memory_decrypted((unsigned long)queue, PAGE_ALIGN(size) / PAGE_SIZE);
+                        memset(queue, 0, PAGE_ALIGN(size));
+#endif
 
                         /*
                          * Sanity check: make sure we dind't truncate