Re: [PATCH v4 1/3] KVM: s390: Add map/unmap ioctl and clean mappings post-guest
From: Matthew Rosato
Date: Thu Apr 30 2026 - 16:11:23 EST
On 4/29/26 10:44 AM, Matthew Rosato wrote:
>
>> +static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
>> +{
>> + struct mm_struct *mm = kvm->mm;
>> + struct page *page = NULL;
>> + int locked = 1;
>> +
>> + if (mmget_not_zero(mm)) {
>> + mmap_read_lock(mm);
>> + get_user_pages_remote(mm, uaddr, 1, FOLL_WRITE,
>> + &page, &locked);
>
> I have wondered this before, and Sashiko mentions it now: Would it make
> sense to also FOLL_LONGTERM here?
>
> I recognize that the old ioctl code that you are resurrecting here did
> not use FOLL_LONGTERM, but I can't think of a reason why.
>
> The mapping may indeed be held long-term (life of the guest or at least
> the associated adapter in the guest), and it's effectively under
> userspace control, waiting for a corresponding unmap ioctl or for the
> guest to go away or enter pv mode.
>
> Can you please test?
>
OK, I did some more looking into this. FOLL_LONGTERM will require
FOLL_PIN, and you're using FOLL_GET here. So that won't work as-is with
this code.
So let's step back and consider what these pages are intended to be used
for; we want to write to them prior to injecting an adapter interrupt
into a guest vm. That seems to me to fall under case 5 in
Documentation/core-api/pin_user_pages.rst which says to use FOLL_PIN.
Whether we keep the page pinned for a long period of time or not depends
on whether or not it was mapped by userspace via the ioctl in this patch
-- but the intent is the same.
So I wonder if really we should be using pin_user_pages_remote here,
with (FOLL_WRITE | FOLL_LONGTERM) for the map/unmap ioctl case added by
this patch but also switch to pin_user_pages_remote() instead of
get_user_pages_remote() using only FOLL_WRITE for the existing 'slow'
path where we expect to pin, write some bits into the page and then
unpin the page right away
And of course unpin_user_page() instead of put_page() for both cases...
Christian / Janosch / Claudio, does this sound sane to you?
I did a quick test with the following diff on top of this series (ran
some I/O over virtio-ccw and virio-pci, saw io_390_inatomic numbers
going up) -- Doug can you give it a more thorough testing and if it
looks good / assuming no objections work it into your next version?
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index ee3376b017cb..0960b6726f03 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -2453,7 +2453,8 @@ int kvm_s390_mask_adapter(struct kvm *kvm, unsigned int id, bool masked)
return ret;
}
-static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
+static struct page *pin_map_page(struct kvm *kvm, u64 uaddr,
+ unsigned int gup_flags)
{
struct mm_struct *mm = kvm->mm;
struct page *page = NULL;
@@ -2461,7 +2462,7 @@ static struct page *get_map_page(struct kvm *kvm, u64 uaddr)
if (mmget_not_zero(mm)) {
mmap_read_lock(mm);
- get_user_pages_remote(mm, uaddr, 1, FOLL_WRITE,
+ pin_user_pages_remote(mm, uaddr, 1, FOLL_WRITE | gup_flags,
&page, &locked);
if (locked)
mmap_read_unlock(mm);
@@ -2498,7 +2499,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
srcu_read_unlock(&kvm->srcu, idx);
map->guest_addr = addr;
map->addr = host_addr;
- map->page = get_map_page(kvm, host_addr);
+ map->page = pin_map_page(kvm, host_addr, FOLL_LONGTERM);
if (!map->page) {
ret = -EINVAL;
goto out;
@@ -2509,7 +2510,7 @@ static int kvm_s390_adapter_map(struct kvm *kvm, unsigned int id, __u64 addr)
adapter->nr_maps++;
ret = 0;
} else {
- put_page(map->page);
+ unpin_user_page(map->page);
ret = -EINVAL;
}
raw_spin_unlock_irqrestore(&adapter->maps_lock, flags);
@@ -2550,7 +2551,7 @@ static int kvm_s390_adapter_unmap(struct kvm *kvm, unsigned int id, __u64 addr)
mark_page_dirty(kvm, map_addr_to_mark >> PAGE_SHIFT);
set_page_dirty_lock(map_page_to_put);
srcu_read_unlock(&kvm->srcu, idx);
- put_page(map_page_to_put);
+ unpin_user_page(map_page_to_put);
}
return found ? 0 : -ENOENT;
@@ -2569,7 +2570,7 @@ void kvm_s390_destroy_adapters(struct kvm *kvm)
list_for_each_entry_safe(map, tmp,
&kvm->arch.adapters[i]->maps, list) {
list_del(&map->list);
- put_page(map->page);
+ unpin_user_page(map->page);
kfree(map);
}
raw_spin_unlock_irqrestore(&kvm->arch.adapters[i]->maps_lock, flags);
@@ -2899,7 +2900,7 @@ static int adapter_indicators_set(struct kvm *kvm,
ind_info = get_map_info(adapter, adapter_int->ind_addr);
if (!ind_info) {
raw_spin_unlock_irqrestore(&adapter->maps_lock, flags);
- ind_page = get_map_page(kvm, adapter_int->ind_addr);
+ ind_page = pin_map_page(kvm, adapter_int->ind_addr, 0);
if (!ind_page)
return -1;
idx = srcu_read_lock(&kvm->srcu);
@@ -2921,11 +2922,11 @@ static int adapter_indicators_set(struct kvm *kvm,
summary_info = get_map_info(adapter, adapter_int->summary_addr);
if (!summary_info) {
raw_spin_unlock_irqrestore(&adapter->maps_lock, flags);
- summary_page = get_map_page(kvm, adapter_int->summary_addr);
+ summary_page = pin_map_page(kvm, adapter_int->summary_addr, 0);
if (!summary_page) {
if (!ind_info) {
WARN_ON_ONCE(!ind_page);
- put_page(ind_page);
+ unpin_user_page(ind_page);
}
return -1;
}
@@ -2946,9 +2947,9 @@ static int adapter_indicators_set(struct kvm *kvm,
}
if (!ind_info)
- put_page(ind_page);
+ unpin_user_page(ind_page);
if (!summary_info)
- put_page(summary_page);
+ unpin_user_page(summary_page);
return summary_set ? 0 : 1;
}