/*- * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0 * * Copyright (c) 2005 Mellanox Technologies. All rights reserved. * Copyright 2018-2019 Amazon.com, Inc. or its affiliates. All rights reserved. * Copyright 2019 Marvell. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include "uverbs.h" #include "core_priv.h" /** * rdma_umap_priv_init() - Initialize the private data of a vma * * @priv: The already allocated private data * @vma: The vm area struct that needs private data * @entry: entry into the mmap_xa that needs to be linked with * this vma * * Each time we map IO memory into user space this keeps track of the * mapping. When the device is hot-unplugged we 'zap' the mmaps in user space * to point to the zero page and allow the hot unplug to proceed. * * This is necessary for cases like PCI physical hot unplug as the actual BAR * memory may vanish after this and access to it from userspace could MCE. * * RDMA drivers supporting disassociation must have their user space designed * to cope in some way with their IO pages going to the zero page. * */ void rdma_umap_priv_init(struct rdma_umap_priv *priv, struct vm_area_struct *vma, struct rdma_user_mmap_entry *entry) { struct ib_uverbs_file *ufile = vma->vm_file->private_data; priv->vma = vma; if (entry) { kref_get(&entry->ref); priv->entry = entry; } vma->vm_private_data = priv; /* vm_ops is setup in ib_uverbs_mmap() to avoid module dependencies */ mutex_lock(&ufile->umap_lock); list_add(&priv->list, &ufile->umaps); mutex_unlock(&ufile->umap_lock); } EXPORT_SYMBOL(rdma_umap_priv_init); /** * rdma_user_mmap_io() - Map IO memory into a process * * @ucontext: associated user context * @vma: the vma related to the current mmap call * @pfn: pfn to map * @size: size to map * @prot: pgprot to use in remap call * @entry: mmap_entry retrieved from rdma_user_mmap_entry_get(), or NULL * if mmap_entry is not used by the driver * * This is to be called by drivers as part of their mmap() functions if they * wish to send something like PCI-E BAR memory to userspace. * * Return -EINVAL on wrong flags or size, -EAGAIN on failure to map. 0 on * success. */ int rdma_user_mmap_io(struct ib_ucontext *ucontext, struct vm_area_struct *vma, unsigned long pfn, unsigned long size, pgprot_t prot, struct rdma_user_mmap_entry *entry) { struct ib_uverbs_file *ufile = ucontext->ufile; struct rdma_umap_priv *priv; if (!(vma->vm_flags & VM_SHARED)) return -EINVAL; if (vma->vm_end - vma->vm_start != size) return -EINVAL; /* Driver is using this wrong, must be called by ib_uverbs_mmap */ if (WARN_ON(!vma->vm_file || vma->vm_file->private_data != ufile)) return -EINVAL; priv = kzalloc(sizeof(*priv), GFP_KERNEL); if (!priv) return -ENOMEM; vma->vm_page_prot = prot; if (io_remap_pfn_range(vma, vma->vm_start, pfn, size, prot)) { kfree(priv); return -EAGAIN; } rdma_umap_priv_init(priv, vma, entry); return 0; } EXPORT_SYMBOL(rdma_user_mmap_io); /** * rdma_user_mmap_entry_get_pgoff() - Get an entry from the mmap_xa * * @ucontext: associated user context * @pgoff: The mmap offset >> PAGE_SHIFT * * This function is called when a user tries to mmap with an offset (returned * by rdma_user_mmap_get_offset()) it initially received from the driver. The * rdma_user_mmap_entry was created by the function * rdma_user_mmap_entry_insert(). This function increases the refcnt of the * entry so that it won't be deleted from the xarray in the meantime. * * Return an reference to an entry if exists or NULL if there is no * match. rdma_user_mmap_entry_put() must be called to put the reference. */ struct rdma_user_mmap_entry * rdma_user_mmap_entry_get_pgoff(struct ib_ucontext *ucontext, unsigned long pgoff) { struct rdma_user_mmap_entry *entry; if (pgoff > U32_MAX) return NULL; xa_lock(&ucontext->mmap_xa); entry = xa_load(&ucontext->mmap_xa, pgoff); /* * If refcount is zero, entry is already being deleted, driver_removed * indicates that the no further mmaps are possible and we waiting for * the active VMAs to be closed. */ if (!entry || entry->start_pgoff != pgoff || entry->driver_removed || !kref_get_unless_zero(&entry->ref)) goto err; xa_unlock(&ucontext->mmap_xa); return entry; err: xa_unlock(&ucontext->mmap_xa); return NULL; } EXPORT_SYMBOL(rdma_user_mmap_entry_get_pgoff); /** * rdma_user_mmap_entry_get() - Get an entry from the mmap_xa * * @ucontext: associated user context * @vma: the vma being mmap'd into * * This function is like rdma_user_mmap_entry_get_pgoff() except that it also * checks that the VMA is correct. */ struct rdma_user_mmap_entry * rdma_user_mmap_entry_get(struct ib_ucontext *ucontext, struct vm_area_struct *vma) { struct rdma_user_mmap_entry *entry; if (!(vma->vm_flags & VM_SHARED)) return NULL; entry = rdma_user_mmap_entry_get_pgoff(ucontext, vma->vm_pgoff); if (!entry) return NULL; if (entry->npages * PAGE_SIZE != vma->vm_end - vma->vm_start) { rdma_user_mmap_entry_put(entry); return NULL; } return entry; } EXPORT_SYMBOL(rdma_user_mmap_entry_get); static void rdma_user_mmap_entry_free(struct kref *kref) { struct rdma_user_mmap_entry *entry = container_of(kref, struct rdma_user_mmap_entry, ref); struct ib_ucontext *ucontext = entry->ucontext; unsigned long i; /* * Erase all entries occupied by this single entry, this is deferred * until all VMA are closed so that the mmap offsets remain unique. */ xa_lock(&ucontext->mmap_xa); for (i = 0; i < entry->npages; i++) __xa_erase(&ucontext->mmap_xa, entry->start_pgoff + i); xa_unlock(&ucontext->mmap_xa); if (ucontext->device->mmap_free) ucontext->device->mmap_free(entry); } /** * rdma_user_mmap_entry_put() - Drop reference to the mmap entry * * @entry: an entry in the mmap_xa * * This function is called when the mapping is closed if it was * an io mapping or when the driver is done with the entry for * some other reason. * Should be called after rdma_user_mmap_entry_get was called * and entry is no longer needed. This function will erase the * entry and free it if its refcnt reaches zero. */ void rdma_user_mmap_entry_put(struct rdma_user_mmap_entry *entry) { kref_put(&entry->ref, rdma_user_mmap_entry_free); } EXPORT_SYMBOL(rdma_user_mmap_entry_put); /** * rdma_user_mmap_entry_remove() - Drop reference to entry and * mark it as unmmapable * * @entry: the entry to insert into the mmap_xa * * Drivers can call this to prevent userspace from creating more mappings for * entry, however existing mmaps continue to exist and ops->mmap_free() will * not be called until all user mmaps are destroyed. */ void rdma_user_mmap_entry_remove(struct rdma_user_mmap_entry *entry) { if (!entry) return; xa_lock(&entry->ucontext->mmap_xa); entry->driver_removed = true; xa_unlock(&entry->ucontext->mmap_xa); kref_put(&entry->ref, rdma_user_mmap_entry_free); } EXPORT_SYMBOL(rdma_user_mmap_entry_remove); /** * rdma_user_mmap_entry_insert_range() - Insert an entry to the mmap_xa * in a given range. * * @ucontext: associated user context. * @entry: the entry to insert into the mmap_xa * @length: length of the address that will be mmapped * @min_pgoff: minimum pgoff to be returned * @max_pgoff: maximum pgoff to be returned * * This function should be called by drivers that use the rdma_user_mmap * interface for implementing their mmap syscall A database of mmap offsets is * handled in the core and helper functions are provided to insert entries * into the database and extract entries when the user calls mmap with the * given offset. The function allocates a unique page offset in a given range * that should be provided to user, the user will use the offset to retrieve * information such as address to be mapped and how. * * Return: 0 on success and -ENOMEM on failure */ int rdma_user_mmap_entry_insert_range(struct ib_ucontext *ucontext, struct rdma_user_mmap_entry *entry, size_t length, u32 min_pgoff, u32 max_pgoff) { struct ib_uverbs_file *ufile = ucontext->ufile; u32 xa_first, xa_last, npages; int err; u32 i; u32 j; if (!entry) return -EINVAL; kref_init(&entry->ref); entry->ucontext = ucontext; /* * We want the whole allocation to be done without interruption from a * different thread. The allocation requires finding a free range and * storing. During the xa_insert the lock could be released, possibly * allowing another thread to choose the same range. */ mutex_lock(&ufile->umap_lock); xa_lock(&ucontext->mmap_xa); /* We want to find an empty range */ npages = (u32)DIV_ROUND_UP(length, PAGE_SIZE); entry->npages = npages; /* Find an empty range */ for (i = min_pgoff, j = 0; (i + j) <= max_pgoff && j != npages; ) { if (xa_load(&ucontext->mmap_xa, i + j) != NULL) { if (unlikely(i + j == max_pgoff)) break; i = i + j + 1; j = 0; } else { if (unlikely(i + j == max_pgoff)) break; j++; } } if (j != npages) goto err_unlock; xa_first = i; xa_last = i + j; for (i = xa_first; i < xa_last; i++) { err = __xa_insert(&ucontext->mmap_xa, i, entry, GFP_KERNEL); if (err) goto err_undo; } /* * Internally the kernel uses a page offset, in libc this is a byte * offset. Drivers should not return pgoff to userspace. */ entry->start_pgoff = xa_first; xa_unlock(&ucontext->mmap_xa); mutex_unlock(&ufile->umap_lock); return 0; err_undo: for (; i > xa_first; i--) __xa_erase(&ucontext->mmap_xa, i - 1); err_unlock: xa_unlock(&ucontext->mmap_xa); mutex_unlock(&ufile->umap_lock); return -ENOMEM; } EXPORT_SYMBOL(rdma_user_mmap_entry_insert_range); /** * rdma_user_mmap_entry_insert() - Insert an entry to the mmap_xa. * * @ucontext: associated user context. * @entry: the entry to insert into the mmap_xa * @length: length of the address that will be mmapped * * This function should be called by drivers that use the rdma_user_mmap * interface for handling user mmapped addresses. The database is handled in * the core and helper functions are provided to insert entries into the * database and extract entries when the user calls mmap with the given offset. * The function allocates a unique page offset that should be provided to user, * the user will use the offset to retrieve information such as address to * be mapped and how. * * Return: 0 on success and -ENOMEM on failure */ int rdma_user_mmap_entry_insert(struct ib_ucontext *ucontext, struct rdma_user_mmap_entry *entry, size_t length) { return rdma_user_mmap_entry_insert_range(ucontext, entry, length, 0, U32_MAX); } EXPORT_SYMBOL(rdma_user_mmap_entry_insert);