11507f512SMike Rapoport // SPDX-License-Identifier: GPL-2.0
21507f512SMike Rapoport /*
31507f512SMike Rapoport * Copyright IBM Corporation, 2021
41507f512SMike Rapoport *
51507f512SMike Rapoport * Author: Mike Rapoport <rppt@linux.ibm.com>
61507f512SMike Rapoport */
71507f512SMike Rapoport
81507f512SMike Rapoport #include <linux/mm.h>
91507f512SMike Rapoport #include <linux/fs.h>
101507f512SMike Rapoport #include <linux/swap.h>
111507f512SMike Rapoport #include <linux/mount.h>
121507f512SMike Rapoport #include <linux/memfd.h>
131507f512SMike Rapoport #include <linux/bitops.h>
141507f512SMike Rapoport #include <linux/printk.h>
151507f512SMike Rapoport #include <linux/pagemap.h>
161507f512SMike Rapoport #include <linux/syscalls.h>
171507f512SMike Rapoport #include <linux/pseudo_fs.h>
181507f512SMike Rapoport #include <linux/secretmem.h>
191507f512SMike Rapoport #include <linux/set_memory.h>
201507f512SMike Rapoport #include <linux/sched/signal.h>
211507f512SMike Rapoport
221507f512SMike Rapoport #include <uapi/linux/magic.h>
231507f512SMike Rapoport
241507f512SMike Rapoport #include <asm/tlbflush.h>
251507f512SMike Rapoport
261507f512SMike Rapoport #include "internal.h"
271507f512SMike Rapoport
281507f512SMike Rapoport #undef pr_fmt
291507f512SMike Rapoport #define pr_fmt(fmt) "secretmem: " fmt
301507f512SMike Rapoport
311507f512SMike Rapoport /*
321507f512SMike Rapoport * Define mode and flag masks to allow validation of the system call
331507f512SMike Rapoport * parameters.
341507f512SMike Rapoport */
351507f512SMike Rapoport #define SECRETMEM_MODE_MASK (0x0)
361507f512SMike Rapoport #define SECRETMEM_FLAGS_MASK SECRETMEM_MODE_MASK
371507f512SMike Rapoport
38b758fe6dSMike Rapoport (IBM) static bool secretmem_enable __ro_after_init = 1;
391507f512SMike Rapoport module_param_named(enable, secretmem_enable, bool, 0400);
401507f512SMike Rapoport MODULE_PARM_DESC(secretmem_enable,
411507f512SMike Rapoport "Enable secretmem and memfd_secret(2) system call");
421507f512SMike Rapoport
4387066fddSLinus Torvalds static atomic_t secretmem_users;
449a436f8fSMike Rapoport
secretmem_active(void)459a436f8fSMike Rapoport bool secretmem_active(void)
469a436f8fSMike Rapoport {
4787066fddSLinus Torvalds return !!atomic_read(&secretmem_users);
489a436f8fSMike Rapoport }
499a436f8fSMike Rapoport
secretmem_fault(struct vm_fault * vmf)501507f512SMike Rapoport static vm_fault_t secretmem_fault(struct vm_fault *vmf)
511507f512SMike Rapoport {
521507f512SMike Rapoport struct address_space *mapping = vmf->vma->vm_file->f_mapping;
531507f512SMike Rapoport struct inode *inode = file_inode(vmf->vma->vm_file);
541507f512SMike Rapoport pgoff_t offset = vmf->pgoff;
551507f512SMike Rapoport gfp_t gfp = vmf->gfp_mask;
561507f512SMike Rapoport unsigned long addr;
577e2fca52SZhangPeng struct folio *folio;
5884ac0130SMike Rapoport vm_fault_t ret;
591507f512SMike Rapoport int err;
601507f512SMike Rapoport
611507f512SMike Rapoport if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
621507f512SMike Rapoport return vmf_error(-EINVAL);
631507f512SMike Rapoport
6484ac0130SMike Rapoport filemap_invalidate_lock_shared(mapping);
6584ac0130SMike Rapoport
661507f512SMike Rapoport retry:
6732925ee6SMatthew Wilcox (Oracle) folio = filemap_lock_folio(mapping, offset);
6832925ee6SMatthew Wilcox (Oracle) if (IS_ERR(folio)) {
697e2fca52SZhangPeng folio = folio_alloc(gfp | __GFP_ZERO, 0);
707e2fca52SZhangPeng if (!folio) {
7184ac0130SMike Rapoport ret = VM_FAULT_OOM;
7284ac0130SMike Rapoport goto out;
7384ac0130SMike Rapoport }
741507f512SMike Rapoport
7532925ee6SMatthew Wilcox (Oracle) err = set_direct_map_invalid_noflush(folio_page(folio, 0));
761507f512SMike Rapoport if (err) {
777e2fca52SZhangPeng folio_put(folio);
7884ac0130SMike Rapoport ret = vmf_error(err);
7984ac0130SMike Rapoport goto out;
801507f512SMike Rapoport }
811507f512SMike Rapoport
827e2fca52SZhangPeng __folio_mark_uptodate(folio);
837e2fca52SZhangPeng err = filemap_add_folio(mapping, folio, offset, gfp);
841507f512SMike Rapoport if (unlikely(err)) {
851507f512SMike Rapoport /*
861507f512SMike Rapoport * If a split of large page was required, it
871507f512SMike Rapoport * already happened when we marked the page invalid
881507f512SMike Rapoport * which guarantees that this call won't fail
891507f512SMike Rapoport */
9032925ee6SMatthew Wilcox (Oracle) set_direct_map_default_noflush(folio_page(folio, 0));
916f86d053SLance Yang folio_put(folio);
921507f512SMike Rapoport if (err == -EEXIST)
931507f512SMike Rapoport goto retry;
941507f512SMike Rapoport
9584ac0130SMike Rapoport ret = vmf_error(err);
9684ac0130SMike Rapoport goto out;
971507f512SMike Rapoport }
981507f512SMike Rapoport
9932925ee6SMatthew Wilcox (Oracle) addr = (unsigned long)folio_address(folio);
1001507f512SMike Rapoport flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
1011507f512SMike Rapoport }
1021507f512SMike Rapoport
10332925ee6SMatthew Wilcox (Oracle) vmf->page = folio_file_page(folio, vmf->pgoff);
10484ac0130SMike Rapoport ret = VM_FAULT_LOCKED;
10584ac0130SMike Rapoport
10684ac0130SMike Rapoport out:
10784ac0130SMike Rapoport filemap_invalidate_unlock_shared(mapping);
10884ac0130SMike Rapoport return ret;
1091507f512SMike Rapoport }
1101507f512SMike Rapoport
1111507f512SMike Rapoport static const struct vm_operations_struct secretmem_vm_ops = {
1121507f512SMike Rapoport .fault = secretmem_fault,
1131507f512SMike Rapoport };
1141507f512SMike Rapoport
secretmem_release(struct inode * inode,struct file * file)1159a436f8fSMike Rapoport static int secretmem_release(struct inode *inode, struct file *file)
1169a436f8fSMike Rapoport {
11787066fddSLinus Torvalds atomic_dec(&secretmem_users);
1189a436f8fSMike Rapoport return 0;
1199a436f8fSMike Rapoport }
1209a436f8fSMike Rapoport
secretmem_mmap_prepare(struct vm_area_desc * desc)121439b3fb0SLorenzo Stoakes static int secretmem_mmap_prepare(struct vm_area_desc *desc)
1221507f512SMike Rapoport {
12354c58a2fSLorenzo Stoakes const unsigned long len = vma_desc_size(desc);
1241507f512SMike Rapoport
125*e650bb30SLorenzo Stoakes (Oracle) if (!vma_desc_test_any(desc, VMA_SHARED_BIT, VMA_MAYSHARE_BIT))
1261507f512SMike Rapoport return -EINVAL;
1271507f512SMike Rapoport
128fd3196eeSLorenzo Stoakes vma_desc_set_flags(desc, VMA_LOCKED_BIT, VMA_DONTDUMP_BIT);
129fd3196eeSLorenzo Stoakes if (!mlock_future_ok(desc->mm, /*is_vma_locked=*/ true, len))
1301507f512SMike Rapoport return -EAGAIN;
131439b3fb0SLorenzo Stoakes desc->vm_ops = &secretmem_vm_ops;
1321507f512SMike Rapoport
1331507f512SMike Rapoport return 0;
1341507f512SMike Rapoport }
1351507f512SMike Rapoport
vma_is_secretmem(struct vm_area_struct * vma)1361507f512SMike Rapoport bool vma_is_secretmem(struct vm_area_struct *vma)
1371507f512SMike Rapoport {
1381507f512SMike Rapoport return vma->vm_ops == &secretmem_vm_ops;
1391507f512SMike Rapoport }
1401507f512SMike Rapoport
1411507f512SMike Rapoport static const struct file_operations secretmem_fops = {
1429a436f8fSMike Rapoport .release = secretmem_release,
143439b3fb0SLorenzo Stoakes .mmap_prepare = secretmem_mmap_prepare,
1441507f512SMike Rapoport };
1451507f512SMike Rapoport
secretmem_migrate_folio(struct address_space * mapping,struct folio * dst,struct folio * src,enum migrate_mode mode)1465409548dSMatthew Wilcox (Oracle) static int secretmem_migrate_folio(struct address_space *mapping,
1475409548dSMatthew Wilcox (Oracle) struct folio *dst, struct folio *src, enum migrate_mode mode)
1481507f512SMike Rapoport {
1491507f512SMike Rapoport return -EBUSY;
1501507f512SMike Rapoport }
1511507f512SMike Rapoport
secretmem_free_folio(struct folio * folio)1526612ed24SMatthew Wilcox (Oracle) static void secretmem_free_folio(struct folio *folio)
1531507f512SMike Rapoport {
15432925ee6SMatthew Wilcox (Oracle) set_direct_map_default_noflush(folio_page(folio, 0));
1556612ed24SMatthew Wilcox (Oracle) folio_zero_segment(folio, 0, folio_size(folio));
1561507f512SMike Rapoport }
1571507f512SMike Rapoport
1581507f512SMike Rapoport const struct address_space_operations secretmem_aops = {
15946de8b97SMatthew Wilcox (Oracle) .dirty_folio = noop_dirty_folio,
1606612ed24SMatthew Wilcox (Oracle) .free_folio = secretmem_free_folio,
1615409548dSMatthew Wilcox (Oracle) .migrate_folio = secretmem_migrate_folio,
1621507f512SMike Rapoport };
1631507f512SMike Rapoport
secretmem_setattr(struct mnt_idmap * idmap,struct dentry * dentry,struct iattr * iattr)164c1632a0fSChristian Brauner static int secretmem_setattr(struct mnt_idmap *idmap,
165f9b141f9SAxel Rasmussen struct dentry *dentry, struct iattr *iattr)
166f9b141f9SAxel Rasmussen {
167f9b141f9SAxel Rasmussen struct inode *inode = d_inode(dentry);
16884ac0130SMike Rapoport struct address_space *mapping = inode->i_mapping;
169f9b141f9SAxel Rasmussen unsigned int ia_valid = iattr->ia_valid;
17084ac0130SMike Rapoport int ret;
17184ac0130SMike Rapoport
17284ac0130SMike Rapoport filemap_invalidate_lock(mapping);
173f9b141f9SAxel Rasmussen
174f9b141f9SAxel Rasmussen if ((ia_valid & ATTR_SIZE) && inode->i_size)
17584ac0130SMike Rapoport ret = -EINVAL;
17684ac0130SMike Rapoport else
177c1632a0fSChristian Brauner ret = simple_setattr(idmap, dentry, iattr);
178f9b141f9SAxel Rasmussen
17984ac0130SMike Rapoport filemap_invalidate_unlock(mapping);
18084ac0130SMike Rapoport
18184ac0130SMike Rapoport return ret;
182f9b141f9SAxel Rasmussen }
183f9b141f9SAxel Rasmussen
184f9b141f9SAxel Rasmussen static const struct inode_operations secretmem_iops = {
185f9b141f9SAxel Rasmussen .setattr = secretmem_setattr,
186f9b141f9SAxel Rasmussen };
187f9b141f9SAxel Rasmussen
1881507f512SMike Rapoport static struct vfsmount *secretmem_mnt;
1891507f512SMike Rapoport
secretmem_file_create(unsigned long flags)1901507f512SMike Rapoport static struct file *secretmem_file_create(unsigned long flags)
1911507f512SMike Rapoport {
19298001fd6SColin Ian King struct file *file;
1931507f512SMike Rapoport struct inode *inode;
1942bfe15c5SChristian Göttsche const char *anon_name = "[secretmem]";
1951507f512SMike Rapoport
196cbe4134eSShivank Garg inode = anon_inode_make_secure_inode(secretmem_mnt->mnt_sb, anon_name, NULL);
1971507f512SMike Rapoport if (IS_ERR(inode))
1981507f512SMike Rapoport return ERR_CAST(inode);
1991507f512SMike Rapoport
2001507f512SMike Rapoport file = alloc_file_pseudo(inode, secretmem_mnt, "secretmem",
2011812de14SAl Viro O_RDWR | O_LARGEFILE, &secretmem_fops);
2021507f512SMike Rapoport if (IS_ERR(file))
2031507f512SMike Rapoport goto err_free_inode;
2041507f512SMike Rapoport
2051507f512SMike Rapoport mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
2061507f512SMike Rapoport mapping_set_unevictable(inode->i_mapping);
2071507f512SMike Rapoport
208f9b141f9SAxel Rasmussen inode->i_op = &secretmem_iops;
2091507f512SMike Rapoport inode->i_mapping->a_ops = &secretmem_aops;
2101507f512SMike Rapoport
2111507f512SMike Rapoport /* pretend we are a normal file with zero size */
2121507f512SMike Rapoport inode->i_mode |= S_IFREG;
2131507f512SMike Rapoport inode->i_size = 0;
2141507f512SMike Rapoport
2151812de14SAl Viro atomic_inc(&secretmem_users);
2161812de14SAl Viro
2171507f512SMike Rapoport return file;
2181507f512SMike Rapoport
2191507f512SMike Rapoport err_free_inode:
2201507f512SMike Rapoport iput(inode);
2211507f512SMike Rapoport return file;
2221507f512SMike Rapoport }
2231507f512SMike Rapoport
SYSCALL_DEFINE1(memfd_secret,unsigned int,flags)2241507f512SMike Rapoport SYSCALL_DEFINE1(memfd_secret, unsigned int, flags)
2251507f512SMike Rapoport {
226b6c46600Sjianyun.gao /* make sure local flags do not conflict with global fcntl.h */
2271507f512SMike Rapoport BUILD_BUG_ON(SECRETMEM_FLAGS_MASK & O_CLOEXEC);
2281507f512SMike Rapoport
229532b53ceSPatrick Roy if (!secretmem_enable || !can_set_direct_map())
2301507f512SMike Rapoport return -ENOSYS;
2311507f512SMike Rapoport
2321507f512SMike Rapoport if (flags & ~(SECRETMEM_FLAGS_MASK | O_CLOEXEC))
2331507f512SMike Rapoport return -EINVAL;
234cb685432SMatthew Wilcox (Oracle) if (atomic_read(&secretmem_users) < 0)
235cb685432SMatthew Wilcox (Oracle) return -ENFILE;
2361507f512SMike Rapoport
237910c361fSChristian Brauner return FD_ADD(flags & O_CLOEXEC, secretmem_file_create(flags));
2381507f512SMike Rapoport }
2391507f512SMike Rapoport
secretmem_init_fs_context(struct fs_context * fc)2401507f512SMike Rapoport static int secretmem_init_fs_context(struct fs_context *fc)
2411507f512SMike Rapoport {
24298f99394SChristian Brauner struct pseudo_fs_context *ctx;
24398f99394SChristian Brauner
24498f99394SChristian Brauner ctx = init_pseudo(fc, SECRETMEM_MAGIC);
24598f99394SChristian Brauner if (!ctx)
24698f99394SChristian Brauner return -ENOMEM;
24798f99394SChristian Brauner
24898f99394SChristian Brauner fc->s_iflags |= SB_I_NOEXEC;
24998f99394SChristian Brauner fc->s_iflags |= SB_I_NODEV;
25098f99394SChristian Brauner return 0;
2511507f512SMike Rapoport }
2521507f512SMike Rapoport
2531507f512SMike Rapoport static struct file_system_type secretmem_fs = {
2541507f512SMike Rapoport .name = "secretmem",
2551507f512SMike Rapoport .init_fs_context = secretmem_init_fs_context,
2561507f512SMike Rapoport .kill_sb = kill_anon_super,
2571507f512SMike Rapoport };
2581507f512SMike Rapoport
secretmem_init(void)2591ea41595SXiu Jianfeng static int __init secretmem_init(void)
2601507f512SMike Rapoport {
261532b53ceSPatrick Roy if (!secretmem_enable || !can_set_direct_map())
262f7c5b1aaSXiu Jianfeng return 0;
2631507f512SMike Rapoport
2641507f512SMike Rapoport secretmem_mnt = kern_mount(&secretmem_fs);
2651507f512SMike Rapoport if (IS_ERR(secretmem_mnt))
2664eb5bbdeSBinyi Han return PTR_ERR(secretmem_mnt);
2671507f512SMike Rapoport
268f7c5b1aaSXiu Jianfeng return 0;
2691507f512SMike Rapoport }
2701507f512SMike Rapoport fs_initcall(secretmem_init);
271