|
83 | 83 | #include <linux/pid_namespace.h>
|
84 | 84 | #include <linux/fs_struct.h>
|
85 | 85 | #include <linux/slab.h>
|
| 86 | +#include <linux/flex_array.h> |
86 | 87 | #ifdef CONFIG_HARDWALL
|
87 | 88 | #include <asm/hardwall.h>
|
88 | 89 | #endif
|
@@ -134,6 +135,8 @@ struct pid_entry {
|
134 | 135 | NULL, &proc_single_file_operations, \
|
135 | 136 | { .proc_show = show } )
|
136 | 137 |
|
| 138 | +static int proc_fd_permission(struct inode *inode, int mask); |
| 139 | + |
137 | 140 | /*
|
138 | 141 | * Count the number of hardlinks for the pid_entry table, excluding the .
|
139 | 142 | * and .. links.
|
@@ -2046,6 +2049,355 @@ static const struct file_operations proc_fd_operations = {
|
2046 | 2049 | .llseek = default_llseek,
|
2047 | 2050 | };
|
2048 | 2051 |
|
| 2052 | +#ifdef CONFIG_CHECKPOINT_RESTORE |
| 2053 | + |
| 2054 | +/* |
| 2055 | + * dname_to_vma_addr - maps a dentry name into two unsigned longs |
| 2056 | + * which represent vma start and end addresses. |
| 2057 | + */ |
| 2058 | +static int dname_to_vma_addr(struct dentry *dentry, |
| 2059 | + unsigned long *start, unsigned long *end) |
| 2060 | +{ |
| 2061 | + if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) |
| 2062 | + return -EINVAL; |
| 2063 | + |
| 2064 | + return 0; |
| 2065 | +} |
| 2066 | + |
| 2067 | +static int map_files_d_revalidate(struct dentry *dentry, struct nameidata *nd) |
| 2068 | +{ |
| 2069 | + unsigned long vm_start, vm_end; |
| 2070 | + bool exact_vma_exists = false; |
| 2071 | + struct mm_struct *mm = NULL; |
| 2072 | + struct task_struct *task; |
| 2073 | + const struct cred *cred; |
| 2074 | + struct inode *inode; |
| 2075 | + int status = 0; |
| 2076 | + |
| 2077 | + if (nd && nd->flags & LOOKUP_RCU) |
| 2078 | + return -ECHILD; |
| 2079 | + |
| 2080 | + if (!capable(CAP_SYS_ADMIN)) { |
| 2081 | + status = -EACCES; |
| 2082 | + goto out_notask; |
| 2083 | + } |
| 2084 | + |
| 2085 | + inode = dentry->d_inode; |
| 2086 | + task = get_proc_task(inode); |
| 2087 | + if (!task) |
| 2088 | + goto out_notask; |
| 2089 | + |
| 2090 | + if (!ptrace_may_access(task, PTRACE_MODE_READ)) |
| 2091 | + goto out; |
| 2092 | + |
| 2093 | + mm = get_task_mm(task); |
| 2094 | + if (!mm) |
| 2095 | + goto out; |
| 2096 | + |
| 2097 | + if (!dname_to_vma_addr(dentry, &vm_start, &vm_end)) { |
| 2098 | + down_read(&mm->mmap_sem); |
| 2099 | + exact_vma_exists = !!find_exact_vma(mm, vm_start, vm_end); |
| 2100 | + up_read(&mm->mmap_sem); |
| 2101 | + } |
| 2102 | + |
| 2103 | + mmput(mm); |
| 2104 | + |
| 2105 | + if (exact_vma_exists) { |
| 2106 | + if (task_dumpable(task)) { |
| 2107 | + rcu_read_lock(); |
| 2108 | + cred = __task_cred(task); |
| 2109 | + inode->i_uid = cred->euid; |
| 2110 | + inode->i_gid = cred->egid; |
| 2111 | + rcu_read_unlock(); |
| 2112 | + } else { |
| 2113 | + inode->i_uid = 0; |
| 2114 | + inode->i_gid = 0; |
| 2115 | + } |
| 2116 | + security_task_to_inode(task, inode); |
| 2117 | + status = 1; |
| 2118 | + } |
| 2119 | + |
| 2120 | +out: |
| 2121 | + put_task_struct(task); |
| 2122 | + |
| 2123 | +out_notask: |
| 2124 | + if (status <= 0) |
| 2125 | + d_drop(dentry); |
| 2126 | + |
| 2127 | + return status; |
| 2128 | +} |
| 2129 | + |
| 2130 | +static const struct dentry_operations tid_map_files_dentry_operations = { |
| 2131 | + .d_revalidate = map_files_d_revalidate, |
| 2132 | + .d_delete = pid_delete_dentry, |
| 2133 | +}; |
| 2134 | + |
| 2135 | +static int proc_map_files_get_link(struct dentry *dentry, struct path *path) |
| 2136 | +{ |
| 2137 | + unsigned long vm_start, vm_end; |
| 2138 | + struct vm_area_struct *vma; |
| 2139 | + struct task_struct *task; |
| 2140 | + struct mm_struct *mm; |
| 2141 | + int rc; |
| 2142 | + |
| 2143 | + rc = -ENOENT; |
| 2144 | + task = get_proc_task(dentry->d_inode); |
| 2145 | + if (!task) |
| 2146 | + goto out; |
| 2147 | + |
| 2148 | + mm = get_task_mm(task); |
| 2149 | + put_task_struct(task); |
| 2150 | + if (!mm) |
| 2151 | + goto out; |
| 2152 | + |
| 2153 | + rc = dname_to_vma_addr(dentry, &vm_start, &vm_end); |
| 2154 | + if (rc) |
| 2155 | + goto out_mmput; |
| 2156 | + |
| 2157 | + down_read(&mm->mmap_sem); |
| 2158 | + vma = find_exact_vma(mm, vm_start, vm_end); |
| 2159 | + if (vma && vma->vm_file) { |
| 2160 | + *path = vma->vm_file->f_path; |
| 2161 | + path_get(path); |
| 2162 | + rc = 0; |
| 2163 | + } |
| 2164 | + up_read(&mm->mmap_sem); |
| 2165 | + |
| 2166 | +out_mmput: |
| 2167 | + mmput(mm); |
| 2168 | +out: |
| 2169 | + return rc; |
| 2170 | +} |
| 2171 | + |
| 2172 | +struct map_files_info { |
| 2173 | + struct file *file; |
| 2174 | + unsigned long len; |
| 2175 | + unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ |
| 2176 | +}; |
| 2177 | + |
| 2178 | +static struct dentry * |
| 2179 | +proc_map_files_instantiate(struct inode *dir, struct dentry *dentry, |
| 2180 | + struct task_struct *task, const void *ptr) |
| 2181 | +{ |
| 2182 | + const struct file *file = ptr; |
| 2183 | + struct proc_inode *ei; |
| 2184 | + struct inode *inode; |
| 2185 | + |
| 2186 | + if (!file) |
| 2187 | + return ERR_PTR(-ENOENT); |
| 2188 | + |
| 2189 | + inode = proc_pid_make_inode(dir->i_sb, task); |
| 2190 | + if (!inode) |
| 2191 | + return ERR_PTR(-ENOENT); |
| 2192 | + |
| 2193 | + ei = PROC_I(inode); |
| 2194 | + ei->op.proc_get_link = proc_map_files_get_link; |
| 2195 | + |
| 2196 | + inode->i_op = &proc_pid_link_inode_operations; |
| 2197 | + inode->i_size = 64; |
| 2198 | + inode->i_mode = S_IFLNK; |
| 2199 | + |
| 2200 | + if (file->f_mode & FMODE_READ) |
| 2201 | + inode->i_mode |= S_IRUSR; |
| 2202 | + if (file->f_mode & FMODE_WRITE) |
| 2203 | + inode->i_mode |= S_IWUSR; |
| 2204 | + |
| 2205 | + d_set_d_op(dentry, &tid_map_files_dentry_operations); |
| 2206 | + d_add(dentry, inode); |
| 2207 | + |
| 2208 | + return NULL; |
| 2209 | +} |
| 2210 | + |
| 2211 | +static struct dentry *proc_map_files_lookup(struct inode *dir, |
| 2212 | + struct dentry *dentry, struct nameidata *nd) |
| 2213 | +{ |
| 2214 | + unsigned long vm_start, vm_end; |
| 2215 | + struct vm_area_struct *vma; |
| 2216 | + struct task_struct *task; |
| 2217 | + struct dentry *result; |
| 2218 | + struct mm_struct *mm; |
| 2219 | + |
| 2220 | + result = ERR_PTR(-EACCES); |
| 2221 | + if (!capable(CAP_SYS_ADMIN)) |
| 2222 | + goto out; |
| 2223 | + |
| 2224 | + result = ERR_PTR(-ENOENT); |
| 2225 | + task = get_proc_task(dir); |
| 2226 | + if (!task) |
| 2227 | + goto out; |
| 2228 | + |
| 2229 | + result = ERR_PTR(-EACCES); |
| 2230 | + if (lock_trace(task)) |
| 2231 | + goto out_put_task; |
| 2232 | + |
| 2233 | + result = ERR_PTR(-ENOENT); |
| 2234 | + if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) |
| 2235 | + goto out_unlock; |
| 2236 | + |
| 2237 | + mm = get_task_mm(task); |
| 2238 | + if (!mm) |
| 2239 | + goto out_unlock; |
| 2240 | + |
| 2241 | + down_read(&mm->mmap_sem); |
| 2242 | + vma = find_exact_vma(mm, vm_start, vm_end); |
| 2243 | + if (!vma) |
| 2244 | + goto out_no_vma; |
| 2245 | + |
| 2246 | + result = proc_map_files_instantiate(dir, dentry, task, vma->vm_file); |
| 2247 | + |
| 2248 | +out_no_vma: |
| 2249 | + up_read(&mm->mmap_sem); |
| 2250 | + mmput(mm); |
| 2251 | +out_unlock: |
| 2252 | + unlock_trace(task); |
| 2253 | +out_put_task: |
| 2254 | + put_task_struct(task); |
| 2255 | +out: |
| 2256 | + return result; |
| 2257 | +} |
| 2258 | + |
| 2259 | +static const struct inode_operations proc_map_files_inode_operations = { |
| 2260 | + .lookup = proc_map_files_lookup, |
| 2261 | + .permission = proc_fd_permission, |
| 2262 | + .setattr = proc_setattr, |
| 2263 | +}; |
| 2264 | + |
| 2265 | +static int |
| 2266 | +proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) |
| 2267 | +{ |
| 2268 | + struct dentry *dentry = filp->f_path.dentry; |
| 2269 | + struct inode *inode = dentry->d_inode; |
| 2270 | + struct vm_area_struct *vma; |
| 2271 | + struct task_struct *task; |
| 2272 | + struct mm_struct *mm; |
| 2273 | + ino_t ino; |
| 2274 | + int ret; |
| 2275 | + |
| 2276 | + ret = -EACCES; |
| 2277 | + if (!capable(CAP_SYS_ADMIN)) |
| 2278 | + goto out; |
| 2279 | + |
| 2280 | + ret = -ENOENT; |
| 2281 | + task = get_proc_task(inode); |
| 2282 | + if (!task) |
| 2283 | + goto out; |
| 2284 | + |
| 2285 | + ret = -EACCES; |
| 2286 | + if (lock_trace(task)) |
| 2287 | + goto out_put_task; |
| 2288 | + |
| 2289 | + ret = 0; |
| 2290 | + switch (filp->f_pos) { |
| 2291 | + case 0: |
| 2292 | + ino = inode->i_ino; |
| 2293 | + if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) |
| 2294 | + goto out_unlock; |
| 2295 | + filp->f_pos++; |
| 2296 | + case 1: |
| 2297 | + ino = parent_ino(dentry); |
| 2298 | + if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) |
| 2299 | + goto out_unlock; |
| 2300 | + filp->f_pos++; |
| 2301 | + default: |
| 2302 | + { |
| 2303 | + unsigned long nr_files, pos, i; |
| 2304 | + struct flex_array *fa = NULL; |
| 2305 | + struct map_files_info info; |
| 2306 | + struct map_files_info *p; |
| 2307 | + |
| 2308 | + mm = get_task_mm(task); |
| 2309 | + if (!mm) |
| 2310 | + goto out_unlock; |
| 2311 | + down_read(&mm->mmap_sem); |
| 2312 | + |
| 2313 | + nr_files = 0; |
| 2314 | + |
| 2315 | + /* |
| 2316 | + * We need two passes here: |
| 2317 | + * |
| 2318 | + * 1) Collect vmas of mapped files with mmap_sem taken |
| 2319 | + * 2) Release mmap_sem and instantiate entries |
| 2320 | + * |
| 2321 | + * otherwise we get lockdep complained, since filldir() |
| 2322 | + * routine might require mmap_sem taken in might_fault(). |
| 2323 | + */ |
| 2324 | + |
| 2325 | + for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { |
| 2326 | + if (vma->vm_file && ++pos > filp->f_pos) |
| 2327 | + nr_files++; |
| 2328 | + } |
| 2329 | + |
| 2330 | + if (nr_files) { |
| 2331 | + fa = flex_array_alloc(sizeof(info), nr_files, |
| 2332 | + GFP_KERNEL); |
| 2333 | + if (!fa || flex_array_prealloc(fa, 0, nr_files, |
| 2334 | + GFP_KERNEL)) { |
| 2335 | + ret = -ENOMEM; |
| 2336 | + if (fa) |
| 2337 | + flex_array_free(fa); |
| 2338 | + up_read(&mm->mmap_sem); |
| 2339 | + mmput(mm); |
| 2340 | + goto out_unlock; |
| 2341 | + } |
| 2342 | + for (i = 0, vma = mm->mmap, pos = 2; vma; |
| 2343 | + vma = vma->vm_next) { |
| 2344 | + if (!vma->vm_file) |
| 2345 | + continue; |
| 2346 | + if (++pos <= filp->f_pos) |
| 2347 | + continue; |
| 2348 | + |
| 2349 | + get_file(vma->vm_file); |
| 2350 | + info.file = vma->vm_file; |
| 2351 | + info.len = snprintf(info.name, |
| 2352 | + sizeof(info.name), "%lx-%lx", |
| 2353 | + vma->vm_start, vma->vm_end); |
| 2354 | + if (flex_array_put(fa, i++, &info, GFP_KERNEL)) |
| 2355 | + BUG(); |
| 2356 | + } |
| 2357 | + } |
| 2358 | + up_read(&mm->mmap_sem); |
| 2359 | + |
| 2360 | + for (i = 0; i < nr_files; i++) { |
| 2361 | + p = flex_array_get(fa, i); |
| 2362 | + ret = proc_fill_cache(filp, dirent, filldir, |
| 2363 | + p->name, p->len, |
| 2364 | + proc_map_files_instantiate, |
| 2365 | + task, p->file); |
| 2366 | + if (ret) |
| 2367 | + break; |
| 2368 | + filp->f_pos++; |
| 2369 | + fput(p->file); |
| 2370 | + } |
| 2371 | + for (; i < nr_files; i++) { |
| 2372 | + /* |
| 2373 | + * In case of error don't forget |
| 2374 | + * to put rest of file refs. |
| 2375 | + */ |
| 2376 | + p = flex_array_get(fa, i); |
| 2377 | + fput(p->file); |
| 2378 | + } |
| 2379 | + if (fa) |
| 2380 | + flex_array_free(fa); |
| 2381 | + mmput(mm); |
| 2382 | + } |
| 2383 | + } |
| 2384 | + |
| 2385 | +out_unlock: |
| 2386 | + unlock_trace(task); |
| 2387 | +out_put_task: |
| 2388 | + put_task_struct(task); |
| 2389 | +out: |
| 2390 | + return ret; |
| 2391 | +} |
| 2392 | + |
| 2393 | +static const struct file_operations proc_map_files_operations = { |
| 2394 | + .read = generic_read_dir, |
| 2395 | + .readdir = proc_map_files_readdir, |
| 2396 | + .llseek = default_llseek, |
| 2397 | +}; |
| 2398 | + |
| 2399 | +#endif /* CONFIG_CHECKPOINT_RESTORE */ |
| 2400 | + |
2049 | 2401 | /*
|
2050 | 2402 | * /proc/pid/fd needs a special permission handler so that a process can still
|
2051 | 2403 | * access /proc/self/fd after it has executed a setuid().
|
@@ -2661,6 +3013,9 @@ static const struct inode_operations proc_task_inode_operations;
|
2661 | 3013 | static const struct pid_entry tgid_base_stuff[] = {
|
2662 | 3014 | DIR("task", S_IRUGO|S_IXUGO, proc_task_inode_operations, proc_task_operations),
|
2663 | 3015 | DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations),
|
| 3016 | +#ifdef CONFIG_CHECKPOINT_RESTORE |
| 3017 | + DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), |
| 3018 | +#endif |
2664 | 3019 | DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations),
|
2665 | 3020 | DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations),
|
2666 | 3021 | #ifdef CONFIG_NET
|
|
0 commit comments