网站切换图片做背景怎么写,外贸营销邮件主题一般怎么写,旅游网站开发的流程,鞋图相册网站怎么做1. fork()函数介绍#xff08;百度百科#xff09; fork系统调用用于创建一个新进程#xff0c;称为子进程#xff0c;它与进程#xff08;称为系统调用fork的进程#xff09;同时运行#xff0c;此进程称为父进程。创建新的子进程后#xff0c;两个进程将执行fork百度百科 fork系统调用用于创建一个新进程称为子进程它与进程称为系统调用fork的进程同时运行此进程称为父进程。创建新的子进程后两个进程将执行fork系统调用之后的下一条指令。子进程使用相同的pc程序计数器相同的CPU寄存器在父进程中使用的相同打开文件。 它不需要参数并返回一个整数值。下面是fork返回的不同值。 负值创建子进程失败。 零返回到新创建的子进程。 正值返回父进程或调用者。该值包含新创建的子进程的进程ID。 2. fork()使用示例百度百科
#includeunistd.h
#includestdio.h
#includestdlib.hint main(int argc,char *argv[]){pid_t pidfork();if ( pid 0 ) {fprintf(stderr,错误);} else if( pid 0 ) {printf(子进程空间);exit(0);} else {printf(父进程空间子进程pid为%d,pid);}// 可以使用wait或waitpid函数等待子进程的结束并获取结束状态exit(0);
}3. Linux中fork()代码实现分析
3.1 fork()系统调用定义
#ifdef __ARCH_WANT_SYS_FORK
SYSCALL_DEFINE0(fork)
{
#ifdef CONFIG_MMUstruct kernel_clone_args args {.exit_signal SIGCHLD,};return kernel_clone(args);
#else/* can not support in nommu mode */return -EINVAL;
#endif
}
#endiffork()函数不需要传递任何参数因此他的系统调用声明为DEFINE0。我们继续跟踪fork()系统调用的实现这里发现是直接调用kernel_clone()函数进行后续处理。
3.2 跟踪kernel_clone()函数实现
/** Ok, this is the main fork-routine.** It copies the process, and if successful kick-starts* it and waits for it to finish using the VM if required.** args-exit_signal is expected to be checked for sanity by the caller.*/
pid_t kernel_clone(struct kernel_clone_args *args)
{u64 clone_flags args-flags;struct completion vfork;struct pid *pid;struct task_struct *p;int trace 0;pid_t nr;/** For legacy clone() calls, CLONE_PIDFD uses the parent_tid argument* to return the pidfd. Hence, CLONE_PIDFD and CLONE_PARENT_SETTID are* mutually exclusive. With clone3() CLONE_PIDFD has grown a separate* field in struct clone_args and it still doesnt make sense to have* them both point at the same memory location. Performing this check* here has the advantage that we dont need to have a separate helper* to check for legacy clone().*/if ((args-flags CLONE_PIDFD) (args-flags CLONE_PARENT_SETTID) (args-pidfd args-parent_tid))return -EINVAL;/** Determine whether and which event to report to ptracer. When* called from kernel_thread or CLONE_UNTRACED is explicitly* requested, no event is reported; otherwise, report if the event* for the type of forking is enabled.*/if (!(clone_flags CLONE_UNTRACED)) {if (clone_flags CLONE_VFORK)trace PTRACE_EVENT_VFORK;else if (args-exit_signal ! SIGCHLD)trace PTRACE_EVENT_CLONE;elsetrace PTRACE_EVENT_FORK;if (likely(!ptrace_event_enabled(current, trace)))trace 0;}/* 通过copy_process()函数创建一个新的进程 */p copy_process(NULL, trace, NUMA_NO_NODE, args);add_latent_entropy();if (IS_ERR(p))return PTR_ERR(p);/** Do this prior waking up the new thread - the thread pointer* might get invalid after that point, if the thread exits quickly.*/trace_sched_process_fork(current, p);pid get_task_pid(p, PIDTYPE_PID);nr pid_vnr(pid);if (clone_flags CLONE_PARENT_SETTID)put_user(nr, args-parent_tid);if (clone_flags CLONE_VFORK) {p-vfork_done vfork;init_completion(vfork);get_task_struct(p);}if (IS_ENABLED(CONFIG_LRU_GEN) !(clone_flags CLONE_VM)) {/* lock the task to synchronize with memcg migration */task_lock(p);lru_gen_add_mm(p-mm);task_unlock(p);}/* 唤醒新创建的进程 */wake_up_new_task(p);/* forking complete and child started to run, tell ptracer */if (unlikely(trace))ptrace_event_pid(trace, pid);if (clone_flags CLONE_VFORK) {if (!wait_for_vfork_done(p, vfork))ptrace_event_pid(PTRACE_EVENT_VFORK_DONE, pid);}put_pid(pid);return nr;
}从kernel_clone()函数定义可以看出新进程的创建是通过拷贝父进程来实现的通过copy_process()完成拷贝动作而新进程的调度运行是通过wake_up_new_task()函数进行处理的。
3.3 跟踪copy_process()函数实现
/** This creates a new process as a copy of the old one,* but does not actually start it yet.** It copies the registers, and all the appropriate* parts of the process environment (as per the clone* flags). The actual kick-off is left to the caller.*/
static __latent_entropy struct task_struct *copy_process(struct pid *pid,int trace,int node,struct kernel_clone_args *args)
{int pidfd -1, retval;struct task_struct *p;struct multiprocess_signals delayed;struct file *pidfile NULL;const u64 clone_flags args-flags;struct nsproxy *nsp current-nsproxy;/** Dont allow sharing the root directory with processes in a different* namespace*/if ((clone_flags (CLONE_NEWNS|CLONE_FS)) (CLONE_NEWNS|CLONE_FS))return ERR_PTR(-EINVAL);if ((clone_flags (CLONE_NEWUSER|CLONE_FS)) (CLONE_NEWUSER|CLONE_FS))return ERR_PTR(-EINVAL);/** Thread groups must share signals as well, and detached threads* can only be started up within the thread group.*/if ((clone_flags CLONE_THREAD) !(clone_flags CLONE_SIGHAND))return ERR_PTR(-EINVAL);/** Shared signal handlers imply shared VM. By way of the above,* thread groups also imply shared VM. Blocking this case allows* for various simplifications in other code.*/if ((clone_flags CLONE_SIGHAND) !(clone_flags CLONE_VM))return ERR_PTR(-EINVAL);/** Siblings of global init remain as zombies on exit since they are* not reaped by their parent (swapper). To solve this and to avoid* multi-rooted process trees, prevent global and container-inits* from creating siblings.*/if ((clone_flags CLONE_PARENT) current-signal-flags SIGNAL_UNKILLABLE)return ERR_PTR(-EINVAL);/** If the new process will be in a different pid or user namespace* do not allow it to share a thread group with the forking task.*/if (clone_flags CLONE_THREAD) {if ((clone_flags (CLONE_NEWUSER | CLONE_NEWPID)) ||(task_active_pid_ns(current) ! nsp-pid_ns_for_children))return ERR_PTR(-EINVAL);}/** If the new process will be in a different time namespace* do not allow it to share VM or a thread group with the forking task.*/if (clone_flags (CLONE_THREAD | CLONE_VM)) {if (nsp-time_ns ! nsp-time_ns_for_children)return ERR_PTR(-EINVAL);}if (clone_flags CLONE_PIDFD) {/** - CLONE_DETACHED is blocked so that we can potentially* reuse it later for CLONE_PIDFD.* - CLONE_THREAD is blocked until someone really needs it.*/if (clone_flags (CLONE_DETACHED | CLONE_THREAD))return ERR_PTR(-EINVAL);}/** Force any signals received before this point to be delivered* before the fork happens. Collect up signals sent to multiple* processes that happen during the fork and delay them so that* they appear to happen after the fork.*/sigemptyset(delayed.signal);INIT_HLIST_NODE(delayed.node);spin_lock_irq(current-sighand-siglock);if (!(clone_flags CLONE_THREAD))hlist_add_head(delayed.node, current-signal-multiprocess);recalc_sigpending();spin_unlock_irq(current-sighand-siglock);retval -ERESTARTNOINTR;if (task_sigpending(current))goto fork_out;retval -ENOMEM;/* 复制父进程的task_struct到新创建的进程新进程的内核栈也在这个函数中分配 */p dup_task_struct(current, node);if (!p)goto fork_out;p-flags ~PF_KTHREAD;if (args-kthread)p-flags | PF_KTHREAD;if (args-io_thread) {/** Mark us an IO worker, and block any signal that isnt* fatal or STOP*/p-flags | PF_IO_WORKER;siginitsetinv(p-blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));}p-set_child_tid (clone_flags CLONE_CHILD_SETTID) ? args-child_tid : NULL;/** Clear TID on mm_release()?*/p-clear_child_tid (clone_flags CLONE_CHILD_CLEARTID) ? args-child_tid : NULL;ftrace_graph_init_task(p);rt_mutex_init_task(p);lockdep_assert_irqs_enabled();
#ifdef CONFIG_PROVE_LOCKINGDEBUG_LOCKS_WARN_ON(!p-softirqs_enabled);
#endifretval copy_creds(p, clone_flags);if (retval 0)goto bad_fork_free;retval -EAGAIN;if (is_rlimit_overlimit(task_ucounts(p), UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC))) {if (p-real_cred-user ! INIT_USER !capable(CAP_SYS_RESOURCE) !capable(CAP_SYS_ADMIN))goto bad_fork_cleanup_count;}current-flags ~PF_NPROC_EXCEEDED;/** If multiple threads are within copy_process(), then this check* triggers too late. This doesnt hurt, the check is only there* to stop root fork bombs.*/retval -EAGAIN;if (data_race(nr_threads max_threads))goto bad_fork_cleanup_count;delayacct_tsk_init(p); /* Must remain after dup_task_struct() */p-flags ~(PF_SUPERPRIV | PF_WQ_WORKER | PF_IDLE | PF_NO_SETAFFINITY);p-flags | PF_FORKNOEXEC;INIT_LIST_HEAD(p-children);INIT_LIST_HEAD(p-sibling);rcu_copy_process(p);p-vfork_done NULL;spin_lock_init(p-alloc_lock);init_sigpending(p-pending);p-utime p-stime p-gtime 0;
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIMEp-utimescaled p-stimescaled 0;
#endifprev_cputime_init(p-prev_cputime);#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GENseqcount_init(p-vtime.seqcount);p-vtime.starttime 0;p-vtime.state VTIME_INACTIVE;
#endif#ifdef CONFIG_IO_URINGp-io_uring NULL;
#endif#if defined(SPLIT_RSS_COUNTING)memset(p-rss_stat, 0, sizeof(p-rss_stat));
#endifp-default_timer_slack_ns current-timer_slack_ns;#ifdef CONFIG_PSIp-psi_flags 0;
#endiftask_io_accounting_init(p-ioac);acct_clear_integrals(p);posix_cputimers_init(p-posix_cputimers);p-io_context NULL;audit_set_context(p, NULL);cgroup_fork(p);if (args-kthread) {if (!set_kthread_struct(p))goto bad_fork_cleanup_delayacct;}
#ifdef CONFIG_NUMAp-mempolicy mpol_dup(p-mempolicy);if (IS_ERR(p-mempolicy)) {retval PTR_ERR(p-mempolicy);p-mempolicy NULL;goto bad_fork_cleanup_delayacct;}
#endif
#ifdef CONFIG_CPUSETSp-cpuset_mem_spread_rotor NUMA_NO_NODE;p-cpuset_slab_spread_rotor NUMA_NO_NODE;seqcount_spinlock_init(p-mems_allowed_seq, p-alloc_lock);
#endif
#ifdef CONFIG_TRACE_IRQFLAGSmemset(p-irqtrace, 0, sizeof(p-irqtrace));p-irqtrace.hardirq_disable_ip _THIS_IP_;p-irqtrace.softirq_enable_ip _THIS_IP_;p-softirqs_enabled 1;p-softirq_context 0;
#endifp-pagefault_disabled 0;#ifdef CONFIG_LOCKDEPlockdep_init_task(p);
#endif#ifdef CONFIG_DEBUG_MUTEXESp-blocked_on NULL; /* not blocked yet */
#endif
#ifdef CONFIG_BCACHEp-sequential_io 0;p-sequential_io_avg 0;
#endif
#ifdef CONFIG_BPF_SYSCALLRCU_INIT_POINTER(p-bpf_storage, NULL);p-bpf_ctx NULL;
#endif/* Perform scheduler related setup. Assign this task to a CPU. */retval sched_fork(clone_flags, p);if (retval)goto bad_fork_cleanup_policy;retval perf_event_init_task(p, clone_flags);if (retval)goto bad_fork_cleanup_policy;retval audit_alloc(p);if (retval)goto bad_fork_cleanup_perf;/* copy all the process information */shm_init_task(p);retval security_task_alloc(p, clone_flags);if (retval)goto bad_fork_cleanup_audit;retval copy_semundo(clone_flags, p);if (retval)goto bad_fork_cleanup_security;/* 将父进程的所有打开文件描述符表都复制到新创建的进程中 */retval copy_files(clone_flags, p);if (retval)goto bad_fork_cleanup_semundo;retval copy_fs(clone_flags, p);if (retval)goto bad_fork_cleanup_files;retval copy_sighand(clone_flags, p);if (retval)goto bad_fork_cleanup_fs;retval copy_signal(clone_flags, p);if (retval)goto bad_fork_cleanup_sighand;/* 将父进程的内存空间拷贝到新进程中其实就是为新进程创建页表把父进程的页表项拷贝到新进程中 */retval copy_mm(clone_flags, p);if (retval)goto bad_fork_cleanup_signal;retval copy_namespaces(clone_flags, p);if (retval)goto bad_fork_cleanup_mm;retval copy_io(clone_flags, p);if (retval)goto bad_fork_cleanup_namespaces;retval copy_thread(p, args);if (retval)goto bad_fork_cleanup_io;stackleak_task_init(p);if (pid ! init_struct_pid) {pid alloc_pid(p-nsproxy-pid_ns_for_children, args-set_tid,args-set_tid_size);if (IS_ERR(pid)) {retval PTR_ERR(pid);goto bad_fork_cleanup_thread;}}/** This has to happen after weve potentially unshared the file* descriptor table (so that the pidfd doesnt leak into the child* if the fd table isnt shared).*/if (clone_flags CLONE_PIDFD) {retval get_unused_fd_flags(O_RDWR | O_CLOEXEC);if (retval 0)goto bad_fork_free_pid;pidfd retval;pidfile anon_inode_getfile([pidfd], pidfd_fops, pid,O_RDWR | O_CLOEXEC);if (IS_ERR(pidfile)) {put_unused_fd(pidfd);retval PTR_ERR(pidfile);goto bad_fork_free_pid;}get_pid(pid); /* held by pidfile now */retval put_user(pidfd, args-pidfd);if (retval)goto bad_fork_put_pidfd;}#ifdef CONFIG_BLOCKp-plug NULL;
#endiffutex_init_task(p);/** sigaltstack should be cleared when sharing the same VM*/if ((clone_flags (CLONE_VM|CLONE_VFORK)) CLONE_VM)sas_ss_reset(p);/** Syscall tracing and stepping should be turned off in the* child regardless of CLONE_PTRACE.*/user_disable_single_step(p);clear_task_syscall_work(p, SYSCALL_TRACE);
#if defined(CONFIG_GENERIC_ENTRY) || defined(TIF_SYSCALL_EMU)clear_task_syscall_work(p, SYSCALL_EMU);
#endifclear_tsk_latency_tracing(p);/* ok, now we should be set up.. */p-pid pid_nr(pid);if (clone_flags CLONE_THREAD) {p-group_leader current-group_leader;p-tgid current-tgid;} else {p-group_leader p;p-tgid p-pid;}p-nr_dirtied 0;p-nr_dirtied_pause 128 (PAGE_SHIFT - 10);p-dirty_paused_when 0;p-pdeath_signal 0;INIT_LIST_HEAD(p-thread_group);p-task_works NULL;clear_posix_cputimers_work(p);#ifdef CONFIG_KRETPROBESp-kretprobe_instances.first NULL;
#endif
#ifdef CONFIG_RETHOOKp-rethooks.first NULL;
#endif/** Ensure that the cgroup subsystem policies allow the new process to be* forked. It should be noted that the new processs css_set can be changed* between here and cgroup_post_fork() if an organisation operation is in* progress.*/retval cgroup_can_fork(p, args);if (retval)goto bad_fork_put_pidfd;/** Now that the cgroups are pinned, re-clone the parent cgroup and put* the new task on the correct runqueue. All this *before* the task* becomes visible.** This isnt part of -can_fork() because while the re-cloning is* cgroup specific, it unconditionally needs to place the task on a* runqueue.*/sched_cgroup_fork(p, args);/** From this point on we must avoid any synchronous user-space* communication until we take the tasklist-lock. In particular, we do* not want user-space to be able to predict the process start-time by* stalling fork(2) after we recorded the start_time but before it is* visible to the system.*/p-start_time ktime_get_ns();p-start_boottime ktime_get_boottime_ns();/** Make it visible to the rest of the system, but dont wake it up yet.* Need tasklist lock for parent etc handling!*/write_lock_irq(tasklist_lock);/* CLONE_PARENT re-uses the old parent */if (clone_flags (CLONE_PARENT|CLONE_THREAD)) {p-real_parent current-real_parent;p-parent_exec_id current-parent_exec_id;if (clone_flags CLONE_THREAD)p-exit_signal -1;elsep-exit_signal current-group_leader-exit_signal;} else {p-real_parent current;p-parent_exec_id current-self_exec_id;p-exit_signal args-exit_signal;}klp_copy_process(p);sched_core_fork(p);spin_lock(current-sighand-siglock);rv_task_fork(p);rseq_fork(p, clone_flags);/* Dont start children in a dying pid namespace */if (unlikely(!(ns_of_pid(pid)-pid_allocated PIDNS_ADDING))) {retval -ENOMEM;goto bad_fork_cancel_cgroup;}/* Let kill terminate clone/fork in the middle */if (fatal_signal_pending(current)) {retval -EINTR;goto bad_fork_cancel_cgroup;}/* No more failure paths after this point. *//** Copy seccomp details explicitly here, in case they were changed* before holding sighand lock.*/copy_seccomp(p);init_task_pid_links(p);if (likely(p-pid)) {ptrace_init_task(p, (clone_flags CLONE_PTRACE) || trace);init_task_pid(p, PIDTYPE_PID, pid);if (thread_group_leader(p)) {init_task_pid(p, PIDTYPE_TGID, pid);init_task_pid(p, PIDTYPE_PGID, task_pgrp(current));init_task_pid(p, PIDTYPE_SID, task_session(current));if (is_child_reaper(pid)) {ns_of_pid(pid)-child_reaper p;p-signal-flags | SIGNAL_UNKILLABLE;}p-signal-shared_pending.signal delayed.signal;p-signal-tty tty_kref_get(current-signal-tty);/** Inherit has_child_subreaper flag under the same* tasklist_lock with adding child to the process tree* for propagate_has_child_subreaper optimization.*/p-signal-has_child_subreaper p-real_parent-signal-has_child_subreaper ||p-real_parent-signal-is_child_subreaper;list_add_tail(p-sibling, p-real_parent-children);list_add_tail_rcu(p-tasks, init_task.tasks);attach_pid(p, PIDTYPE_TGID);attach_pid(p, PIDTYPE_PGID);attach_pid(p, PIDTYPE_SID);__this_cpu_inc(process_counts);} else {current-signal-nr_threads;current-signal-quick_threads;atomic_inc(current-signal-live);refcount_inc(current-signal-sigcnt);task_join_group_stop(p);list_add_tail_rcu(p-thread_group,p-group_leader-thread_group);list_add_tail_rcu(p-thread_node,p-signal-thread_head);}attach_pid(p, PIDTYPE_PID);nr_threads;}total_forks;hlist_del_init(delayed.node);spin_unlock(current-sighand-siglock);syscall_tracepoint_update(p);write_unlock_irq(tasklist_lock);if (pidfile)fd_install(pidfd, pidfile);proc_fork_connector(p);sched_post_fork(p);cgroup_post_fork(p, args);perf_event_fork(p);trace_task_newtask(p, clone_flags);uprobe_copy_process(p, clone_flags);copy_oom_score_adj(clone_flags, p);return p;bad_fork_cancel_cgroup:sched_core_free(p);spin_unlock(current-sighand-siglock);write_unlock_irq(tasklist_lock);cgroup_cancel_fork(p, args);
bad_fork_put_pidfd:if (clone_flags CLONE_PIDFD) {fput(pidfile);put_unused_fd(pidfd);}
bad_fork_free_pid:if (pid ! init_struct_pid)free_pid(pid);
bad_fork_cleanup_thread:exit_thread(p);
bad_fork_cleanup_io:if (p-io_context)exit_io_context(p);
bad_fork_cleanup_namespaces:exit_task_namespaces(p);
bad_fork_cleanup_mm:if (p-mm) {mm_clear_owner(p-mm, p);mmput(p-mm);}
bad_fork_cleanup_signal:if (!(clone_flags CLONE_THREAD))free_signal_struct(p-signal);
bad_fork_cleanup_sighand:__cleanup_sighand(p-sighand);
bad_fork_cleanup_fs:exit_fs(p); /* blocking */
bad_fork_cleanup_files:exit_files(p); /* blocking */
bad_fork_cleanup_semundo:exit_sem(p);
bad_fork_cleanup_security:security_task_free(p);
bad_fork_cleanup_audit:audit_free(p);
bad_fork_cleanup_perf:perf_event_free_task(p);
bad_fork_cleanup_policy:lockdep_free_task(p);
#ifdef CONFIG_NUMAmpol_put(p-mempolicy);
#endif
bad_fork_cleanup_delayacct:delayacct_tsk_free(p);
bad_fork_cleanup_count:dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);exit_creds(p);
bad_fork_free:WRITE_ONCE(p-__state, TASK_DEAD);exit_task_stack_account(p);put_task_stack(p);delayed_free_task(p);
fork_out:spin_lock_irq(current-sighand-siglock);hlist_del_init(delayed.node);spin_unlock_irq(current-sighand-siglock);return ERR_PTR(retval);
}copy_process()函数内容较多博主只介绍跟内存空间相关的复制动作因此下面介绍copy_mm()的实现。
3.4 跟踪copy_mm()函数实现
static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
{struct mm_struct *mm, *oldmm;tsk-min_flt tsk-maj_flt 0;tsk-nvcsw tsk-nivcsw 0;
#ifdef CONFIG_DETECT_HUNG_TASKtsk-last_switch_count tsk-nvcsw tsk-nivcsw;tsk-last_switch_time 0;
#endiftsk-mm NULL;tsk-active_mm NULL;/** Are we cloning a kernel thread?** We need to steal a active VM for that..*/oldmm current-mm;if (!oldmm)return 0;/* 如果是线程的创建则不需要为新的线程task_struct创建新的mm_struct结构和父线程共享即可其实这也就是人们常说的多进程间是共享同一内存的原因或者是多进程之间通信简单 */if (clone_flags CLONE_VM) {mmget(oldmm);mm oldmm;} else { /* 如果是需要创建新的进程则需要去分配新的mm_struct */mm dup_mm(tsk, current-mm);if (!mm)return -ENOMEM;}tsk-mm mm;tsk-active_mm mm;return 0;
}对于多线程的copy_mm()到这里就结束了子线程共享父线程的地址空间但是对于创建新的进程来说还需要去创建新的mm_struct因此还需要跟踪dup_mm()的实现。
3.5 跟踪dup_mm()函数实现
/*** dup_mm() - duplicates an existing mm structure* tsk: the task_struct with which the new mm will be associated.* oldmm: the mm to duplicate.** Allocates a new mm structure and duplicates the provided oldmm structure* content into it.** Return: the duplicated mm or NULL on failure.*/
static struct mm_struct *dup_mm(struct task_struct *tsk,struct mm_struct *oldmm)
{struct mm_struct *mm;int err;/* 分配一个新的mm_struct */mm allocate_mm();if (!mm)goto fail_nomem;/* 拷贝父进程mm_struct的内容到子进程中 */memcpy(mm, oldmm, sizeof(*mm));/* 新进程的mm_struct初始化操作 */if (!mm_init(mm, tsk, mm-user_ns))goto fail_nomem;/* 拷贝父进程的所有vma到子进程子进程的页表也是在此创建 */err dup_mmap(mm, oldmm);if (err)goto free_pt;mm-hiwater_rss get_mm_rss(mm);mm-hiwater_vm mm-total_vm;if (mm-binfmt !try_module_get(mm-binfmt-module))goto free_pt;return mm;free_pt:/* dont put binfmt in mmput, we havent got module yet */mm-binfmt NULL;mm_init_owner(mm, NULL);mmput(mm);fail_nomem:return NULL;
}在dup_mm()函数内部通过调用dup_mmap()函数来将父进程的所有vma拷贝到新创建的子进程的mm_struct中。
3.6 跟踪dup_mmap()函数实现
#ifdef CONFIG_MMU
static __latent_entropy int dup_mmap(struct mm_struct *mm,struct mm_struct *oldmm)
{struct vm_area_struct *mpnt, *tmp;int retval;unsigned long charge 0;LIST_HEAD(uf);MA_STATE(old_mas, oldmm-mm_mt, 0, 0);MA_STATE(mas, mm-mm_mt, 0, 0);uprobe_start_dup_mmap();if (mmap_write_lock_killable(oldmm)) {retval -EINTR;goto fail_uprobe_end;}flush_cache_dup_mm(oldmm);uprobe_dup_mmap(oldmm, mm);/** Not linked in yet - no deadlock potential:*/mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING);/* No ordering required: file already has been exposed. */dup_mm_exe_file(mm, oldmm);mm-total_vm oldmm-total_vm;mm-data_vm oldmm-data_vm;mm-exec_vm oldmm-exec_vm;mm-stack_vm oldmm-stack_vm;retval ksm_fork(mm, oldmm);if (retval)goto out;khugepaged_fork(mm, oldmm);retval mas_expected_entries(mas, oldmm-map_count);if (retval)goto out;/* 遍历父进程的所有vma */mas_for_each(old_mas, mpnt, ULONG_MAX) {struct file *file;if (mpnt-vm_flags VM_DONTCOPY) {vm_stat_account(mm, mpnt-vm_flags, -vma_pages(mpnt));continue;}charge 0;/** Dont duplicate many vmas if weve been oom-killed (for* example)*/if (fatal_signal_pending(current)) {retval -EINTR;goto loop_out;}if (mpnt-vm_flags VM_ACCOUNT) {unsigned long len vma_pages(mpnt);if (security_vm_enough_memory_mm(oldmm, len)) /* sic */goto fail_nomem;charge len;}/* 为新进程创建vma并拷贝父进程的vma内容到新创建的vma中 */tmp vm_area_dup(mpnt);if (!tmp)goto fail_nomem;retval vma_dup_policy(mpnt, tmp);if (retval)goto fail_nomem_policy;tmp-vm_mm mm;retval dup_userfaultfd(tmp, uf);if (retval)goto fail_nomem_anon_vma_fork;if (tmp-vm_flags VM_WIPEONFORK) {/** VM_WIPEONFORK gets a clean slate in the child.* Dont prepare anon_vma until fault since we dont* copy page for current vma.*/tmp-anon_vma NULL;} else if (anon_vma_fork(tmp, mpnt))goto fail_nomem_anon_vma_fork;tmp-vm_flags ~(VM_LOCKED | VM_LOCKONFAULT);file tmp-vm_file;if (file) { /* 如果是文件页映射的vma则会对新创建的vma做如下操作 */struct address_space *mapping file-f_mapping;get_file(file);i_mmap_lock_write(mapping);if (tmp-vm_flags VM_SHARED)mapping_allow_writable(mapping);flush_dcache_mmap_lock(mapping);/* insert tmp into the share list, just after mpnt */vma_interval_tree_insert_after(tmp, mpnt,mapping-i_mmap);flush_dcache_mmap_unlock(mapping);i_mmap_unlock_write(mapping);}/** Copy/update hugetlb private vma information.*/if (is_vm_hugetlb_page(tmp))hugetlb_dup_vma_private(tmp);/* Link the vma into the MT */mas.index tmp-vm_start;mas.last tmp-vm_end - 1;mas_store(mas, tmp);if (mas_is_err(mas))goto fail_nomem_mas_store;mm-map_count;if (!(tmp-vm_flags VM_WIPEONFORK))/* 将父进程vma的页表信息拷贝到子进程中 */retval copy_page_range(tmp, mpnt);if (tmp-vm_ops tmp-vm_ops-open)tmp-vm_ops-open(tmp);if (retval)goto loop_out;}/* a new mm has just been created */retval arch_dup_mmap(oldmm, mm);
loop_out:mas_destroy(mas);
out:mmap_write_unlock(mm);flush_tlb_mm(oldmm);mmap_write_unlock(oldmm);dup_userfaultfd_complete(uf);
fail_uprobe_end:uprobe_end_dup_mmap();return retval;fail_nomem_mas_store:unlink_anon_vmas(tmp);
fail_nomem_anon_vma_fork:mpol_put(vma_policy(tmp));
fail_nomem_policy:vm_area_free(tmp);
fail_nomem:retval -ENOMEM;vm_unacct_memory(charge);goto loop_out;
}这里就是fork()系统调用的实现全部内容了细节并没有全部展开读者可自行阅读源码。