@@ -298,11 +298,7 @@ func (t *Task) Clone(args *linux.CloneArgs) (ThreadID, *SyscallControl, error) {
298298	}
299299
300300	if  userns  !=  creds .UserNamespace  {
301- 		if  err  :=  nt .SetUserNamespace (userns ); err  !=  nil  {
302- 			// This shouldn't be possible: userns was created from nt.creds, so 
303- 			// nt should have CAP_SYS_ADMIN in userns. 
304- 			panic ("Task.Clone: SetUserNamespace failed: "  +  err .Error ())
305- 		}
301+ 		nt .creds .Store (creds .ForkIntoUserNamespace (userns ))
306302	}
307303
308304	// This has to happen last, because e.g. ptraceClone may send a SIGSTOP to 
@@ -618,7 +614,6 @@ func (t *Task) Unshare(flags int32) error {
618614	if  flags & (linux .CLONE_VM | linux .CLONE_SIGHAND ) !=  0  {
619615		return  linuxerr .EINVAL 
620616	}
621- 	creds  :=  t .Credentials ()
622617	if  flags & linux .CLONE_THREAD  !=  0  {
623618		t .tg .signalHandlers .mu .Lock ()
624619		if  t .tg .tasksCount  !=  1  {
@@ -629,98 +624,137 @@ func (t *Task) Unshare(flags int32) error {
629624		// This isn't racy because we're the only living task, and therefore 
630625		// the only task capable of creating new ones, in our thread group. 
631626	}
627+ 
628+ 	// Prepare new execution context. 
629+ 	creds  :=  t .Credentials ()
630+ 	var  (
631+ 		newFSContext   * FSContext 
632+ 		newFDTable     * FDTable 
633+ 		newCreds       bool 
634+ 		newChildPIDNS  * PIDNamespace 
635+ 		newNetNS       * inet.Namespace 
636+ 		newUTSNS       * UTSNamespace 
637+ 		newIPCNS       * IPCNamespace 
638+ 		newMountNS     * vfs.MountNamespace 
639+ 	)
640+ 	defer  func () {
641+ 		if  newFSContext  !=  nil  {
642+ 			newFSContext .destroy (t )
643+ 		}
644+ 		if  newFDTable  !=  nil  {
645+ 			newFDTable .DecRef (t )
646+ 		}
647+ 		if  newNetNS  !=  nil  {
648+ 			newNetNS .DecRef (t )
649+ 		}
650+ 		if  newUTSNS  !=  nil  {
651+ 			newUTSNS .DecRef (t )
652+ 		}
653+ 		if  newIPCNS  !=  nil  {
654+ 			newIPCNS .DecRef (t )
655+ 		}
656+ 		if  newMountNS  !=  nil  {
657+ 			newMountNS .DecRef (t )
658+ 		}
659+ 	}()
660+ 	if  flags & linux .CLONE_FS  !=  0  ||  flags & linux .CLONE_NEWNS  !=  0  {
661+ 		newFSContext  =  t .FSContext ().Fork ()
662+ 	}
663+ 	if  flags & linux .CLONE_FILES  !=  0  {
664+ 		newFDTable  =  t .fdTable .Fork (t , MaxFdLimit )
665+ 	}
632666	if  flags & linux .CLONE_NEWUSER  !=  0  {
633667		if  t .IsChrooted () {
634668			return  linuxerr .EPERM 
635669		}
670+ 		var  err  error 
636671		newUserNS , err  :=  creds .NewChildUserNamespace ()
637672		if  err  !=  nil  {
638673			return  err 
639674		}
640- 		err  =  t .SetUserNamespace (newUserNS )
641- 		if  err  !=  nil  {
642- 			return  err 
643- 		}
644- 		// Need to reload creds, because t.SetUserNamespace() changed task credentials. 
645- 		creds  =  t .Credentials ()
675+ 		creds  =  t .Credentials ().ForkIntoUserNamespace (newUserNS )
676+ 		newCreds  =  true 
646677	}
647- 	haveCapSysAdmin  :=  t .HasCapability (linux .CAP_SYS_ADMIN )
678+ 	haveCapSysAdmin  :=  creds .HasCapability (linux .CAP_SYS_ADMIN )
648679	if  flags & linux .CLONE_NEWPID  !=  0  {
649680		if  ! haveCapSysAdmin  {
650681			return  linuxerr .EPERM 
651682		}
652- 		t . childPIDNamespace  =  t .tg .pidns .NewChild (t , t .k , t .UserNamespace () )
683+ 		newChildPIDNS  =  t .tg .pidns .NewChild (t , t .k , creds .UserNamespace )
653684	}
654685	if  flags & linux .CLONE_NEWNET  !=  0  {
655686		if  ! haveCapSysAdmin  {
656687			return  linuxerr .EPERM 
657688		}
658- 		netns  :=  t .NetworkNamespace ()
659- 		netns  =  inet .NewNamespace (netns , t .UserNamespace ())
660- 		netnsInode  :=  nsfs .NewInode (t , t .k .nsfsMount , netns )
661- 		netns .SetInode (netnsInode )
662- 		t .mu .Lock ()
663- 		oldNetns  :=  t .netns 
664- 		t .netns  =  netns 
665- 		t .mu .Unlock ()
666- 		oldNetns .DecRef (t )
689+ 		newNetNS  =  inet .NewNamespace (t .netns , creds .UserNamespace )
690+ 		newNetNS .SetInode (nsfs .NewInode (t , t .k .nsfsMount , newNetNS ))
667691	}
668- 
669- 	cu  :=  cleanup.Cleanup {}
670- 	// All cu actions has to be executed after releasing t.mu. 
671- 	defer  cu .Clean ()
672- 	t .mu .Lock ()
673- 	defer  t .mu .Unlock ()
674692	if  flags & linux .CLONE_NEWUTS  !=  0  {
675693		if  ! haveCapSysAdmin  {
676694			return  linuxerr .EPERM 
677695		}
678- 		// Note that this must happen after NewUserNamespace, so the 
679- 		// new user namespace is used if there is one. 
680- 		oldUTSNS  :=  t .utsns 
681- 		t .utsns  =  t .utsns .Clone (creds .UserNamespace )
682- 		t .utsns .SetInode (nsfs .NewInode (t , t .k .nsfsMount , t .utsns ))
683- 		cu .Add (func () { oldUTSNS .DecRef (t ) })
696+ 		newUTSNS  =  t .utsns .Clone (creds .UserNamespace )
697+ 		newUTSNS .SetInode (nsfs .NewInode (t , t .k .nsfsMount , newUTSNS ))
684698	}
685699	if  flags & linux .CLONE_NEWIPC  !=  0  {
686700		if  ! haveCapSysAdmin  {
687701			return  linuxerr .EPERM 
688702		}
689- 		// Note that "If CLONE_NEWIPC is set, then create the process in a new IPC 
690- 		// namespace" 
691- 		oldIPCNS  :=  t .ipcns 
692- 		t .ipcns  =  NewIPCNamespace (creds .UserNamespace )
693- 		t .ipcns .InitPosixQueues (t , t .k .VFS (), creds )
694- 		t .ipcns .SetInode (nsfs .NewInode (t , t .k .nsfsMount , t .ipcns ))
695- 		cu .Add (func () { oldIPCNS .DecRef (t ) })
696- 	}
697- 	if  flags & linux .CLONE_FILES  !=  0  {
698- 		oldFDTable  :=  t .fdTable 
699- 		t .fdTable  =  oldFDTable .Fork (t , MaxFdLimit )
700- 		cu .Add (func () { oldFDTable .DecRef (t ) })
701- 	}
702- 	if  flags & linux .CLONE_FS  !=  0  ||  flags & linux .CLONE_NEWNS  !=  0  {
703- 		oldFSContext  :=  t .FSContext ()
704- 		// unshareFromTask() lowers the old fs context's ref count, but its for us to 
705- 		// destroy it if there are no other references. 
706- 		if  oldFSContext .unshareFromTask (t , oldFSContext .Fork ()) {
707- 			// destroy() requires t.mu to not be held, hence the deferral. 
708- 			cu .Add (func () { oldFSContext .destroy (t ) })
709- 		}
703+ 		newIPCNS  =  NewIPCNamespace (creds .UserNamespace )
704+ 		newIPCNS .InitPosixQueues (t , t .k .VFS (), creds )
705+ 		newIPCNS .SetInode (nsfs .NewInode (t , t .k .nsfsMount , newIPCNS ))
710706	}
711707	if  flags & linux .CLONE_NEWNS  !=  0  {
712708		if  ! haveCapSysAdmin  {
713709			return  linuxerr .EPERM 
714710		}
715- 		oldMountNS  :=  t .mountNamespace 
716- 		fsContext  :=  t .FSContext ()
717- 		mntns , err  :=  t .k .vfs .CloneMountNamespace (t , creds .UserNamespace , oldMountNS , & fsContext .root , & fsContext .cwd , t .k )
711+ 		fsContext  :=  newFSContext 
712+ 		if  fsContext  ==  nil  {
713+ 			fsContext  =  t .FSContext ()
714+ 		}
715+ 		var  err  error 
716+ 		newMountNS , err  =  t .k .vfs .CloneMountNamespace (t , creds .UserNamespace , t .mountNamespace , & fsContext .root , & fsContext .cwd , t .k )
718717		if  err  !=  nil  {
719718			return  err 
720719		}
721- 		t .mountNamespace  =  mntns 
722- 		cu .Add (func () { oldMountNS .DecRef (t ) })
723720	}
721+ 
722+ 	// Switch to new execution context. Store replaced resources in new* so 
723+ 	// that they're cleaned up by the deferred function. 
724+ 	if  newCreds  {
725+ 		t .creds .Store (creds )
726+ 	}
727+ 	t .mu .Lock ()
728+ 	defer  t .mu .Unlock ()
729+ 	if  newFSContext  !=  nil  {
730+ 		oldFSContext  :=  t .FSContext ()
731+ 		// unshareFromTask() lowers the old fs context's ref count, but its for us to 
732+ 		// destroy it if there are no other references. 
733+ 		if  oldFSContext .unshareFromTask (t , newFSContext ) {
734+ 			newFSContext  =  oldFSContext 
735+ 		} else  {
736+ 			newFSContext  =  nil 
737+ 		}
738+ 	}
739+ 	if  newFDTable  !=  nil  {
740+ 		t .fdTable , newFDTable  =  newFDTable , t .fdTable 
741+ 	}
742+ 	if  newChildPIDNS  !=  nil  {
743+ 		t .childPIDNamespace  =  newChildPIDNS 
744+ 	}
745+ 	if  newNetNS  !=  nil  {
746+ 		t .netns , newNetNS  =  newNetNS , t .netns 
747+ 	}
748+ 	if  newUTSNS  !=  nil  {
749+ 		t .utsns , newUTSNS  =  newUTSNS , t .utsns 
750+ 	}
751+ 	if  newIPCNS  !=  nil  {
752+ 		t .ipcns , newIPCNS  =  newIPCNS , t .ipcns 
753+ 	}
754+ 	if  newMountNS  !=  nil  {
755+ 		t .mountNamespace , newMountNS  =  newMountNS , t .mountNamespace 
756+ 	}
757+ 
724758	return  nil 
725759}
726760
0 commit comments