Automate Python Virtual Environment Setup on Linux Terminal
Company: Capital One
Role: Data Scientist
Category: Coding & Algorithms
Difficulty: Medium
Interview Round: Onsite
Quick Answer: This question evaluates proficiency in shell scripting and Linux terminal automation for Python virtual-environment setup, focusing on environment management, automation, and reproducibility skills relevant to data science workflows.
Constraints
- 1 <= len(commands) <= 100000
- Each command is one of: 'set -e', 'mkdir PATH', 'touch PATH', 'rm PATH', 'chmod +x PATH', 'virtualenv PATH', 'source PATH'
- PATH is relative (no leading '/'), uses '/' as separator, has no '.' or '..', and contains no spaces
- Each PATH length <= 200
- Initial file system contains only the root directory
Solution
from typing import List, Dict, Optional, Set, Tuple
class FSState:
def __init__(self):
# nodes: path -> 'dir' or 'file'
self.nodes: Dict[str, str] = {}
# file_exec: path -> bool (only for files)
self.file_exec: Dict[str, bool] = {}
# children: dir path -> set of immediate child paths
self.children: Dict[str, Set[str]] = {}
# environment
self.env: Dict[str, Optional[str]] = {"ACTIVE_VENV": None}
# initialize root directory
self.nodes['/'] = 'dir'
self.children['/'] = set()
def clone(self) -> 'FSState':
c = FSState()
c.nodes = dict(self.nodes)
c.file_exec = dict(self.file_exec)
c.children = {k: set(v) for k, v in self.children.items()}
c.env = dict(self.env)
return c
def is_idempotent_script(commands: List[str]) -> bool:
def canon(path: str) -> str:
s = path.strip()
# collapse multiple slashes
while '//' in s:
s = s.replace('//', '/')
s = s.strip('/')
return '/' if s == '' else '/' + s
def parent(path: str) -> str:
if path == '/':
return '/'
i = path.rfind('/')
return '/' if i == 0 else path[:i]
def join(base: str, name: str) -> str:
return '/' + name if base == '/' else base + '/' + name
def is_dir(st: FSState, p: str) -> bool:
return st.nodes.get(p) == 'dir'
def is_file(st: FSState, p: str) -> bool:
return st.nodes.get(p) == 'file'
def mkdir_p(st: FSState, p: str) -> Optional[str]:
# returns None on success, or error message
if p == '/':
return None
comps = p.strip('/').split('/')
cur = '/'
for comp in comps:
nxt = join(cur, comp)
if nxt in st.nodes:
if is_file(st, nxt):
return f"mkdir blocked by file at {nxt}"
# directory exists; continue
else:
# create directory
st.nodes[nxt] = 'dir'
st.children[nxt] = set()
st.children[cur].add(nxt)
cur = nxt
return None
def touch(st: FSState, p: str) -> Optional[str]:
par = parent(p)
if not is_dir(st, par):
return f"touch parent missing or not dir at {par}"
if p in st.nodes:
if is_dir(st, p):
return f"touch path is directory at {p}"
# file exists: no-op
return None
st.nodes[p] = 'file'
st.file_exec[p] = False
st.children[par].add(p)
return None
def rm_rf(st: FSState, p: str) -> Optional[str]:
if p not in st.nodes:
return None
if is_file(st, p):
par = parent(p)
if par in st.children and p in st.children[par]:
st.children[par].remove(p)
st.nodes.pop(p, None)
st.file_exec.pop(p, None)
return None
# directory removal (recursive). Do not allow removing root '/'
if p == '/':
return "cannot remove root"
# gather directories to remove
order: List[str] = []
stack: List[str] = [p]
while stack:
d = stack.pop()
order.append(d)
# iterate over a snapshot of children
for child in list(st.children.get(d, set())):
if is_dir(st, child):
stack.append(child)
else:
# remove file child
st.children[d].remove(child)
st.nodes.pop(child, None)
st.file_exec.pop(child, None)
# remove directories in reverse (deepest first)
for d in reversed(order):
# by now, d has only (possibly) dir children in st.children[d]
# remove reference from parent
par = parent(d)
if par in st.children and d in st.children[par]:
st.children[par].remove(d)
# remove directory node and its children set
st.children.pop(d, None)
st.nodes.pop(d, None)
return None
def chmod_x(st: FSState, p: str) -> Optional[str]:
if not is_file(st, p):
return f"chmod target not a file at {p}"
st.file_exec[p] = True
return None
def do_virtualenv(st: FSState, p: str) -> Optional[str]:
if is_file(st, p):
return f"virtualenv path is a file at {p}"
# ensure p dir
err = mkdir_p(st, p)
if err:
return err
# ensure p/bin dir
bin_dir = join(p, 'bin')
err = mkdir_p(st, bin_dir)
if err:
return err
# ensure p/bin/activate file
act = join(bin_dir, 'activate')
if act in st.nodes and is_dir(st, act):
return f"activate exists as directory at {act}"
if act not in st.nodes:
# create file
par = parent(act)
if not is_dir(st, par):
return f"internal error: missing bin dir at {par}"
st.nodes[act] = 'file'
st.file_exec[act] = False
st.children[par].add(act)
return None
def source(st: FSState, p: str) -> Optional[str]:
if not is_file(st, p):
return f"source target not a file at {p}"
if p.endswith('/bin/activate'):
root = p[: -len('/bin/activate')]
st.env['ACTIVE_VENV'] = root if root != '' else '/'
return None
def run(cmds: List[str], initial: Optional[FSState] = None) -> Tuple[FSState, Optional[int]]:
st = initial.clone() if initial is not None else FSState()
for idx, raw in enumerate(cmds, 1):
s = raw.strip()
if s == '' or s.startswith('#'):
continue
if s == 'set -e':
# always in effect
continue
parts = s.split()
op = parts[0]
if op == 'mkdir' and len(parts) == 2:
p = canon(parts[1])
err = mkdir_p(st, p)
elif op == 'touch' and len(parts) == 2:
p = canon(parts[1])
err = touch(st, p)
elif op == 'rm' and len(parts) == 2:
p = canon(parts[1])
err = rm_rf(st, p)
elif op == 'chmod' and len(parts) == 3 and parts[1] == '+x':
p = canon(parts[2])
err = chmod_x(st, p)
elif op == 'virtualenv' and len(parts) == 2:
p = canon(parts[1])
err = do_virtualenv(st, p)
elif op == 'source' and len(parts) == 2:
p = canon(parts[1])
err = source(st, p)
else:
err = f"unknown or malformed command: {s}"
if err is not None:
return st, idx
return st, None
# First run
st1, err1 = run(commands)
if err1 is not None:
return False
# Second run starting from the end state of first run
st2, err2 = run(commands, initial=st1)
if err2 is not None:
return False
# Compare final states (nodes, file_exec, and env)
return st1.nodes == st2.nodes and st1.file_exec == st2.file_exec and st1.env == st2.env
Explanation
Time complexity: O(n * p + R), where n is the number of commands, p is the maximum path depth (for mkdir/touch), and R is the total size of subtrees removed by rm across the run. The second run has the same bound.. Space complexity: O(u), where u is the number of file system nodes (files and directories) created..
Hints
- Model the file system with hash maps: path -> type (file/dir) and executable bit for files.
- Implement mkdir with recursive creation; error if a file blocks any directory component.
- Implement rm as recursive deletion; maintain parent->children sets for efficient subtree removal.
- virtualenv PATH should ensure PATH, PATH/bin, and PATH/bin/activate exist; treat conflicts as errors.
- Apply set -e: stop the run on the first error. Idempotent means both runs succeed and final states match (including ACTIVE_VENV).