Design Data Structure for Sparse Matrices Operations
Company: Pinterest
Role: Data Scientist
Category: Coding & Algorithms
Difficulty: Medium
Interview Round: Onsite
Quick Answer: This question evaluates a candidate's ability to design efficient data structures and implement algorithms for sparse matrix storage and operations, emphasizing space-efficient representations, matrix arithmetic, and computational complexity reasoning.
Constraints
- 1 <= dimA[0], dimA[1], dimB[0], dimB[1] <= 100000
- 0 <= len(A_entries) + len(B_entries) <= 200000
- Entries use 0-based indices: 0 <= i < rows, 0 <= j < cols
- Values are 32-bit signed integers; duplicates at the same (i,j) are summed; zeros are omitted
- For op == 'add': dimA == dimB
- For op == 'multiply': dimA[1] == dimB[0]
- Output for add/multiply: list of [i,j,val] sorted by i, then j
- Output for printA/printB: newline-separated 'i j v' lines in sorted order
- Do not mutate inputs; raise ValueError on invalid indices or incompatible dimensions
Solution
def sparse_matrix_ops(dimA, dimB, A_entries, B_entries, op):
def as_pair(x):
if isinstance(x, (list, tuple)) and len(x) == 2:
return int(x[0]), int(x[1])
raise ValueError("dim must be a length-2 list/tuple")
ra, ca = as_pair(dimA)
rb, cb = as_pair(dimB)
def normalize(entries, r, c):
rows = {}
if entries is None:
return rows
for e in entries:
if not (isinstance(e, (list, tuple)) and len(e) == 3):
raise ValueError("each entry must be [i,j,val]")
i, j, v = e
i = int(i); j = int(j); v = int(v)
if not (0 <= i < r and 0 <= j < c):
raise ValueError("index out of bounds")
if v == 0:
continue
row = rows.get(i)
if row is None:
row = {}
rows[i] = row
row[j] = row.get(j, 0) + v
if row[j] == 0:
del row[j]
return rows
Arows = normalize(A_entries, ra, ca)
Brows = normalize(B_entries, rb, cb)
def to_triplets(rows):
trip = []
for i in sorted(rows):
row = rows[i]
for j in sorted(row):
v = row[j]
if v != 0:
trip.append([i, j, v])
return trip
if op == "printA":
trip = to_triplets(Arows)
return "\n".join(f"{i} {j} {v}" for i, j, v in trip)
if op == "printB":
trip = to_triplets(Brows)
return "\n".join(f"{i} {j} {v}" for i, j, v in trip)
if op == "add":
if ra != rb or ca != cb:
raise ValueError("dimension mismatch for add")
res = {}
def add_from(src):
for i, row in src.items():
rrow = res.get(i)
if rrow is None:
rrow = {}
res[i] = rrow
for j, v in row.items():
rrow[j] = rrow.get(j, 0) + v
if rrow.get(j, 0) == 0:
rrow.pop(j, None)
add_from(Arows)
add_from(Brows)
return to_triplets(res)
if op == "multiply":
if ca != rb:
raise ValueError("dimension mismatch for multiply")
res = {}
for i, arow in Arows.items():
rrow = res.get(i)
if rrow is None:
rrow = {}
res[i] = rrow
for k, av in arow.items():
brow = Brows.get(k)
if not brow:
continue
for j, bv in brow.items():
rrow[j] = rrow.get(j, 0) + av * bv
if rrow.get(j, 0) == 0:
rrow.pop(j, None)
return to_triplets(res)
raise ValueError("unsupported op")