Implement log storage and querying
Company: Datadog
Role: Software Engineer
Category: Coding & Algorithms
Difficulty: Medium
Interview Round: Technical Screen
Quick Answer: This question evaluates a candidate's skill in designing and analyzing log storage and query systems, covering data layout, indexing strategies, time-range scans with optional filters, and time/space complexity reasoning.
Constraints
- 1 <= len(ops) <= 50000
- Append operations arrive with non-decreasing timestamp
- 0 <= timestamp <= 10^13
- severity ∈ {"INFO","WARN","ERROR"}
- 0 <= length(serviceId) <= 64
- 0 <= length(message) <= 512
- Queries use inclusive time range: start <= timestamp <= end
- orderBy ∈ {"timestamp_asc","timestamp_desc"}
- 0 <= limit <= 100000, 0 <= offset
- Return outputs only for count/get; append produces no output
Solution
from bisect import bisect_left, bisect_right
def process_logs(ops: list) -> list:
ts = []
severities = []
services = []
messages = []
outputs = []
for op in ops:
typ = op.get('op')
if typ == 'append':
ts.append(op['timestamp'])
severities.append(op['severity'])
services.append(op['serviceId'])
messages.append(op['message'])
elif typ == 'count' or typ == 'get':
start = op['start']
end = op['end']
if start > end or not ts:
l = 0
r = 0
else:
l = bisect_left(ts, start)
r = bisect_right(ts, end)
filters = op.get('filters') or {}
fsev = filters.get('severity')
fsvc = filters.get('serviceId')
if typ == 'count':
c = 0
for i in range(l, r):
if (fsev is None or severities[i] == fsev) and (fsvc is None or services[i] == fsvc):
c += 1
outputs.append(c)
else: # get
orderBy = op.get('orderBy', 'timestamp_asc')
reverse = (orderBy == 'timestamp_desc')
limit = op.get('limit', r - l)
offset = op.get('offset', 0)
if limit < 0:
limit = 0
if offset < 0:
offset = 0
res = []
skipped = 0
if reverse:
i = r - 1
while i >= l and len(res) < limit:
if (fsev is None or severities[i] == fsev) and (fsvc is None or services[i] == fsvc):
if skipped < offset:
skipped += 1
else:
res.append({'timestamp': ts[i], 'severity': severities[i], 'serviceId': services[i], 'message': messages[i]})
i -= 1
else:
i = l
while i < r and len(res) < limit:
if (fsev is None or severities[i] == fsev) and (fsvc is None or services[i] == fsvc):
if skipped < offset:
skipped += 1
else:
res.append({'timestamp': ts[i], 'severity': severities[i], 'serviceId': services[i], 'message': messages[i]})
i += 1
outputs.append(res)
else:
# Unknown operations are ignored per spec (not expected)
pass
return outputs