Skip to content

Commit 4298506

Browse files
author
aaron.liu
committed
update
1 parent a7ff0c5 commit 4298506

13 files changed

+768
-0
lines changed

Array/LC14 Longest Common Prefix.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
'''
2+
https://leetcode.com/problems/longest-common-prefix/description/?envType=company&envId=apple&favoriteSlug=apple-three-months
3+
4+
竖着一个个字符对比即可
5+
6+
followup: 如果多次query LCP 怎么处理? --> 用trie. 用strs里的字符串建立trie 然后来一个query就用trie做前缀匹配
7+
'''
8+
from typing import List
9+
10+
class Solution:
11+
# time O(S), where S is the total lenth of all str in strs. space: O(1)
12+
def longestCommonPrefix(self, strs: List[str]) -> str:
13+
if not strs:
14+
return ""
15+
for i in range(len(strs[0])): # 最长前缀不会超过strs[0]的长度
16+
char = strs[0][i] # 取出第i个字符
17+
for j in range(1, len(strs)): # 遍历剩下字符串j的每一个字符
18+
if i == len(strs[j]) or strs[j][i] != char: # 第j个字符走到头 或者 第i个字符与strs[j][i]不匹配
19+
return strs[0][:i] # [:i]左闭右开
20+
return strs[0]
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
'''
2+
Time complexity : O(max(N,M)), where N and M are the lengths of the input strings respectively. It's a one-pass solution.
3+
4+
Space complexity : O(max(N,M)).
5+
6+
Despite the fact that we did not keep arrays of revision numbers,
7+
we still need some additional space to store a substring of the input string for integer conversion.
8+
9+
In the worst case, the substring could be of the original string as well.
10+
'''
11+
from typing import List
12+
13+
class Solution:
14+
def get_chunk(self, version: str, n: int, p: int) -> List[int]:
15+
# 已经走完了当前str
16+
if p > n - 1:
17+
return 0, p
18+
19+
# 找一下个"."的位置
20+
p_end = p
21+
while p_end < n and version[p_end] != ".":
22+
p_end += 1 # p_end最后停在.的idx
23+
24+
# 截取出对应的数字 p_n在str结尾处需要特判
25+
num = int(version[p:p_end]) if p_end != n - 1 else int(version[p:n])
26+
# p指向下一个chuck的起点 为了下一次截取数字
27+
p = p_end + 1
28+
return num, p
29+
30+
def compareVersion(self, version1: str, version2: str) -> int:
31+
p1, p2 = 0, 0
32+
n1, n2 = len(version1), len(version2)
33+
34+
while p1 < n1 or p2 < n2: # 注意这里是or 有一个str还有就接着走
35+
num1, p1 = self.get_chunk(version1, n1, p1)
36+
num2, p2 = self.get_chunk(version2, n2, p2)
37+
if num1 != num2:
38+
return 1 if num1 > num2 else -1
39+
40+
# 走过一遍都没return 两个版本最后相等
41+
return 0

Array/LC189 Rotate Array.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from typing import List
2+
3+
class Solution:
4+
def reverse(self, nums: list, start: int, end: int) -> None:
5+
while start <= end:
6+
nums[start], nums[end] = nums[end], nums[start]
7+
start += 1
8+
end -= 1
9+
10+
def rotate(self, nums: List[int], k: int) -> None:
11+
n = len(nums)
12+
k %= n # 对n取module: 有可能k > n
13+
14+
# 三步翻转法: 整体翻转 前k个翻转 后n-k个翻转
15+
self.reverse(nums, 0, n - 1)
16+
self.reverse(nums, 0, k - 1)
17+
self.reverse(nums, k, n - 1)
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
'''
2+
https://leetcode.com/problems/product-of-array-except-self/description/?envType=company&envId=apple&favoriteSlug=apple-six-months&status=TO_DO
3+
4+
前后缀分解问题
5+
前缀乘积用数组prefix表示 prefix[i]:前i-1个数的乘积(不包括第i个数) 容易得出:prefix[i] = prefix[i-1]*nums[i-1]
6+
后缀乘积也可以用数组表示 但是题目要求space O(1)复杂度 所以后缀乘积用一个变量suffix表示 然后on the flight的从后往前计算答案 同时更新suffix
7+
'''
8+
from typing import List
9+
10+
class Solution:
11+
def productExceptSelf(self, nums: List[int]) -> List[int]:
12+
n = len(nums)
13+
prefix = [1] * n
14+
15+
for i in range(1, n): # 先计算前缀乘积数组
16+
prefix[i] = prefix[i - 1] * nums[i - 1]
17+
18+
suffix, idx = 1, n - 1 # 从后往前
19+
while idx >= 0:
20+
prefix[idx] = prefix[idx] * suffix
21+
suffix *= nums[idx] # 每次更新完答案 也要更新suffix 给下一次计算用
22+
idx -= 1
23+
24+
return prefix
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
'''
2+
双指针
3+
维护区间[i,j] 使得该区间内部没有重复字符 当出现重复字符时 朝j的方向移动i 当窗口内无重复字符时 再移动j
4+
'''
5+
from collections import defaultdict
6+
7+
class Solution:
8+
def lengthOfLongestSubstring(self, s: str) -> int:
9+
if not s:
10+
return 0
11+
12+
n, ret = len(s), 0
13+
map = defaultdict(int) # str:freq
14+
15+
# 双指针模板
16+
i, j = 0, 0
17+
while j < len(s):
18+
map[s[j]] += 1 # j对应的字符freq+1
19+
while map[s[j]] > 1: # 出现重复字符了:刚加入的字符freq大于1
20+
map[s[i]] -= 1 # i指向的字符滑出窗口 freq-1
21+
i += 1 # 移动i
22+
ret = max(ret, j - i + 1) # 每一步更新见过的窗口大小
23+
j += 1 # 用while要记得显示写j+=1
24+
return ret
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
'''
2+
given sorted array A of doubles. compute a new sorted array B where each element
3+
is obtained by applying the following fuction F(x) (x is element in A)
4+
f(x) = ax^2 + bx + c, where a > 0
5+
output: array B of sorted doubles f(x)
6+
7+
parabola with a > 0, 开口向上的抛物线. 最小值=-b/(2a) 根据图像法: 在最小值左边单调减 在最小值右边单调增
8+
分成两部分算 得到两个array 然后转化成 merge two sorted array
9+
time complexity: O(n) -> 沿抛物线算两个f_x数组O(n), merge two sorted list O(n)
10+
'''
11+
12+
from typing import List
13+
def compute_sorted_fx(nums: List[float], a:float, b:float, c:float) -> List[float]:
14+
if (a <= 0):
15+
raise ValueError("input a should be >= 0")
16+
17+
min_val = -b / (2*a)
18+
left_arr, right_arr = [], []
19+
20+
for i in range(len(nums)):
21+
f_x = a * (nums[i] ** 2) + b * nums[i] + c
22+
if nums[i] <= min_val: # parabola左边 left_arr单调减小
23+
left_arr.append(f_x)
24+
else:
25+
right_arr.append(f_x) # parabola右边 right_arr
26+
27+
# 下面是merge two sorted list的模板
28+
out_arr = []
29+
left_idx, right_idx = len(left_arr) - 1, 0 # 注意left是从后往前数 要注意单调性
30+
while left_idx >= 0 and right_idx < len(right_arr): # 写while循环注意在循环逻辑最后把对应的idx++/--
31+
if left_arr[left_idx] <= right_arr[right_idx]:
32+
out_arr.append(left_arr[left_idx])
33+
left_idx -= 1
34+
else:
35+
out_arr.append(right_arr[right_idx])
36+
right_idx += 1
37+
38+
while left_idx >= 0:
39+
out_arr.append(left_arr[left_idx])
40+
left_idx -= 1
41+
while right_idx < len(right_arr):
42+
out_arr.append(right_arr[right_idx])
43+
right_idx += 1
44+
45+
return out_arr
46+
47+
# unit test
48+
a = 2.0
49+
b = -20.0
50+
c = 1.0
51+
nums = [-13, -10, -3, 1, 2, 3, 4, 5, 6, 7, 20, 50]
52+
53+
print(compute_sorted_fx(nums, a, b, c))

Array/Linkedin Meeting Point.py

Lines changed: 134 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
'''
2+
N robots on a line. Find a point such that the total distance traveled by all robots is minimized.
3+
4+
Input:
5+
The position pi of each robot i={1, 2, ..., N}, unsorted array
6+
Output:
7+
meeting point x* such that the total distance traveled by all individuals is minimized.
8+
9+
x* = min_x: sum(|x - p_i|) -> convex function because sum of convex funtions is also convex.
10+
形象得想 在数轴上 如果起初把meeting point定在median 然后再把meeting point往左或者右移动delta 可以发现 距离之和
11+
一定会增加delta. 所以median是最优meeting point
12+
'''
13+
14+
from typing import List
15+
16+
def find_median(nums: List[float]) -> float:
17+
if not nums:
18+
return 0.0
19+
target_th = len(nums) // 2 if len(nums) % 2 == 0 else len(nums) // 2 + 1
20+
idx = partition(nums, target_th, 0, len(nums) - 1)
21+
return nums[idx]
22+
23+
def partition(nums:List[float], k:int, start:int, end:int) -> int:
24+
if start >= end: # corner case: keys is empty. start = 0, end = -1
25+
return start
26+
27+
left, right = start - 1, end + 1 # 每次用start的后一个 和end的前一个相比 (含义是[start, left], [right, end]已经排好序了)
28+
pivot = nums[(left + right) // 2] # pivot选择中间点的值 比较保险
29+
while left < right:
30+
while True:
31+
left += 1
32+
if nums[left] >= pivot: # left从左往右找第一个>=pivot的数
33+
break
34+
while True:
35+
right -= 1 # right从右往左找第一个<=pivot的数
36+
if nums[right] <= pivot:
37+
break
38+
if left < right: # 交换l,r指向的数 注意这里l,r交换后 不会加一/减一
39+
nums[left], nums[right] = nums[right], nums[left]
40+
41+
if k <= right - start + 1: # 左边区间[s, r]有(r-s+1)个数字 k<=它 说明第k小的数 落在[s,r]内 往左递归
42+
return partition(nums, k, start, right)
43+
else: # 在右边区间里 找第 k-(r-s+1)小的数字 左半边区间已经有(r-s+1)个数字了 所以要减掉
44+
return partition(nums, k - (right - start + 1), right + 1, end)
45+
46+
# nums = [4.1, 2.2, 2.4, 1.8, 5.4, 6.9]
47+
nums = [4.1, 2.2, 2.4, 1.8, 5.4]
48+
print(find_median(nums))
49+
50+
'''
51+
followup: 如果数组太大 无法放到一台机器上 如何分布式求解?
52+
利用p-persentile distributed calcuation求解
53+
54+
步骤 1:数据分割
55+
将大数组分割成若干小块,每块数据可以放入单台机器进行处理。假设有 N 台机器,那么将数组分割成 N 块,每块由一个机器负责处理。
56+
57+
步骤 2: 初始候选选择
58+
随机选择一些数据点作为候选中位数。这些候选点可以从数据块中随机采样得到。
59+
60+
步骤 3: 分布式统计
61+
将这些候选中位数广播到所有机器上,并在每个机器上计算其数据中小于等于每个候选的个数。
62+
63+
步骤 4: 汇总统计结果
64+
协调节点汇总所有机器上的统计结果,计算全局范围内每个候选的累计个数。
65+
66+
步骤 5: 调整搜索范围
67+
根据累计个数和目标中位数的位置,调整候选的搜索范围。重复步骤 2 到 4, 直到搜索范围收敛。
68+
'''
69+
'''
70+
import random
71+
import numpy as np
72+
73+
# 将数据分割成若干块
74+
def split_data(data, num_chunks):
75+
return np.array_split(data, num_chunks)
76+
77+
# 生成初始候选中位数
78+
def initial_candidates(data_chunks, num_candidates):
79+
all_data = np.concatenate(data_chunks)
80+
return random.sample(list(all_data), num_candidates)
81+
82+
# 在每个机器上计算小于等于候选的个数
83+
def count_less_equal(data_chunk, candidates):
84+
return [np.sum(data_chunk <= candidate) for candidate in candidates]
85+
86+
# 汇总所有机器的统计结果
87+
def aggregate_counts(counts_per_machine):
88+
return np.sum(counts_per_machine, axis=0)
89+
90+
def find_median_distributed(data, num_machines, num_candidates):
91+
# 将数据分成若干块
92+
data_chunks = split_data(data, num_machines)
93+
94+
# 初始候选中位数
95+
candidates = initial_candidates(data_chunks, num_candidates)
96+
97+
# 目标中位数的位置
98+
median_position = len(data) // 2
99+
100+
while True:
101+
# 在每个机器上计算小于等于候选的个数
102+
counts_per_machine = [count_less_equal(chunk, candidates) for chunk in data_chunks]
103+
104+
# 汇总所有机器的统计结果
105+
total_counts = aggregate_counts(counts_per_machine)
106+
107+
# 找到累计个数刚好超过中位数位置的候选
108+
for i, count in enumerate(total_counts):
109+
if count >= median_position:
110+
current_median = candidates[i]
111+
break
112+
113+
# 检查是否满足中位数条件
114+
if total_counts[i] == median_position:
115+
return current_median
116+
117+
# 更新候选范围
118+
if total_counts[i] < median_position:
119+
lower_bound = candidates[i]
120+
else:
121+
upper_bound = candidates[i]
122+
123+
# 生成新的候选
124+
candidates = [random.uniform(lower_bound, upper_bound) for _ in range(num_candidates)]
125+
126+
# 示例数据
127+
data = np.random.randint(0, 100, size=1000)
128+
num_machines = 10
129+
num_candidates = 5
130+
131+
# 求解中位数
132+
median = find_median_distributed(data, num_machines, num_candidates)
133+
print("Estimated median is:", median)
134+
'''

0 commit comments

Comments
 (0)