📦 数据结构
组织、管理和存储数据的方式,从基础链表到 AI 时代的向量索引
🔗 链表
由节点组成的线性结构,每个节点包含数据和指向下一个节点的指针。
📚 栈
后进先出 (LIFO) 的数据结构,只允许在栈顶进行插入和删除操作。
📥 队列
先进先出 (FIFO) 的数据结构,允许在队尾插入元素,在队头删除元素。
🌳 二叉搜索树
每个节点的左子树所有节点值小于该节点,右子树所有节点值大于该节点。
⚖️ AVL 树/红黑树
自平衡二叉搜索树,通过旋转操作保持平衡,确保 O(log n) 的操作复杂度。
📐 堆
特殊的完全二叉树,分为最大堆和最小堆,优先队列的典型实现。
🕸️ 图
由顶点和边组成的非线性结构,用于表示复杂的关系网络。
💰 贪心算法
每一步选择当前最优解的算法思想,适用于找零、活动选择、分数背包等问题。
数组、链表、栈、队列
二叉树、B 树、红黑树
邻接矩阵、邻接表
哈希表、布隆过滤器
🔗 1. 链表 (Linked List)
数据结构
由节点组成的数据结构,每个节点包含数据和指向下一个节点的指针。
🔗 链表结构示意
[data|next] → [data|next] → [data|next] → null
null ← [prev|data|next] ↔ [prev|data|next] ↔ [prev|data|next] → null
✅ 优点
- 插入删除效率高
- 动态大小
- 不需要连续内存
- 实现简单
❌ 缺点
- 随机访问效率低
- 额外指针开销
- 缓存不友好
- 需要额外空间存储指针
代码实现
class ListNode {
constructor(val = 0, next = null) {
this.val = val;
this.next = next;
}
}
class LinkedList {
constructor() {
this.head = null;
this.size = 0;
}
append(val) {
const node = new ListNode(val);
if (!this.head) {
this.head = node;
} else {
let current = this.head;
while (current.next) {
current = current.next;
}
current.next = node;
}
this.size++;
}
delete(val) {
if (!this.head) return;
if (this.head.val === val) {
this.head = this.head.next;
this.size--;
return;
}
let current = this.head;
while (current.next) {
if (current.next.val === val) {
current.next = current.next.next;
this.size--;
return;
}
current = current.next;
}
}
reverse() {
let prev = null;
let current = this.head;
while (current) {
const next = current.next;
current.next = prev;
prev = current;
current = next;
}
this.head = prev;
}
print() {
const values = [];
let current = this.head;
while (current) {
values.push(current.val);
current = current.next;
}
console.log(values.join(' -> '));
}
}
// 测试
const list = new LinkedList();
list.append(1);
list.append(2);
list.append(3);
list.print(); // 1 -> 2 -> 3
list.reverse();
list.print(); // 3 -> 2 -> 1class ListNode:
def __init__(self, val=0, next=None):
self.val = val
self.next = next
class LinkedList:
def __init__(self):
self.head = None
self.size = 0
def append(self, val):
node = ListNode(val)
if not self.head:
self.head = node
else:
current = self.head
while current.next:
current = current.next
current.next = node
self.size += 1
def delete(self, val):
if not self.head:
return
if self.head.val == val:
self.head = self.head.next
self.size -= 1
return
current = self.head
while current.next:
if current.next.val == val:
current.next = current.next.next
self.size -= 1
return
current = current.next
def reverse(self):
prev = None
current = self.head
while current:
nxt = current.next
current.next = prev
prev = current
current = nxt
self.head = prev
def print(self):
values = []
current = self.head
while current:
values.append(str(current.val))
current = current.next
print(' -> '.join(values))
# 测试
lst = LinkedList()
lst.append(1)
lst.append(2)
lst.append(3)
lst.print() # 1 -> 2 -> 3
lst.reverse()
lst.print() # 3 -> 2 -> 1package main
import "fmt"
type ListNode struct {
val int
next *ListNode
}
type LinkedList struct {
head *ListNode
size int
}
func NewLinkedList() *LinkedList {
return &LinkedList{}
}
func (l *LinkedList) Append(val int) {
node := &ListNode{val: val}
if l.head == nil {
l.head = node
} else {
current := l.head
for current.next != nil {
current = current.next
}
current.next = node
}
l.size++
}
func (l *LinkedList) Delete(val int) {
if l.head == nil {
return
}
if l.head.val == val {
l.head = l.head.next
l.size--
return
}
current := l.head
for current.next != nil {
if current.next.val == val {
current.next = current.next.next
l.size--
return
}
current = current.next
}
}
func (l *LinkedList) Reverse() {
var prev *ListNode
current := l.head
for current != nil {
next := current.next
current.next = prev
prev = current
current = next
}
l.head = prev
}
func (l *LinkedList) Print() {
values := []string{}
current := l.head
for current != nil {
values = append(values, fmt.Sprintf("%d", current.val))
current = current.next
}
fmt.Println(strings.Join(values, " -> "))
}
func main() {
list := NewLinkedList()
list.Append(1)
list.Append(2)
list.Append(3)
list.Print() // 1 -> 2 -> 3
list.Reverse()
list.Print() // 3 -> 2 -> 1
}📚 2. 栈 (Stack)
数据结构
后进先出 (LIFO) 的数据结构,只允许在栈顶进行插入和删除操作。
代码实现
class Stack {
constructor() {
this.items = [];
}
push(element) {
this.items.push(element);
}
pop() {
if (this.items.length === 0) return null;
return this.items.pop();
}
peek() {
return this.items[this.items.length - 1];
}
isEmpty() {
return this.items.length === 0;
}
size() {
return this.items.length;
}
}
// 应用:有效括号
function isValidParentheses(s) {
const stack = new Stack();
const map = { ')': '(', '}': '{', ']': '[' };
for (const char of s) {
if (char in map) {
if (stack.pop() !== map[char]) return false;
} else {
stack.push(char);
}
}
return stack.isEmpty();
}
// 测试
console.log(isValidParentheses("()[]{}")); // true
console.log(isValidParentheses("([)]")); // falseclass Stack:
def __init__(self):
self.items = []
def push(self, element):
self.items.append(element)
def pop(self):
if len(self.items) == 0:
return None
return self.items.pop()
def peek(self):
if len(self.items) == 0:
return None
return self.items[-1]
def is_empty(self):
return len(self.items) == 0
def size(self):
return len(self.items)
# 应用:有效括号
def is_valid_parentheses(s):
stack = Stack()
mapping = {')': '(', '}': '{', ']': '['}
for char in s:
if char in mapping:
if stack.pop() != mapping[char]:
return False
else:
stack.push(char)
return stack.is_empty()
# 测试
print(is_valid_parentheses("()[]{}")) # True
print(is_valid_parentheses("([)]")) # Falsepackage main
import "fmt"
type Stack struct {
items []interface{}
}
func NewStack() *Stack {
return &Stack{items: []interface{}{}}
}
func (s *Stack) Push(element interface{}) {
s.items = append(s.items, element)
}
func (s *Stack) Pop() interface{} {
if len(s.items) == 0 {
return nil
}
top := s.items[len(s.items)-1]
s.items = s.items[:len(s.items)-1]
return top
}
func (s *Stack) Peek() interface{} {
if len(s.items) == 0 {
return nil
}
return s.items[len(s.items)-1]
}
func (s *Stack) IsEmpty() bool {
return len(s.items) == 0
}
func (s *Stack) Size() int {
return len(s.items)
}
func isValidParentheses(s string) bool {
stack := NewStack()
mapping := map[rune]rune{')': '(', '}': '{', ']': '['}
for _, char := range s {
if match, ok := mapping[char]; ok {
if stack.Pop() != match {
return false
}
} else {
stack.Push(char)
}
}
return stack.IsEmpty()
}
func main() {
fmt.Println(isValidParentheses("()[]{}")) // true
fmt.Println(isValidParentheses("([)]")) // false
}📥 3. 队列 (Queue)
数据结构
先进先出 (FIFO) 的数据结构,允许在队尾插入元素,在队头删除元素。
代码实现
class Queue {
constructor() {
this.items = [];
}
enqueue(element) {
this.items.push(element);
}
dequeue() {
if (this.items.length === 0) return null;
return this.items.shift();
}
front() {
return this.items[0];
}
isEmpty() {
return this.items.length === 0;
}
size() {
return this.items.length;
}
}
// 测试
const queue = new Queue();
queue.enqueue(1);
queue.enqueue(2);
queue.enqueue(3);
console.log(queue.dequeue()); // 1class Queue:
def __init__(self):
self.items = []
def enqueue(self, element):
self.items.append(element)
def dequeue(self):
if len(self.items) == 0:
return None
return self.items.pop(0)
def front(self):
if len(self.items) == 0:
return None
return self.items[0]
def is_empty(self):
return len(self.items) == 0
def size(self):
return len(self.items)
# 测试
queue = Queue()
queue.enqueue(1)
queue.enqueue(2)
queue.enqueue(3)
print(queue.dequeue()) # 1package main
import "fmt"
type Queue struct {
items []interface{}
}
func NewQueue() *Queue {
return &Queue{items: []interface{}{}}
}
func (q *Queue) Enqueue(element interface{}) {
q.items = append(q.items, element)
}
func (q *Queue) Dequeue() interface{} {
if len(q.items) == 0 {
return nil
}
front := q.items[0]
q.items = q.items[1:]
return front
}
func (q *Queue) Front() interface{} {
if len(q.items) == 0 {
return nil
}
return q.items[0]
}
func (q *Queue) IsEmpty() bool {
return len(q.items) == 0
}
func (q *Queue) Size() int {
return len(q.items)
}
func main() {
queue := NewQueue()
queue.Enqueue(1)
queue.Enqueue(2)
queue.Enqueue(3)
fmt.Println(queue.Dequeue()) // 1
}🌳 4. 二叉搜索树 (Binary Search Tree)
数据结构
二叉搜索树 (BST) 是一种特殊的二叉树,满足以下性质:每个节点的左子树所有节点值小于该节点值,右子树所有节点值大于该节点值。这使得 BST 支持高效的查找、插入和删除操作。
🌳 BST 结构示意
8
/ \
3 10
/ \ \
1 6 14
/ \
4 7
• 左子树 < 根节点
• 右子树 > 根节点
• 递归定义
• 中序遍历有序
✅ 优点
- 查找、插入、删除效率高(平均 O(log n))
- 中序遍历可得有序序列
- 实现相对简单
- 支持范围查询
❌ 缺点
- 最坏情况退化为链表(O(n))
- 需要额外空间存储指针
- 不平衡时性能下降
- 不适合频繁随机访问
代码实现
class TreeNode {
constructor(val) {
this.val = val;
this.left = null;
this.right = null;
}
}
class BinarySearchTree {
constructor() {
this.root = null;
}
insert(val) {
const newNode = new TreeNode(val);
if (!this.root) {
this.root = newNode;
return;
}
let current = this.root;
while (true) {
if (val < current.val) {
if (!current.left) {
current.left = newNode;
return;
}
current = current.left;
} else if (val > current.val) {
if (!current.right) {
current.right = newNode;
return;
}
current = current.right;
} else {
return; // 不插入重复值
}
}
}
search(val) {
let current = this.root;
while (current) {
if (val === current.val) return true;
if (val < current.val) {
current = current.left;
} else {
current = current.right;
}
}
return false;
}
delete(val) {
const deleteNode = (node, val) => {
if (!node) return null;
if (val < node.val) {
node.left = deleteNode(node.left, val);
} else if (val > node.val) {
node.right = deleteNode(node.right, val);
} else {
// 找到要删除的节点
if (!node.left) return node.right;
if (!node.right) return node.left;
// 有两个子节点,找右子树最小值
let minNode = node.right;
while (minNode.left) minNode = minNode.left;
node.val = minNode.val;
node.right = deleteNode(node.right, minNode.val);
}
return node;
};
this.root = deleteNode(this.root, val);
}
inorder(node = this.root, result = []) {
if (node) {
this.inorder(node.left, result);
result.push(node.val);
this.inorder(node.right, result);
}
return result;
}
preorder(node = this.root, result = []) {
if (node) {
result.push(node.val);
this.preorder(node.left, result);
this.preorder(node.right, result);
}
return result;
}
postorder(node = this.root, result = []) {
if (node) {
this.postorder(node.left, result);
this.postorder(node.right, result);
result.push(node.val);
}
return result;
}
}
// 测试
const bst = new BinarySearchTree();
bst.insert(8);
bst.insert(3);
bst.insert(10);
bst.insert(1);
bst.insert(6);
bst.insert(14);
console.log('中序遍历:', bst.inorder()); // [1, 3, 6, 8, 10, 14]
console.log('查找 6:', bst.search(6)); // true
console.log('查找 5:', bst.search(5)); // false
bst.delete(3);
console.log('删除 3 后:', bst.inorder()); // [1, 6, 8, 10, 14]class TreeNode:
def __init__(self, val=0):
self.val = val
self.left = None
self.right = None
class BinarySearchTree:
def __init__(self):
self.root = None
def insert(self, val):
if not self.root:
self.root = TreeNode(val)
return
current = self.root
while True:
if val < current.val:
if not current.left:
current.left = TreeNode(val)
return
current = current.left
elif val > current.val:
if not current.right:
current.right = TreeNode(val)
return
current = current.right
else:
return # 不插入重复值
def search(self, val):
current = self.root
while current:
if val == current.val:
return True
elif val < current.val:
current = current.left
else:
current = current.right
return False
def delete(self, val):
def delete_node(node, val):
if not node:
return None
if val < node.val:
node.left = delete_node(node.left, val)
elif val > node.val:
node.right = delete_node(node.right, val)
else:
# 找到要删除的节点
if not node.left:
return node.right
if not node.right:
return node.left
# 有两个子节点,找右子树最小值
min_node = node.right
while min_node.left:
min_node = min_node.left
node.val = min_node.val
node.right = delete_node(node.right, min_node.val)
return node
self.root = delete_node(self.root, val)
def inorder(self, node=None, result=None):
if result is None:
result = []
if node is None:
node = self.root
if node:
self.inorder(node.left, result)
result.append(node.val)
self.inorder(node.right, result)
return result
def preorder(self, node=None, result=None):
if result is None:
result = []
if node is None:
node = self.root
if node:
result.append(node.val)
self.preorder(node.left, result)
self.preorder(node.right, result)
return result
def postorder(self, node=None, result=None):
if result is None:
result = []
if node is None:
node = self.root
if node:
self.postorder(node.left, result)
self.postorder(node.right, result)
result.append(node.val)
return result
# 测试
bst = BinarySearchTree()
bst.insert(8)
bst.insert(3)
bst.insert(10)
bst.insert(1)
bst.insert(6)
bst.insert(14)
print('中序遍历:', bst.inorder()) # [1, 3, 6, 8, 10, 14]
print('查找 6:', bst.search(6)) # True
print('查找 5:', bst.search(5)) # False
bst.delete(3)
print('删除 3 后:', bst.inorder()) # [1, 6, 8, 10, 14]package main
import "fmt"
type TreeNode struct {
Val int
Left *TreeNode
Right *TreeNode
}
type BinarySearchTree struct {
Root *TreeNode
}
func NewBinarySearchTree() *BinarySearchTree {
return &BinarySearchTree{}
}
func (bst *BinarySearchTree) Insert(val int) {
if bst.Root == nil {
bst.Root = &TreeNode{Val: val}
return
}
current := bst.Root
for {
if val < current.Val {
if current.Left == nil {
current.Left = &TreeNode{Val: val}
return
}
current = current.Left
} else if val > current.Val {
if current.Right == nil {
current.Right = &TreeNode{Val: val}
return
}
current = current.Right
} else {
return // 不插入重复值
}
}
}
func (bst *BinarySearchTree) Search(val int) bool {
current := bst.Root
for current != nil {
if val == current.Val {
return true
} else if val < current.Val {
current = current.Left
} else {
current = current.Right
}
}
return false
}
func (bst *BinarySearchTree) Delete(val int) {
bst.Root = deleteNode(bst.Root, val)
}
func deleteNode(node *TreeNode, val int) *TreeNode {
if node == nil {
return nil
}
if val < node.Val {
node.Left = deleteNode(node.Left, val)
} else if val > node.Val {
node.Right = deleteNode(node.Right, val)
} else {
// 找到要删除的节点
if node.Left == nil {
return node.Right
}
if node.Right == nil {
return node.Left
}
// 有两个子节点,找右子树最小值
minNode := node.Right
for minNode.Left != nil {
minNode = minNode.Left
}
node.Val = minNode.Val
node.Right = deleteNode(node.Right, minNode.Val)
}
return node
}
func (bst *BinarySearchTree) Inorder() []int {
result := []int{}
var inorder func(*TreeNode)
inorder = func(node *TreeNode) {
if node != nil {
inorder(node.Left)
result = append(result, node.Val)
inorder(node.Right)
}
}
inorder(bst.Root)
return result
}
func (bst *BinarySearchTree) Preorder() []int {
result := []int{}
var preorder func(*TreeNode)
preorder = func(node *TreeNode) {
if node != nil {
result = append(result, node.Val)
preorder(node.Left)
preorder(node.Right)
}
}
preorder(bst.Root)
return result
}
func (bst *BinarySearchTree) Postorder() []int {
result := []int{}
var postorder func(*TreeNode)
postorder = func(node *TreeNode) {
if node != nil {
postorder(node.Left)
postorder(node.Right)
result = append(result, node.Val)
}
}
postorder(bst.Root)
return result
}
func main() {
bst := NewBinarySearchTree()
bst.Insert(8)
bst.Insert(3)
bst.Insert(10)
bst.Insert(1)
bst.Insert(6)
bst.Insert(14)
fmt.Println("中序遍历:", bst.Inorder()) // [1 3 6 8 10 14]
fmt.Println("查找 6:", bst.Search(6)) // true
fmt.Println("查找 5:", bst.Search(5)) // false
bst.Delete(3)
fmt.Println("删除 3 后:", bst.Inorder()) // [1 6 8 10 14]
}⚖️ 5. AVL 树/红黑树 (AVL Tree / Red-Black Tree)
数据结构
AVL 树 是最早的自平衡二叉搜索树,任何节点的左右子树高度差不超过 1。通过旋转操作保持平衡,确保 O(log n) 的操作复杂度。
红黑树 是一种近似平衡的二叉搜索树,通过节点颜色(红/黑)和特定规则保证从根到叶子的最长路径不超过最短路径的两倍。Java TreeMap、HashMap(JDK8+)、C++ STL map 都使用红黑树。
⚖️ 旋转操作示意
A B
/ / \
B → X A
/ \ / \
X Y Y Z
A B
\ / \
B → A Z
/ \ /
Y Z X
• 节点非红即黑
• 根节点为黑
• 红节点子节点必黑
• 根到叶路径黑节点数相同
✅ 优点
- 严格保证 O(log n) 复杂度
- AVL 树更平衡,查找更快
- 红黑树旋转少,插入删除更快
- 广泛应用于标准库
❌ 缺点
- 实现复杂
- AVL 树维护平衡开销大
- 红黑树不如 AVL 平衡
- 额外空间存储高度/颜色
代码实现
class AVLNode {
constructor(val) {
this.val = val;
this.left = null;
this.right = null;
this.height = 1;
}
}
class AVLTree {
constructor() {
this.root = null;
}
getHeight(node) {
return node ? node.height : 0;
}
getBalance(node) {
return node ? this.getHeight(node.left) - this.getHeight(node.right) : 0;
}
updateHeight(node) {
node.height = 1 + Math.max(this.getHeight(node.left), this.getHeight(node.right));
}
rotateRight(y) {
const x = y.left;
const T2 = x.right;
x.right = y;
y.left = T2;
this.updateHeight(y);
this.updateHeight(x);
return x;
}
rotateLeft(x) {
const y = x.right;
const T2 = y.left;
y.left = x;
x.right = T2;
this.updateHeight(x);
this.updateHeight(y);
return y;
}
insert(val) {
const insertNode = (node, val) => {
if (!node) return new AVLNode(val);
if (val < node.val) {
node.left = insertNode(node.left, val);
} else if (val > node.val) {
node.right = insertNode(node.right, val);
} else {
return node; // 不插入重复值
}
this.updateHeight(node);
const balance = this.getBalance(node);
// LL
if (balance > 1 && val < node.left.val) {
return this.rotateRight(node);
}
// RR
if (balance < -1 && val > node.right.val) {
return this.rotateLeft(node);
}
// LR
if (balance > 1 && val > node.left.val) {
node.left = this.rotateLeft(node.left);
return this.rotateRight(node);
}
// RL
if (balance < -1 && val < node.right.val) {
node.right = this.rotateRight(node.right);
return this.rotateLeft(node);
}
return node;
};
this.root = insertNode(this.root, val);
}
search(val) {
let current = this.root;
while (current) {
if (val === current.val) return true;
current = val < current.val ? current.left : current.right;
}
return false;
}
inorder(node = this.root, result = []) {
if (node) {
this.inorder(node.left, result);
result.push(node.val);
this.inorder(node.right, result);
}
return result;
}
}
// 测试
const avl = new AVLTree();
avl.insert(10);
avl.insert(20);
avl.insert(30); // RR 失衡,触发左旋
avl.insert(5);
avl.insert(15); // LR 失衡,触发左右旋
console.log('中序遍历:', avl.inorder()); // [5, 10, 15, 20, 30]
console.log('查找 15:', avl.search(15)); // trueclass AVLNode:
def __init__(self, val):
self.val = val
self.left = None
self.right = None
self.height = 1
class AVLTree:
def __init__(self):
self.root = None
def get_height(self, node):
return node.height if node else 0
def get_balance(self, node):
return self.get_height(node.left) - self.get_height(node.right) if node else 0
def update_height(self, node):
node.height = 1 + max(self.get_height(node.left), self.get_height(node.right))
def rotate_right(self, y):
x = y.left
T2 = x.right
x.right = y
y.left = T2
self.update_height(y)
self.update_height(x)
return x
def rotate_left(self, x):
y = x.right
T2 = y.left
y.left = x
x.right = T2
self.update_height(x)
self.update_height(y)
return y
def insert(self, val):
def insert_node(node, val):
if not node:
return AVLNode(val)
if val < node.val:
node.left = insert_node(node.left, val)
elif val > node.val:
node.right = insert_node(node.right, val)
else:
return node
self.update_height(node)
balance = self.get_balance(node)
# LL
if balance > 1 and val < node.left.val:
return self.rotate_right(node)
# RR
if balance < -1 and val > node.right.val:
return self.rotate_left(node)
# LR
if balance > 1 and val > node.left.val:
node.left = self.rotate_left(node.left)
return self.rotate_right(node)
# RL
if balance < -1 and val < node.right.val:
node.right = self.rotate_right(node.right)
return self.rotate_left(node)
return node
self.root = insert_node(self.root, val)
def search(self, val):
current = self.root
while current:
if val == current.val:
return True
current = current.left if val < current.val else current.right
return False
def inorder(self, node=None, result=None):
if result is None:
result = []
if node is None:
node = self.root
if node:
self.inorder(node.left, result)
result.append(node.val)
self.inorder(node.right, result)
return result
# 测试
avl = AVLTree()
avl.insert(10)
avl.insert(20)
avl.insert(30) # RR 失衡,触发左旋
avl.insert(5)
avl.insert(15) # LR 失衡,触发左右旋
print('中序遍历:', avl.inorder()) # [5, 10, 15, 20, 30]
print('查找 15:', avl.search(15)) # Truepackage main
import "fmt"
type AVLNode struct {
Val int
Left *AVLNode
Right *AVLNode
Height int
}
type AVLTree struct {
Root *AVLNode
}
func NewAVLTree() *AVLTree {
return &AVLTree{}
}
func (t *AVLTree) GetHeight(node *AVLNode) int {
if node == nil {
return 0
}
return node.Height
}
func (t *AVLTree) GetBalance(node *AVLNode) int {
if node == nil {
return 0
}
return t.GetHeight(node.Left) - t.GetHeight(node.Right)
}
func (t *AVLTree) UpdateHeight(node *AVLNode) {
node.Height = 1 + max(t.GetHeight(node.Left), t.GetHeight(node.Right))
}
func (t *AVLTree) RotateRight(y *AVLNode) *AVLNode {
x := y.Left
T2 := x.Right
x.Right = y
y.Left = T2
t.UpdateHeight(y)
t.UpdateHeight(x)
return x
}
func (t *AVLTree) RotateLeft(x *AVLNode) *AVLNode {
y := x.Right
T2 := y.Left
y.Left = x
x.Right = T2
t.UpdateHeight(x)
t.UpdateHeight(y)
return y
}
func (t *AVLTree) Insert(val int) {
t.Root = t.insertNode(t.Root, val)
}
func (t *AVLTree) insertNode(node *AVLNode, val int) *AVLNode {
if node == nil {
return &AVLNode{Val: val, Height: 1}
}
if val < node.Val {
node.Left = t.insertNode(node.Left, val)
} else if val > node.Val {
node.Right = t.insertNode(node.Right, val)
} else {
return node
}
t.UpdateHeight(node)
balance := t.GetBalance(node)
// LL
if balance > 1 && val < node.Left.Val {
return t.RotateRight(node)
}
// RR
if balance < -1 && val > node.Right.Val {
return t.RotateLeft(node)
}
// LR
if balance > 1 && val > node.Left.Val {
node.Left = t.RotateLeft(node.Left)
return t.RotateRight(node)
}
// RL
if balance < -1 && val < node.Right.Val {
node.Right = t.RotateRight(node.Right)
return t.RotateLeft(node)
}
return node
}
func (t *AVLTree) Search(val int) bool {
current := t.Root
for current != nil {
if val == current.Val {
return true
} else if val < current.Val {
current = current.Left
} else {
current = current.Right
}
}
return false
}
func (t *AVLTree) Inorder() []int {
result := []int{}
var inorder func(*AVLNode)
inorder = func(node *AVLNode) {
if node != nil {
inorder(node.Left)
result = append(result, node.Val)
inorder(node.Right)
}
}
inorder(t.Root)
return result
}
func max(a, b int) int {
if a > b {
return a
}
return b
}
func main() {
tree := NewAVLTree()
tree.Insert(10)
tree.Insert(20)
tree.Insert(30) // RR 失衡
tree.Insert(5)
tree.Insert(15) // LR 失衡
fmt.Println("中序遍历:", tree.Inorder()) // [5 10 15 20 30]
fmt.Println("查找 15:", tree.Search(15)) // true
}📐 6. 堆 (Heap)
数据结构
堆是一种特殊的完全二叉树,分为最大堆和最小堆。
最大堆:每个节点的值都大于或等于其子节点的值,根节点是最大值。
最小堆:每个节点的值都小于或等于其子节点的值,根节点是最小值。
堆通常用数组实现,对于索引 i 的节点:父节点为 (i-1)/2,左子节点为 2i+1,右子节点为 2i+2。
📐 堆结构示意
50
/ \
30 40
/ \ / \
10 20 35 5
[50, 30, 40, 10, 20, 35, 5]
索引关系:
i=0: 50 (根)
i=1: 30 (左子)
i=2: 40 (右子)
✅ 优点
- O(1) 访问最大/最小值
- 插入删除效率高
- 空间效率高(数组实现)
- 堆排序原地排序
❌ 缺点
- 查找任意元素 O(n)
- 只支持访问最大/最小值
- 不支持随机访问
- 删除任意元素复杂
代码实现
class MaxHeap {
constructor() {
this.heap = [];
}
insert(val) {
this.heap.push(val);
this.bubbleUp(this.heap.length - 1);
}
bubbleUp(index) {
while (index > 0) {
const parentIndex = Math.floor((index - 1) / 2);
if (this.heap[index] <= this.heap[parentIndex]) break;
[this.heap[index], this.heap[parentIndex]] =
[this.heap[parentIndex], this.heap[index]];
index = parentIndex;
}
}
extractMax() {
if (this.heap.length === 0) return null;
if (this.heap.length === 1) return this.heap.pop();
const max = this.heap[0];
this.heap[0] = this.heap.pop();
this.bubbleDown(0);
return max;
}
bubbleDown(index) {
const length = this.heap.length;
while (true) {
const leftChild = 2 * index + 1;
const rightChild = 2 * index + 2;
let largest = index;
if (leftChild < length && this.heap[leftChild] > this.heap[largest]) {
largest = leftChild;
}
if (rightChild < length && this.heap[rightChild] > this.heap[largest]) {
largest = rightChild;
}
if (largest === index) break;
[this.heap[index], this.heap[largest]] =
[this.heap[largest], this.heap[index]];
index = largest;
}
}
peek() {
return this.heap.length > 0 ? this.heap[0] : null;
}
size() {
return this.heap.length;
}
isEmpty() {
return this.heap.length === 0;
}
// 堆排序
static heapSort(arr) {
const heap = new MaxHeap();
arr.forEach(val => heap.insert(val));
const result = [];
while (!heap.isEmpty()) {
result.unshift(heap.extractMax());
}
return result;
}
}
// 测试
const heap = new MaxHeap();
heap.insert(10);
heap.insert(20);
heap.insert(15);
heap.insert(30);
heap.insert(5);
console.log('最大堆:', heap.heap); // [30, 20, 15, 10, 5]
console.log('最大值:', heap.extractMax()); // 30
console.log('堆排序:', MaxHeap.heapSort([3, 1, 4, 1, 5, 9, 2, 6])); // [1,1,2,3,4,5,6,9]class MaxHeap:
def __init__(self):
self.heap = []
def insert(self, val):
self.heap.append(val)
self._bubble_up(len(self.heap) - 1)
def _bubble_up(self, index):
while index > 0:
parent_index = (index - 1) // 2
if self.heap[index] <= self.heap[parent_index]:
break
self.heap[index], self.heap[parent_index] = \
self.heap[parent_index], self.heap[index]
index = parent_index
def extract_max(self):
if len(self.heap) == 0:
return None
if len(self.heap) == 1:
return self.heap.pop()
max_val = self.heap[0]
self.heap[0] = self.heap.pop()
self._bubble_down(0)
return max_val
def _bubble_down(self, index):
length = len(self.heap)
while True:
left_child = 2 * index + 1
right_child = 2 * index + 2
largest = index
if left_child < length and self.heap[left_child] > self.heap[largest]:
largest = left_child
if right_child < length and self.heap[right_child] > self.heap[largest]:
largest = right_child
if largest == index:
break
self.heap[index], self.heap[largest] = \
self.heap[largest], self.heap[index]
index = largest
def peek(self):
return self.heap[0] if self.heap else None
def size(self):
return len(self.heap)
def is_empty(self):
return len(self.heap) == 0
@staticmethod
def heap_sort(arr):
heap = MaxHeap()
for val in arr:
heap.insert(val)
result = []
while not heap.is_empty():
result.insert(0, heap.extract_max())
return result
# 测试
heap = MaxHeap()
heap.insert(10)
heap.insert(20)
heap.insert(15)
heap.insert(30)
heap.insert(5)
print('最大堆:', heap.heap) # [30, 20, 15, 10, 5]
print('最大值:', heap.extract_max()) # 30
print('堆排序:', MaxHeap.heap_sort([3, 1, 4, 1, 5, 9, 2, 6])) # [1,1,2,3,4,5,6,9]package main
import "fmt"
type MaxHeap struct {
heap []int
}
func NewMaxHeap() *MaxHeap {
return &MaxHeap{heap: []int{}}
}
func (h *MaxHeap) Insert(val int) {
h.heap = append(h.heap, val)
h.bubbleUp(len(h.heap) - 1)
}
func (h *MaxHeap) bubbleUp(index int) {
for index > 0 {
parentIndex := (index - 1) / 2
if h.heap[index] <= h.heap[parentIndex] {
break
}
h.heap[index], h.heap[parentIndex] = h.heap[parentIndex], h.heap[index]
index = parentIndex
}
}
func (h *MaxHeap) ExtractMax() int {
if len(h.heap) == 0 {
return -1
}
if len(h.heap) == 1 {
val := h.heap[0]
h.heap = h.heap[:0]
return val
}
maxVal := h.heap[0]
h.heap[0] = h.heap[len(h.heap)-1]
h.heap = h.heap[:len(h.heap)-1]
h.bubbleDown(0)
return maxVal
}
func (h *MaxHeap) bubbleDown(index int) {
length := len(h.heap)
for {
leftChild := 2*index + 1
rightChild := 2*index + 2
largest := index
if leftChild < length && h.heap[leftChild] > h.heap[largest] {
largest = leftChild
}
if rightChild < length && h.heap[rightChild] > h.heap[largest] {
largest = rightChild
}
if largest == index {
break
}
h.heap[index], h.heap[largest] = h.heap[largest], h.heap[index]
index = largest
}
}
func (h *MaxHeap) Peek() int {
if len(h.heap) == 0 {
return -1
}
return h.heap[0]
}
func (h *MaxHeap) Size() int {
return len(h.heap)
}
func (h *MaxHeap) IsEmpty() bool {
return len(h.heap) == 0
}
// 优先队列应用
type PriorityQueue struct {
heap *MaxHeap
}
func NewPriorityQueue() *PriorityQueue {
return &PriorityQueue{heap: NewMaxHeap()}
}
func (pq *PriorityQueue) Enqueue(val int) {
pq.heap.Insert(val)
}
func (pq *PriorityQueue) Dequeue() int {
return pq.heap.ExtractMax()
}
func (pq *PriorityQueue) IsEmpty() bool {
return pq.heap.IsEmpty()
}
// 堆排序
func HeapSort(arr []int) []int {
h := NewMaxHeap()
for _, val := range arr {
h.Insert(val)
}
result := make([]int, 0, len(arr))
for !h.IsEmpty() {
result = append([]int{h.ExtractMax()}, result...)
}
return result
}
func main() {
heap := NewMaxHeap()
heap.Insert(10)
heap.Insert(20)
heap.Insert(15)
heap.Insert(30)
heap.Insert(5)
fmt.Println("最大堆:", heap.heap) // [30 20 15 10 5]
fmt.Println("最大值:", heap.ExtractMax()) // 30
fmt.Println("堆排序:", HeapSort([]int{3, 1, 4, 1, 5, 9, 2, 6})) // [1 1 2 3 4 5 6 9]
// 优先队列示例
pq := NewPriorityQueue()
pq.Enqueue(100)
pq.Enqueue(50)
pq.Enqueue(200)
for !pq.IsEmpty() {
fmt.Print(pq.Dequeue(), " ") // 200 100 50
}
}🕸️ 7. 图 (Graph)
数据结构
图是由顶点(节点)和边组成的数据结构,用于表示对象之间的关系。
图的类型:
- 有向图:边有方向,如社交网络关注关系
- 无向图:边无方向,如好友关系
- 加权图:边有权重,如地图中的距离
存储方式:
- 邻接矩阵:二维数组,适合稠密图
- 邻接表:数组 + 链表,适合稀疏图
🕸️ 图结构示意
0 —— 1
| / |
| / |
2 —— 3 —— 4
0: [1, 2]
1: [0, 2, 3]
2: [0, 1, 3]
3: [1, 2, 4]
4: [3]
0 1 2 3 4
0 0 1 1 0 0
1 1 0 1 1 0
2 1 1 0 1 0
3 0 1 1 0 1
4 0 0 0 1 0
✅ 优点
- 能表示复杂关系
- 应用广泛(社交网络、地图导航)
- 邻接表空间效率高
- 支持多种遍历算法
❌ 缺点
- 邻接矩阵空间 O(V²)
- 某些算法复杂度高
- 实现相对复杂
- 需要处理环路
代码实现
class Graph {
constructor(isDirected = false) {
this.isDirected = isDirected;
this.adjList = new Map();
}
addVertex(vertex) {
if (!this.adjList.has(vertex)) {
this.adjList.set(vertex, []);
}
}
addEdge(vertex1, vertex2, weight = 1) {
this.addVertex(vertex1);
this.addVertex(vertex2);
this.adjList.get(vertex1).push({ vertex: vertex2, weight });
if (!this.isDirected) {
this.adjList.get(vertex2).push({ vertex: vertex1, weight });
}
}
bfs(start) {
const visited = new Set();
const queue = [start];
const result = [];
visited.add(start);
while (queue.length > 0) {
const vertex = queue.shift();
result.push(vertex);
for (const neighbor of this.adjList.get(vertex)) {
if (!visited.has(neighbor.vertex)) {
visited.add(neighbor.vertex);
queue.push(neighbor.vertex);
}
}
}
return result;
}
dfs(start) {
const visited = new Set();
const result = [];
const dfsHelper = (vertex) => {
visited.add(vertex);
result.push(vertex);
for (const neighbor of this.adjList.get(vertex)) {
if (!visited.has(neighbor.vertex)) {
dfsHelper(neighbor.vertex);
}
}
};
dfsHelper(start);
return result;
}
print() {
for (const [vertex, neighbors] of this.adjList) {
console.log(`${vertex}: ${neighbors.map(n => `${n.vertex}(${n.weight})`).join(', ')}`);
}
}
}
// 测试
const graph = new Graph(false); // 无向图
graph.addEdge(0, 1, 1);
graph.addEdge(0, 2, 1);
graph.addEdge(1, 2, 1);
graph.addEdge(1, 3, 1);
graph.addEdge(2, 3, 1);
graph.addEdge(3, 4, 1);
console.log('邻接表:');
graph.print();
console.log('BFS 从 0:', graph.bfs(0)); // [0, 1, 2, 3, 4]
console.log('DFS 从 0:', graph.dfs(0)); // [0, 1, 2, 3, 4]from collections import deque
class Graph:
def __init__(self, is_directed=False):
self.is_directed = is_directed
self.adj_list = {}
def add_vertex(self, vertex):
if vertex not in self.adj_list:
self.adj_list[vertex] = []
def add_edge(self, vertex1, vertex2, weight=1):
self.add_vertex(vertex1)
self.add_vertex(vertex2)
self.adj_list[vertex1].append((vertex2, weight))
if not self.is_directed:
self.adj_list[vertex2].append((vertex1, weight))
def bfs(self, start):
visited = set()
queue = deque([start])
result = []
visited.add(start)
while queue:
vertex = queue.popleft()
result.append(vertex)
for neighbor, _ in self.adj_list[vertex]:
if neighbor not in visited:
visited.add(neighbor)
queue.append(neighbor)
return result
def dfs(self, start):
visited = set()
result = []
def dfs_helper(vertex):
visited.add(vertex)
result.append(vertex)
for neighbor, _ in self.adj_list[vertex]:
if neighbor not in visited:
dfs_helper(neighbor)
dfs_helper(start)
return result
def print(self):
for vertex, neighbors in self.adj_list.items():
print(f"{vertex}: {', '.join(f'{n}({w})' for n, w in neighbors)}")
# 测试
graph = Graph(is_directed=False) # 无向图
graph.add_edge(0, 1, 1)
graph.add_edge(0, 2, 1)
graph.add_edge(1, 2, 1)
graph.add_edge(1, 3, 1)
graph.add_edge(2, 3, 1)
graph.add_edge(3, 4, 1)
print('邻接表:')
graph.print()
print('BFS 从 0:', graph.bfs(0)) # [0, 1, 2, 3, 4]
print('DFS 从 0:', graph.dfs(0)) # [0, 1, 2, 3, 4]package main
import "fmt"
type Edge struct {
Vertex int
Weight int
}
type Graph struct {
IsDirected bool
AdjList map[int][]Edge
}
func NewGraph(isDirected bool) *Graph {
return &Graph{
IsDirected: isDirected,
AdjList: make(map[int][]Edge),
}
}
func (g *Graph) AddVertex(vertex int) {
if _, exists := g.AdjList[vertex]; !exists {
g.AdjList[vertex] = []Edge{}
}
}
func (g *Graph) AddEdge(vertex1, vertex2, weight int) {
g.AddVertex(vertex1)
g.AddVertex(vertex2)
g.AdjList[vertex1] = append(g.AdjList[vertex1], Edge{Vertex: vertex2, Weight: weight})
if !g.IsDirected {
g.AdjList[vertex2] = append(g.AdjList[vertex2], Edge{Vertex: vertex1, Weight: weight})
}
}
func (g *Graph) BFS(start int) []int {
visited := make(map[int]bool)
queue := []int{start}
result := []int{}
visited[start] = true
for len(queue) > 0 {
vertex := queue[0]
queue = queue[1:]
result = append(result, vertex)
for _, neighbor := range g.AdjList[vertex] {
if !visited[neighbor.Vertex] {
visited[neighbor.Vertex] = true
queue = append(queue, neighbor.Vertex)
}
}
}
return result
}
func (g *Graph) DFS(start int) []int {
visited := make(map[int]bool)
result := []int{}
var dfsHelper func(int)
dfsHelper = func(vertex int) {
visited[vertex] = true
result = append(result, vertex)
for _, neighbor := range g.AdjList[vertex] {
if !visited[neighbor.Vertex] {
dfsHelper(neighbor.Vertex)
}
}
}
dfsHelper(start)
return result
}
func (g *Graph) Print() {
for vertex, neighbors := range g.AdjList {
fmt.Printf("%d: ", vertex)
for i, n := range neighbors {
if i > 0 {
fmt.Print(", ")
}
fmt.Printf("%d(%d)", n.Vertex, n.Weight)
}
fmt.Println()
}
}
func main() {
graph := NewGraph(false) // 无向图
graph.AddEdge(0, 1, 1)
graph.AddEdge(0, 2, 1)
graph.AddEdge(1, 2, 1)
graph.AddEdge(1, 3, 1)
graph.AddEdge(2, 3, 1)
graph.AddEdge(3, 4, 1)
fmt.Println("邻接表:")
graph.Print()
fmt.Println("BFS 从 0:", graph.BFS(0)) // [0 1 2 3 4]
fmt.Println("DFS 从 0:", graph.DFS(0)) // [0 1 2 3 4]
}🤖 AI 时代的数据结构
🔍 向量索引
用于高效检索高维向量的数据结构,是 AI 大模型、语义搜索、推荐系统的核心基础设施。
🌸 布隆过滤器
空间效率极高的概率型数据结构,用于判断元素是否存在于集合中。适用于大规模数据去重和缓存穿透防护。
🪜 跳表
基于链表的高效搜索数据结构,通过多层索引实现 O(log n) 的查找复杂度。Redis 的有序集合使用跳表实现。
📊 线段树
用于高效处理区间查询和更新操作的数据结构,广泛应用于计算几何和区间统计问题。
💾 LRU 缓存
最近最少使用缓存,结合哈希表和双向链表实现 O(1) 的访问和更新。广泛应用于缓存系统和内存管理。
📊 4. 线段树 (Segment Tree)
数据结构
线段树是一种二叉树数据结构,用于高效处理区间查询和更新操作。每个节点代表一个区间,叶子节点代表单个元素。
📊 线段树结构示意
[0,7]
/ \
[0,3] [4,7]
/ \ / \
[0,1] [2,3] [4,5] [6,7]
• 区间求和
• 区间最值
• 区间更新
• 区间染色
✅ 优点
- 区间查询高效
- 支持动态更新
- 实现相对简单
- 应用广泛
❌ 缺点
- 空间开销较大
- 只适用于可合并操作
- 常数因子较大
- 递归实现栈开销
代码实现
class SegmentTree {
constructor(arr) {
this.n = arr.length;
this.tree = new Array(4 * this.n).fill(0);
this.build(arr, 0, 0, this.n - 1);
}
build(arr, node, start, end) {
if (start === end) {
this.tree[node] = arr[start];
} else {
const mid = Math.floor((start + end) / 2);
const leftNode = 2 * node + 1;
const rightNode = 2 * node + 2;
this.build(arr, leftNode, start, mid);
this.build(arr, rightNode, mid + 1, end);
this.tree[node] = this.tree[leftNode] + this.tree[rightNode];
}
}
query(l, r) {
return this._query(0, 0, this.n - 1, l, r);
}
_query(node, start, end, l, r) {
if (r < start || end < l) return 0;
if (l <= start && end <= r) return this.tree[node];
const mid = Math.floor((start + end) / 2);
const leftSum = this._query(2 * node + 1, start, mid, l, r);
const rightSum = this._query(2 * node + 2, mid + 1, end, l, r);
return leftSum + rightSum;
}
update(idx, val) {
this._update(0, 0, this.n - 1, idx, val);
}
_update(node, start, end, idx, val) {
if (start === end) {
this.tree[node] = val;
} else {
const mid = Math.floor((start + end) / 2);
if (start <= idx && idx <= mid) {
this._update(2 * node + 1, start, mid, idx, val);
} else {
this._update(2 * node + 2, mid + 1, end, idx, val);
}
this.tree[node] = this.tree[2 * node + 1] + this.tree[2 * node + 2];
}
}
}
// 测试
const arr = [1, 3, 5, 7, 9, 11];
const st = new SegmentTree(arr);
console.log('区间和 [1,3]:', st.query(1, 3)); // 15
st.update(2, 10);
console.log('更新后 [1,3]:', st.query(1, 3)); // 20class SegmentTree:
def __init__(self, arr):
self.n = len(arr)
self.tree = [0] * (4 * self.n)
self._build(arr, 0, 0, self.n - 1)
def _build(self, arr, node, start, end):
if start == end:
self.tree[node] = arr[start]
else:
mid = (start + end) // 2
left_node = 2 * node + 1
right_node = 2 * node + 2
self._build(arr, left_node, start, mid)
self._build(arr, right_node, mid + 1, end)
self.tree[node] = self.tree[left_node] + self.tree[right_node]
def query(self, l, r):
return self._query(0, 0, self.n - 1, l, r)
def _query(self, node, start, end, l, r):
if r < start or end < l:
return 0
if l <= start and end <= r:
return self.tree[node]
mid = (start + end) // 2
left_sum = self._query(2 * node + 1, start, mid, l, r)
right_sum = self._query(2 * node + 2, mid + 1, end, l, r)
return left_sum + right_sum
def update(self, idx, val):
self._update(0, 0, self.n - 1, idx, val)
def _update(self, node, start, end, idx, val):
if start == end:
self.tree[node] = val
else:
mid = (start + end) // 2
if start <= idx <= mid:
self._update(2 * node + 1, start, mid, idx, val)
else:
self._update(2 * node + 2, mid + 1, end, idx, val)
self.tree[node] = self.tree[2 * node + 1] + self.tree[2 * node + 2]
# 测试
arr = [1, 3, 5, 7, 9, 11]
st = SegmentTree(arr)
print(f"区间和 [1,3]: {st.query(1, 3)}") # 15
st.update(2, 10)
print(f"更新后 [1,3]: {st.query(1, 3)}") # 20package main
import "fmt"
type SegmentTree struct {
n int
tree []int
}
func NewSegmentTree(arr []int) *SegmentTree {
n := len(arr)
st := &SegmentTree{
n: n,
tree: make([]int, 4*n),
}
st.build(arr, 0, 0, n-1)
return st
}
func (st *SegmentTree) build(arr []int, node, start, end int) {
if start == end {
st.tree[node] = arr[start]
} else {
mid := (start + end) / 2
leftNode := 2*node + 1
rightNode := 2*node + 2
st.build(arr, leftNode, start, mid)
st.build(arr, rightNode, mid+1, end)
st.tree[node] = st.tree[leftNode] + st.tree[rightNode]
}
}
func (st *SegmentTree) Query(l, r int) int {
return st.query(0, 0, st.n-1, l, r)
}
func (st *SegmentTree) query(node, start, end, l, r int) int {
if r < start || end < l {
return 0
}
if l <= start && end <= r {
return st.tree[node]
}
mid := (start + end) / 2
leftSum := st.query(2*node+1, start, mid, l, r)
rightSum := st.query(2*node+2, mid+1, end, l, r)
return leftSum + rightSum
}
func (st *SegmentTree) Update(idx, val int) {
st.update(0, 0, st.n-1, idx, val)
}
func (st *SegmentTree) update(node, start, end, idx, val int) {
if start == end {
st.tree[node] = val
} else {
mid := (start + end) / 2
if start <= idx && idx <= mid {
st.update(2*node+1, start, mid, idx, val)
} else {
st.update(2*node+2, mid+1, end, idx, val)
}
st.tree[node] = st.tree[2*node+1] + st.tree[2*node+2]
}
}
func main() {
arr := []int{1, 3, 5, 7, 9, 11}
st := NewSegmentTree(arr)
fmt.Printf("区间和 [1,3]: %d\n", st.Query(1, 3)) // 15
st.Update(2, 10)
fmt.Printf("更新后 [1,3]: %d\n", st.Query(1, 3)) // 20
}🔍 5. 向量索引 (Vector Index)
数据结构
向量索引用于高效检索高维向量的数据结构,通过计算向量之间的相似度(如余弦相似度)来找到最相似的向量。是 AI 大模型、语义搜索、推荐系统的核心基础设施。
🔍 向量检索流程
[0.1, 0.5, -0.3, ...]
余弦相似度 / 欧氏距离
最相似的 K 个向量
✅ 优点
- 支持语义搜索
- 适用于 AI 应用
- 实现简单
- 可扩展到近似搜索
❌ 缺点
- 精确搜索慢
- 高维计算开销大
- 需要向量嵌入
- 内存占用大
代码实现
class VectorIndex {
constructor() {
this.vectors = [];
this.metadata = [];
}
insert(id, vector, metadata = {}) {
this.vectors.push({ id, vector: this._normalize(vector) });
this.metadata.push({ id, metadata });
}
_normalize(vector) {
const norm = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
return norm > 0 ? vector.map(v => v / norm) : vector;
}
_dotProduct(a, b) {
return a.reduce((sum, val, i) => sum + val * b[i], 0);
}
search(queryVector, topK = 5) {
const normalized = this._normalize(queryVector);
const scores = this.vectors.map((item, idx) => ({
id: item.id,
score: this._dotProduct(normalized, item.vector),
metadata: this.metadata[idx].metadata
}));
scores.sort((a, b) => b.score - a.score);
return scores.slice(0, topK);
}
batchInsert(items) {
for (const { id, vector, metadata } of items) {
this.insert(id, vector, metadata);
}
}
}
// 测试
const index = new VectorIndex();
index.insert('doc1', [0.8, 0.2, 0.5], { text: 'AI 技术' });
index.insert('doc2', [0.7, 0.3, 0.4], { text: '机器学习' });
index.insert('doc3', [0.1, 0.9, 0.2], { text: '美食食谱' });
const results = index.search([0.75, 0.25, 0.45], 2);
console.log('搜索结果:', results);import numpy as np
class VectorIndex:
def __init__(self):
self.vectors = []
self.ids = []
self.metadata = []
def insert(self, id, vector, metadata=None):
normalized = self._normalize(vector)
self.vectors.append(normalized)
self.ids.append(id)
self.metadata.append(metadata or {})
def _normalize(self, vector):
arr = np.array(vector, dtype=np.float32)
norm = np.linalg.norm(arr)
return arr / norm if norm > 0 else arr
def _cosine_similarity(self, a, b):
return float(np.dot(a, b))
def search(self, query_vector, top_k=5):
if not self.vectors:
return []
query = self._normalize(query_vector)
scores = []
for i, vec in enumerate(self.vectors):
score = self._cosine_similarity(query, vec)
scores.append({
'id': self.ids[i],
'score': score,
'metadata': self.metadata[i]
})
scores.sort(key=lambda x: x['score'], reverse=True)
return scores[:top_k]
def batch_insert(self, items):
for item in items:
self.insert(item['id'], item['vector'], item.get('metadata'))
# 测试
index = VectorIndex()
index.insert('doc1', [0.8, 0.2, 0.5], {'text': 'AI 技术'})
index.insert('doc2', [0.7, 0.3, 0.4], {'text': '机器学习'})
index.insert('doc3', [0.1, 0.9, 0.2], {'text': '美食食谱'})
results = index.search([0.75, 0.25, 0.45], 2)
for r in results:
print(f"{r['id']}: {r['score']:.4f} - {r['metadata']}")package main
import (
"fmt"
"math"
"sort"
)
type VectorItem struct {
id string
vector []float64
metadata map[string]interface{}
}
type VectorIndex struct {
vectors []VectorItem
}
func NewVectorIndex() *VectorIndex {
return &VectorIndex{
vectors: make([]VectorItem, 0),
}
}
func (vi *VectorIndex) Insert(id string, vector []float64, metadata map[string]interface{}) {
normalized := vi.normalize(vector)
vi.vectors = append(vi.vectors, VectorItem{
id: id,
vector: normalized,
metadata: metadata,
})
}
func (vi *VectorIndex) normalize(vector []float64) []float64 {
norm := 0.0
for _, v := range vector {
norm += v * v
}
norm = math.Sqrt(norm)
if norm == 0 {
return vector
}
result := make([]float64, len(vector))
for i, v := range vector {
result[i] = v / norm
}
return result
}
func (vi *VectorIndex) dotProduct(a, b []float64) float64 {
sum := 0.0
for i := range a {
sum += a[i] * b[i]
}
return sum
}
type SearchResult struct {
ID string
Score float64
Metadata map[string]interface{}
}
func (vi *VectorIndex) Search(queryVector []float64, topK int) []SearchResult {
normalized := vi.normalize(queryVector)
scores := make([]SearchResult, len(vi.vectors))
for i, item := range vi.vectors {
scores[i] = SearchResult{
ID: item.id,
Score: vi.dotProduct(normalized, item.vector),
Metadata: item.metadata,
}
}
sort.Slice(scores, func(i, j int) bool {
return scores[i].Score > scores[j].Score
})
if topK > len(scores) {
topK = len(scores)
}
return scores[:topK]
}
func main() {
index := NewVectorIndex()
index.Insert("doc1", []float64{0.8, 0.2, 0.5}, map[string]interface{}{"text": "AI 技术"})
index.Insert("doc2", []float64{0.7, 0.3, 0.4}, map[string]interface{}{"text": "机器学习"})
index.Insert("doc3", []float64{0.1, 0.9, 0.2}, map[string]interface{}{"text": "美食食谱"})
results := index.Search([]float64{0.75, 0.25, 0.45}, 2)
for _, r := range results {
fmt.Printf("%s: %.4f - %v\n", r.ID, r.Score, r.Metadata)
}
}🌸 6. 布隆过滤器 (Bloom Filter)
数据结构
布隆过滤器是一种空间效率极高的概率型数据结构,用于判断元素是否存在于集合中。它可能产生假阳性(误判元素存在),但不会产生假阴性。
🌸 布隆过滤器工作原理
元素 → k 个哈希函数 → k 个位置置 1
元素 → k 个哈希函数 → 检查 k 个位置
全为 1: 可能存在
有 0: 一定不存在
✅ 优点
- 空间效率极高
- 查询速度快
- 支持大规模数据
- 无假阴性
❌ 缺点
- 可能假阳性
- 不支持删除
- 哈希函数选择敏感
- 无法获取元素列表
代码实现
class BloomFilter {
constructor(size = 1000, hashCount = 3) {
this.size = size;
this.hashCount = hashCount;
this.bitArray = new Array(size).fill(0);
}
_hash(value, seed) {
let hash = 0;
const str = String(value);
for (let i = 0; i < str.length; i++) {
hash = ((hash << 5) - hash) + str.charCodeAt(i);
hash += seed * i;
hash = hash & hash;
}
return Math.abs(hash) % this.size;
}
add(value) {
for (let i = 0; i < this.hashCount; i++) {
const index = this._hash(value, i);
this.bitArray[index] = 1;
}
}
contains(value) {
for (let i = 0; i < this.hashCount; i++) {
const index = this._hash(value, i);
if (this.bitArray[index] === 0) {
return false;
}
}
return true;
}
addBatch(values) {
values.forEach(v => this.add(v));
}
}
// 测试
const bf = new BloomFilter(1000, 3);
['apple', 'banana', 'orange'].forEach(f => bf.add(f));
console.log('apple:', bf.contains('apple')); // true
console.log('banana:', bf.contains('banana')); // true
console.log('grape:', bf.contains('grape')); // false
console.log('watermelon:', bf.contains('watermelon')); // 可能误判import hashlib
class BloomFilter:
def __init__(self, size=1000, hash_count=3):
self.size = size
self.hash_count = hash_count
self.bit_array = [0] * size
def _hash(self, value, seed):
key = f"{seed}:{value}".encode()
hash_val = int(hashlib.md5(key).hexdigest(), 16)
return hash_val % self.size
def add(self, value):
for i in range(self.hash_count):
index = self._hash(value, i)
self.bit_array[index] = 1
def contains(self, value):
for i in range(self.hash_count):
index = self._hash(value, i)
if self.bit_array[index] == 0:
return False
return True
def add_batch(self, values):
for v in values:
self.add(v)
# 测试
bf = BloomFilter(1000, 3)
for fruit in ['apple', 'banana', 'orange']:
bf.add(fruit)
print('apple:', bf.contains('apple')) # True
print('banana:', bf.contains('banana')) # True
print('grape:', bf.contains('grape')) # False
print('watermelon:', bf.contains('watermelon')) # 可能误判package main
import (
"crypto/md5"
"encoding/binary"
"fmt"
)
type BloomFilter struct {
size int
hashCount int
bitArray []bool
}
func NewBloomFilter(size, hashCount int) *BloomFilter {
return &BloomFilter{
size: size,
hashCount: hashCount,
bitArray: make([]bool, size),
}
}
func (bf *BloomFilter) hash(value string, seed uint32) int {
data := fmt.Sprintf("%d:%s", seed, value)
hash := md5.Sum([]byte(data))
hashVal := binary.LittleEndian.Uint32(hash[:4])
return int(hashVal) % bf.size
}
func (bf *BloomFilter) Add(value string) {
for i := 0; i < bf.hashCount; i++ {
index := bf.hash(value, uint32(i))
bf.bitArray[index] = true
}
}
func (bf *BloomFilter) Contains(value string) bool {
for i := 0; i < bf.hashCount; i++ {
index := bf.hash(value, uint32(i))
if !bf.bitArray[index] {
return false
}
}
return true
}
func main() {
bf := NewBloomFilter(1000, 3)
fruits := []string{"apple", "banana", "orange"}
for _, fruit := range fruits {
bf.Add(fruit)
}
fmt.Println("apple:", bf.Contains("apple")) // true
fmt.Println("banana:", bf.Contains("banana")) // true
fmt.Println("grape:", bf.Contains("grape")) // false
fmt.Println("watermelon:", bf.Contains("watermelon")) // 可能误判
}💰 贪心算法 (Greedy Algorithm)
算法思想
贪心算法是一种在每一步选择中都采取当前状态下最优选择的算法思想。它不从整体最优上加以考虑,而是通过局部最优解希望得到全局最优解。
💰 贪心算法示意
当前最优 → 下一步最优 → ... → 希望全局最优
• 贪心选择性质
• 最优子结构
• 无后效性
✅ 优点
- 算法简单直观
- 时间效率高
- 空间复杂度低
- 易于实现
❌ 缺点
- 不总是得到全局最优
- 需要证明正确性
- 适用范围有限
- 可能陷入局部最优
经典应用场景
用最少数量的硬币找出指定金额
物品可分割,按价值密度贪心选择
选择最多互不冲突的活动
构造最优前缀编码树
单源最短路径问题
最小生成树问题
代码实现
🪙 应用 1: 找零问题
假设有面值为 25、10、5、1 的硬币,找出最少数量的硬币组合。
// 找零问题 - 贪心算法
function makeChange(amount) {
const coins = [25, 10, 5, 1];
const result = {};
let count = 0;
for (const coin of coins) {
if (amount === 0) break;
const num = Math.floor(amount / coin);
if (num > 0) {
result[coin] = num;
count += num;
amount -= num * coin;
}
}
return { coins: result, total: count };
}
// 测试
console.log(makeChange(63)); // { coins: { 25: 2, 10: 1, 1: 3 }, total: 6 }
console.log(makeChange(99)); // { coins: { 25: 3, 10: 2, 1: 4 }, total: 9 }# 找零问题 - 贪心算法
def make_change(amount):
coins = [25, 10, 5, 1]
result = {}
count = 0
for coin in coins:
if amount == 0:
break
num = amount // coin
if num > 0:
result[coin] = num
count += num
amount -= num * coin
return {'coins': result, 'total': count}
# 测试
print(make_change(63)) # {'coins': {25: 2, 10: 1, 1: 3}, 'total': 6}
print(make_change(99)) # {'coins': {25: 3, 10: 2, 1: 4}, 'total': 9}package main
import "fmt"
type ChangeResult struct {
Coins map[int]int
Total int
}
func makeChange(amount int) ChangeResult {
coins := []int{25, 10, 5, 1}
result := ChangeResult{Coins: make(map[int]int), Total: 0}
for _, coin := range coins {
if amount == 0 {
break
}
num := amount / coin
if num > 0 {
result.Coins[coin] = num
result.Total += num
amount -= num * coin
}
}
return result
}
func main() {
r1 := makeChange(63)
fmt.Printf("63: %v, 总数:%d\n", r1.Coins, r1.Total)
r2 := makeChange(99)
fmt.Printf("99: %v, 总数:%d\n", r2.Coins, r2.Total)
}📅 应用 2: 活动选择问题
给定多个活动的开始和结束时间,选择最多的互不冲突的活动。
// 活动选择问题 - 贪心算法
function selectActivities(activities) {
// 按结束时间排序
activities.sort((a, b) => a.end - b.end);
const selected = [];
let lastEnd = -Infinity;
for (const activity of activities) {
if (activity.start >= lastEnd) {
selected.push(activity);
lastEnd = activity.end;
}
}
return selected;
}
// 测试
const activities = [
{ name: 'A', start: 1, end: 3 },
{ name: 'B', start: 2, end: 5 },
{ name: 'C', start: 4, end: 7 },
{ name: 'D', start: 6, end: 9 },
{ name: 'E', start: 8, end: 10 }
];
const result = selectActivities(activities);
console.log('选中的活动:', result.map(a => a.name)); // ['A', 'C', 'E']
console.log('数量:', result.length); // 3# 活动选择问题 - 贪心算法
def select_activities(activities):
# 按结束时间排序
activities.sort(key=lambda x: x[1])
selected = []
last_end = float('-inf')
for name, start, end in activities:
if start >= last_end:
selected.append((name, start, end))
last_end = end
return selected
# 测试
activities = [
('A', 1, 3),
('B', 2, 5),
('C', 4, 7),
('D', 6, 9),
('E', 8, 10)
]
result = select_activities(activities)
print('选中的活动:', [a[0] for a in result]) # ['A', 'C', 'E']
print('数量:', len(result)) # 3package main
import (
"fmt"
"sort"
)
type Activity struct {
Name string
Start int
End int
}
func selectActivities(activities []Activity) []Activity {
// 按结束时间排序
sort.Slice(activities, func(i, j int) bool {
return activities[i].End < activities[j].End
})
selected := []Activity{}
lastEnd := -1
for _, activity := range activities {
if activity.Start >= lastEnd {
selected = append(selected, activity)
lastEnd = activity.End
}
}
return selected
}
func main() {
activities := []Activity{
{"A", 1, 3},
{"B", 2, 5},
{"C", 4, 7},
{"D", 6, 9},
{"E", 8, 10},
}
result := selectActivities(activities)
fmt.Print("选中的活动:")
for _, a := range result {
fmt.Print(a.Name, " ")
}
fmt.Println()
fmt.Println("数量:", len(result))
}🎒 应用 3: 分数背包问题
物品可以分割,按价值密度(价值/重量)贪心选择,使背包总价值最大。
// 分数背包问题 - 贪心算法
function fractionalKnapsack(items, capacity) {
// 计算价值密度并排序
items.forEach(item => {
item.density = item.value / item.weight;
});
items.sort((a, b) => b.density - a.density);
let totalValue = 0;
let remaining = capacity;
const taken = [];
for (const item of items) {
if (remaining <= 0) break;
if (item.weight <= remaining) {
// 全拿
taken.push({ ...item, takeWeight: item.weight });
totalValue += item.value;
remaining -= item.weight;
} else {
// 拿一部分
const takeWeight = remaining;
const takeValue = item.density * takeWeight;
taken.push({ ...item, takeWeight, takeValue });
totalValue += takeValue;
remaining = 0;
}
}
return { totalValue, taken };
}
// 测试
const items = [
{ name: 'A', value: 60, weight: 10 },
{ name: 'B', value: 100, weight: 20 },
{ name: 'C', value: 120, weight: 30 }
];
const result = fractionalKnapsack(items, 50);
console.log('最大价值:', result.totalValue); // 240
console.log('选取:', result.taken.map(t => `${t.name}(${t.takeWeight}kg)`));# 分数背包问题 - 贪心算法
def fractional_knapsack(items, capacity):
# 计算价值密度并排序
for item in items:
item['density'] = item['value'] / item['weight']
items.sort(key=lambda x: x['density'], reverse=True)
total_value = 0
remaining = capacity
taken = []
for item in items:
if remaining <= 0:
break
if item['weight'] <= remaining:
# 全拿
taken.append({**item, 'take_weight': item['weight']})
total_value += item['value']
remaining -= item['weight']
else:
# 拿一部分
take_weight = remaining
take_value = item['density'] * take_weight
taken.append({**item, 'take_weight': take_weight, 'take_value': take_value})
total_value += take_value
remaining = 0
return {'total_value': total_value, 'taken': taken}
# 测试
items = [
{'name': 'A', 'value': 60, 'weight': 10},
{'name': 'B', 'value': 100, 'weight': 20},
{'name': 'C', 'value': 120, 'weight': 30}
]
result = fractional_knapsack(items, 50)
print('最大价值:', result['total_value']) # 240.0
print('选取:', [(t['name'], f"{t['take_weight']}kg") for t in result['taken']])package main
import (
"fmt"
"sort"
)
type Item struct {
Name string
Value int
Weight int
Density float64
}
type TakenItem struct {
Name string
TakeWeight int
TakeValue float64
}
func fractionalKnapsack(items []Item, capacity int) (float64, []TakenItem) {
// 计算价值密度
for i := range items {
items[i].Density = float64(items[i].Value) / float64(items[i].Weight)
}
// 按价值密度排序
sort.Slice(items, func(i, j int) bool {
return items[i].Density > items[j].Density
})
totalValue := 0.0
remaining := capacity
taken := []TakenItem{}
for _, item := range items {
if remaining <= 0 {
break
}
if item.Weight <= remaining {
taken = append(taken, TakenItem{
Name: item.Name,
TakeWeight: item.Weight,
TakeValue: float64(item.Value),
})
totalValue += float64(item.Value)
remaining -= item.Weight
} else {
takeWeight := remaining
takeValue := item.Density * float64(takeWeight)
taken = append(taken, TakenItem{
Name: item.Name,
TakeWeight: takeWeight,
TakeValue: takeValue,
})
totalValue += takeValue
remaining = 0
}
}
return totalValue, taken
}
func main() {
items := []Item{
{"A", 60, 10, 0},
{"B", 100, 20, 0},
{"C", 120, 30, 0},
}
totalValue, taken := fractionalKnapsack(items, 50)
fmt.Printf("最大价值:%.2f\n", totalValue)
fmt.Print("选取:")
for _, t := range taken {
fmt.Printf("%s(%dkg) ", t.Name, t.TakeWeight)
}
fmt.Println()
}- 频繁插入删除:链表(O(1) 插入删除)
- 后进先出场景:栈(括号匹配、函数调用)
- 先进先出场景:队列(任务调度、BFS)
- 快速查找/动态集合:二叉搜索树、AVL 树、红黑树
- 优先级队列/TopK:堆(堆排序、优先队列)
- 复杂关系网络:图(社交网络、路径规划)
- 字符串前缀:字典树(自动补全、拼写检查)
- 区间查询更新:线段树(区间求和、最值)
- 局部最优可解:贪心算法(找零、活动选择、分数背包)
- 高维向量检索:向量索引(AI 应用、语义搜索)
- 大规模去重:布隆过滤器(缓存穿透、URL 去重)