📦 数据结构

组织、管理和存储数据的方式,从基础链表到 AI 时代的向量索引

🔗 链表

由节点组成的线性结构,每个节点包含数据和指向下一个节点的指针。

📚

后进先出 (LIFO) 的数据结构,只允许在栈顶进行插入和删除操作。

📥 队列

先进先出 (FIFO) 的数据结构,允许在队尾插入元素,在队头删除元素。

🌳 二叉搜索树

每个节点的左子树所有节点值小于该节点,右子树所有节点值大于该节点。

⚖️ AVL 树/红黑树

自平衡二叉搜索树,通过旋转操作保持平衡,确保 O(log n) 的操作复杂度。

📐

特殊的完全二叉树,分为最大堆和最小堆,优先队列的典型实现。

🕸️

由顶点和边组成的非线性结构,用于表示复杂的关系网络。

💰 贪心算法

每一步选择当前最优解的算法思想,适用于找零、活动选择、分数背包等问题。

💡 数据结构核心概念
📊 线性结构

数组、链表、栈、队列

🌲 树形结构

二叉树、B 树、红黑树

🕸️ 图形结构

邻接矩阵、邻接表

🔑 哈希结构

哈希表、布隆过滤器

🔗 1. 链表 (Linked List)

数据结构

由节点组成的数据结构,每个节点包含数据和指向下一个节点的指针。

🔗 链表结构示意

单向链表
[data|next] → [data|next] → [data|next] → null
双向链表
null ← [prev|data|next] ↔ [prev|data|next] ↔ [prev|data|next] → null
访问
O(n)
搜索
O(n)
插入
O(1)
删除
O(1)

✅ 优点

  • 插入删除效率高
  • 动态大小
  • 不需要连续内存
  • 实现简单

❌ 缺点

  • 随机访问效率低
  • 额外指针开销
  • 缓存不友好
  • 需要额外空间存储指针

代码实现

class ListNode {
    constructor(val = 0, next = null) {
        this.val = val;
        this.next = next;
    }
}

class LinkedList {
    constructor() {
        this.head = null;
        this.size = 0;
    }

    append(val) {
        const node = new ListNode(val);
        if (!this.head) {
            this.head = node;
        } else {
            let current = this.head;
            while (current.next) {
                current = current.next;
            }
            current.next = node;
        }
        this.size++;
    }

    delete(val) {
        if (!this.head) return;
        if (this.head.val === val) {
            this.head = this.head.next;
            this.size--;
            return;
        }
        let current = this.head;
        while (current.next) {
            if (current.next.val === val) {
                current.next = current.next.next;
                this.size--;
                return;
            }
            current = current.next;
        }
    }

    reverse() {
        let prev = null;
        let current = this.head;
        while (current) {
            const next = current.next;
            current.next = prev;
            prev = current;
            current = next;
        }
        this.head = prev;
    }

    print() {
        const values = [];
        let current = this.head;
        while (current) {
            values.push(current.val);
            current = current.next;
        }
        console.log(values.join(' -> '));
    }
}

// 测试
const list = new LinkedList();
list.append(1);
list.append(2);
list.append(3);
list.print(); // 1 -> 2 -> 3
list.reverse();
list.print(); // 3 -> 2 -> 1
class ListNode:
    def __init__(self, val=0, next=None):
        self.val = val
        self.next = next

class LinkedList:
    def __init__(self):
        self.head = None
        self.size = 0

    def append(self, val):
        node = ListNode(val)
        if not self.head:
            self.head = node
        else:
            current = self.head
            while current.next:
                current = current.next
            current.next = node
        self.size += 1

    def delete(self, val):
        if not self.head:
            return
        if self.head.val == val:
            self.head = self.head.next
            self.size -= 1
            return
        current = self.head
        while current.next:
            if current.next.val == val:
                current.next = current.next.next
                self.size -= 1
                return
            current = current.next

    def reverse(self):
        prev = None
        current = self.head
        while current:
            nxt = current.next
            current.next = prev
            prev = current
            current = nxt
        self.head = prev

    def print(self):
        values = []
        current = self.head
        while current:
            values.append(str(current.val))
            current = current.next
        print(' -> '.join(values))

# 测试
lst = LinkedList()
lst.append(1)
lst.append(2)
lst.append(3)
lst.print()  # 1 -> 2 -> 3
lst.reverse()
lst.print()  # 3 -> 2 -> 1
package main

import "fmt"

type ListNode struct {
    val  int
    next *ListNode
}

type LinkedList struct {
    head *ListNode
    size int
}

func NewLinkedList() *LinkedList {
    return &LinkedList{}
}

func (l *LinkedList) Append(val int) {
    node := &ListNode{val: val}
    if l.head == nil {
        l.head = node
    } else {
        current := l.head
        for current.next != nil {
            current = current.next
        }
        current.next = node
    }
    l.size++
}

func (l *LinkedList) Delete(val int) {
    if l.head == nil {
        return
    }
    if l.head.val == val {
        l.head = l.head.next
        l.size--
        return
    }
    current := l.head
    for current.next != nil {
        if current.next.val == val {
            current.next = current.next.next
            l.size--
            return
        }
        current = current.next
    }
}

func (l *LinkedList) Reverse() {
    var prev *ListNode
    current := l.head
    for current != nil {
        next := current.next
        current.next = prev
        prev = current
        current = next
    }
    l.head = prev
}

func (l *LinkedList) Print() {
    values := []string{}
    current := l.head
    for current != nil {
        values = append(values, fmt.Sprintf("%d", current.val))
        current = current.next
    }
    fmt.Println(strings.Join(values, " -> "))
}

func main() {
    list := NewLinkedList()
    list.Append(1)
    list.Append(2)
    list.Append(3)
    list.Print() // 1 -> 2 -> 3
    list.Reverse()
    list.Print() // 3 -> 2 -> 1
}

📚 2. 栈 (Stack)

数据结构

后进先出 (LIFO) 的数据结构,只允许在栈顶进行插入和删除操作。

访问
O(n)
搜索
O(n)
插入
O(1)
删除
O(1)

代码实现

class Stack {
    constructor() {
        this.items = [];
    }

    push(element) {
        this.items.push(element);
    }

    pop() {
        if (this.items.length === 0) return null;
        return this.items.pop();
    }

    peek() {
        return this.items[this.items.length - 1];
    }

    isEmpty() {
        return this.items.length === 0;
    }

    size() {
        return this.items.length;
    }
}

// 应用:有效括号
function isValidParentheses(s) {
    const stack = new Stack();
    const map = { ')': '(', '}': '{', ']': '[' };
    for (const char of s) {
        if (char in map) {
            if (stack.pop() !== map[char]) return false;
        } else {
            stack.push(char);
        }
    }
    return stack.isEmpty();
}

// 测试
console.log(isValidParentheses("()[]{}")); // true
console.log(isValidParentheses("([)]")); // false
class Stack:
    def __init__(self):
        self.items = []

    def push(self, element):
        self.items.append(element)

    def pop(self):
        if len(self.items) == 0:
            return None
        return self.items.pop()

    def peek(self):
        if len(self.items) == 0:
            return None
        return self.items[-1]

    def is_empty(self):
        return len(self.items) == 0

    def size(self):
        return len(self.items)

# 应用:有效括号
def is_valid_parentheses(s):
    stack = Stack()
    mapping = {')': '(', '}': '{', ']': '['}
    for char in s:
        if char in mapping:
            if stack.pop() != mapping[char]:
                return False
        else:
            stack.push(char)
    return stack.is_empty()

# 测试
print(is_valid_parentheses("()[]{}"))  # True
print(is_valid_parentheses("([)]"))    # False
package main

import "fmt"

type Stack struct {
    items []interface{}
}

func NewStack() *Stack {
    return &Stack{items: []interface{}{}}
}

func (s *Stack) Push(element interface{}) {
    s.items = append(s.items, element)
}

func (s *Stack) Pop() interface{} {
    if len(s.items) == 0 {
        return nil
    }
    top := s.items[len(s.items)-1]
    s.items = s.items[:len(s.items)-1]
    return top
}

func (s *Stack) Peek() interface{} {
    if len(s.items) == 0 {
        return nil
    }
    return s.items[len(s.items)-1]
}

func (s *Stack) IsEmpty() bool {
    return len(s.items) == 0
}

func (s *Stack) Size() int {
    return len(s.items)
}

func isValidParentheses(s string) bool {
    stack := NewStack()
    mapping := map[rune]rune{')': '(', '}': '{', ']': '['}
    for _, char := range s {
        if match, ok := mapping[char]; ok {
            if stack.Pop() != match {
                return false
            }
        } else {
            stack.Push(char)
        }
    }
    return stack.IsEmpty()
}

func main() {
    fmt.Println(isValidParentheses("()[]{}")) // true
    fmt.Println(isValidParentheses("([)]"))   // false
}

📥 3. 队列 (Queue)

数据结构

先进先出 (FIFO) 的数据结构,允许在队尾插入元素,在队头删除元素。

访问
O(n)
搜索
O(n)
插入
O(1)
删除
O(1)

代码实现

class Queue {
    constructor() {
        this.items = [];
    }

    enqueue(element) {
        this.items.push(element);
    }

    dequeue() {
        if (this.items.length === 0) return null;
        return this.items.shift();
    }

    front() {
        return this.items[0];
    }

    isEmpty() {
        return this.items.length === 0;
    }

    size() {
        return this.items.length;
    }
}

// 测试
const queue = new Queue();
queue.enqueue(1);
queue.enqueue(2);
queue.enqueue(3);
console.log(queue.dequeue()); // 1
class Queue:
    def __init__(self):
        self.items = []

    def enqueue(self, element):
        self.items.append(element)

    def dequeue(self):
        if len(self.items) == 0:
            return None
        return self.items.pop(0)

    def front(self):
        if len(self.items) == 0:
            return None
        return self.items[0]

    def is_empty(self):
        return len(self.items) == 0

    def size(self):
        return len(self.items)

# 测试
queue = Queue()
queue.enqueue(1)
queue.enqueue(2)
queue.enqueue(3)
print(queue.dequeue())  # 1
package main

import "fmt"

type Queue struct {
    items []interface{}
}

func NewQueue() *Queue {
    return &Queue{items: []interface{}{}}
}

func (q *Queue) Enqueue(element interface{}) {
    q.items = append(q.items, element)
}

func (q *Queue) Dequeue() interface{} {
    if len(q.items) == 0 {
        return nil
    }
    front := q.items[0]
    q.items = q.items[1:]
    return front
}

func (q *Queue) Front() interface{} {
    if len(q.items) == 0 {
        return nil
    }
    return q.items[0]
}

func (q *Queue) IsEmpty() bool {
    return len(q.items) == 0
}

func (q *Queue) Size() int {
    return len(q.items)
}

func main() {
    queue := NewQueue()
    queue.Enqueue(1)
    queue.Enqueue(2)
    queue.Enqueue(3)
    fmt.Println(queue.Dequeue()) // 1
}

🌳 4. 二叉搜索树 (Binary Search Tree)

数据结构

二叉搜索树 (BST) 是一种特殊的二叉树,满足以下性质:每个节点的左子树所有节点值小于该节点值,右子树所有节点值大于该节点值。这使得 BST 支持高效的查找、插入和删除操作。

🌳 BST 结构示意

示例 BST
        8
      /     \
     3      10
    /  \       \
   1    6      14
       / \
      4   7
核心性质
• 左子树 < 根节点
• 右子树 > 根节点
• 递归定义
• 中序遍历有序
平均查找
O(log n)
平均插入
O(log n)
平均删除
O(log n)
最坏情况
O(n)

✅ 优点

  • 查找、插入、删除效率高(平均 O(log n))
  • 中序遍历可得有序序列
  • 实现相对简单
  • 支持范围查询

❌ 缺点

  • 最坏情况退化为链表(O(n))
  • 需要额外空间存储指针
  • 不平衡时性能下降
  • 不适合频繁随机访问

代码实现

class TreeNode {
    constructor(val) {
        this.val = val;
        this.left = null;
        this.right = null;
    }
}

class BinarySearchTree {
    constructor() {
        this.root = null;
    }

    insert(val) {
        const newNode = new TreeNode(val);
        if (!this.root) {
            this.root = newNode;
            return;
        }
        let current = this.root;
        while (true) {
            if (val < current.val) {
                if (!current.left) {
                    current.left = newNode;
                    return;
                }
                current = current.left;
            } else if (val > current.val) {
                if (!current.right) {
                    current.right = newNode;
                    return;
                }
                current = current.right;
            } else {
                return; // 不插入重复值
            }
        }
    }

    search(val) {
        let current = this.root;
        while (current) {
            if (val === current.val) return true;
            if (val < current.val) {
                current = current.left;
            } else {
                current = current.right;
            }
        }
        return false;
    }

    delete(val) {
        const deleteNode = (node, val) => {
            if (!node) return null;
            if (val < node.val) {
                node.left = deleteNode(node.left, val);
            } else if (val > node.val) {
                node.right = deleteNode(node.right, val);
            } else {
                // 找到要删除的节点
                if (!node.left) return node.right;
                if (!node.right) return node.left;
                // 有两个子节点,找右子树最小值
                let minNode = node.right;
                while (minNode.left) minNode = minNode.left;
                node.val = minNode.val;
                node.right = deleteNode(node.right, minNode.val);
            }
            return node;
        };
        this.root = deleteNode(this.root, val);
    }

    inorder(node = this.root, result = []) {
        if (node) {
            this.inorder(node.left, result);
            result.push(node.val);
            this.inorder(node.right, result);
        }
        return result;
    }

    preorder(node = this.root, result = []) {
        if (node) {
            result.push(node.val);
            this.preorder(node.left, result);
            this.preorder(node.right, result);
        }
        return result;
    }

    postorder(node = this.root, result = []) {
        if (node) {
            this.postorder(node.left, result);
            this.postorder(node.right, result);
            result.push(node.val);
        }
        return result;
    }
}

// 测试
const bst = new BinarySearchTree();
bst.insert(8);
bst.insert(3);
bst.insert(10);
bst.insert(1);
bst.insert(6);
bst.insert(14);
console.log('中序遍历:', bst.inorder()); // [1, 3, 6, 8, 10, 14]
console.log('查找 6:', bst.search(6)); // true
console.log('查找 5:', bst.search(5)); // false
bst.delete(3);
console.log('删除 3 后:', bst.inorder()); // [1, 6, 8, 10, 14]
class TreeNode:
    def __init__(self, val=0):
        self.val = val
        self.left = None
        self.right = None

class BinarySearchTree:
    def __init__(self):
        self.root = None

    def insert(self, val):
        if not self.root:
            self.root = TreeNode(val)
            return
        current = self.root
        while True:
            if val < current.val:
                if not current.left:
                    current.left = TreeNode(val)
                    return
                current = current.left
            elif val > current.val:
                if not current.right:
                    current.right = TreeNode(val)
                    return
                current = current.right
            else:
                return  # 不插入重复值

    def search(self, val):
        current = self.root
        while current:
            if val == current.val:
                return True
            elif val < current.val:
                current = current.left
            else:
                current = current.right
        return False

    def delete(self, val):
        def delete_node(node, val):
            if not node:
                return None
            if val < node.val:
                node.left = delete_node(node.left, val)
            elif val > node.val:
                node.right = delete_node(node.right, val)
            else:
                # 找到要删除的节点
                if not node.left:
                    return node.right
                if not node.right:
                    return node.left
                # 有两个子节点,找右子树最小值
                min_node = node.right
                while min_node.left:
                    min_node = min_node.left
                node.val = min_node.val
                node.right = delete_node(node.right, min_node.val)
            return node
        self.root = delete_node(self.root, val)

    def inorder(self, node=None, result=None):
        if result is None:
            result = []
        if node is None:
            node = self.root
        if node:
            self.inorder(node.left, result)
            result.append(node.val)
            self.inorder(node.right, result)
        return result

    def preorder(self, node=None, result=None):
        if result is None:
            result = []
        if node is None:
            node = self.root
        if node:
            result.append(node.val)
            self.preorder(node.left, result)
            self.preorder(node.right, result)
        return result

    def postorder(self, node=None, result=None):
        if result is None:
            result = []
        if node is None:
            node = self.root
        if node:
            self.postorder(node.left, result)
            self.postorder(node.right, result)
            result.append(node.val)
        return result

# 测试
bst = BinarySearchTree()
bst.insert(8)
bst.insert(3)
bst.insert(10)
bst.insert(1)
bst.insert(6)
bst.insert(14)
print('中序遍历:', bst.inorder())  # [1, 3, 6, 8, 10, 14]
print('查找 6:', bst.search(6))  # True
print('查找 5:', bst.search(5))  # False
bst.delete(3)
print('删除 3 后:', bst.inorder())  # [1, 6, 8, 10, 14]
package main

import "fmt"

type TreeNode struct {
    Val   int
    Left  *TreeNode
    Right *TreeNode
}

type BinarySearchTree struct {
    Root *TreeNode
}

func NewBinarySearchTree() *BinarySearchTree {
    return &BinarySearchTree{}
}

func (bst *BinarySearchTree) Insert(val int) {
    if bst.Root == nil {
        bst.Root = &TreeNode{Val: val}
        return
    }
    current := bst.Root
    for {
        if val < current.Val {
            if current.Left == nil {
                current.Left = &TreeNode{Val: val}
                return
            }
            current = current.Left
        } else if val > current.Val {
            if current.Right == nil {
                current.Right = &TreeNode{Val: val}
                return
            }
            current = current.Right
        } else {
            return // 不插入重复值
        }
    }
}

func (bst *BinarySearchTree) Search(val int) bool {
    current := bst.Root
    for current != nil {
        if val == current.Val {
            return true
        } else if val < current.Val {
            current = current.Left
        } else {
            current = current.Right
        }
    }
    return false
}

func (bst *BinarySearchTree) Delete(val int) {
    bst.Root = deleteNode(bst.Root, val)
}

func deleteNode(node *TreeNode, val int) *TreeNode {
    if node == nil {
        return nil
    }
    if val < node.Val {
        node.Left = deleteNode(node.Left, val)
    } else if val > node.Val {
        node.Right = deleteNode(node.Right, val)
    } else {
        // 找到要删除的节点
        if node.Left == nil {
            return node.Right
        }
        if node.Right == nil {
            return node.Left
        }
        // 有两个子节点,找右子树最小值
        minNode := node.Right
        for minNode.Left != nil {
            minNode = minNode.Left
        }
        node.Val = minNode.Val
        node.Right = deleteNode(node.Right, minNode.Val)
    }
    return node
}

func (bst *BinarySearchTree) Inorder() []int {
    result := []int{}
    var inorder func(*TreeNode)
    inorder = func(node *TreeNode) {
        if node != nil {
            inorder(node.Left)
            result = append(result, node.Val)
            inorder(node.Right)
        }
    }
    inorder(bst.Root)
    return result
}

func (bst *BinarySearchTree) Preorder() []int {
    result := []int{}
    var preorder func(*TreeNode)
    preorder = func(node *TreeNode) {
        if node != nil {
            result = append(result, node.Val)
            preorder(node.Left)
            preorder(node.Right)
        }
    }
    preorder(bst.Root)
    return result
}

func (bst *BinarySearchTree) Postorder() []int {
    result := []int{}
    var postorder func(*TreeNode)
    postorder = func(node *TreeNode) {
        if node != nil {
            postorder(node.Left)
            postorder(node.Right)
            result = append(result, node.Val)
        }
    }
    postorder(bst.Root)
    return result
}

func main() {
    bst := NewBinarySearchTree()
    bst.Insert(8)
    bst.Insert(3)
    bst.Insert(10)
    bst.Insert(1)
    bst.Insert(6)
    bst.Insert(14)
    fmt.Println("中序遍历:", bst.Inorder()) // [1 3 6 8 10 14]
    fmt.Println("查找 6:", bst.Search(6))  // true
    fmt.Println("查找 5:", bst.Search(5))  // false
    bst.Delete(3)
    fmt.Println("删除 3 后:", bst.Inorder()) // [1 6 8 10 14]
}

⚖️ 5. AVL 树/红黑树 (AVL Tree / Red-Black Tree)

数据结构

AVL 树 是最早的自平衡二叉搜索树,任何节点的左右子树高度差不超过 1。通过旋转操作保持平衡,确保 O(log n) 的操作复杂度。

红黑树 是一种近似平衡的二叉搜索树,通过节点颜色(红/黑)和特定规则保证从根到叶子的最长路径不超过最短路径的两倍。Java TreeMap、HashMap(JDK8+)、C++ STL map 都使用红黑树。

⚖️ 旋转操作示意

右旋 (LL 失衡)
    A            B
   /            /  \
  B    →      X   A
 /  \          /      \
X    Y        Y      Z
左旋 (RR 失衡)
  A              B
   \            /  \
   B   →     A    Z
  /  \      /      
 Y    Z    X      
红黑树规则
• 节点非红即黑
• 根节点为黑
• 红节点子节点必黑
• 根到叶路径黑节点数相同
AVL 查找
O(log n)
AVL 插入/删除
O(log n)
红黑树查找
O(log n)
红黑树插入/删除
O(log n)

✅ 优点

  • 严格保证 O(log n) 复杂度
  • AVL 树更平衡,查找更快
  • 红黑树旋转少,插入删除更快
  • 广泛应用于标准库

❌ 缺点

  • 实现复杂
  • AVL 树维护平衡开销大
  • 红黑树不如 AVL 平衡
  • 额外空间存储高度/颜色

代码实现

class AVLNode {
    constructor(val) {
        this.val = val;
        this.left = null;
        this.right = null;
        this.height = 1;
    }
}

class AVLTree {
    constructor() {
        this.root = null;
    }

    getHeight(node) {
        return node ? node.height : 0;
    }

    getBalance(node) {
        return node ? this.getHeight(node.left) - this.getHeight(node.right) : 0;
    }

    updateHeight(node) {
        node.height = 1 + Math.max(this.getHeight(node.left), this.getHeight(node.right));
    }

    rotateRight(y) {
        const x = y.left;
        const T2 = x.right;
        x.right = y;
        y.left = T2;
        this.updateHeight(y);
        this.updateHeight(x);
        return x;
    }

    rotateLeft(x) {
        const y = x.right;
        const T2 = y.left;
        y.left = x;
        x.right = T2;
        this.updateHeight(x);
        this.updateHeight(y);
        return y;
    }

    insert(val) {
        const insertNode = (node, val) => {
            if (!node) return new AVLNode(val);
            if (val < node.val) {
                node.left = insertNode(node.left, val);
            } else if (val > node.val) {
                node.right = insertNode(node.right, val);
            } else {
                return node; // 不插入重复值
            }
            this.updateHeight(node);
            const balance = this.getBalance(node);
            // LL
            if (balance > 1 && val < node.left.val) {
                return this.rotateRight(node);
            }
            // RR
            if (balance < -1 && val > node.right.val) {
                return this.rotateLeft(node);
            }
            // LR
            if (balance > 1 && val > node.left.val) {
                node.left = this.rotateLeft(node.left);
                return this.rotateRight(node);
            }
            // RL
            if (balance < -1 && val < node.right.val) {
                node.right = this.rotateRight(node.right);
                return this.rotateLeft(node);
            }
            return node;
        };
        this.root = insertNode(this.root, val);
    }

    search(val) {
        let current = this.root;
        while (current) {
            if (val === current.val) return true;
            current = val < current.val ? current.left : current.right;
        }
        return false;
    }

    inorder(node = this.root, result = []) {
        if (node) {
            this.inorder(node.left, result);
            result.push(node.val);
            this.inorder(node.right, result);
        }
        return result;
    }
}

// 测试
const avl = new AVLTree();
avl.insert(10);
avl.insert(20);
avl.insert(30); // RR 失衡,触发左旋
avl.insert(5);
avl.insert(15); // LR 失衡,触发左右旋
console.log('中序遍历:', avl.inorder()); // [5, 10, 15, 20, 30]
console.log('查找 15:', avl.search(15)); // true
class AVLNode:
    def __init__(self, val):
        self.val = val
        self.left = None
        self.right = None
        self.height = 1

class AVLTree:
    def __init__(self):
        self.root = None

    def get_height(self, node):
        return node.height if node else 0

    def get_balance(self, node):
        return self.get_height(node.left) - self.get_height(node.right) if node else 0

    def update_height(self, node):
        node.height = 1 + max(self.get_height(node.left), self.get_height(node.right))

    def rotate_right(self, y):
        x = y.left
        T2 = x.right
        x.right = y
        y.left = T2
        self.update_height(y)
        self.update_height(x)
        return x

    def rotate_left(self, x):
        y = x.right
        T2 = y.left
        y.left = x
        x.right = T2
        self.update_height(x)
        self.update_height(y)
        return y

    def insert(self, val):
        def insert_node(node, val):
            if not node:
                return AVLNode(val)
            if val < node.val:
                node.left = insert_node(node.left, val)
            elif val > node.val:
                node.right = insert_node(node.right, val)
            else:
                return node
            self.update_height(node)
            balance = self.get_balance(node)
            # LL
            if balance > 1 and val < node.left.val:
                return self.rotate_right(node)
            # RR
            if balance < -1 and val > node.right.val:
                return self.rotate_left(node)
            # LR
            if balance > 1 and val > node.left.val:
                node.left = self.rotate_left(node.left)
                return self.rotate_right(node)
            # RL
            if balance < -1 and val < node.right.val:
                node.right = self.rotate_right(node.right)
                return self.rotate_left(node)
            return node
        self.root = insert_node(self.root, val)

    def search(self, val):
        current = self.root
        while current:
            if val == current.val:
                return True
            current = current.left if val < current.val else current.right
        return False

    def inorder(self, node=None, result=None):
        if result is None:
            result = []
        if node is None:
            node = self.root
        if node:
            self.inorder(node.left, result)
            result.append(node.val)
            self.inorder(node.right, result)
        return result

# 测试
avl = AVLTree()
avl.insert(10)
avl.insert(20)
avl.insert(30)  # RR 失衡,触发左旋
avl.insert(5)
avl.insert(15)  # LR 失衡,触发左右旋
print('中序遍历:', avl.inorder())  # [5, 10, 15, 20, 30]
print('查找 15:', avl.search(15))  # True
package main

import "fmt"

type AVLNode struct {
    Val    int
    Left   *AVLNode
    Right  *AVLNode
    Height int
}

type AVLTree struct {
    Root *AVLNode
}

func NewAVLTree() *AVLTree {
    return &AVLTree{}
}

func (t *AVLTree) GetHeight(node *AVLNode) int {
    if node == nil {
        return 0
    }
    return node.Height
}

func (t *AVLTree) GetBalance(node *AVLNode) int {
    if node == nil {
        return 0
    }
    return t.GetHeight(node.Left) - t.GetHeight(node.Right)
}

func (t *AVLTree) UpdateHeight(node *AVLNode) {
    node.Height = 1 + max(t.GetHeight(node.Left), t.GetHeight(node.Right))
}

func (t *AVLTree) RotateRight(y *AVLNode) *AVLNode {
    x := y.Left
    T2 := x.Right
    x.Right = y
    y.Left = T2
    t.UpdateHeight(y)
    t.UpdateHeight(x)
    return x
}

func (t *AVLTree) RotateLeft(x *AVLNode) *AVLNode {
    y := x.Right
    T2 := y.Left
    y.Left = x
    x.Right = T2
    t.UpdateHeight(x)
    t.UpdateHeight(y)
    return y
}

func (t *AVLTree) Insert(val int) {
    t.Root = t.insertNode(t.Root, val)
}

func (t *AVLTree) insertNode(node *AVLNode, val int) *AVLNode {
    if node == nil {
        return &AVLNode{Val: val, Height: 1}
    }
    if val < node.Val {
        node.Left = t.insertNode(node.Left, val)
    } else if val > node.Val {
        node.Right = t.insertNode(node.Right, val)
    } else {
        return node
    }
    t.UpdateHeight(node)
    balance := t.GetBalance(node)
    // LL
    if balance > 1 && val < node.Left.Val {
        return t.RotateRight(node)
    }
    // RR
    if balance < -1 && val > node.Right.Val {
        return t.RotateLeft(node)
    }
    // LR
    if balance > 1 && val > node.Left.Val {
        node.Left = t.RotateLeft(node.Left)
        return t.RotateRight(node)
    }
    // RL
    if balance < -1 && val < node.Right.Val {
        node.Right = t.RotateRight(node.Right)
        return t.RotateLeft(node)
    }
    return node
}

func (t *AVLTree) Search(val int) bool {
    current := t.Root
    for current != nil {
        if val == current.Val {
            return true
        } else if val < current.Val {
            current = current.Left
        } else {
            current = current.Right
        }
    }
    return false
}

func (t *AVLTree) Inorder() []int {
    result := []int{}
    var inorder func(*AVLNode)
    inorder = func(node *AVLNode) {
        if node != nil {
            inorder(node.Left)
            result = append(result, node.Val)
            inorder(node.Right)
        }
    }
    inorder(t.Root)
    return result
}

func max(a, b int) int {
    if a > b {
        return a
    }
    return b
}

func main() {
    tree := NewAVLTree()
    tree.Insert(10)
    tree.Insert(20)
    tree.Insert(30) // RR 失衡
    tree.Insert(5)
    tree.Insert(15) // LR 失衡
    fmt.Println("中序遍历:", tree.Inorder()) // [5 10 15 20 30]
    fmt.Println("查找 15:", tree.Search(15)) // true
}

📐 6. 堆 (Heap)

数据结构

堆是一种特殊的完全二叉树,分为最大堆和最小堆。

最大堆:每个节点的值都大于或等于其子节点的值,根节点是最大值。

最小堆:每个节点的值都小于或等于其子节点的值,根节点是最小值。

堆通常用数组实现,对于索引 i 的节点:父节点为 (i-1)/2,左子节点为 2i+1,右子节点为 2i+2。

📐 堆结构示意

最大堆
        50
      /    \
     30     40
    /  \    /  \
   10  20  35  5
数组表示
[50, 30, 40, 10, 20, 35, 5]

索引关系:
i=0: 50 (根)
i=1: 30 (左子)
i=2: 40 (右子)
访问最大/最小
O(1)
插入
O(log n)
删除最大/最小
O(log n)
建堆
O(n)

✅ 优点

  • O(1) 访问最大/最小值
  • 插入删除效率高
  • 空间效率高(数组实现)
  • 堆排序原地排序

❌ 缺点

  • 查找任意元素 O(n)
  • 只支持访问最大/最小值
  • 不支持随机访问
  • 删除任意元素复杂

代码实现

class MaxHeap {
    constructor() {
        this.heap = [];
    }

    insert(val) {
        this.heap.push(val);
        this.bubbleUp(this.heap.length - 1);
    }

    bubbleUp(index) {
        while (index > 0) {
            const parentIndex = Math.floor((index - 1) / 2);
            if (this.heap[index] <= this.heap[parentIndex]) break;
            [this.heap[index], this.heap[parentIndex]] = 
                [this.heap[parentIndex], this.heap[index]];
            index = parentIndex;
        }
    }

    extractMax() {
        if (this.heap.length === 0) return null;
        if (this.heap.length === 1) return this.heap.pop();
        const max = this.heap[0];
        this.heap[0] = this.heap.pop();
        this.bubbleDown(0);
        return max;
    }

    bubbleDown(index) {
        const length = this.heap.length;
        while (true) {
            const leftChild = 2 * index + 1;
            const rightChild = 2 * index + 2;
            let largest = index;
            if (leftChild < length && this.heap[leftChild] > this.heap[largest]) {
                largest = leftChild;
            }
            if (rightChild < length && this.heap[rightChild] > this.heap[largest]) {
                largest = rightChild;
            }
            if (largest === index) break;
            [this.heap[index], this.heap[largest]] = 
                [this.heap[largest], this.heap[index]];
            index = largest;
        }
    }

    peek() {
        return this.heap.length > 0 ? this.heap[0] : null;
    }

    size() {
        return this.heap.length;
    }

    isEmpty() {
        return this.heap.length === 0;
    }

    // 堆排序
    static heapSort(arr) {
        const heap = new MaxHeap();
        arr.forEach(val => heap.insert(val));
        const result = [];
        while (!heap.isEmpty()) {
            result.unshift(heap.extractMax());
        }
        return result;
    }
}

// 测试
const heap = new MaxHeap();
heap.insert(10);
heap.insert(20);
heap.insert(15);
heap.insert(30);
heap.insert(5);
console.log('最大堆:', heap.heap); // [30, 20, 15, 10, 5]
console.log('最大值:', heap.extractMax()); // 30
console.log('堆排序:', MaxHeap.heapSort([3, 1, 4, 1, 5, 9, 2, 6])); // [1,1,2,3,4,5,6,9]
class MaxHeap:
    def __init__(self):
        self.heap = []

    def insert(self, val):
        self.heap.append(val)
        self._bubble_up(len(self.heap) - 1)

    def _bubble_up(self, index):
        while index > 0:
            parent_index = (index - 1) // 2
            if self.heap[index] <= self.heap[parent_index]:
                break
            self.heap[index], self.heap[parent_index] = \
                self.heap[parent_index], self.heap[index]
            index = parent_index

    def extract_max(self):
        if len(self.heap) == 0:
            return None
        if len(self.heap) == 1:
            return self.heap.pop()
        max_val = self.heap[0]
        self.heap[0] = self.heap.pop()
        self._bubble_down(0)
        return max_val

    def _bubble_down(self, index):
        length = len(self.heap)
        while True:
            left_child = 2 * index + 1
            right_child = 2 * index + 2
            largest = index
            if left_child < length and self.heap[left_child] > self.heap[largest]:
                largest = left_child
            if right_child < length and self.heap[right_child] > self.heap[largest]:
                largest = right_child
            if largest == index:
                break
            self.heap[index], self.heap[largest] = \
                self.heap[largest], self.heap[index]
            index = largest

    def peek(self):
        return self.heap[0] if self.heap else None

    def size(self):
        return len(self.heap)

    def is_empty(self):
        return len(self.heap) == 0

    @staticmethod
    def heap_sort(arr):
        heap = MaxHeap()
        for val in arr:
            heap.insert(val)
        result = []
        while not heap.is_empty():
            result.insert(0, heap.extract_max())
        return result

# 测试
heap = MaxHeap()
heap.insert(10)
heap.insert(20)
heap.insert(15)
heap.insert(30)
heap.insert(5)
print('最大堆:', heap.heap)  # [30, 20, 15, 10, 5]
print('最大值:', heap.extract_max())  # 30
print('堆排序:', MaxHeap.heap_sort([3, 1, 4, 1, 5, 9, 2, 6]))  # [1,1,2,3,4,5,6,9]
package main

import "fmt"

type MaxHeap struct {
    heap []int
}

func NewMaxHeap() *MaxHeap {
    return &MaxHeap{heap: []int{}}
}

func (h *MaxHeap) Insert(val int) {
    h.heap = append(h.heap, val)
    h.bubbleUp(len(h.heap) - 1)
}

func (h *MaxHeap) bubbleUp(index int) {
    for index > 0 {
        parentIndex := (index - 1) / 2
        if h.heap[index] <= h.heap[parentIndex] {
            break
        }
        h.heap[index], h.heap[parentIndex] = h.heap[parentIndex], h.heap[index]
        index = parentIndex
    }
}

func (h *MaxHeap) ExtractMax() int {
    if len(h.heap) == 0 {
        return -1
    }
    if len(h.heap) == 1 {
        val := h.heap[0]
        h.heap = h.heap[:0]
        return val
    }
    maxVal := h.heap[0]
    h.heap[0] = h.heap[len(h.heap)-1]
    h.heap = h.heap[:len(h.heap)-1]
    h.bubbleDown(0)
    return maxVal
}

func (h *MaxHeap) bubbleDown(index int) {
    length := len(h.heap)
    for {
        leftChild := 2*index + 1
        rightChild := 2*index + 2
        largest := index
        if leftChild < length && h.heap[leftChild] > h.heap[largest] {
            largest = leftChild
        }
        if rightChild < length && h.heap[rightChild] > h.heap[largest] {
            largest = rightChild
        }
        if largest == index {
            break
        }
        h.heap[index], h.heap[largest] = h.heap[largest], h.heap[index]
        index = largest
    }
}

func (h *MaxHeap) Peek() int {
    if len(h.heap) == 0 {
        return -1
    }
    return h.heap[0]
}

func (h *MaxHeap) Size() int {
    return len(h.heap)
}

func (h *MaxHeap) IsEmpty() bool {
    return len(h.heap) == 0
}

// 优先队列应用
type PriorityQueue struct {
    heap *MaxHeap
}

func NewPriorityQueue() *PriorityQueue {
    return &PriorityQueue{heap: NewMaxHeap()}
}

func (pq *PriorityQueue) Enqueue(val int) {
    pq.heap.Insert(val)
}

func (pq *PriorityQueue) Dequeue() int {
    return pq.heap.ExtractMax()
}

func (pq *PriorityQueue) IsEmpty() bool {
    return pq.heap.IsEmpty()
}

// 堆排序
func HeapSort(arr []int) []int {
    h := NewMaxHeap()
    for _, val := range arr {
        h.Insert(val)
    }
    result := make([]int, 0, len(arr))
    for !h.IsEmpty() {
        result = append([]int{h.ExtractMax()}, result...)
    }
    return result
}

func main() {
    heap := NewMaxHeap()
    heap.Insert(10)
    heap.Insert(20)
    heap.Insert(15)
    heap.Insert(30)
    heap.Insert(5)
    fmt.Println("最大堆:", heap.heap)        // [30 20 15 10 5]
    fmt.Println("最大值:", heap.ExtractMax()) // 30
    fmt.Println("堆排序:", HeapSort([]int{3, 1, 4, 1, 5, 9, 2, 6})) // [1 1 2 3 4 5 6 9]

    // 优先队列示例
    pq := NewPriorityQueue()
    pq.Enqueue(100)
    pq.Enqueue(50)
    pq.Enqueue(200)
    for !pq.IsEmpty() {
        fmt.Print(pq.Dequeue(), " ") // 200 100 50
    }
}

🕸️ 7. 图 (Graph)

数据结构

图是由顶点(节点)和边组成的数据结构,用于表示对象之间的关系。

图的类型

  • 有向图:边有方向,如社交网络关注关系
  • 无向图:边无方向,如好友关系
  • 加权图:边有权重,如地图中的距离

存储方式

  • 邻接矩阵:二维数组,适合稠密图
  • 邻接表:数组 + 链表,适合稀疏图

🕸️ 图结构示意

无向图
  0 —— 1
  |    /  |
  |  /    |
  2 —— 3 —— 4
邻接表
0: [1, 2]
1: [0, 2, 3]
2: [0, 1, 3]
3: [1, 2, 4]
4: [3]
邻接矩阵
  0 1 2 3 4
0 0 1 1 0 0
1 1 0 1 1 0
2 1 1 0 1 0
3 0 1 1 0 1
4 0 0 0 1 0
BFS 遍历
O(V + E)
DFS 遍历
O(V + E)
邻接矩阵查询
O(1)
邻接表查询
O(degree)

✅ 优点

  • 能表示复杂关系
  • 应用广泛(社交网络、地图导航)
  • 邻接表空间效率高
  • 支持多种遍历算法

❌ 缺点

  • 邻接矩阵空间 O(V²)
  • 某些算法复杂度高
  • 实现相对复杂
  • 需要处理环路

代码实现

class Graph {
    constructor(isDirected = false) {
        this.isDirected = isDirected;
        this.adjList = new Map();
    }

    addVertex(vertex) {
        if (!this.adjList.has(vertex)) {
            this.adjList.set(vertex, []);
        }
    }

    addEdge(vertex1, vertex2, weight = 1) {
        this.addVertex(vertex1);
        this.addVertex(vertex2);
        this.adjList.get(vertex1).push({ vertex: vertex2, weight });
        if (!this.isDirected) {
            this.adjList.get(vertex2).push({ vertex: vertex1, weight });
        }
    }

    bfs(start) {
        const visited = new Set();
        const queue = [start];
        const result = [];
        visited.add(start);
        while (queue.length > 0) {
            const vertex = queue.shift();
            result.push(vertex);
            for (const neighbor of this.adjList.get(vertex)) {
                if (!visited.has(neighbor.vertex)) {
                    visited.add(neighbor.vertex);
                    queue.push(neighbor.vertex);
                }
            }
        }
        return result;
    }

    dfs(start) {
        const visited = new Set();
        const result = [];
        const dfsHelper = (vertex) => {
            visited.add(vertex);
            result.push(vertex);
            for (const neighbor of this.adjList.get(vertex)) {
                if (!visited.has(neighbor.vertex)) {
                    dfsHelper(neighbor.vertex);
                }
            }
        };
        dfsHelper(start);
        return result;
    }

    print() {
        for (const [vertex, neighbors] of this.adjList) {
            console.log(`${vertex}: ${neighbors.map(n => `${n.vertex}(${n.weight})`).join(', ')}`);
        }
    }
}

// 测试
const graph = new Graph(false); // 无向图
graph.addEdge(0, 1, 1);
graph.addEdge(0, 2, 1);
graph.addEdge(1, 2, 1);
graph.addEdge(1, 3, 1);
graph.addEdge(2, 3, 1);
graph.addEdge(3, 4, 1);
console.log('邻接表:');
graph.print();
console.log('BFS 从 0:', graph.bfs(0)); // [0, 1, 2, 3, 4]
console.log('DFS 从 0:', graph.dfs(0)); // [0, 1, 2, 3, 4]
from collections import deque

class Graph:
    def __init__(self, is_directed=False):
        self.is_directed = is_directed
        self.adj_list = {}

    def add_vertex(self, vertex):
        if vertex not in self.adj_list:
            self.adj_list[vertex] = []

    def add_edge(self, vertex1, vertex2, weight=1):
        self.add_vertex(vertex1)
        self.add_vertex(vertex2)
        self.adj_list[vertex1].append((vertex2, weight))
        if not self.is_directed:
            self.adj_list[vertex2].append((vertex1, weight))

    def bfs(self, start):
        visited = set()
        queue = deque([start])
        result = []
        visited.add(start)
        while queue:
            vertex = queue.popleft()
            result.append(vertex)
            for neighbor, _ in self.adj_list[vertex]:
                if neighbor not in visited:
                    visited.add(neighbor)
                    queue.append(neighbor)
        return result

    def dfs(self, start):
        visited = set()
        result = []
        def dfs_helper(vertex):
            visited.add(vertex)
            result.append(vertex)
            for neighbor, _ in self.adj_list[vertex]:
                if neighbor not in visited:
                    dfs_helper(neighbor)
        dfs_helper(start)
        return result

    def print(self):
        for vertex, neighbors in self.adj_list.items():
            print(f"{vertex}: {', '.join(f'{n}({w})' for n, w in neighbors)}")

# 测试
graph = Graph(is_directed=False)  # 无向图
graph.add_edge(0, 1, 1)
graph.add_edge(0, 2, 1)
graph.add_edge(1, 2, 1)
graph.add_edge(1, 3, 1)
graph.add_edge(2, 3, 1)
graph.add_edge(3, 4, 1)
print('邻接表:')
graph.print()
print('BFS 从 0:', graph.bfs(0))  # [0, 1, 2, 3, 4]
print('DFS 从 0:', graph.dfs(0))  # [0, 1, 2, 3, 4]
package main

import "fmt"

type Edge struct {
    Vertex int
    Weight int
}

type Graph struct {
    IsDirected bool
    AdjList    map[int][]Edge
}

func NewGraph(isDirected bool) *Graph {
    return &Graph{
        IsDirected: isDirected,
        AdjList:    make(map[int][]Edge),
    }
}

func (g *Graph) AddVertex(vertex int) {
    if _, exists := g.AdjList[vertex]; !exists {
        g.AdjList[vertex] = []Edge{}
    }
}

func (g *Graph) AddEdge(vertex1, vertex2, weight int) {
    g.AddVertex(vertex1)
    g.AddVertex(vertex2)
    g.AdjList[vertex1] = append(g.AdjList[vertex1], Edge{Vertex: vertex2, Weight: weight})
    if !g.IsDirected {
        g.AdjList[vertex2] = append(g.AdjList[vertex2], Edge{Vertex: vertex1, Weight: weight})
    }
}

func (g *Graph) BFS(start int) []int {
    visited := make(map[int]bool)
    queue := []int{start}
    result := []int{}
    visited[start] = true
    for len(queue) > 0 {
        vertex := queue[0]
        queue = queue[1:]
        result = append(result, vertex)
        for _, neighbor := range g.AdjList[vertex] {
            if !visited[neighbor.Vertex] {
                visited[neighbor.Vertex] = true
                queue = append(queue, neighbor.Vertex)
            }
        }
    }
    return result
}

func (g *Graph) DFS(start int) []int {
    visited := make(map[int]bool)
    result := []int{}
    var dfsHelper func(int)
    dfsHelper = func(vertex int) {
        visited[vertex] = true
        result = append(result, vertex)
        for _, neighbor := range g.AdjList[vertex] {
            if !visited[neighbor.Vertex] {
                dfsHelper(neighbor.Vertex)
            }
        }
    }
    dfsHelper(start)
    return result
}

func (g *Graph) Print() {
    for vertex, neighbors := range g.AdjList {
        fmt.Printf("%d: ", vertex)
        for i, n := range neighbors {
            if i > 0 {
                fmt.Print(", ")
            }
            fmt.Printf("%d(%d)", n.Vertex, n.Weight)
        }
        fmt.Println()
    }
}

func main() {
    graph := NewGraph(false) // 无向图
    graph.AddEdge(0, 1, 1)
    graph.AddEdge(0, 2, 1)
    graph.AddEdge(1, 2, 1)
    graph.AddEdge(1, 3, 1)
    graph.AddEdge(2, 3, 1)
    graph.AddEdge(3, 4, 1)
    fmt.Println("邻接表:")
    graph.Print()
    fmt.Println("BFS 从 0:", graph.BFS(0)) // [0 1 2 3 4]
    fmt.Println("DFS 从 0:", graph.DFS(0)) // [0 1 2 3 4]
}

🤖 AI 时代的数据结构

🔍 向量索引

用于高效检索高维向量的数据结构,是 AI 大模型、语义搜索、推荐系统的核心基础设施。

🌸 布隆过滤器

空间效率极高的概率型数据结构,用于判断元素是否存在于集合中。适用于大规模数据去重和缓存穿透防护。

🪜 跳表

基于链表的高效搜索数据结构,通过多层索引实现 O(log n) 的查找复杂度。Redis 的有序集合使用跳表实现。

📊 线段树

用于高效处理区间查询和更新操作的数据结构,广泛应用于计算几何和区间统计问题。

💾 LRU 缓存

最近最少使用缓存,结合哈希表和双向链表实现 O(1) 的访问和更新。广泛应用于缓存系统和内存管理。

📊 4. 线段树 (Segment Tree)

数据结构

线段树是一种二叉树数据结构,用于高效处理区间查询和更新操作。每个节点代表一个区间,叶子节点代表单个元素。

📊 线段树结构示意

区间 [0,7]
      [0,7]
    /      \
  [0,3]    [4,7]
  /  \    /  \
[0,1] [2,3] [4,5] [6,7]
应用场景
• 区间求和
• 区间最值
• 区间更新
• 区间染色
构建
O(n)
查询
O(log n)
更新
O(log n)
空间
O(n)

✅ 优点

  • 区间查询高效
  • 支持动态更新
  • 实现相对简单
  • 应用广泛

❌ 缺点

  • 空间开销较大
  • 只适用于可合并操作
  • 常数因子较大
  • 递归实现栈开销

代码实现

class SegmentTree {
    constructor(arr) {
        this.n = arr.length;
        this.tree = new Array(4 * this.n).fill(0);
        this.build(arr, 0, 0, this.n - 1);
    }

    build(arr, node, start, end) {
        if (start === end) {
            this.tree[node] = arr[start];
        } else {
            const mid = Math.floor((start + end) / 2);
            const leftNode = 2 * node + 1;
            const rightNode = 2 * node + 2;
            this.build(arr, leftNode, start, mid);
            this.build(arr, rightNode, mid + 1, end);
            this.tree[node] = this.tree[leftNode] + this.tree[rightNode];
        }
    }

    query(l, r) {
        return this._query(0, 0, this.n - 1, l, r);
    }

    _query(node, start, end, l, r) {
        if (r < start || end < l) return 0;
        if (l <= start && end <= r) return this.tree[node];
        const mid = Math.floor((start + end) / 2);
        const leftSum = this._query(2 * node + 1, start, mid, l, r);
        const rightSum = this._query(2 * node + 2, mid + 1, end, l, r);
        return leftSum + rightSum;
    }

    update(idx, val) {
        this._update(0, 0, this.n - 1, idx, val);
    }

    _update(node, start, end, idx, val) {
        if (start === end) {
            this.tree[node] = val;
        } else {
            const mid = Math.floor((start + end) / 2);
            if (start <= idx && idx <= mid) {
                this._update(2 * node + 1, start, mid, idx, val);
            } else {
                this._update(2 * node + 2, mid + 1, end, idx, val);
            }
            this.tree[node] = this.tree[2 * node + 1] + this.tree[2 * node + 2];
        }
    }
}

// 测试
const arr = [1, 3, 5, 7, 9, 11];
const st = new SegmentTree(arr);
console.log('区间和 [1,3]:', st.query(1, 3)); // 15
st.update(2, 10);
console.log('更新后 [1,3]:', st.query(1, 3)); // 20
class SegmentTree:
    def __init__(self, arr):
        self.n = len(arr)
        self.tree = [0] * (4 * self.n)
        self._build(arr, 0, 0, self.n - 1)

    def _build(self, arr, node, start, end):
        if start == end:
            self.tree[node] = arr[start]
        else:
            mid = (start + end) // 2
            left_node = 2 * node + 1
            right_node = 2 * node + 2
            self._build(arr, left_node, start, mid)
            self._build(arr, right_node, mid + 1, end)
            self.tree[node] = self.tree[left_node] + self.tree[right_node]

    def query(self, l, r):
        return self._query(0, 0, self.n - 1, l, r)

    def _query(self, node, start, end, l, r):
        if r < start or end < l:
            return 0
        if l <= start and end <= r:
            return self.tree[node]
        mid = (start + end) // 2
        left_sum = self._query(2 * node + 1, start, mid, l, r)
        right_sum = self._query(2 * node + 2, mid + 1, end, l, r)
        return left_sum + right_sum

    def update(self, idx, val):
        self._update(0, 0, self.n - 1, idx, val)

    def _update(self, node, start, end, idx, val):
        if start == end:
            self.tree[node] = val
        else:
            mid = (start + end) // 2
            if start <= idx <= mid:
                self._update(2 * node + 1, start, mid, idx, val)
            else:
                self._update(2 * node + 2, mid + 1, end, idx, val)
            self.tree[node] = self.tree[2 * node + 1] + self.tree[2 * node + 2]

# 测试
arr = [1, 3, 5, 7, 9, 11]
st = SegmentTree(arr)
print(f"区间和 [1,3]: {st.query(1, 3)}")  # 15
st.update(2, 10)
print(f"更新后 [1,3]: {st.query(1, 3)}")  # 20
package main

import "fmt"

type SegmentTree struct {
    n    int
    tree []int
}

func NewSegmentTree(arr []int) *SegmentTree {
    n := len(arr)
    st := &SegmentTree{
        n:    n,
        tree: make([]int, 4*n),
    }
    st.build(arr, 0, 0, n-1)
    return st
}

func (st *SegmentTree) build(arr []int, node, start, end int) {
    if start == end {
        st.tree[node] = arr[start]
    } else {
        mid := (start + end) / 2
        leftNode := 2*node + 1
        rightNode := 2*node + 2
        st.build(arr, leftNode, start, mid)
        st.build(arr, rightNode, mid+1, end)
        st.tree[node] = st.tree[leftNode] + st.tree[rightNode]
    }
}

func (st *SegmentTree) Query(l, r int) int {
    return st.query(0, 0, st.n-1, l, r)
}

func (st *SegmentTree) query(node, start, end, l, r int) int {
    if r < start || end < l {
        return 0
    }
    if l <= start && end <= r {
        return st.tree[node]
    }
    mid := (start + end) / 2
    leftSum := st.query(2*node+1, start, mid, l, r)
    rightSum := st.query(2*node+2, mid+1, end, l, r)
    return leftSum + rightSum
}

func (st *SegmentTree) Update(idx, val int) {
    st.update(0, 0, st.n-1, idx, val)
}

func (st *SegmentTree) update(node, start, end, idx, val int) {
    if start == end {
        st.tree[node] = val
    } else {
        mid := (start + end) / 2
        if start <= idx && idx <= mid {
            st.update(2*node+1, start, mid, idx, val)
        } else {
            st.update(2*node+2, mid+1, end, idx, val)
        }
        st.tree[node] = st.tree[2*node+1] + st.tree[2*node+2]
    }
}

func main() {
    arr := []int{1, 3, 5, 7, 9, 11}
    st := NewSegmentTree(arr)
    fmt.Printf("区间和 [1,3]: %d\n", st.Query(1, 3))  // 15
    st.Update(2, 10)
    fmt.Printf("更新后 [1,3]: %d\n", st.Query(1, 3))  // 20
}

🔍 5. 向量索引 (Vector Index)

数据结构

向量索引用于高效检索高维向量的数据结构,通过计算向量之间的相似度(如余弦相似度)来找到最相似的向量。是 AI 大模型、语义搜索、推荐系统的核心基础设施。

🔍 向量检索流程

输入查询向量
[0.1, 0.5, -0.3, ...]
计算相似度
余弦相似度 / 欧氏距离
返回 TopK
最相似的 K 个向量
插入
O(1)
搜索
O(n × d)
空间
O(n × d)
维度
d

✅ 优点

  • 支持语义搜索
  • 适用于 AI 应用
  • 实现简单
  • 可扩展到近似搜索

❌ 缺点

  • 精确搜索慢
  • 高维计算开销大
  • 需要向量嵌入
  • 内存占用大

代码实现

class VectorIndex {
    constructor() {
        this.vectors = [];
        this.metadata = [];
    }

    insert(id, vector, metadata = {}) {
        this.vectors.push({ id, vector: this._normalize(vector) });
        this.metadata.push({ id, metadata });
    }

    _normalize(vector) {
        const norm = Math.sqrt(vector.reduce((sum, v) => sum + v * v, 0));
        return norm > 0 ? vector.map(v => v / norm) : vector;
    }

    _dotProduct(a, b) {
        return a.reduce((sum, val, i) => sum + val * b[i], 0);
    }

    search(queryVector, topK = 5) {
        const normalized = this._normalize(queryVector);
        const scores = this.vectors.map((item, idx) => ({
            id: item.id,
            score: this._dotProduct(normalized, item.vector),
            metadata: this.metadata[idx].metadata
        }));
        scores.sort((a, b) => b.score - a.score);
        return scores.slice(0, topK);
    }

    batchInsert(items) {
        for (const { id, vector, metadata } of items) {
            this.insert(id, vector, metadata);
        }
    }
}

// 测试
const index = new VectorIndex();
index.insert('doc1', [0.8, 0.2, 0.5], { text: 'AI 技术' });
index.insert('doc2', [0.7, 0.3, 0.4], { text: '机器学习' });
index.insert('doc3', [0.1, 0.9, 0.2], { text: '美食食谱' });

const results = index.search([0.75, 0.25, 0.45], 2);
console.log('搜索结果:', results);
import numpy as np

class VectorIndex:
    def __init__(self):
        self.vectors = []
        self.ids = []
        self.metadata = []

    def insert(self, id, vector, metadata=None):
        normalized = self._normalize(vector)
        self.vectors.append(normalized)
        self.ids.append(id)
        self.metadata.append(metadata or {})

    def _normalize(self, vector):
        arr = np.array(vector, dtype=np.float32)
        norm = np.linalg.norm(arr)
        return arr / norm if norm > 0 else arr

    def _cosine_similarity(self, a, b):
        return float(np.dot(a, b))

    def search(self, query_vector, top_k=5):
        if not self.vectors:
            return []
        query = self._normalize(query_vector)
        scores = []
        for i, vec in enumerate(self.vectors):
            score = self._cosine_similarity(query, vec)
            scores.append({
                'id': self.ids[i],
                'score': score,
                'metadata': self.metadata[i]
            })
        scores.sort(key=lambda x: x['score'], reverse=True)
        return scores[:top_k]

    def batch_insert(self, items):
        for item in items:
            self.insert(item['id'], item['vector'], item.get('metadata'))

# 测试
index = VectorIndex()
index.insert('doc1', [0.8, 0.2, 0.5], {'text': 'AI 技术'})
index.insert('doc2', [0.7, 0.3, 0.4], {'text': '机器学习'})
index.insert('doc3', [0.1, 0.9, 0.2], {'text': '美食食谱'})

results = index.search([0.75, 0.25, 0.45], 2)
for r in results:
    print(f"{r['id']}: {r['score']:.4f} - {r['metadata']}")
package main

import (
    "fmt"
    "math"
    "sort"
)

type VectorItem struct {
    id       string
    vector   []float64
    metadata map[string]interface{}
}

type VectorIndex struct {
    vectors []VectorItem
}

func NewVectorIndex() *VectorIndex {
    return &VectorIndex{
        vectors: make([]VectorItem, 0),
    }
}

func (vi *VectorIndex) Insert(id string, vector []float64, metadata map[string]interface{}) {
    normalized := vi.normalize(vector)
    vi.vectors = append(vi.vectors, VectorItem{
        id:       id,
        vector:   normalized,
        metadata: metadata,
    })
}

func (vi *VectorIndex) normalize(vector []float64) []float64 {
    norm := 0.0
    for _, v := range vector {
        norm += v * v
    }
    norm = math.Sqrt(norm)
    if norm == 0 {
        return vector
    }
    result := make([]float64, len(vector))
    for i, v := range vector {
        result[i] = v / norm
    }
    return result
}

func (vi *VectorIndex) dotProduct(a, b []float64) float64 {
    sum := 0.0
    for i := range a {
        sum += a[i] * b[i]
    }
    return sum
}

type SearchResult struct {
    ID       string
    Score    float64
    Metadata map[string]interface{}
}

func (vi *VectorIndex) Search(queryVector []float64, topK int) []SearchResult {
    normalized := vi.normalize(queryVector)
    scores := make([]SearchResult, len(vi.vectors))
    for i, item := range vi.vectors {
        scores[i] = SearchResult{
            ID:       item.id,
            Score:    vi.dotProduct(normalized, item.vector),
            Metadata: item.metadata,
        }
    }
    sort.Slice(scores, func(i, j int) bool {
        return scores[i].Score > scores[j].Score
    })
    if topK > len(scores) {
        topK = len(scores)
    }
    return scores[:topK]
}

func main() {
    index := NewVectorIndex()
    index.Insert("doc1", []float64{0.8, 0.2, 0.5}, map[string]interface{}{"text": "AI 技术"})
    index.Insert("doc2", []float64{0.7, 0.3, 0.4}, map[string]interface{}{"text": "机器学习"})
    index.Insert("doc3", []float64{0.1, 0.9, 0.2}, map[string]interface{}{"text": "美食食谱"})

    results := index.Search([]float64{0.75, 0.25, 0.45}, 2)
    for _, r := range results {
        fmt.Printf("%s: %.4f - %v\n", r.ID, r.Score, r.Metadata)
    }
}

🌸 6. 布隆过滤器 (Bloom Filter)

数据结构

布隆过滤器是一种空间效率极高的概率型数据结构,用于判断元素是否存在于集合中。它可能产生假阳性(误判元素存在),但不会产生假阴性。

🌸 布隆过滤器工作原理

添加元素
元素 → k 个哈希函数 → k 个位置置 1
查询元素
元素 → k 个哈希函数 → 检查 k 个位置
判断结果
全为 1: 可能存在
有 0: 一定不存在
添加
O(k)
查询
O(k)
空间
O(m)
哈希函数
k

✅ 优点

  • 空间效率极高
  • 查询速度快
  • 支持大规模数据
  • 无假阴性

❌ 缺点

  • 可能假阳性
  • 不支持删除
  • 哈希函数选择敏感
  • 无法获取元素列表

代码实现

class BloomFilter {
    constructor(size = 1000, hashCount = 3) {
        this.size = size;
        this.hashCount = hashCount;
        this.bitArray = new Array(size).fill(0);
    }

    _hash(value, seed) {
        let hash = 0;
        const str = String(value);
        for (let i = 0; i < str.length; i++) {
            hash = ((hash << 5) - hash) + str.charCodeAt(i);
            hash += seed * i;
            hash = hash & hash;
        }
        return Math.abs(hash) % this.size;
    }

    add(value) {
        for (let i = 0; i < this.hashCount; i++) {
            const index = this._hash(value, i);
            this.bitArray[index] = 1;
        }
    }

    contains(value) {
        for (let i = 0; i < this.hashCount; i++) {
            const index = this._hash(value, i);
            if (this.bitArray[index] === 0) {
                return false;
            }
        }
        return true;
    }

    addBatch(values) {
        values.forEach(v => this.add(v));
    }
}

// 测试
const bf = new BloomFilter(1000, 3);
['apple', 'banana', 'orange'].forEach(f => bf.add(f));

console.log('apple:', bf.contains('apple'));     // true
console.log('banana:', bf.contains('banana'));   // true
console.log('grape:', bf.contains('grape'));     // false
console.log('watermelon:', bf.contains('watermelon')); // 可能误判
import hashlib

class BloomFilter:
    def __init__(self, size=1000, hash_count=3):
        self.size = size
        self.hash_count = hash_count
        self.bit_array = [0] * size

    def _hash(self, value, seed):
        key = f"{seed}:{value}".encode()
        hash_val = int(hashlib.md5(key).hexdigest(), 16)
        return hash_val % self.size

    def add(self, value):
        for i in range(self.hash_count):
            index = self._hash(value, i)
            self.bit_array[index] = 1

    def contains(self, value):
        for i in range(self.hash_count):
            index = self._hash(value, i)
            if self.bit_array[index] == 0:
                return False
        return True

    def add_batch(self, values):
        for v in values:
            self.add(v)

# 测试
bf = BloomFilter(1000, 3)
for fruit in ['apple', 'banana', 'orange']:
    bf.add(fruit)

print('apple:', bf.contains('apple'))      # True
print('banana:', bf.contains('banana'))    # True
print('grape:', bf.contains('grape'))      # False
print('watermelon:', bf.contains('watermelon'))  # 可能误判
package main

import (
    "crypto/md5"
    "encoding/binary"
    "fmt"
)

type BloomFilter struct {
    size      int
    hashCount int
    bitArray  []bool
}

func NewBloomFilter(size, hashCount int) *BloomFilter {
    return &BloomFilter{
        size:      size,
        hashCount: hashCount,
        bitArray:  make([]bool, size),
    }
}

func (bf *BloomFilter) hash(value string, seed uint32) int {
    data := fmt.Sprintf("%d:%s", seed, value)
    hash := md5.Sum([]byte(data))
    hashVal := binary.LittleEndian.Uint32(hash[:4])
    return int(hashVal) % bf.size
}

func (bf *BloomFilter) Add(value string) {
    for i := 0; i < bf.hashCount; i++ {
        index := bf.hash(value, uint32(i))
        bf.bitArray[index] = true
    }
}

func (bf *BloomFilter) Contains(value string) bool {
    for i := 0; i < bf.hashCount; i++ {
        index := bf.hash(value, uint32(i))
        if !bf.bitArray[index] {
            return false
        }
    }
    return true
}

func main() {
    bf := NewBloomFilter(1000, 3)
    fruits := []string{"apple", "banana", "orange"}
    for _, fruit := range fruits {
        bf.Add(fruit)
    }

    fmt.Println("apple:", bf.Contains("apple"))      // true
    fmt.Println("banana:", bf.Contains("banana"))    // true
    fmt.Println("grape:", bf.Contains("grape"))      // false
    fmt.Println("watermelon:", bf.Contains("watermelon")) // 可能误判
}

💰 贪心算法 (Greedy Algorithm)

算法思想

贪心算法是一种在每一步选择中都采取当前状态下最优选择的算法思想。它不从整体最优上加以考虑,而是通过局部最优解希望得到全局最优解。

💰 贪心算法示意

贪心策略
当前最优 → 下一步最优 → ... → 希望全局最优
适用条件
• 贪心选择性质
• 最优子结构
• 无后效性
时间复杂度
O(n log n)
空间复杂度
O(1)
正确性
需证明
效率
高效

✅ 优点

  • 算法简单直观
  • 时间效率高
  • 空间复杂度低
  • 易于实现

❌ 缺点

  • 不总是得到全局最优
  • 需要证明正确性
  • 适用范围有限
  • 可能陷入局部最优

经典应用场景

🪙 找零问题

用最少数量的硬币找出指定金额

🎒 分数背包

物品可分割,按价值密度贪心选择

📅 活动选择

选择最多互不冲突的活动

🌲 Huffman 编码

构造最优前缀编码树

🛣️ Dijkstra 算法

单源最短路径问题

🔗 Kruskal 算法

最小生成树问题

代码实现

🪙 应用 1: 找零问题

假设有面值为 25、10、5、1 的硬币,找出最少数量的硬币组合。

// 找零问题 - 贪心算法
function makeChange(amount) {
    const coins = [25, 10, 5, 1];
    const result = {};
    let count = 0;
    
    for (const coin of coins) {
        if (amount === 0) break;
        const num = Math.floor(amount / coin);
        if (num > 0) {
            result[coin] = num;
            count += num;
            amount -= num * coin;
        }
    }
    
    return { coins: result, total: count };
}

// 测试
console.log(makeChange(63));  // { coins: { 25: 2, 10: 1, 1: 3 }, total: 6 }
console.log(makeChange(99));  // { coins: { 25: 3, 10: 2, 1: 4 }, total: 9 }
# 找零问题 - 贪心算法
def make_change(amount):
    coins = [25, 10, 5, 1]
    result = {}
    count = 0
    
    for coin in coins:
        if amount == 0:
            break
        num = amount // coin
        if num > 0:
            result[coin] = num
            count += num
            amount -= num * coin
    
    return {'coins': result, 'total': count}

# 测试
print(make_change(63))  # {'coins': {25: 2, 10: 1, 1: 3}, 'total': 6}
print(make_change(99))  # {'coins': {25: 3, 10: 2, 1: 4}, 'total': 9}
package main

import "fmt"

type ChangeResult struct {
    Coins map[int]int
    Total int
}

func makeChange(amount int) ChangeResult {
    coins := []int{25, 10, 5, 1}
    result := ChangeResult{Coins: make(map[int]int), Total: 0}
    
    for _, coin := range coins {
        if amount == 0 {
            break
        }
        num := amount / coin
        if num > 0 {
            result.Coins[coin] = num
            result.Total += num
            amount -= num * coin
        }
    }
    
    return result
}

func main() {
    r1 := makeChange(63)
    fmt.Printf("63: %v, 总数:%d\n", r1.Coins, r1.Total)
    
    r2 := makeChange(99)
    fmt.Printf("99: %v, 总数:%d\n", r2.Coins, r2.Total)
}

📅 应用 2: 活动选择问题

给定多个活动的开始和结束时间,选择最多的互不冲突的活动。

// 活动选择问题 - 贪心算法
function selectActivities(activities) {
    // 按结束时间排序
    activities.sort((a, b) => a.end - b.end);
    
    const selected = [];
    let lastEnd = -Infinity;
    
    for (const activity of activities) {
        if (activity.start >= lastEnd) {
            selected.push(activity);
            lastEnd = activity.end;
        }
    }
    
    return selected;
}

// 测试
const activities = [
    { name: 'A', start: 1, end: 3 },
    { name: 'B', start: 2, end: 5 },
    { name: 'C', start: 4, end: 7 },
    { name: 'D', start: 6, end: 9 },
    { name: 'E', start: 8, end: 10 }
];

const result = selectActivities(activities);
console.log('选中的活动:', result.map(a => a.name));  // ['A', 'C', 'E']
console.log('数量:', result.length);  // 3
# 活动选择问题 - 贪心算法
def select_activities(activities):
    # 按结束时间排序
    activities.sort(key=lambda x: x[1])
    
    selected = []
    last_end = float('-inf')
    
    for name, start, end in activities:
        if start >= last_end:
            selected.append((name, start, end))
            last_end = end
    
    return selected

# 测试
activities = [
    ('A', 1, 3),
    ('B', 2, 5),
    ('C', 4, 7),
    ('D', 6, 9),
    ('E', 8, 10)
]

result = select_activities(activities)
print('选中的活动:', [a[0] for a in result])  # ['A', 'C', 'E']
print('数量:', len(result))  # 3
package main

import (
    "fmt"
    "sort"
)

type Activity struct {
    Name  string
    Start int
    End   int
}

func selectActivities(activities []Activity) []Activity {
    // 按结束时间排序
    sort.Slice(activities, func(i, j int) bool {
        return activities[i].End < activities[j].End
    })
    
    selected := []Activity{}
    lastEnd := -1
    
    for _, activity := range activities {
        if activity.Start >= lastEnd {
            selected = append(selected, activity)
            lastEnd = activity.End
        }
    }
    
    return selected
}

func main() {
    activities := []Activity{
        {"A", 1, 3},
        {"B", 2, 5},
        {"C", 4, 7},
        {"D", 6, 9},
        {"E", 8, 10},
    }
    
    result := selectActivities(activities)
    fmt.Print("选中的活动:")
    for _, a := range result {
        fmt.Print(a.Name, " ")
    }
    fmt.Println()
    fmt.Println("数量:", len(result))
}

🎒 应用 3: 分数背包问题

物品可以分割,按价值密度(价值/重量)贪心选择,使背包总价值最大。

// 分数背包问题 - 贪心算法
function fractionalKnapsack(items, capacity) {
    // 计算价值密度并排序
    items.forEach(item => {
        item.density = item.value / item.weight;
    });
    items.sort((a, b) => b.density - a.density);
    
    let totalValue = 0;
    let remaining = capacity;
    const taken = [];
    
    for (const item of items) {
        if (remaining <= 0) break;
        
        if (item.weight <= remaining) {
            // 全拿
            taken.push({ ...item, takeWeight: item.weight });
            totalValue += item.value;
            remaining -= item.weight;
        } else {
            // 拿一部分
            const takeWeight = remaining;
            const takeValue = item.density * takeWeight;
            taken.push({ ...item, takeWeight, takeValue });
            totalValue += takeValue;
            remaining = 0;
        }
    }
    
    return { totalValue, taken };
}

// 测试
const items = [
    { name: 'A', value: 60, weight: 10 },
    { name: 'B', value: 100, weight: 20 },
    { name: 'C', value: 120, weight: 30 }
];

const result = fractionalKnapsack(items, 50);
console.log('最大价值:', result.totalValue);  // 240
console.log('选取:', result.taken.map(t => `${t.name}(${t.takeWeight}kg)`));
# 分数背包问题 - 贪心算法
def fractional_knapsack(items, capacity):
    # 计算价值密度并排序
    for item in items:
        item['density'] = item['value'] / item['weight']
    items.sort(key=lambda x: x['density'], reverse=True)
    
    total_value = 0
    remaining = capacity
    taken = []
    
    for item in items:
        if remaining <= 0:
            break
        
        if item['weight'] <= remaining:
            # 全拿
            taken.append({**item, 'take_weight': item['weight']})
            total_value += item['value']
            remaining -= item['weight']
        else:
            # 拿一部分
            take_weight = remaining
            take_value = item['density'] * take_weight
            taken.append({**item, 'take_weight': take_weight, 'take_value': take_value})
            total_value += take_value
            remaining = 0
    
    return {'total_value': total_value, 'taken': taken}

# 测试
items = [
    {'name': 'A', 'value': 60, 'weight': 10},
    {'name': 'B', 'value': 100, 'weight': 20},
    {'name': 'C', 'value': 120, 'weight': 30}
]

result = fractional_knapsack(items, 50)
print('最大价值:', result['total_value'])  # 240.0
print('选取:', [(t['name'], f"{t['take_weight']}kg") for t in result['taken']])
package main

import (
    "fmt"
    "sort"
)

type Item struct {
    Name     string
    Value    int
    Weight   int
    Density  float64
}

type TakenItem struct {
    Name      string
    TakeWeight int
    TakeValue float64
}

func fractionalKnapsack(items []Item, capacity int) (float64, []TakenItem) {
    // 计算价值密度
    for i := range items {
        items[i].Density = float64(items[i].Value) / float64(items[i].Weight)
    }
    
    // 按价值密度排序
    sort.Slice(items, func(i, j int) bool {
        return items[i].Density > items[j].Density
    })
    
    totalValue := 0.0
    remaining := capacity
    taken := []TakenItem{}
    
    for _, item := range items {
        if remaining <= 0 {
            break
        }
        
        if item.Weight <= remaining {
            taken = append(taken, TakenItem{
                Name: item.Name,
                TakeWeight: item.Weight,
                TakeValue: float64(item.Value),
            })
            totalValue += float64(item.Value)
            remaining -= item.Weight
        } else {
            takeWeight := remaining
            takeValue := item.Density * float64(takeWeight)
            taken = append(taken, TakenItem{
                Name: item.Name,
                TakeWeight: takeWeight,
                TakeValue: takeValue,
            })
            totalValue += takeValue
            remaining = 0
        }
    }
    
    return totalValue, taken
}

func main() {
    items := []Item{
        {"A", 60, 10, 0},
        {"B", 100, 20, 0},
        {"C", 120, 30, 0},
    }
    
    totalValue, taken := fractionalKnapsack(items, 50)
    fmt.Printf("最大价值:%.2f\n", totalValue)
    fmt.Print("选取:")
    for _, t := range taken {
        fmt.Printf("%s(%dkg) ", t.Name, t.TakeWeight)
    }
    fmt.Println()
}
🎯 数据结构选择指南
  • 频繁插入删除:链表(O(1) 插入删除)
  • 后进先出场景:栈(括号匹配、函数调用)
  • 先进先出场景:队列(任务调度、BFS)
  • 快速查找/动态集合:二叉搜索树、AVL 树、红黑树
  • 优先级队列/TopK:堆(堆排序、优先队列)
  • 复杂关系网络:图(社交网络、路径规划)
  • 字符串前缀:字典树(自动补全、拼写检查)
  • 区间查询更新:线段树(区间求和、最值)
  • 局部最优可解:贪心算法(找零、活动选择、分数背包)
  • 高维向量检索:向量索引(AI 应用、语义搜索)
  • 大规模去重:布隆过滤器(缓存穿透、URL 去重)