有限状态自动机 (Finite State Automaton, FSA) 是一种抽象的计算模型,由有限个状态组成,通过接收输入符号在状态之间转移。 它是正则语言的形式化描述,广泛应用于编译器、文本处理、网络协议等领域。
有限状态自动机的核心思想是: 通过有限的状态集合和状态转移规则来识别语言中的字符串。
O(|Q|) - 状态数
O(n) - 字符串长度
正则语言
一个确定有限自动机 (DFA) 是一个五元组:
M = (Q, Σ, δ, q₀, F)
其中:
• Q = 有限状态集合 {q₀, q₁, ..., qₙ}
• Σ = 输入字母表 (有限符号集)
• δ = 状态转移函数 Q × Σ → Q
• q₀ = 初始状态 (q₀ ∈ Q)
• F = 接受状态集合 (F ⊆ Q)
识别规则:以 0 结尾的二进制数
| 状态 | 输入 0 | 输入 1 |
|---|---|---|
| q₀ (初始/接受) | q₀ | q₁ |
| q₁ | q₀ | q₁ |
正则表达式可以转换为等价的 DFA,这是正则表达式引擎的核心原理。
class DFA {
constructor() {
this.states = new Set();
this.alphabet = new Set();
this.transitions = new Map();
this.startState = null;
this.acceptStates = new Set();
}
addState(state) {
this.states.add(state);
}
setStartState(state) {
this.startState = state;
}
addAcceptState(state) {
this.acceptStates.add(state);
}
addTransition(fromState, symbol, toState) {
const key = `${fromState}:${symbol}`;
this.transitions.set(key, toState);
this.alphabet.add(symbol);
}
// 检查字符串是否被接受
accepts(input) {
let currentState = this.startState;
for (const char of input) {
const key = `${currentState}:${char}`;
const nextState = this.transitions.get(key);
if (nextState === undefined) {
return false; // 无效转移
}
currentState = nextState;
}
return this.acceptStates.has(currentState);
}
// 获取转移函数
getTransition(state, symbol) {
return this.transitions.get(`${state}:${symbol}`);
}
// 克隆 DFA
clone() {
const newDFA = new DFA();
newDFA.states = new Set(this.states);
newDFA.alphabet = new Set(this.alphabet);
newDFA.transitions = new Map(this.transitions);
newDFA.startState = this.startState;
newDFA.acceptStates = new Set(this.acceptStates);
return newDFA;
}
}
// NFA 类(支持 ε 转移和多值转移)
class NFA {
constructor() {
this.states = new Set();
this.alphabet = new Set();
this.epsilonTransitions = new Map();
this.transitions = new Map();
this.startStates = new Set();
this.acceptStates = new Set();
}
addState(state) {
this.states.add(state);
}
addStartState(state) {
this.startStates.add(state);
}
addAcceptState(state) {
this.acceptStates.add(state);
}
addTransition(fromState, symbol, toState) {
const key = `${fromState}:${symbol}`;
if (!this.transitions.has(key)) {
this.transitions.set(key, new Set());
}
this.transitions.get(key).add(toState);
this.alphabet.add(symbol);
}
addEpsilonTransition(fromState, toState) {
if (!this.epsilonTransitions.has(fromState)) {
this.epsilonTransitions.set(fromState, new Set());
}
this.epsilonTransitions.get(fromState).add(toState);
}
// ε-闭包计算
epsilonClosure(states) {
const closure = new Set(states);
const stack = Array.from(states);
while (stack.length > 0) {
const state = stack.pop();
const epsilonNext = this.epsilonTransitions.get(state);
if (epsilonNext) {
for (const next of epsilonNext) {
if (!closure.has(next)) {
closure.add(next);
stack.push(next);
}
}
}
}
return closure;
}
// NFA 接受检查
accepts(input) {
let currentStates = this.epsilonClosure(this.startStates);
for (const char of input) {
const nextStates = new Set();
for (const state of currentStates) {
const key = `${state}:${char}`;
const trans = this.transitions.get(key);
if (trans) {
nextStates.add(...trans);
}
}
currentStates = this.epsilonClosure(nextStates);
if (currentStates.size === 0) {
return false;
}
}
// 检查是否有接受状态
for (const state of currentStates) {
if (this.acceptStates.has(state)) {
return true;
}
}
return false;
}
}
// 正则表达式到 NFA 的转换 (Thompson 构造法)
class RegexToNFA {
constructor() {
this.stateCounter = 0;
}
newState() {
return `q${this.stateCounter++}`;
}
// 基本正则表达式(单个字符)
basic(char) {
const start = this.newState();
const accept = this.newState();
const nfa = {
start: start,
accept: accept,
transitions: new Map(),
epsilonTransitions: new Map()
};
nfa.transitions.set(`${start}:${char}`, new Set([accept]));
return nfa;
}
// 并联:a|b
union(nfa1, nfa2) {
const start = this.newState();
const accept = this.newState();
const epsilon1 = new Set([nfa1.start, nfa2.start]);
const epsilon2 = new Set([accept]);
return {
start: start,
accept: accept,
transitions: new Map([...nfa1.transitions, ...nfa2.transitions]),
epsilonTransitions: new Map([
[start, epsilon1],
[nfa1.accept, epsilon2],
[nfa2.accept, epsilon2]
])
};
}
// 连接:ab
concat(nfa1, nfa2) {
return {
start: nfa1.start,
accept: nfa2.accept,
transitions: new Map([...nfa1.transitions, ...nfa2.transitions]),
epsilonTransitions: new Map([[nfa1.accept, new Set([nfa2.start])]])
};
}
// 克林闭包:a*
kleeneStar(nfa) {
const start = this.newState();
const accept = this.newState();
const epsilon1 = new Set([nfa.start, accept]);
const epsilon2 = new Set([accept]);
return {
start: start,
accept: accept,
transitions: new Map(nfa.transitions),
epsilonTransitions: new Map([
[start, epsilon1],
[nfa.accept, epsilon2]
])
};
}
}
// 使用示例
const dfa = new DFA();
dfa.addState('q0');
dfa.addState('q1');
dfa.setStartState('q0');
dfa.addAcceptState('q0');
dfa.addTransition('q0', '0', 'q0');
dfa.addTransition('q0', '1', 'q1');
dfa.addTransition('q1', '0', 'q0');
dfa.addTransition('q1', '1', 'q1');
// 测试
const testStrings = ['0', '1', '00', '01', '10', '11', '000', '1010'];
console.log('DFA 测试 (识别以0结尾的二进制数):');
testStrings.forEach(str => {
console.log(` "${str}": ${dfa.accepts(str) ? '接受' : '拒绝'}`);
});
from collections import defaultdict
class DFA:
def __init__(self):
self.states = set()
self.alphabet = set()
self.transitions = defaultdict(dict)
self.start_state = None
self.accept_states = set()
def add_state(self, state):
self.states.add(state)
def set_start_state(self, state):
self.start_state = state
def add_accept_state(self, state):
self.accept_states.add(state)
def add_transition(self, from_state, symbol, to_state):
self.transitions[from_state][symbol] = to_state
self.alphabet.add(symbol)
def accepts(self, input_str):
current_state = self.start_state
for char in input_str:
if char not in self.transitions[current_state]:
return False
current_state = self.transitions[current_state][char]
return current_state in self.accept_states
def get_transition(self, state, symbol):
return self.transitions[state].get(symbol)
class NFA:
def __init__(self):
self.states = set()
self.alphabet = set()
self.epsilon_transitions = defaultdict(set)
self.transitions = defaultdict(lambda: defaultdict(set))
self.start_states = set()
self.accept_states = set()
def add_state(self, state):
self.states.add(state)
def add_start_state(self, state):
self.start_states.add(state)
def add_accept_state(self, state):
self.accept_states.add(state)
def add_transition(self, from_state, symbol, to_state):
self.transitions[from_state][symbol].add(to_state)
self.alphabet.add(symbol)
def add_epsilon_transition(self, from_state, to_state):
self.epsilon_transitions[from_state].add(to_state)
def epsilon_closure(self, states):
closure = set(states)
stack = list(states)
while stack:
state = stack.pop()
for next_state in self.epsilon_transitions[state]:
if next_state not in closure:
closure.add(next_state)
stack.append(next_state)
return closure
def accepts(self, input_str):
current_states = self.epsilon_closure(self.start_states)
for char in input_str:
next_states = set()
for state in current_states:
next_states.update(self.transitions[state][char])
current_states = self.epsilon_closure(next_states)
if not current_states:
return False
return bool(current_states & self.accept_states)
# 正则表达式到 NFA 转换
class RegexToNFA:
def __init__(self):
self.state_counter = 0
def new_state(self):
state = f"q{self.state_counter}"
self.state_counter += 1
return state
def basic(self, char):
start = self.new_state()
accept = self.new_state()
nfa = {
'start': start,
'accept': accept,
'transitions': {start: {char: {accept}}},
'epsilon': {}
}
return nfa
def union(self, nfa1, nfa2):
start = self.new_state()
accept = self.new_state()
transitions = {**nfa1['transitions'], **nfa2['transitions']}
transitions[start] = {}
transitions[nfa1['accept']] = {}
transitions[nfa2['accept']] = {}
epsilon = {start: {nfa1['start'], nfa2['accept']}}
epsilon[nfa1['accept']] = {accept}
epsilon[nfa2['accept']] = {accept}
return {
'start': start,
'accept': accept,
'transitions': transitions,
'epsilon': epsilon
}
def concat(self, nfa1, nfa2):
transitions = {**nfa1['transitions'], **nfa2['transitions']}
transitions[nfa1['accept']] = {}
return {
'start': nfa1['start'],
'accept': nfa2['accept'],
'transitions': transitions,
'epsilon': {}
}
def kleene_star(self, nfa):
start = self.new_state()
accept = self.new_state()
transitions = dict(nfa['transitions'])
transitions[start] = {}
transitions[nfa['accept']] = {}
epsilon = {
start: {nfa['start'], accept},
nfa['accept']: {accept}
}
return {
'start': start,
'accept': accept,
'transitions': transitions,
'epsilon': epsilon
}
# 使用示例
if __name__ == "__main__":
# 构建 DFA:识别以 0 结尾的二进制数
dfa = DFA()
dfa.add_state('q0')
dfa.add_state('q1')
dfa.set_start_state('q0')
dfa.add_accept_state('q0')
dfa.add_transition('q0', '0', 'q0')
dfa.add_transition('q0', '1', 'q1')
dfa.add_transition('q1', '0', 'q0')
dfa.add_transition('q1', '1', 'q1')
# 测试
test_strings = ['0', '1', '00', '01', '10', '11', '000', '1010']
print('DFA 测试 (识别以0结尾的二进制数):')
for s in test_strings:
print(f' "{s}": {"接受" if dfa.accepts(s) else "拒绝"}')
package main
import (
"fmt"
)
type DFA struct {
states map[string]bool
alphabet map[string]bool
transitions map[string]map[string]string
startState string
acceptStates map[string]bool
}
func NewDFA() *DFA {
return &DFA{
states: make(map[string]bool),
alphabet: make(map[string]bool),
transitions: make(map[string]map[string]string),
acceptStates: make(map[string]bool),
}
}
func (dfa *DFA) AddState(state string) {
dfa.states[state] = true
dfa.transitions[state] = make(map[string]string)
}
func (dfa *DFA) SetStartState(state string) {
dfa.startState = state
}
func (dfa *DFA) AddAcceptState(state string) {
dfa.acceptStates[state] = true
}
func (dfa *DFA) AddTransition(fromState, symbol, toState string) {
dfa.states[fromState] = true
dfa.states[toState] = true
dfa.alphabet[symbol] = true
dfa.transitions[fromState][symbol] = toState
}
func (dfa *DFA) Accepts(input string) bool {
currentState := dfa.startState
for _, char := range input {
nextState, ok := dfa.transitions[currentState][string(char)]
if !ok {
return false
}
currentState = nextState
}
return dfa.acceptStates[currentState]
}
type NFA struct {
states map[string]bool
alphabet map[string]bool
epsilonTransitions map[string][]string
transitions map[string]map[string][]string
startStates map[string]bool
acceptStates map[string]bool
}
func NewNFA() *NFA {
return &NFA{
states: make(map[string]bool),
alphabet: make(map[string]bool),
epsilonTransitions: make(map[string][]string),
transitions: make(map[string]map[string][]string),
startStates: make(map[string]bool),
acceptStates: make(map[string]bool),
}
}
func (nfa *NFA) AddState(state string) {
nfa.states[state] = true
}
func (nfa *NFA) AddStartState(state string) {
nfa.startStates[state] = true
}
func (nfa *NFA) AddAcceptState(state string) {
nfa.acceptStates[state] = true
}
func (nfa *NFA) AddTransition(fromState, symbol, toState string) {
nfa.states[fromState] = true
nfa.states[toState] = true
nfa.alphabet[symbol] = true
if nfa.transitions[fromState] == nil {
nfa.transitions[fromState] = make(map[string][]string)
}
nfa.transitions[fromState][symbol] = append(nfa.transitions[fromState][symbol], toState)
}
func (nfa *NFA) AddEpsilonTransition(fromState, toState string) {
nfa.states[fromState] = true
nfa.states[toState] = true
nfa.epsilonTransitions[fromState] = append(nfa.epsilonTransitions[fromState], toState)
}
func (nfa *NFA) epsilonClosure(states map[string]bool) map[string]bool {
closure := make(map[string]bool)
var stack []string
for s := range states {
closure[s] = true
stack = append(stack, s)
}
for len(stack) > 0 {
state := stack[len(stack)-1]
stack = stack[:len(stack)-1]
for _, next := range nfa.epsilonTransitions[state] {
if !closure[next] {
closure[next] = true
stack = append(stack, next)
}
}
}
return closure
}
func (nfa *NFA) Accepts(input string) bool {
currentStates := nfa.epsilonClosure(nfa.startStates)
for _, char := range input {
nextStates := make(map[string]bool)
for state := range currentStates {
for _, next := range nfa.transitions[state][string(char)] {
nextStates[next] = true
}
}
currentStates = nfa.epsilonClosure(nextStates)
if len(currentStates) == 0 {
return false
}
}
for state := range currentStates {
if nfa.acceptStates[state] {
return true
}
}
return false
}
func main() {
// 构建 DFA:识别以 0 结尾的二进制数
dfa := NewDFA()
dfa.AddState("q0")
dfa.AddState("q1")
dfa.SetStartState("q0")
dfa.AddAcceptState("q0")
dfa.AddTransition("q0", "0", "q0")
dfa.AddTransition("q0", "1", "q1")
dfa.AddTransition("q1", "0", "q0")
dfa.AddTransition("q1", "1", "q1")
// 测试
testStrings := []string{"0", "1", "00", "01", "10", "11", "000", "1010"}
fmt.Println("DFA 测试 (识别以0结尾的二进制数):")
for _, s := range testStrings {
result := "拒绝"
if dfa.Accepts(s) {
result = "接受"
}
fmt.Printf(" \"%s\": %s\n", s, result)
}
}
大多数编程语言的正则表达式实现底层使用 NFA 或 DFA。
编译器的前端使用有限自动机进行词法分析,识别关键字、标识符等。
TCP/IP 协议栈的状态机实现网络连接管理。
grep、sed 等工具使用有限自动机进行模式匹配。
有限状态机用于实现游戏角色的行为逻辑。
💡 这些语言需要下推自动机 (PDA) 或更强大的计算模型来识别