Bash 正则表达式参考
Bash 中的正则表达式:=~ 运算符、POSIX 字符类、grep/sed/awk 模式。
=~ 运算符
# 匹配并通过 BASH_REMATCH 捕获
str="2024-03-15"
if [[ "$str" =~ ^([0-9]{4})-([0-9]{2})-([0-9]{2})$ ]]; then
echo "年: ${BASH_REMATCH[1]}" # 2024
echo "月: ${BASH_REMATCH[2]}" # 03
echo "日: ${BASH_REMATCH[3]}" # 15
fi
# 邮箱验证
email="user@example.com"
if [[ "$email" =~ ^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$ ]]; then
echo "格式正确"
fi
# 取反
[[ ! "$input" =~ ^[0-9]+$ ]] && echo "不是数字"
grep
grep 'pattern' file # 基本匹配
grep -E '[0-9]+' file # 扩展正则
grep -P '\d+' file # PCRE
grep -i 'pattern' file # 忽略大小写
grep -v 'pattern' file # 反向匹配
grep -n 'pattern' file # 显示行号
grep -r 'pattern' ./dir/ # 递归搜索
grep -o 'pattern' file # 只打印匹配部分
grep -E '^(ERROR|WARN)' app.log
sed
sed 's/foo/bar/g' file # 全部替换
sed -E 's/([0-9]{4})-([0-9]{2})/\2\/\1/' f # 捕获组交换
sed '/^$/d' file # 删除空行
sed '/pattern/d' file # 删除匹配行
sed -n '/start/,/end/p' file # 打印范围
POSIX 字符类
[:alpha:] # 字母 [a-zA-Z]
[:digit:] # 数字 [0-9]
[:alnum:] # 字母数字
[:space:] # 空白字符
[:upper:] # 大写字母
[:lower:] # 小写字母
[:punct:] # 标点符号
grep '[[:alpha:]]' file
grep '^[[:digit:]]\+$' file
sed 's/[[:space:]]\+/ /g' file
POSIX 与 PCRE 对比
# BRE(grep 默认): . * ^ $ [] \ \(\) \{\}
# ERE(grep -E、awk): . * + ? ^ $ [] | () {}
# PCRE(grep -P): \d \w \s \b (?:) (?=) (?<=)
# 单词边界
grep -E '\bword\b' file # ERE
grep -P '\bword\b' file # PCRE
grep '\' file # BRE
# 非贪婪(仅 PCRE)
grep -oP '<.+?>' file