Shell 脚本中常用的字符串操作有哪些？如何进行字符串拼接、截取和替换？ - 面试题

Shell 脚本中常用的字符串操作包括字符串拼接、截取、替换、比较和长度计算等。

字符串定义

基本定义

bash
# 定义字符串
str1="Hello World"
str2='Hello World'
str3=Hello

# 多行字符串
str4="Line 1
Line 2
Line 3"

字符串拼接

简单拼接

bash
# 直接拼接
str1="Hello"
str2="World"
str3=$str1" "$str2
echo $str3  # 输出: Hello World

# 使用引号
str4="${str1} ${str2}"
echo $str4  # 输出: Hello World

# 多个字符串拼接
str5="Hello" " " "World"
echo $str5  # 输出: Hello World

动态拼接

bash
# 拼接变量和文本
name="John"
greeting="Hello, $name!"
echo $greeting  # 输出: Hello, John!

# 拼接命令输出
date_str="Current date: $(date)"
echo $date_str

# 拼接数组元素
arr=("apple" "banana" "cherry")
str="${arr[0]}, ${arr[1]}, ${arr[2]}"
echo $str  # 输出: apple, banana, cherry

字符串长度

计算长度

bash
# 计算字符串长度
str="Hello World"
echo ${#str}  # 输出: 11

# 使用 expr
expr length "$str"  # 输出: 11

# 使用 awk
echo "$str" | awk '{print length}'

字符串截取

基本截取

bash
str="Hello World"

# 从指定位置开始截取
echo ${str:0}    # 输出: Hello World
echo ${str:6}    # 输出: World

# 从指定位置截取指定长度
echo ${str:0:5}  # 输出: Hello
echo ${str:6:5}  # 输出: World

# 从末尾截取
echo ${str: -5}  # 输出: World
echo ${str: -5:3}  # 输出: Wor

删除子串

bash
str="Hello World"

# 删除最短匹配的前缀
echo ${str#He}    # 输出: llo World
echo ${str#*o}    # 输出:  World

# 删除最长匹配的前缀
echo ${str##*o}   # 输出: rld

# 删除最短匹配的后缀
echo ${str%ld}    # 输出: Hello Wor
echo ${str%o*}    # 输出: Hello W

# 删除最长匹配的后缀
echo ${str%%o*}   # 输出: Hell

提取文件名和路径

bash
filepath="/path/to/file.txt"

# 提取文件名
filename=${filepath##*/}
echo $filename  # 输出: file.txt

# 提取目录
dirname=${filepath%/*}
echo $dirname   # 输出: /path/to

# 提取扩展名
extension=${filepath##*.}
echo $extension  # 输出: txt

# 去除扩展名
basename=${filename%.*}
echo $basename   # 输出: file

字符串替换

基本替换

bash
str="Hello World"

# 替换第一个匹配
echo ${str/World/Bash}  # 输出: Hello Bash

# 替换所有匹配
echo ${str//o/O}        # 输出: HellO WOrld

# 删除匹配
echo ${str/o/}          # 输出: Hell World
echo ${str//o/}         # 输出: Hell Wrld

前缀和后缀替换

bash
str="Hello World"

# 替换前缀
echo ${str/#Hello/Hi}   # 输出: Hi World

# 替换后缀
echo ${str/%World/Bash} # 输出: Hello Bash

大小写转换

bash
str="Hello World"

# 转换为大写
echo ${str^^}  # 输出: HELLO WORLD

# 转换为小写
echo ${str,,}  # 输出: hello world

# 首字母大写
echo ${str^}   # 输出: Hello world

字符串比较

基本比较

bash
str1="Hello"
str2="World"

# 相等比较
if [ "$str1" = "$str2" ]; then
    echo "Strings are equal"
fi

# 不等比较
if [ "$str1" != "$str2" ]; then
    echo "Strings are not equal"
fi

# 使用 [[ ]]
if [[ "$str1" == "$str2" ]]; then
    echo "Strings are equal"
fi

模式匹配

bash
str="Hello World"

# 检查是否以指定字符串开头
if [[ "$str" == Hello* ]]; then
    echo "Starts with Hello"
fi

# 检查是否以指定字符串结尾
if [[ "$str" == *World ]]; then
    echo "Ends with World"
fi

# 检查是否包含指定字符串
if [[ "$str" == *lo* ]]; then
    echo "Contains 'lo'"
fi

正则表达式匹配

bash
str="Hello123"

# 使用 =~ 进行正则匹配
if [[ "$str" =~ ^[A-Za-z]+$ ]]; then
    echo "Only letters"
fi

if [[ "$str" =~ ^[A-Za-z0-9]+$ ]]; then
    echo "Letters and numbers"
fi

# 提取匹配的子串
if [[ "$str" =~ ([A-Za-z]+)([0-9]+) ]]; then
    echo "Letters: ${BASH_REMATCH[1]}"
    echo "Numbers: ${BASH_REMATCH[2]}"
fi

字符串分割

使用 IFS 分割

bash
str="apple,banana,cherry"

# 设置 IFS 并分割
IFS=',' read -ra arr <<< "$str"
echo "${arr[0]}"  # 输出: apple
echo "${arr[1]}"  # 输出: banana
echo "${arr[2]}"  # 输出: cherry

# 遍历分割后的数组
for item in "${arr[@]}"; do
    echo "Item: $item"
done

使用 cut 分割

bash
str="apple:banana:cherry"

# 按分隔符分割
echo "$str" | cut -d: -f1  # 输出: apple
echo "$str" | cut -d: -f2  # 输出: banana
echo "$str" | cut -d: -f3  # 输出: cherry

字符串去空

去除空白字符

bash
str="  Hello World  "

# 去除前导空格
str="${str#"${str%%[![:space:]]*}"}"

# 去除尾部空格
str="${str%"${str##*[![:space:]]}"}"

echo "$str"  # 输出: Hello World

使用 sed 去空

bash
str="  Hello World  "

# 去除前导空格
echo "$str" | sed 's/^[[:space:]]*//'

# 去除尾部空格
echo "$str" | sed 's/[[:space:]]*$//'

# 去除所有空格
echo "$str" | sed 's/[[:space:]]//g'

实际应用示例

验证输入

bash
# 验证邮箱格式
validate_email() {
    local email="$1"
    if [[ "$email" =~ ^[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}$ ]]; then
        echo "Valid email"
        return 0
    else
        echo "Invalid email"
        return 1
    fi
}

validate_email "user@example.com"

格式化输出

bash
# 格式化字符串
format_string() {
    local name="$1"
    local age="$2"
    local city="$3"
    
    printf "Name: %-20s Age: %3d City: %s\n" "$name" "$age" "$city"
}

format_string "John Doe" 25 "New York"

批量重命名

bash
# 批量修改文件扩展名
for file in *.txt; do
    new_name="${file%.txt}.bak"
    mv "$file" "$new_name"
done

日志解析

bash
# 解析日志行
log_line="[2024-01-01 10:00:00] [INFO] User logged in"

# 提取时间
time="${log_line#\[}"
time="${time%%\]*}"
echo "Time: $time"

# 提取日志级别
level="${log_line#*\[}"
level="${level%%\]*}"
echo "Level: $level"

# 提取消息
message="${log_line#*\] }"
echo "Message: $message"

字符串操作最佳实践

始终使用引号: 防止空格和特殊字符问题
使用 ${} 而不是 $(): 字符串操作更高效
检查字符串长度: 避免空字符串错误
使用正则表达式验证输入: 提高数据质量
使用 printf 格式化输出: 比 echo 更灵活
注意大小写敏感: 使用 ^ 和 , 进行转换
使用数组存储分割结果: 便于后续处理