您的位置:首页 > 数据库 > MySQL

线上删除mysql数据脚本

2016-01-29 22:01 369 查看
一、需求
线上有几个实例,经常磁盘告警,之前每次人工删除,效率实在不高。
二、解决方法
登录跳板机执行脚本删除,不过在删除前,需要导出binary log日志和库表的的数据,最后把备份的传数据传到hdfs。
三、脚本
读取配置文件

1.1 mysql.conf,哪些实例,哪些表名
MYSQL_HOME=/usr/local/mysql/bin/mysql
INCLUDE_MYSQL=(mysql4:5507 mysql5:5508 mysql6:5509 mysql7:5510 mysql7:5511)
EXCLUDE_DB_GA10=(dc_15 dc_17 dc_44 dc_49 dc_88 dc_183 dc_279 dc_490 dc_624 dc_643 dc_903 dc_906 dc_908 dc_954 dc_1099 dc_1100 dc_1167 dc_1214 dc_1463 dc_1464 dc_2444 dc_2445 dc_2695)
APPID_REG=[0-9A-Za-z]{32,33}
DELETE_TABLE_GA10=(dc_datacenter_cache dc_day_report_cache_basic dc_day_report_cache_income dc_day_report_cache_cumu dc_day_report_cache_channel ${APPID_REG_REG}_dc_distributed_everyday front_${APPID_REG}_dc_everyday2 ${APPID_REG}_dc_everyhour ${APPID_REG}_dc_everyday ${APPID_REG}_dc_distributed_everyday2 ${APPID_REG}_dc_distributedweek_everyweek2 ${APPID_REG}_dc_distributedmonth_everymonth2 ${APPID_REG}_dc_pay_distributed_everyday ${APPID_REG}_dc_custom_retain_by_day front_${APPID_REG}_dc_custom_retain_by_day ${APPID_REG}_dc_distributedmonth_everymonth2 ${APPID_REG}_dc_equipment_distributed_by_day ${APPID_REG}_dc_fp_distributed_everyday ${APPID_REG}_dc_event_by_day ${APPID_REG}_dc_event_attr_by_day ${APPID_REG}_dc_error_report_detail ${APPID_REG}_dc_error_report_dist_hour ${APPID_REG}_dc_player_30dayvalue front_${APPID_REG}_dc_player_30dayvalue ${APPID_REG}_dc_player_30day_arpu front_${APPID_REG}_dc_player_30day_arpu ${APPID_REG}_dc_whale_player_day ${APPID_REG}_dc_uid_retain_by_day front_${APPID_REG}_dc_uid_retain_by_day ${APPID_REG}_dc_task_everyday ${APPID_REG}_dc_level_everyday ${APPID_REG}_dc_app_ring ${APPID_REG}_dc_error_report_dist_sys ${APPID_REG}_dc_error_report_detail_sys ${APPID_REG}_dc_error_report_dist_user ${APPID_REG}_dc_error_report_detail_user ${APPID_REG}_dc_rollserver_player_by_day ${APPID_REG}_dc_rollserver_income_by_day ${APPID_REG}_dc_rollserver_retain front_${APPID_REG}_dc_rollserver_retain ${APPID_REG}_dc_tag_everyday ${APPID_REG}_dc_tag_level_outflow ${APPID_REG}_dc_tag_retain_by_day ${APPID_REG}_dc_everyweek ${APPID_REG}_dc_everymonth ${APPID_REG}_dc_distributed_everyweek ${APPID_REG}_dc_distributed_everyweek2 ${APPID_REG}_dc_distributed_everymonth ${APPID_REG}_dc_distributed_everymonth2 ${APPID_REG}_dc_custom_retain_by_week ${APPID_REG}_dc_custom_retain_by_month front_${APPID_REG}_dc_custom_retain_by_week front_${APPID_REG}_dc_custom_retain_by_month)

EXPIRE_LOGS_DAYS=21
1.2 backup.conf
JAVA_HOME=/usr/java/jdk1.7.0_25
LOCAL_FILES=(/home/yaolihong/yao/backup/bak.tar.gz)
BUSINESS_TYPE=ga
CUSTOM_SUB_DIR=mysql
2.backup_tools.sh脚本
主要将备份压缩好的数据发往到hdfs,这个会在clean_data0.sh和clean_data1.sh最后调用到
#!/bin/sh
#set -x

#check user
WHO_AM_I=`whoami`
if [ "hadoop" == $WHO_AM_I ]; then
echo "Error: you should not use hadoop user to back files, use another user instead please..."
exit 1
fi

shellLocation=`dirname $0`
shellLocation=`cd "$shellLocation" ; pwd`
. $shellLocation/conf/backup.conf
HADOOP_CMD="$shellLocation/bin/hadoop-1.0.4/bin/hadoop"

#check if empty
REQUIED_PARAMS=(JAVA_HOME LOCAL_FILES BUSINESS_TYPE CUSTOM_SUB_DIR)
for PARAM in ${REQUIED_PARAMS[@]}
do
if [ "X${!PARAM}" = "X" ]; then
echo "Error: ${PARAM} is not set..."
exit 1
fi
done
export JAVA_HOME

CURRENT_TIME=`date +%Y%m%d-%H_%M_%S`
CURRENT_LOG_FILE=$shellLocation/logs/backup.$CURRENT_TIME.log

#get time
CURRENT_YEAR=`date +%Y -d '1 hours ago'`
CURRENT_MONTH=`date +%m -d '1 hours ago'`
CURRENT_DAY=`date +%d -d '1 hours ago'`
LAST_HOUR=`date +%H -d '1 hours ago'`
HOUR_STR_FOR_HDFS="$CURRENT_YEAR/$CURRENT_MONTH/$CURRENT_DAY/$LAST_HOUR"

BACKUP_BASIC_PATH="/backup/$BUSINESS_TYPE/$CUSTOM_SUB_DIR"
BACKUP_DAY_PATH="$BACKUP_BASIC_PATH/$CURRENT_YEAR/$CURRENT_MONTH/$CURRENT_DAY"

# 判断是否有传入路径,有则以传入的路径为准
if [ $# = 1 ]; then
BACKUP_DAY_PATH="/backup/$BUSINESS_TYPE/$CUSTOM_SUB_DIR/$1"
fi

#check if hdfs dir exist and mkdir
$HADOOP_CMD fs -test -e $BACKUP_DAY_PATH
if [ $? -ne 0 ]; then
$HADOOP_CMD fs -mkdir $BACKUP_DAY_PATH >> $CURRENT_LOG_FILE 2>&1
fi
HOST_NAME=`/bin/hostname`

IS_ANY_FAILED="N"
for LOCAL_FILE in ${LOCAL_FILES[@]}
do
#check if local file exist
if [ ! -f $LOCAL_FILE ]; then
IS_ANY_FAILED="Y"
echo "Error: Local file not exist : $LOCAL_FILE" >> $CURRENT_LOG_FILE
continue
fi

#put local file to hdfs
LOCL_FILE_NAME=`/bin/basename $LOCAL_FILE`
HDFS_FILE_NAME=${LOCL_FILE_NAME}_${HOST_NAME}_${CURRENT_TIME}
$HADOOP_CMD fs -put $LOCAL_FILE $BACKUP_DAY_PATH/$HDFS_FILE_NAME >> $CURRENT_LOG_FILE 2>&1

#if put failed, we should not touch a done file
if [ $? -ne 0 ]; then
IS_ANY_FAILED="Y"
echo "Error: put file to hdfs failed : $LOCAL_FILE" >> $CURRENT_LOG_FILE
continue
fi
done

#delete log 10 days ago
find $shellLocation/logs/ -mtime +10 -delete

if [ $IS_ANY_FAILED = "Y" ]; then
exit 1
fi
#set +x


3.清除数据库脚本clean_data0.sh
#!/bin/bash

# 加载配置文件
source ./conf/mysql.conf
source ./conf/backup.conf
SHELLLOCATION=`dirname $0`
SHELLLOCATION=`cd "${SHELLLOCATION}" ; pwd`
HADOOP_CMD="${SHELLLOCATION}/bin/hadoop-1.0.4/bin/hadoop"

echo "选择要清理的数据库实例:"
select MYSQL in ${INCLUDE_MYSQL[@]};
do
break
done
echo "您选择要清理的实例是:$MYSQL"

MYSQL_HOST=`echo $MYSQL | awk -F':' '{print $1}'`
MYSQL_PORT=`echo $MYSQL | awk -F':' '{print $2}'`

read -p "数据库账号:" MYSQL_USER
read -s -p "数据库密码:" MYSQL_PWD

TODAY=`date +"%Y%m%d"`
if [ ! -d ./logs/${MYSQL_HOST}/${MYSQL_PORT} ]; then
mkdir -p ./logs/${MYSQL_HOST}/${MYSQL_PORT}
fi

# 定义一个查询数据库的方法
query_mysql()
{
mysql -u${MYSQL_USER} -p${MYSQL_PWD} -h${MYSQL_HOST} -P${MYSQL_PORT} --default-character-set=utf8 -N -e "$*" | sed "s/^//;s/$//"
}

# 定义一个打印日志的方法
printlog()
{
echo -e $*
echo -e $* >> ./logs/${MYSQL_HOST}/${MYSQL_PORT}/mysql.${TODAY}.log
}

# 定义一个打印删除记录日志的方法
print_delete_log()
{
echo -e $*
echo -e $* >> ./logs/${MYSQL_HOST}/${MYSQL_PORT}/delete.${TODAY}.log
}

DB_SET=`query_mysql "show databases" | egrep -E "dc_[0-9]|ga_[0-9]"`
FAIL_BAK_DB=()
for DB in ${DB_SET}; do
# 如果是游戏分析1.0的数据库并且该数据库不是需要过滤的数据库,则开始清理
if [[ "${DB}" =~ ^dc_[0-9]*$ && ! " ${EXCLUDE_DB_GA10[@]} " =~ " ${DB} " ]]; then
DIR=data/${MYSQL_HOST}/${MYSQL_PORT}/${DB}/${TODAY}
if [ ! -d ./backup/${DIR} ]; then
mkdir -p ./backup/${DIR}
printlog "开始扫描$DB"
WAIT_DELETE_TABLE=()
for TABLE in `query_mysql "use ${DB}; show tables"`; do
# 遍历需要清理的表
for INCLUDE_TABLE in ${DELETE_TABLE_GA10[@]}; do
# 把appid替换成正则表达式
INCLUDE_TABLE=`echo ${INCLUDE_TABLE} | sed 's/^/\^/;s/$/\$/'`
if [[ ${TABLE} =~ ${INCLUDE_TABLE} ]]; then
TOTAL_ROW=`query_mysql "select count(*) from ${DB}.${TABLE}"`
DELETE_ROW=0
DELETE_COLUMN=""
# 如果是日表
if [[ ! `query_mysql "desc ${DB}.${TABLE} StatiTime"` == "" ]]; then
DELETE_COLUMN="StatiTime"
DELETE_ROW=`query_mysql "select count(*) from ${DB}.${TABLE} where StatiTime < unix_timestamp(date_add(now(), interval -6 month))"`

#如果是周表或者月表
elif [[  ! `query_mysql "desc ${DB}.${TABLE} EndDate"` == "" ]]; then
DELETE_COLUMN="EndDate"
DELETE_ROW=`query_mysql "select count(*) from ${DB}.${TABLE} where EndDate < unix_timestamp(date_add(now(), interval -6 month))"`
fi

if [ ${DELETE_ROW} -gt 0 ]; then
WAIT_DELETE_TABLE+=(${TABLE}.${DELETE_COLUMN})
printlog "${DB}.${TABLE} \t ${TOTAL_ROW} \t ${DELETE_ROW}"
fi

TOTAL_ROW=0
DELETE_ROW=0
DELETE_COLUMN=""
break
fi
done
done

if [ ${#WAIT_DELETE_TABLE[@]} -gt 0 ]; then
read -p "是否清理历史数据:[Yes/No]" SURE
if [ ${SURE} = "Yes" ]; then
print_delete_log "开始清除${DB}"
for WAIT_DELETE in ${WAIT_DELETE_TABLE[@]}; do
DEL_TABLE=`echo ${WAIT_DELETE} | awk -F'.' '{print $1}'`
DEL_COLUMN=`echo ${WAIT_DELETE} | awk -F'.' '{print $2}'`
mysqldump -u${MYSQL_USER} -p${MYSQL_PWD} -h${MYSQL_HOST} -P${MYSQL_PORT} ${DB} ${DEL_TABLE} -w"${DEL_COLUMN} < unix_timestamp(date_add(now(), interval -6 month))" > ./backup/${DIR}/${DEL_TABLE}.sql
DEL_ROW=`query_mysql "set sql_log_bin=0; delete from ${DB}.${DEL_TABLE} where ${DEL_COLUMN} < unix_timestamp(date_add(now(), interval -6 month))"`
print_delete_log "${DEL_TABLE} \t ${DEL_ROW}"
done

echo "开始打包导出的sql文件!"
tar -zcf ./backup/bak.tar.gz ./backup/${DIR}/*.sql
echo "开始发送打包文件到hdfs,可能需要几分钟,请耐心等候!"
sh backup_tools.sh "${DIR}"
if [[ $? -ne 0 || `${HADOOP_CMD} fs -ls /backup/${BUSINESS_TYPE}/${CUSTOM_SUB_DIR}/${DIR}/ | grep bak.tar.gz` = "" ]]; then
print_delete_log "==========${DB}备份失败,请手动备份!=========="
FAIL_BAK_DB+=(${MYSQL}.${DB})
fi
fi
fi
print_delete_log "==========备份失败的数据库有:${FAIL_BAK_DB[@]}=========="
unset WAIT_DELETE_TABLE
fi
done
3.清除日志脚本clean_data1.sh
#!/bin/bash
# 加载配置文件
source ./conf/mysql.conf
source ./conf/backup.conf
SHELLLOCATION=`dirname $0`
SHELLLOCATION=`cd "${SHELLLOCATION}" ; pwd`
HADOOP_CMD="${SHELLLOCATION}/bin/hadoop-1.0.4/bin/hadoop"

echo "选择要清理的数据库实例:"
select MYSQL in ${INCLUDE_MYSQL[@]};
do
break
done
echo "您选择要清理的实例是:${MYSQL}"

MYSQL_HOST=`echo ${MYSQL} | awk -F':' '{print $1}'`
MYSQL_PORT=`echo ${MYSQL} | awk -F':' '{print $2}'`

read -p "主机账号:" USER
read -s -p "主机密码:" PWD

TARGET_DAY=`date -d"-${EXPIRE_LOGS_DAYS} day" +%Y-%m-%d`
MTIME=`expr ${EXPIRE_LOGS_DAYS} - 1`
TODAY=`date +%Y%m%d`
DIR=binlog/${MYSQL_HOST}/${MYSQL_PORT}/${TODAY}
if [ ! -d ./backup/${DIR} ]; then
mkdir -p ./backup/${DIR}
fi
rm -f ~/.ssh/known_hosts
for LOG in `expect << EOF
spawn ssh ${USER}@${MYSQL_HOST} "find /data1/mysql/log${MYSQL_PORT}/mysql-bin.* -mtime +${MTIME} | xargs ls --full-time | grep -v ${TARGET_DAY}"
expect "no)?"
send "yes\r"
expect "password:"
send "${PWD}\r"
set timeout -1
expect eof
EOF`; do
if [[ `echo ${LOG} | egrep -E "mysql-bin\.[0-9]{1,}"` != "" ]]; then
rm -f ~/.ssh/known_hosts
expect << EOF
spawn scp ${USER}@${MYSQL_HOST}:${LOG} backup/${DIR}
expect "no)?"
send "yes\r"
expect "password:"
send "${PWD}\r"
set timeout -1
expect eof
EOF
fi
done

echo "开始打包导出的sql文件!"
tar -zcf ./backup/bak.tar.gz ./backup/${DIR}/mysql-bin.*
echo "开始发送打包文件到hdfs,可能需要几分钟,请耐心等候!"
sh backup_tools.sh "${DIR}"
if [[ $? -ne 0 || `${HADOOP_CMD} fs -ls /backup/${BUSINESS_TYPE}/${CUSTOM_SUB_DIR}/${DIR}/ | grep bak.tar.gz` = "" ]]; then
echo "==========备份失败,请手动备份!=========="
else
read -p "数据库账号:" MYSQL_USER
read -s -p "数据库密码:" MYSQL_PWD
mysql -u${MYSQL_USER} -p${MYSQL_PWD} -h${MYSQL_HOST} -P${MYSQL_PORT} -e "PURGE BINARY LOGS BEFORE CURRENT_DATE -INTERVAL ${EXPIRE_LOGS_DAYS} DAY"
echo "备份成功"
fi


本文出自 “散人” 博客,请务必保留此出处http://zouqingyun.blog.51cto.com/782246/1739972
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: