awk处理文本的使用方法

2018-04-21  本文已影响0人  小王同学123321

刚开始工作的时候,经常需要处理服务器的日志,因此会用awk比较多些,下面是刚开始工作的时候总结awk的使用方法

cat file|awk -F'[: ]+' '{print $2}'<br>ifconfig  bond0 | awk  '{print $0}END{print " "}'   #在输出文本的后面再多余输出一行
cat file|awk '{print $(NF-1)}'
cat file|awk 'BEGIN{print "======Grade======="}{print $0}'END{print "------Tail------"}'
cat file|awk '{print $0,$3+$4+$5,int($3+$4+$5)/3}'  (输出每行第三列、第四列和第五列的和并且输出三个的平均值并且取整)
cat 10.txt|awk'{a=a+$1}END{print a}'
cat file|awk'{a+=$NF}END{print a}'
cat 500.log|awk '{a[$1]++}'END{for(i in a){print i,a[i]}}' (循环的永远是下标)
cat file|awk -F'/' '{a[$3]++}'END{for (i in a){print i,a[i]}}'|sort -nrk 2 (a[$3]++表示遇到一次加一次;用END来输出)
cat file|awk '{if(NR>3}{print $0}}' (取file文件中的第四行以外的)
cat file|awk '{if($3>=90){print $0}}'
cat file|awk '{a=NF-1;print a}'
cat file|awk 'BEGIN{a=2}{print a}'
cat file|awk '{for (i=1;i<=NF;i++)(print $1}}' 或者 cat file|awk '{for(i=1;i<=NF-1;i++}{print $i}}' (起始值;终止值;步长)
cat file|awk '{printf $1}' (printf最大的特点就是不换行,需要换行的话加“\n”) cat file|awk '{printf $1"\n"}'
cat file|awk '{for(i=1;i<=NF;i+=2}{printf $i" "}print “\n ”}' (输出奇数列 print “ ”表示打印一个空行,意思就是换行)
cat file | awk '++a[$1]==1'(awk去重)
cat file | awk '!a[$1]==1'
cat file | awk '{a[$1]+=$2}END{for (i in a){print i, a[i]}}'
cat file1 | awk '{a[NR]=$0}END{for(i=1;i<=NR-2;i+=1){print a[i]}}'  
seq 10 | xargs -n 2 (把一列变成两列)
cat test.log|sort|awk 'BEGIN{tmp = "";total = 0}{if(tmp==$1){total += $2}else{if (tmp!=""){print tmp " total is : " total}tmp = $1;total = $2}}END{print $1 " total is : " total}
awk 'BEGIN{n=0}$5 !~ /HIT/{n+=$6}END{printf "%f\n", n/1024/1024}' upper.log
zcat 43.gz |egrep '(010104e3WB|010112e3W6|010519b3WF|060320f3WH|06053343W3|06053343WH|06053443W7)' > 18upper.log

awk 'BEGIN{while(("cat 17_url.sort") | getline x){split(x, Arra);n[Arra[2]]}}{if($2 in n){}else{print $2}}' 18_url.sort

awk '$5 !~ /HIT/{n[$8]+=$6}END{for(i in n){printf "%f\t%s\n", n[i]/1024/1024,i}}' 18upper.log | sort -nr

awk '$5 !~ /HIT/ && $4 ~ /39.155.184.47/{n+=$6}END{printf "%f\n", n/1024/1024}' 18upper.log
awk 'BEGIN{while("cat inlegal.ip"| getline x){split(x, arra);ip=arra[2];n[ip]}}$5 !~ /HIT/{if($4 in n){}else{size+=$6}}END{printf "%f\n", size/1024/1024}' upper.log
awk 'BEGIN{while(("head -30 18_url.sort"| getline x)){split(x, arra);n[arra[2]]}}{if($8 in n){print $0}}' 18edge.log |grep -v HIT |grep -v favicon.ico
awk 'BEGIN{while(("head -30 18_url.sort"| getline x)){split(x, arra);n[arra[2]]}}$5 ~ /MISS/{if($8 in n){m+=$6}}END{printf "%f\n", m/1024/1024}' 18edge.log
sort -n -k2 18_010112e3W6.log | awk 'BEGIN{tt=1484697627.866}{if($2<tt+60){total+=$6;ms+=$3}else{printf "%f Mbps\n", (total*8)/(ms/1000.0);total=0;ms=0;tt+=60}}'
awk '{for(i=1; i<=NF;i++){if(i==2){nn=strftime("%Y-%m-%d_%H:%M:%S", $i);printf " %s", nn}else{printf " %s", $i}};print}' 18_010112e3W6.log
awk '{if($2 >= 1484706900 && $2 < 1484707200){for(i=1;i<=NF;i++)if(i==6){printf "%f ", $i / 1024.0 / 1024}else{printf "%s ", $i}};print}' 18upper.log |grep -v HIT
awk '{if($2 >= 1484706900 && $2 < 1484707200){for(i=4;i<=NF;i++){printf "%s ", $i};print}}' 18upper.log |grep -v HIT | awk '{n[$0]++}END{for(i in n){print n[i],i}}'
awk '{if($2 >= 1484706900 && $2 < 1484707200){for(i=4;i<=NF;i++){if(i==6){continue}printf "%s ", $i}};print}' 18_010112e3W6.log |grep -v HIT | awk '{n[$1]++}END{for(i in n)print n[i],i}'
awk '{if($2 >= 1484706900 && $2 < 1484707200){for(i=4;i<=NF;i++){if(i==6 || i ==1 || i == 2 || i == 3){continue}else{printf "%s ", $i}}};print}' 18_010112e3W6.log |grep -v HIT
awk '{if($2 >= 1484706900 && $2 < 1484707200){for(i=4;i<=NF;i++){if(i==6){continue}else{printf "%s ", $i}}; printf "\n"}}' 18_010112e3W6.log |grep -v HIT | awk '{n[$0]++}END{for(i in n)print n[i],i}'
for i in `echo 5 6 7 8 9`; do echo -n "2017011${i}: ";zcat /data/proclog/log/squid/access/fc-access.log.010112e3W6.CHN-YL-e-3W6.2017011${i}* |awk '$0 ~/mp4.china.com.cn/{if($0 ~ /TCP_REFRESH_MISS/){refreshcount++;total++}else{total++}}END{printf "tcp_refresh_miss %d\ttotal: %d\n", refreshcount, total}'; done
zcat /data/proclog/log/squid/access/fc-access.log.06053343WH.CNC-ZB-4-3WH.20170118* |awk '$0 ~ /mp4.china.com.cn/{if($0 ~ /TCP_REFRESH_MISS/){refreshcount++;total++}else{totaol++}}END{printf "tcp_refresh_miss %d\ttotaol: %d\n", refreshcount, total}'
for i in `ls -l /dev/sd* | awk '$0 !~ /[0-9]$/{print $NF;i++}END{print i > "/dev/stderr" }'`; do smartctl -H $i | awk '$0 ~ /[Hh]ealth/{printf "%s ", $0}'; echo $i; done(磁盘健康度)
ls -lh |grep -v total | awk '{cmd="cat "$NF;while(cmd|getline x){if(x ~ /Hostname/){split(x, arra, /:/);split(arra[2], arrb, /\(/);sub(/[[:blank:]]/,"",arrb[1]);print arrb[1]}}}'
ls -lh |grep -v total | awk 'function getInfo(line){split(line, arra, /:/);split(arra[2], arrb, /\(/);sub(/[[:blank:]]/,"",arrb[1]);printf "%s ", arrb[1]}{cmd="cat "$NF;while(cmd|getline x){if(x ~ /Hostname/){printf "%s ", $NF};IP|IPMI|MrtgNote/){getInfo(x)}};printf "\n"}'
ls -lh |grep -v total | awk 'function getInfo(line){split(line, arra, /:/);split(arra[2], arrb, /\(/);sub(/[[:blank:]]/,"",arrb[1]);printf "%s\t", arrb[1]}{ipmistatus=0;cmd="cat "$NF;while(cmd|getline x){if(x ~ /Hostname/){print, $NF};if(x ~ /IP|IPMI|MrtgNote/){if(x ~ /IPMI/){ipmistatus=1};if(x ~ /MrtgNote/){if(ipmistatus==0){printf "\t\t\t\t\t"}}getInfo(x)}};printf "\n"}' > firstIPMI.csv
zcat 96277.gz | awk '{n[$2][$7]++}END{for(i in n){for(y in n[i]){print n[i][y], y}}}
zcat 96277.gz | awk '{if($0 ~ /2017-01-27/){n[$7]++}else{if($0 ~ /2017-01-28/){y[$7]++}}}END{for(i in n){printf "2017-01-27 %d %s\n", n[i], i};for(i in y){printf "2017-01-28 %d %s\n", y[i], i}}'
awk '{n[$7]++; y[$7]+=$13}END{for(i in n){print n[i],y[i]/1024.0/1024, i}}' 27_23_55.28_00_30.log | sort -nr
awk '{n[$7]++; y[$7]+=$13}END{for(i in n){if(y[i] <= 1024*1024){print n[i],y[i]/1024"K", i}else{print n[i],y[i]/1024/1024"M", i}}}' 27_23_55.28_00_30.log | sort -mr
for i in `ls -l /dev/sd* | awk '$0 !~ /[0-9]$/{print $NF;i++}END{print i > "/dev/stderr" }'`; do smartctl -H $i | awk '$0 ~ /[Hh]ealth/{printf "%s ", $0}'; echo $i; done
echo "am oldby teacher welcome to oldboy training class" | xargs -n1 | awk 'length <=6{print}'
zcat /data/proclog/log/squid/access/*20161214* |grep -Ev "*.gif|*.xml|*.gif|*.ico" |grep -E 'http://v.hebtv.com' |awk '{num[$7]++}END{for(i in num) print num[i],i}' |sort -nr |grep -Ev 'apkich_args' |grep -E '\.flv|\.apk|\.mov|\.mp3|\.mp4|\.ts' |awk -F "." '$NF!~/%/{print }'|awk '$1>1{print $2}'|awk -F'?' '{print $1}' > top_url_$(uname -n)
ip=$(ifconfig bond0|awk -F "[: ]+" 'NR==2{print $4}') && echo "wget http://${ip}:8888/top_url_$(uname -n)" && python -m SimpleHTTPServer 8888
cat /data/proclog/log/squid/backup/010519d3W1_CHN-CA-d.billing0512020631.000944283.log |awk '$1~/rrs/&&NR%2==0{band+=($2*1.15+6*55*$5)}END{printf "%0.5f\n",band*8/300}'
cat 010773f3W5_CHN-GL-f.billing0526190637.000879172.log |grep xesimg.com |awk 'NR%2==0{sum+=($3*1000000000+$4)}END{print sum}'(偶数行代表的是in方向多的)
cat 010773f3W5_CHN-GL-f.billing0526190637.000879172.log |grep xesimg.com |awk 'NR%2==1{sum+=($3*1000000000+$4)}END{print sum}' (奇数行代表的是out方向的)
NR,表示awk开始执行程序后所读取的数据的行数。FNR,与NR功用类似,不同的是awk每打开一个新文件,FNR便从0开始累积
# awk '{print NR,$0}' file1 file2
1 a b c d
2 a b d c
3 a c b d
4 aa bb cc dd
5 aa bb dd cc
6 aa cc bb dd
# awk '{print FNR,$0}' file1 file2
1 a b c d
2 a b d c
3 a c b d
1 aa bb cc dd
2 aa bb dd cc
3 aa cc bb dd
上一篇下一篇

猜你喜欢

热点阅读