¸üÐÂʱ¼ä:2021Äê07ÔÂ16ÈÕ16ʱ01·Ö À´Ô´:ÀÖÓãµç¾º ä¯ÀÀ´ÎÊý:

·þÎñÆ÷ÿÌì»á²úÉú´óÁ¿ÈÕÖ¾Êý¾Ý£¬²¢ÇÒÈÕÖ¾Îļþ¿ÉÄÜ´æÔÚÓÚÿ¸öÓ¦ÓóÌÐòÖ¸¶¨µÄdataĿ¼ÖУ¬ÔÚ²»Ê¹ÓÃÆäËü¹¤¾ßµÄÇé¿öÏ£¬½«·þÎñÆ÷ÖеÄÈÕÖ¾Îļþ¹æ·¶µÄ´æ·ÅÔÚHDFSÖС£Í¨¹ý±àд¼òµ¥µÄshell½Å±¾£¬ÓÃÓÚÿÌì×Ô¶¯²É¼¯·þÎñÆ÷ÉϵÄÈÕÖ¾Îļþ£¬²¢½«º£Á¿µÄÈÕÖ¾ÉÏ´«ÖÁHDFSÖС£ÓÉÓÚÎļþÉÏ´«Ê±»áÏûºÄ´óÁ¿µÄ·þÎñÆ÷×ÊÔ´£¬ÎªÁ˼õÇá·þÎñÆ÷µÄѹÁ¦£¬¿ÉÒԱܿª¸ß·åÆÚ£¬Í¨³£»áÔÚÁ賿½øÐÐÉÏ´«ÎļþµÄ²Ù×÷¡£ÏÂÃæ°´ÕÕ²½ÖèʵÏÖShell¶¨Ê±ÈÕÖ¾²É¼¯¹¦ÄÜ¡£
1£®ÅäÖû·¾³±äÁ¿
Ê×ÏÈÔÚ/export/data/logsĿ¼Ï£¨Èç¹ûĿ¼²»´æÔÚ£¬ÔòÐèÒªÌáǰ´´½¨£©Ê¹ÓÃviÃüÁî´´½¨upload2HDFS.sh½Å±¾Îļþ£¬ÔÚ±àдShell½Å±¾Ê±£¬ÐèÒªÉèÖÃJava»·¾³±äÁ¿£¬¼´Ê¹ÎÒÃǵ±Ç°ÐéÄâ»ú½ÚµãÒѾÅäÖÃÁËJava»·¾³±äÁ¿£¬ÕâÑù×öÊÇÓÃÀ´Ìá¸ßϵͳµÄ¿É¿¿ÐÔ£¬±£ÕÏÔËÐгÌÐòµÄ»úÆ÷ÔÚûÓÐÅäÖû·¾³±äÁ¿µÄÇé¿öÏÂÒÀÈ»Äܹ»ÔËÐнű¾¡£´úÂëÈçÏÂËùʾ£º
export JAVA_HOME=/export/servers/jdk
export JRE_HOME=${JAVA_HOME}/jre
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
ÔÚÅäÖÃÍêJava»·¾³±äÁ¿Ö®ºó»¹ÐèÒªÅäÖÃHadoopµÄ»·¾³±äÁ¿£¬´úÂëÈçÏÂËùʾ£º
export HADOOP_HOME=/export/servers/hadoop-2.7.4/
export PATH=${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:$PATH
2£®×¼±¸ÈÕÖ¾´æ·ÅĿ¼ºÍ´ýÉÏ´«Îļþ
ΪÁËÈÿª·¢Õß±ãÓÚ¿ØÖÆÉÏ´«ÎļþµÄÁ÷³Ì£¬¿ÉÒÔÔڽű¾ÖÐÉèÖÃÒ»¸öÈÕÖ¾´æ·ÅĿ¼ºÍ´ýÉÏ´«ÎļþĿ¼£¬ÈôÉÏ´«¹ý³ÌÖз¢Éú´íÎóÖ»ÐèÒª²é¿´¸ÃĿ¼¾ÍÄÜÖªµÀÎļþµÄÉÏ´«½ø¶È¡£Ìí¼ÓÏàÓ¦´úÂëÈçÏÂËùʾ£º
#ÈÕÖ¾Îļþ´æ·ÅµÄĿ¼
log_src_dir=/export/data/logs/log/
#´ýÉÏ´«Îļþ´æ·ÅµÄĿ¼
log_toupload_dir=/export/data/logs/toupload/
ΪÁ˱£Ö¤ºóÐø½Å±¾ÎļþÄܹ»Õý³£Ö´ÐУ¬»¹ÐèÒªÔÚÆô¶¯½Å±¾Ç°ÊÖ¶¯´´½¨ºÃÕâÁ½¸öĿ¼¡£
3£®ÉèÖÃÈÕÖ¾ÎļþÉÏ´«µÄ·¾¶
ÉèÖÃÉÏ´«µÄHDFSÄ¿±ê·¾¶£¬ÃüÃû¸ñʽÒÔʱ¼ä½á⣬²¢ÇÒÊä³ö´òÓ¡ÐÅÏ¢¡£Ìí¼Ó´úÂëÈçÏÂËùʾ£º
#ÉèÖÃÈÕÆÚ
date1=`date -d last-day +%Y_%m_%d`
#ÈÕÖ¾ÎļþÉÏ´«µ½hdfsµÄ¸ù·¾¶
hdfs_root_dir=/data/clickLog/$date1/
#´òÓ¡»·¾³±äÁ¿ÐÅÏ¢
echo "envs: hadoop_home: $HADOOP_HOME"
#¶ÁÈ¡ÈÕÖ¾ÎļþµÄĿ¼£¬ÅжÏÊÇ·ñÓÐÐèÒªÉÏ´«µÄÎļþ
echo "log_src_dir:"$log_src_dir
4£®ÊµÏÖÎļþÉÏ´«
ÉÏ´«ÎļþµÄ¹ý³Ì¾ÍÊDZéÀúÎļþĿ¼µÄ¹ý³Ì£¬½«ÎļþÊ×ÏÈÒÆ¶¯µ½´ýÉÏ´«Ä¿Â¼£¬ÔÙ´Ó´ýÉÏ´«Ä¿Â¼ÖÐÉÏ´«µ½HDFSÖС£Ìí¼Ó´úÂëÈçÏÂËùʾ£º
ls $log_src_dir | while read fileName do if [[ "$fileName" == access.log.* ]]; then date=`date +%Y_%m_%d_%H_%M_%S \#½«ÎļþÒÆ¶¯µ½´ýÉÏ´«Ä¿Â¼²¢ÖØÃüÃû echo "moving $log_src_dir$fileName to $log_toupload_dir"xxxxx_click_log_$fileName"$date" mv $log_src_dir$fileName $log_toupload_dir"xxxxx_click_log_$fileName"$date \#½«´ýÉÏ´«µÄÎļþpathдÈëÒ»¸öÁбíÎļþwillDoing£¬ echo $log_toupload_dir"xxxxx_click_log_$fileName"$date >> $log_toupload_dir"willDoing."$date fi done
×îºó½«Îļþ´Ó´ýÉÏ´«Ä¿Â¼´«ÖÁHDFSÖУ¬¾ßÌå´úÂëÈçÏÂËùʾ£º
#ÕÒµ½ÁбíÎļþwillDoing
ls $log_toupload_dir | grep will |grep -v "_COPY_" | grep -v "_DONE_" | while
read line
do
\#´òÓ¡ÐÅÏ¢
echo "toupload is in file:"$line
\#½«´ýÉÏ´«ÎļþÁбíwillDoing¸ÄÃûΪwillDoing_COPY_
mv $log_toupload_dir$line $log_toupload_dir$line"_COPY_"
\#¶ÁÁбíÎļþwillDoing_COPY_µÄÄÚÈÝ£¨Ò»¸öÒ»¸öµÄ´ýÉÏ´«ÎļþÃû£©
\#´Ë´¦µÄline ¾ÍÊÇÁбíÖеÄÒ»¸ö´ýÉÏ´«ÎļþµÄpath
cat $log_toupload_dir$line"_COPY_" |while read line
do
\#´òÓ¡ÐÅÏ¢
echo "puting...$line to hdfs path.....$hdfs_root_dir"
hadoop fs -mkdir -p $hdfs_root_dir
hadoop fs -put $line $hdfs_root_dir
done
mv $log_toupload_dir$line"_COPY_" $log_toupload_dir$line"_DONE_"
done
Èç¹ûÔÚÿÌì12µãÁ賿ִÐÐÒ»´Î£¬ÎÒÃÇ¿ÉÒÔʹÓÃLinux Crontab±í´ïʽִÐж¨Ê±ÈÎÎñ¡£
0 0 * * * /shell/upload2HDFS.sh
ÉÏÊöcrontab±í´ïʽÊÇÓÉ6¸ö²ÎÊý¾ö¶¨£¬·Ö±ðΪ·Ö¡¢Ê±¡¢ÈÕ¡¢Ô¡¢ÖÜ¡¢ÃüÁî×é³É£¬ÆäÖÐ/shell/upload2HDFS.shΪshell½Å±¾µÄ¾ø¶Ô·¾¶¡£ÓÉÓÚcrontab±í´ïʽ²¢·Ç±¾ÊéÖØµã£¬ÈôÏëÒªÉîÈëѧϰµÄ¶ÁÕß¿ÉÒÔ×ÔÐвéÔÄ×ÊÁÏѧϰ¡£
5£®Ö´ÐгÌÐòչʾÔËÐнá¹û
Ò»°ãÈÕÖ¾Îļþ²úÉúÊÇÓÉÒµÎñ¾ö¶¨£¬ÀýÈçÿСʱ¹ö¶¯Ò»´Î»òÕßÈÕÖ¾Îļþ´óС´ïµ½1Gʱ£¬¾Í¹ö¶¯Ò»´Î£¬²úÉúеÄÈÕÖ¾Îļþ¡£ÎªÁ˱ÜÃâÿ¸öÈÕÖ¾Îļþ¹ý´óµ¼ÖÂÉÏ´«Ð§Âʵͣ¬¿ÉÒÔ²ÉÈ¡ÔÚ¹ö¶¯ºóµÄÎļþÃûºóÌí¼ÓÒ»¸ö±êʶµÄ²ßÂÔ£¬ÀýÈçaccess.log.x£¬x¾ÍÊÇÎļþ±êʶ£¬Ëü¿ÉÒÔΪÐòºÅ¡¢ÈÕÆÚµÈ×Ô¶¨ÒåÃû³Æ£¬¸Ã±êʶÓÃÓÚ±íʾÈÕÖ¾Îļþ¹ö¶¯¹ýÒ»´Î£¬¹ö¶¯ºóµÄÎļþ£¬Ð²úÉúµÄÊý¾Ý½«²»ÔÙдÈë¸ÃÎļþÖУ¬µ±Âú×ãÒµÎñÐèÇóʱ£¬ÔòÎļþ¿ÉÒÔ±»Òƶ¯µ½´ýÉÏ´«Ä¿Â¼£¬Èçͼ1Ëùʾ¡£

ͼ1 ¹ö¶¯ÈÕÖ¾Îļþ
´Óͼ1¿ÉÒÔ¿´³ö£¬ÎªÁËÄ£ÄâÉú²ú»·¾³£¬ÔÚÈÕÖ¾´æ·ÅĿ¼/export/data/logs/log/ÖУ¬ÊÖ¶¯´´½¨ÈÕÖ¾Îļþ£¬access.log±íʾÕýÔÚÔ´Ô´²»¶ÏµÄ²úÉúÈÕÖ¾µÄÎļþ£¬access.log.1¡¢access.log.2µÈ±íʾÒѾ¹ö¶¯Íê±ÏµÄÈÕÖ¾Îļþ£¬¼´Îª´ýÉÏ´«ÈÕÖ¾Îļþ¡£
ÔÚupload2HDFS.shÎļþ·¾¶ÏÂʹÓÓsh upload2HDFS.sh”Ö¸ÁîÖ´ÐгÌÐò½Å±¾£¬´òÓ¡Ö´ÐÐÁ÷³Ì£¬Èçͼ2Ëùʾ¡£

ͼ2 ÔËÐнű¾
´Óͼ2¿ÉÒÔ¿´³ö£¬Ê×ÏȽ«ÈÕÖ¾´æ·ÅĿ¼logÖеÄÈÕÖ¾ÎļþÒÆ¶¯µ½´ýÉÏ´«touploadĿ¼Ï£¬²¢¸ù¾ÝÒµÎñÐèÇóÖØÃüÃû£¬È»ºó½Å±¾×Ô¶¯Ö´ÐГhadoop put”ÉÏ´«ÃüÁ½«´ýÉÏ´«Ä¿Â¼ÏµÄËùÓÐÈÕÖ¾ÎļþÉÏ´«ÖÁHDFSÖС£Í¨¹ýHDFS Web½çÃæ¿ÉÒÔ¿´µ½£¬ÐèÒª²É¼¯µÄÈÕÖ¾ÎļþÒѾ°´ÕÕÈÕÆÚ·ÖÀ࣬ÉÏ´«µ½HDFSÖУ¬Èçͼ3Ëùʾ¡£

ͼ3 ÈÕÖ¾²É¼¯Îļþ
СÌáʾ£º
Shell½Å±¾ÓïÑÔ²¢±¾Õ½ڵÄÖØµã£¬¶ÁÕßÖ»ÐèÒªÕÆÎÕ±¾½Ú°¸ÀýµÄÒµÎñºÍ˼Ï룬ÒÔ¼°¿ÉÒÔ¶Á¶®¼òµ¥µÄShell½Å±¾ÓïÑÔ¼´¿É¡£
ʲôÊÇHDFS Shell£¿HDFS³£ÓÃÃüÁî²Ù×÷ÑÝʾ
ÔõÑùʹÓÃSpark ShellÀ´¶ÁÈ¡HDFSÎļþ£¿
shell ½Å±¾ÈçºÎ½øÐе÷ÊÔ£¿ÕâЩÃüÁîÄãÖªµÀÂð£¿
ÀÖÓãµç¾ºPython+´óÊý¾Ý¿ª·¢¸ßÊÖ°à
±±¾©Ð£Çø