无标题文章

2018-08-30  本文已影响0人  万昆

Compile

/** 
./hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/src/main/java/org/apache/hadoop/mapreduce/JobContext.java
*/

// add overwrite function in JobContextImpl.java
  /**
   * Get the boolean value for the property that specifies which classpath
   * takes precedence when tasks are launched. True - user's classes takes
   * precedence. False - system's classes takes precedence.
   * @return true if user's classes should take precedence
   */
  public boolean userClassesTakesPrecedence();

  /**
   * Get the boolean value for the property that specifies which classpath
   * takes precedence when tasks are launched. True - user's classes takes
   * precedence. False - system's classes takes precedence.
   * @return true if user's classes should take precedence
   */
  public boolean userClassesTakesPrecedence() {
    return conf.userClassesTakesPrecedence();
  }
    <hadoop.version>2.5.0-cdh5.2.0</hadoop.version>
    
    <repository>
      <snapshots>
          <enabled>true</enabled>
      </snapshots>
      <id>cloudera-repo-releases</id>
      <url>https://repository.cloudera.com/artifactory/repo/</url>
    </repository>

    <pluginRepository>
      <id>maven2-repository.cloudera</id>
      <name>Cloudera Maven Repository</name>
      <url>https://repository.cloudera.com/artifactory/repo/</url>
      <layout>default</layout>
    </pluginRepository>
pom.xml:
    <nodeVersion>v0.10.18</nodeVersion>
    <npmVersion>1.3.8</npmVersion>

package.json:
        "bower": "1.4.1",

Install

Official site
https://tez.apache.org/install.html
hdp documents
http://docs.hortonworks.com/HDPDocuments/HDP2/HDP-2.1.7/bk_installing_manually_book/content/rpm-chap-tez_configure_tez.html

mkdir tez-0.8.5 && cd tez-0.8.5 && tar -zxvf ../tez-0.8.5.tar.gz
cd .. && mv tez-0.8.5 /usr/lib && ln -s /usr/lib/tez-0.8.5 /usr/lib/tez
su - hdfs -c 'hadoop dfs -put -f /opt/app/tez-0.8.5-minimal.tar.gz /metadata/libs/tez/tez-0.8.5-minimal.tar.gz'
hadoop dfs -ls /metadata/libs/tez
hadoop dfs -rm -r -f /tmp/wankun/jars/tez/
hadoop dfs -mkdir /tmp/wankun/jars/tez/
hadoop dfs -put lib/                                         /tmp/wankun/jars/tez/
hadoop dfs -put tez-api-0.5.4.jar                            /tmp/wankun/jars/tez/
hadoop dfs -put tez-common-0.5.4.jar                         /tmp/wankun/jars/tez/
hadoop dfs -put tez-dag-0.5.4.jar                            /tmp/wankun/jars/tez/
hadoop dfs -put tez-examples-0.5.4.jar                       /tmp/wankun/jars/tez/
hadoop dfs -put tez-mapreduce-0.5.4.jar                      /tmp/wankun/jars/tez/
hadoop dfs -put tez-mbeans-resource-calculator-0.5.4.jar     /tmp/wankun/jars/tez/
hadoop dfs -put tez-runtime-internals-0.5.4.jar              /tmp/wankun/jars/tez/
hadoop dfs -put tez-runtime-library-0.5.4.jar                /tmp/wankun/jars/tez/
hadoop dfs -put tez-tests-0.5.4.jar                          /tmp/wankun/jars/tez/
hadoop dfs -put tez-yarn-timeline-history-0.5.4.jar          /tmp/wankun/jars/tez/

hadoop dfs -chmod -R 777 /tmp/wankun/jars/tez/
export TEZ_HOME=/home/wankun/tez
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:${TEZ_HOME}/conf:${TEZ_HOME}/*:${TEZ_HOME}/lib/*

In the configuration file,you should upload tez jars and libs to hdfs file system. and point tez.lib.uris to the hdfs directory.

<!--Fri Apr 25 16:29:38 2014-->
    <configuration>
    
    <property>
      <name>tez.am.java.opts</name>
      <value>-server -Xmx1535m -Djava.net.preferIPv4Stack=true -XX:+UseNUMA -XX:+UseParallelGC</value>
    </property>
    
    <property>
      <name>tez.am.env</name>
      <value>LD_LIBRARY_PATH=/var/bh/hadoop/lib/native:/usr/lib/hadoop/lib/native/`$JAVA_HOME/bin/java -d32 -version &amp;&gt; /dev/null;if [ $? -eq 0 ]; then echo Linux-i386-32; else echo Linux-amd64-64;fi`</value>
    </property>
    
    <property>
      <name>tez.am.shuffle-vertex-manager.max-src-fraction</name>
      <value>0.4</value>
    </property>
    
    <property>
      <name>tez.task.get-task.sleep.interval-ms.max</name>
      <value>200</value>
    </property>
    
    <property>
      <name>tez.staging-dir</name>
      <value>/tmp/${user.name}/staging</value>
    </property>
    
    <property>
      <name>tez.am.grouping.min-size</name>
      <value>16777216</value>
    </property>
    
    <property>
      <name>tez.runtime.intermediate-input.compress.codec</name>
      <value>org.apache.hadoop.io.compress.SnappyCodec</value>
    </property>
    
    <property>
      <name>tez.am.container.reuse.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>tez.yarn.ats.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>tez.am.log.level</name>
      <value>INFO</value>
    </property>
    
    <property>
      <name>tez.session.am.dag.submit.timeout.secs</name>
      <value>300</value>
    </property>
    
    <property>
      <name>tez.am.grouping.split-waves</name>
      <value>1.4</value>
    </property>
    
    <property>
      <name>tez.session.client.timeout.secs</name>
      <value>180</value>
    </property>
    
    <property>
      <name>tez.runtime.intermediate-output.compress.codec</name>
      <value>org.apache.hadoop.io.compress.SnappyCodec</value>
    </property>
    
    <property>
      <name>tez.am.shuffle-vertex-manager.min-src-fraction</name>
      <value>0.2</value>
    </property>
    
    <property>
      <name>tez.runtime.intermediate-output.should-compress</name>
      <value>true</value>
    </property>
    
    <property>
      <name>tez.am.am-rm.heartbeat.interval-ms.max</name>
      <value>250</value>
    </property>
    
    <property>
      <name>tez.lib.uris</name>
      <value>hdfs:///bh/warehouse/dmp/jars/tez/,hdfs:///bh/warehouse/dmp/jars/tez/lib/</value>
    </property>
    
    <property>
      <name>tez.am.container.reuse.non-local-fallback.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>tez.am.container.reuse.rack-fallback.enabled</name>
      <value>true</value>
    </property>
    
    <property>
      <name>tez.am.grouping.max-size</name>
      <value>1073741824</value>
    </property>
    
    <property>
      <name>tez.am.container.reuse.locality.delay-allocation-millis</name>
      <value>250</value>
    </property>
    
    <property>
      <name>tez.runtime.intermediate-input.is-compressed</name>
      <value>true</value>
    </property>
    
    <property>
      <name>tez.am.resource.memory.mb</name>
      <value>2048</value>
    </property>
    
    <property>
      <name>tez.am.container.session.delay-allocation-millis</name>
      <value>30000</value>
    </property>
    
  </configuration>

Test

MapReduce

hadoop dfs -rm -r -f /bh/warehouse/dmp/tmp/output/

hadoop jar tez-mapreduce-examples-0.4.1-incubating.jar orderedwordcount /bh/warehouse/dmp/tmp/input/47675.log /bh/warehouse/dmp/tmp/output/

Hive

Because I don't have the production environment permissions, deploy on hive test failed .

Just record the deployment process.

Tips

Check out hadoop source branch

git branch -va // view remote branches
git checkout remotes/origin/branch-2.5.2
./dev-support/create-release.sh
上一篇 下一篇

猜你喜欢

热点阅读