环境准备
-
JDK1.8
-
Maven 3.6.1
-
Scala 2.12.8
下载源码包
https://archive.apache.org/dist/spark/spark-2.4.2/
选择 spark-2.4.2.tgz
解压Spark 源码
tar -zxvf spark-2.4.2.tgz -C /home/hadoop/sourcecode
下载Scala源码
https://www.scala-lang.org/download/
选择 scala-2.12.8.tgz
解压Scala
tar -zxvf scala-2.12.8.tgz
权限 chown -R root:root scala-2.12.8
下载Maven3.6.1
https://maven.apache.org/download.cgi 选择其中的 apache-maven-3.6.1-bin.tar.gz
解压
tar -zxvf apache-maven-3.6.1-bin.tar.gz
权限 chown -R hadoop:hadoop apache-maven-3.6.1
添加环境变量
export HADOOP_HOME=/home/hadoop/app/hadoop-2.6.0-cdh5.7.0
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
export HIVE_HOME=/home/hadoop/app/hive-1.1.0-cdh5.7.0
export PATH=$HIVE_HOME/bin:$PATH
export JAVA_HOME=/usr/java/jdk1.8.0_144
#export JAVA_HOME=/usr/java/jdk1.7.0_45
#export MAVEN_HOME=/usr/maven/apache-maven-3.3.9
export MAVEN_HOME=/usr/maven/apache-maven-3.6.1
export MAVEN_OPTS="-Xms1024m -Xmx2048m"
export MAVEN_OPTS="-Xmx2g -XX:ReservedCodeCacheSize=512m"
export SPARK_HOME=/home/hadoop/app/spark-2.4.2-bin-2.6.0-cdh5.7.0
export PATH={SPARK_HOME}/bin:$PATH
export SCALA_HOME=/usr/scala/scala-2.12.8
export PROTOBUF_HOME=/usr/protobuf
export FINDBUGS_HOME=/home/hadoop/lib/findbugs-1.3.9
export PATH=$SPARK_HOME/bin:$SCALA_HOME/bin:$HIVE_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$FINDBUGS_HOME/bin:$PROTOBUF_HOME/bin:$MAVEN_HOME/bin:$JAVA_HOME/bin:$PATH
生效环境变量
source .bash_profile
修改Spark源码里面的pom.xml 文件
<repository>
<id>cloudera</id>
<url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
</repository>
编译
[hadoop@hadoop001 spark-2.4.2]$ ./dev/make-distribution.sh --name 2.6.0-cdh5.7.0 --tgz -Phadoop-2.6 -Phive -Phive-thriftserver -Pyarn -Pkubernetes -Dhadoop.version=2.6.0-cdh5.7.0
注意事项
在编译前需要检查软件是否安装成功
echo $JAVA_HOME
echo $SCALA_HOME
echo $MAVEN_HOME
echo $HADOOP_HOME
编译结果
[INFO]
[INFO] Spark Project Parent POM ........................... SUCCESS [ 3.165 s]
[INFO] Spark Project Tags ................................. SUCCESS [ 10.574 s]
[INFO] Spark Project Sketch ............................... SUCCESS [ 17.362 s]
[INFO] Spark Project Local DB ............................. SUCCESS [ 13.102 s]
[INFO] Spark Project Networking ........................... SUCCESS [ 25.008 s]
[INFO] Spark Project Shuffle Streaming Service ............ SUCCESS [ 6.478 s]
[INFO] Spark Project Unsafe ............................... SUCCESS [ 24.633 s]
[INFO] Spark Project Launcher ............................. SUCCESS [01:15 min]
[INFO] Spark Project Core ................................. SUCCESS [04:14 min]
[INFO] Spark Project ML Local Library ..................... SUCCESS [ 39.451 s]
[INFO] Spark Project GraphX ............................... SUCCESS [ 37.242 s]
[INFO] Spark Project Streaming ............................ SUCCESS [01:15 min]
[INFO] Spark Project Catalyst ............................. SUCCESS [02:46 min]
[INFO] Spark Project SQL .................................. SUCCESS [03:53 min]
[INFO] Spark Project ML Library ........................... SUCCESS [02:44 min]
[INFO] Spark Project Tools ................................ SUCCESS [ 10.551 s]
[INFO] Spark Project Hive ................................. SUCCESS [01:28 min]
[INFO] Spark Project REPL ................................. SUCCESS [ 6.871 s]
[INFO] Spark Project YARN Shuffle Service ................. SUCCESS [01:00 min]
[INFO] Spark Project YARN ................................. SUCCESS [ 52.995 s]
[INFO] Spark Project Kubernetes ........................... SUCCESS [ 41.814 s]
[INFO] Spark Project Hive Thrift Server ................... SUCCESS [ 31.049 s]
[INFO] Spark Project Assembly ............................. SUCCESS [ 5.497 s]
[INFO] Spark Integration for Kafka 0.10 ................... SUCCESS [ 23.966 s]
[INFO] Kafka 0.10+ Source for Structured Streaming ........ SUCCESS [ 34.791 s]
[INFO] Spark Project Examples ............................. SUCCESS [ 23.326 s]
[INFO] Spark Integration for Kafka 0.10 Assembly .......... SUCCESS [ 6.101 s]
[INFO] Spark Avro ......................................... SUCCESS [ 26.999 s]
[INFO] ------------------------------------------------------------------------
[INFO] BUILD SUCCESS
[INFO] ------------------------------------------------------------------------
[INFO] Total time: 15:45 min (Wall Clock)
[hadoop@hadoop001 spark-2.4.2]$ ls spark-2.4.2-bin-2.6.0-cdh5.7.0.tgz
spark-2.4.2-bin-2.6.0-cdh5.7.0.tgz
[hadoop@hadoop001 spark-2.4.2]$ pwd
/home/hadoop/sourcecode/spark-2.4.2
[hadoop@hadoop001 spark-2.4.2]$