将windows中HDFS-javaAPI文件打包,在linux的hadoop平台运行

📅 2026/6/27 5:15:12
将windows中HDFS-javaAPI文件打包,在linux的hadoop平台运行
目录运行前提一、打包方式 1IDEA 标准 Artifacts 打包1.顶部菜单 File → Project Structure快捷键 CtrlAltShiftS2.左侧 Artifacts → 右上角 → JAR → From modules with dependencies配置弹窗3.顶部菜单 Build → Build Artifacts → 选中你的 jar → Build​编辑4.注意方式 2Maven 打包最通用企业首选1pom.xml 添加打包配置 主类2执行打包3注意二、在linux系统中的hadoop平台执行1、hadoop已经启动hadoop命令可执行2、安全模式已经退出3、将打包的jar包上传到linux平台中并使用命令chmod 755添加执行权限4、执行jar包附1MkdirDemo代码附2hadoop jar命令使用说明附3打包时是否将依赖打包进入说明1、Hadoop 相关依赖关键改动说明1. 依赖 scope 调整2. 彻底移除 maven-assembly-plugin3. shade 插件核心能力4. 集群运行命令适配 hadoop jar方式 1使用 shaded 大包内置 log4j 依赖无需额外传参方式 2纯净小包 -libjars 动态加载第三方依赖包体积更小配套避坑操作附4打包时指定主类hadoop jar xxx.jar 直接运行不用写主类名原理方案 1maven-shade 插件你现在使用的 pom推荐方案 2原生 maven-jar-plugin无第三方依赖轻量包不适合此项目关键注意事项运行前提1、linux中hadoop配置完成jps显示进程正确2、linux中hadoop命令可以运行3、linux中已经退出hadoop安全模式一、打包方式 1IDEA 标准 Artifacts 打包1.顶部菜单File → Project Structure快捷键CtrlAltShiftS2.左侧Artifacts→ 右上角→JAR → From modules with dependencies配置弹窗Module选择你的当前项目Main class选中你的MkdirDemo包含 main 方法的主类Extract to the target JAR把依赖打进 jar避免 Linux 缺包报错Directory for META-INF需要改为resource路径Apply → OK3.顶部菜单Build → Build Artifacts→ 选中你的 jar → Build生成 jar 路径项目根目录out/artifacts/xxx_jar/xxx.jar4.注意Directory for META-INF需要改为resource路径否则打包 Jar 时IDEA 自动在 src/main/java 下生成了 META-INF/MANIFEST.MF再次打包时工具尝试重新生成触发冲突。如下图所示方式 2Maven 打包最通用企业首选1pom.xml 添加打包配置 主类build plugins !-- maven打包插件指定主类打包所有依赖 -- plugin groupIdorg.apache.maven.plugins/groupId artifactIdmaven-assembly-plugin/artifactId version3.3.0/version executions execution idmake-assembly/id phasepackage/phase goals goalsingle/goal /goals /execution /executions configuration archive manifest !-- 填写你的主类全限定名 -- mainClassMkdirDemo/mainClass /manifest /archive descriptorRefs !-- 将所有依赖打入一个jar包 -- descriptorRefjar-with-dependencies/descriptorRef /descriptorRefs /configuration /plugin /plugins /build完整pom.xml文件为?xml version1.0 encodingUTF-8? project xmlnshttp://maven.apache.org/POM/4.0.0 xmlns:xsihttp://www.w3.org/2001/XMLSchema-instance xsi:schemaLocationhttp://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd modelVersion4.0.0/modelVersion groupIdorg.example/groupId artifactIdHDFS-helloworld/artifactId version1.0-SNAPSHOT/version dependencies dependency groupIdjunit/groupId artifactIdjunit/artifactId versionRELEASE/version /dependency dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-hdfs/artifactId version3.1.3/version /dependency dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-client/artifactId version3.1.3/version /dependency dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-common/artifactId version3.1.3/version /dependency dependency groupIdorg.apache.logging.log4j/groupId artifactIdlog4j-core/artifactId version2.19.0/version /dependency /dependencies build plugins !-- maven打包插件指定主类打包所有依赖 -- plugin groupIdorg.apache.maven.plugins/groupId artifactIdmaven-assembly-plugin/artifactId version3.3.0/version executions execution idmake-assembly/id phasepackage/phase goals goalsingle/goal /goals /execution /executions configuration archive manifest !-- 填写你的主类全限定名 -- mainClassMkdirDemo/mainClass /manifest /archive descriptorRefs !-- 将所有依赖打入一个jar包 -- descriptorRefjar-with-dependencies/descriptorRef /descriptorRefs /configuration /plugin /plugins /build properties maven.compiler.source8/maven.compiler.source maven.compiler.target8/maven.compiler.target /properties /project2执行打包右侧 Maven 面板 →Lifecycle → package双击执行完成后显示finished打包完成后jar 包在项目/target/xxx-jar-with-dependencies.jar3注意如果打包后再次打包报错 Process terminated原因分析Process terminated不是具体异常代表 Maven 打包进程被系统 / IDE 强制终止最常见诱因是内存溢出、JDK/Maven 版本不兼容、pom 插件配置错误、IDEA 运行器配置异常尤其你使用jar-with-dependencies依赖全打进包时依赖多极易内存爆掉。解决方案File → Invalidate Caches... 勾选Clear file system cache and local history → Invalidate and Restart即可方式3直接使用mvn命令1IDEA 内置 Terminal 终端推荐不用切文件夹IDEA 底部打开Terminal直接执行清理 打包一条命令运行mvn clean packageclean删除旧 target 文件夹、旧 jar彻底清空package编译代码、执行 assembly 插件打包2Windows CMD/PowerShell 手动执行CMD cd 切换到你的项目根目录pom.xml 所在文件夹 示例cd D:\javaweb\HDFS-helloworld执行打包mvn clean package三、打包成功标志终端输出BUILD SUCCESSjar 包生成路径项目目录/target/HDFS-helloworld-1.0-jar-with-dependencies.jar二、在linux系统中的hadoop平台执行1、hadoop已经启动hadoop命令可执行2、安全模式已经退出3、将打包的jar包上传到linux平台中并使用命令chmod 755添加执行权限4、执行jar包附1MkdirDemo代码主要功能是在/目录下创建bigdata文件夹import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; public class MkdirDemo { public static FileSystem getFileSystem() throws IOException, URISyntaxException, InterruptedException { Configuration confnew Configuration(); URI urlnew URI(hdfs://192.168.17.128:9000); final FileSystem fileSystemFileSystem.get(url,conf,root); return fileSystem; } public static void list(String dir) throws Exception{ FileSystem fileSystemgetFileSystem(); FileStatus[] listStatusfileSystem.listStatus(new Path(dir)); for(FileStatus fileStatus : listStatus){ boolean isDirfileStatus.isDirectory(); String namefileStatus.getPath().toString(); System.out.println(isDir name); } } public static void mkdir(String path) throws Exception{ final FileSystem fileSystemgetFileSystem(); fileSystem.mkdirs(new Path(path)); //遍历文件夹下的内容 list(/); } public static void create(String path) throws Exception{ final FileSystem fileSystemgetFileSystem(); fileSystem.create(new Path(path)); //遍历文件夹下的内容 list(/); } public static void delete(String path) throws Exception{ final FileSystem fileSystemgetFileSystem(); fileSystem.delete(new Path(path),true); //遍历文件夹下的内容 list(/); } public static void main(String[] args) throws Exception{ mkdir(/bigdata); // delete(bigdata); // create(demo.txt); } }附2hadoop jar命令使用说明hadoop jar jar包路径 主类全名hadoop jar HDFS-helloworld.jar MkdirDemo命令说明hadoop 调用 Hadoop 客户端脚本加载 Hadoop 运行环境、自带 HDFS/MapReduce 所有依赖包。jar 是 hadoop 子命令专门用来执行打包好的 Java Jar 程序底层会启动一个内置 JVM。HDFS-helloworld.jar 你的本地 Jar 包文件路径当前目录下的 MkdirDemo.jar里面存放编写 HDFS 操作代码创建目录。MkdirDemo是Jar 包内程序入口主类全名不带包名则代表主类在默认包等价于 java -cp xxx.jar MkdirDemo。附3打包时是否将依赖打包进入说明此处只针对场景为HDFS操作MR操作。1、Hadoop 相关依赖不需要打进 Jar 包编译时保留、打包剔除集群执行用 hadoop jar 命令时程序运行依赖的 Hadoop 核心包hadoop-common、hadoop-hdfs、mapreduce、yarn 等全部由集群服务自带运行时自动加载不用打入自己的业务 Jar。Maven 中统一设置依赖作用域 provideddependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-common/artifactId version3.3.4/version scopeprovided/scope /dependency dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-hdfs/artifactId version3.3.4/version scopeprovided/scope /dependencyprovided 含义IDE 编译、写代码时提供依赖打包时不会把这些 jar 打进你的包完美规避大包冲突、内存溢出。优势解决之前 jar-with-dependencies 打包过大、进程中断、类版本冲突的问题。同时删除原来添加的 maven-assembly-plugin 完整配置无第三方依赖只用原生 maven-jar-plugin有第三方依赖改用 maven-shade-plugin并过滤所有 hadoop 类不使用 assembly 打包后用命令执行hadoop jar 你的包.jar 主类全路径补充shade 和 provided 搭配是安全的shade 可以手动配置过滤规则主动排除org/apache/hadoop/**配合 provided 双重保险不会冲突filter artifact*:*/artifact excludes excludeorg/apache/hadoop/**/exclude excludeorg/apache/mapreduce/**/exclude excludeorg/apache/hdfs/**/exclude /excludes /filter此时原pom.xml文件?xml version1.0 encodingUTF-8? project xmlnshttp://maven.apache.org/POM/4.0.0 xmlns:xsihttp://www.w3.org/2001/XMLSchema-instance xsi:schemaLocationhttp://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd modelVersion4.0.0/modelVersion groupIdorg.example/groupId artifactIdHDFS-helloworld/artifactId version1.0-SNAPSHOT/version dependencies dependency groupIdjunit/groupId artifactIdjunit/artifactId versionRELEASE/version /dependency dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-hdfs/artifactId version3.1.3/version /dependency dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-client/artifactId version3.1.3/version /dependency dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-common/artifactId version3.1.3/version /dependency dependency groupIdorg.apache.logging.log4j/groupId artifactIdlog4j-core/artifactId version2.19.0/version /dependency /dependencies build plugins !-- maven打包插件指定主类打包所有依赖 -- plugin groupIdorg.apache.maven.plugins/groupId artifactIdmaven-assembly-plugin/artifactId version3.3.0/version executions execution idmake-assembly/id phasepackage/phase goals goalsingle/goal /goals /execution /executions configuration archive manifest !-- 填写你的主类全限定名 -- mainClassMkdirDemo/mainClass /manifest /archive descriptorRefs !-- 将所有依赖打入一个jar包 -- descriptorRefjar-with-dependencies/descriptorRef /descriptorRefs /configuration /plugin /plugins /build properties maven.compiler.source8/maven.compiler.source maven.compiler.target8/maven.compiler.target /properties /project变为?xml version1.0 encodingUTF-8? project xmlnshttp://maven.apache.org/POM/4.0.0 xmlns:xsihttp://www.w3.org/2001/XMLSchema-instance xsi:schemaLocationhttp://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd modelVersion4.0.0/modelVersion groupIdorg.example/groupId artifactIdHDFS-helloworld/artifactId version1.0-SNAPSHOT/version properties maven.compiler.source8/maven.compiler.source maven.compiler.target8/maven.compiler.target project.build.sourceEncodingUTF-8/project.build.sourceEncoding hadoop.version3.1.3/hadoop.version /properties dependencies !-- 单元测试 -- dependency groupIdjunit/groupId artifactIdjunit/artifactId version4.13.2/version scopetest/scope /dependency !-- Hadoop全部核心依赖 scopeprovided 编译用打包剔除 -- dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-hdfs/artifactId version${hadoop.version}/version scopeprovided/scope /dependency dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-client/artifactId version${hadoop.version}/version scopeprovided/scope /dependency dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-common/artifactId version${hadoop.version}/version scopeprovided/scope /dependency !-- 第三方日志依赖非Hadoop自带会被shade打进包 -- dependency groupIdorg.apache.logging.log4j/groupId artifactIdlog4j-core/artifactId version2.19.0/version /dependency /dependencies build plugins !-- 基础编译打包插件 -- plugin groupIdorg.apache.maven.plugins/groupId artifactIdmaven-jar-plugin/artifactId version3.3.0/version /plugin !-- Shade插件只打包第三方依赖过滤所有Hadoop类替代assembly -- plugin groupIdorg.apache.maven.plugins/groupId artifactIdmaven-shade-plugin/artifactId version3.5.1/version executions execution phasepackage/phase goals goalshade/goal /goals configuration !-- 过滤所有Hadoop集群自带类不打进Jar彻底避免冲突 -- filters filter artifact*:*/artifact excludes excludeorg/apache/hadoop/**/exclude excludeorg/apache/yarn/**/exclude excludeorg/apache/mapreduce/**/exclude excludeorg/apache/hdfs/**/exclude excludeorg/apache/hadoop/hdfs/**/exclude /excludes /filter /filters !-- 自动生成MANIFEST指定程序入口主类无需手动创建META-INF -- transformers transformer implementationorg.apache.maven.plugins.shade.resource.ManifestResourceTransformer !-- 替换为你的完整主类名如果有包名必须写全包名例com.bigdata.MkdirDemo -- mainClassMkdirDemo/mainClass /transformer /transformers /configuration /execution /executions /plugin /plugins /build /project关键改动说明1. 依赖 scope 调整所有hadoop-*依赖增加scopeprovided/scopeMaven 打包时不会把这些依赖打入 jarjunit 设置scopetest/scope仅测试阶段生效打包不会带入测试类log4j-core 保留默认 compile属于第三方日志包shade 会将其打进 jar2. 彻底移除 maven-assembly-plugin原 assembly 的jar-with-dependencies会无视 provided 强制打包全部 Hadoop 依赖和集群自带 Hadoop 类产生冲突直接删除。3. shade 插件核心能力自动过滤所有org/apache/hadoop/**相关 class双重保险配合 provided仅打包你的业务代码 log4j 等第三方自定义依赖自动生成合法 MANIFEST.MF不会出现 IDEalready exists in VFS报错打包后生成两个 jarHDFS-helloworld-1.0-SNAPSHOT.jar纯净业务包HDFS-helloworld-1.0-SNAPSHOT-shaded.jar内置第三方依赖的可运行包4. 集群运行命令适配 hadoop jar方式 1使用 shaded 大包内置 log4j 依赖无需额外传参bash运行hadoop jar HDFS-helloworld-1.0-SNAPSHOT-shaded.jar方式 2纯净小包 -libjars 动态加载第三方依赖包体积更小bash运行hadoop jar HDFS-helloworld-1.0-SNAPSHOT.jar MkdirDemo -libjars log4j-core-2.19.0.jar配套避坑操作删除项目内手动创建的src/main/java/META-INF文件夹避免 VFS 文件冲突自定义配置文件、自定义 MANIFEST 资源统一放到src/main/resources/META-INFIDE 本地调试时如果报类找不到临时把 Hadoop 依赖 scope 改为compile打包上线切回provided打包前执行mvn clean package先清空旧 target 目录附4打包时指定主类hadoop jar xxx.jar 直接运行不用写主类名原理hadoop jar 读取 Jar 内部 META-INF/MANIFEST.MF 里的 Main-Class 属性如果配置了命令行可以省略主类全名直接传业务参数。下面分两种场景普通 maven-jar无第三方依赖、shade 打包带第三方依赖你当前 pom 方案方案 1maven-shade 插件你现在使用的 pom推荐shade 通过 ManifestResourceTransformer 自动写入 Main-Class无需手动创建 META-INF 文件完美规避 IDE VFS 文件重复报错。关键配置片段已整合到你之前优化后的 pomplugin groupIdorg.apache.maven.plugins/groupId artifactIdmaven-shade-plugin/artifactId version3.5.1/version executions execution phasepackage/phase goals goalshade/goal /goals configuration !-- 过滤hadoop依赖防止冲突 -- filters filter artifact*:*/artifact excludes excludeorg/apache/hadoop/**/exclude excludeorg/apache/yarn/**/exclude excludeorg/apache/mapreduce/**/exclude /excludes /filter /filters !-- 自动生成MANIFEST配置默认主类 -- transformers transformer implementationorg.apache.maven.plugins.shade.resource.ManifestResourceTransformer !-- 这里替换成你的完整主类名有包必须写全包名例如 com.test.MkdirDemo -- mainClassMkdirDemo/mainClass /transformer /transformers /configuration /execution /executions /plugin使用效果打包后生成 xxx-shaded.jar执行命令直接省略主类# 无需写 MkdirDemo直接运行hadoop jar HDFS-helloworld-1.0-SNAPSHOT-shaded.jar方案 2原生 maven-jar-plugin无第三方依赖轻量包不适合此项目如果你的项目没有 log4j 等第三方包只用基础打包插件通过 manifestEntries 配置主类plugin groupIdorg.apache.maven.plugins/groupId artifactIdmaven-jar-plugin/artifactId version3.3.0/version configuration archive manifest !-- 自动生成Main-Class -- mainClassMkdirDemo/mainClass /manifest /archive /configuration /plugin运行命令hadoop jar HDFS-helloworld-1.0-SNAPSHOT.jar此时完整pom.xml文件为?xml version1.0 encodingUTF-8? project xmlnshttp://maven.apache.org/POM/4.0.0 xmlns:xsihttp://www.w3.org/2001/XMLSchema-instance xsi:schemaLocationhttp://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd modelVersion4.0.0/modelVersion groupIdorg.example/groupId artifactIdHDFS-helloworld/artifactId version1.0-SNAPSHOT/version properties maven.compiler.source8/maven.compiler.source maven.compiler.target8/maven.compiler.target project.build.sourceEncodingUTF-8/project.build.sourceEncoding hadoop.version3.1.3/hadoop.version /properties dependencies dependency groupIdjunit/groupId artifactIdjunit/artifactId version4.13.2/version scopetest/scope /dependency !-- Hadoop依赖全部provided打包不打入 -- dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-hdfs/artifactId version${hadoop.version}/version scopeprovided/scope /dependency dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-client/artifactId version${hadoop.version}/version scopeprovided/scope /dependency dependency groupIdorg.apache.hadoop/groupId artifactIdhadoop-common/artifactId version${hadoop.version}/version scopeprovided/scope /dependency !-- 第三方日志依赖shade会打入包内 -- dependency groupIdorg.apache.logging.log4j/groupId artifactIdlog4j-core/artifactId version2.19.0/version /dependency /dependencies build plugins !-- 基础打包插件 -- plugin groupIdorg.apache.maven.plugins/groupId artifactIdmaven-jar-plugin/artifactId version3.3.0/version /plugin !-- Shade插件过滤hadoop依赖 配置默认主类 -- plugin groupIdorg.apache.maven.plugins/groupId artifactIdmaven-shade-plugin/artifactId version3.5.1/version executions execution phasepackage/phase goals goalshade/goal /goals configuration filters filter artifact*:*/artifact excludes excludeorg/apache/hadoop/**/exclude excludeorg/apache/yarn/**/exclude excludeorg/apache/mapreduce/**/exclude /excludes /filter /filters !-- 核心设置Jar默认主类 -- transformers transformer implementationorg.apache.maven.plugins.shade.resource.ManifestResourceTransformer !-- 重点替换为你项目的完整类名 -- mainClassMkdirDemo/mainClass /transformer /transformers /configuration /execution /executions /plugin /plugins /build /project关键注意事项主类必须写全限定名如果你的类在包下比如com.bigdata.MkdirDemo不能只写MkdirDemo否则运行报错找不到类。禁止手动创建src/main/java/META-INF/MANIFEST.MFMaven/shade 会自动生成清单文件手动创建会触发already exists in VFSIDE 报错。打包执行命令运行mvn clean packagetarget 目录会生成两个包HDFS-helloworld-1.0-SNAPSHOT.jar纯净源码包无第三方依赖不带自动主类需要手动传主类名HDFS-helloworld-1.0-SNAPSHOT-shaded.jar内置第三方依赖配置好默认主类直接用hadoop jar运行