MapReduce之普通文件转SequenceFile

xiaoxiao2021-02-28  97

package com.uplooking.bigdata.mr.writablez.sequecefile; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile.Writer; import org.apache.hadoop.io.SequenceFile.Writer.Option; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; import java.io.File; import java.io.IOException; /**  * 将一个普通的文本文件转变成为一个二进制序列化SequenceFile到hdfs  */ public class SequenceFileWriteOps {     public static void main(String[] args) throws Exception {         if(args == null || args.length < 2) {             System.err.println("Parameter Errors ! Usage: <inputpath outputpath>");             System.exit(-1);         }         String inputpath = args[0];         Path outputpath = new Path(args[1]);         Configuration conf = new Configuration();         //需要我们写入一个SequenceFile的,则需要相应的工具进行操作---Writer         /*         * ops需要哪些懂得,就是要对进行压缩的文件,方式进行简要说明         * */         //上传路径         Option outOption = SequenceFile.Writer.file(outputpath);         //上传后的文件的Key的类型         Option keyOption = SequenceFile.Writer.keyClass(Text.class);         //上传后的文件的Value的类型         Option valueOption = SequenceFile.Writer.valueClass(NullWritable.class);         //采取何种压缩编码格式 //        Option compression = SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK);         Option compression = SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new GzipCodec());         Option[] ops = new Option[]{outOption, keyOption, valueOption, compression};         Writer writer = SequenceFile.createWriter(conf, ops);         String content = FileUtils.readFileToString(new File(inputpath));//指定要进行序列化的文件         writer.append(new Text(content), NullWritable.get());         writer.close();     } }
转载请注明原文地址: https://www.6miu.com/read-71158.html

最新回复(0)