package com.uplooking.bigdata.mr.writablez.sequecefile; import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileUtil; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile.Writer; import org.apache.hadoop.io.SequenceFile.Writer.Option; import org.apache.hadoop.io.Text; import org.apache.hadoop.io.compress.GzipCodec; import java.io.File; import java.io.IOException; /** * 将一个普通的文本文件转变成为一个二进制序列化SequenceFile到hdfs */ public class SequenceFileWriteOps { public static void main(String[] args) throws Exception { if(args == null || args.length < 2) { System.err.println("Parameter Errors ! Usage: <inputpath outputpath>"); System.exit(-1); } String inputpath = args[0]; Path outputpath = new Path(args[1]); Configuration conf = new Configuration(); //需要我们写入一个SequenceFile的,则需要相应的工具进行操作---Writer /* * ops需要哪些懂得,就是要对进行压缩的文件,方式进行简要说明 * */ //上传路径 Option outOption = SequenceFile.Writer.file(outputpath); //上传后的文件的Key的类型 Option keyOption = SequenceFile.Writer.keyClass(Text.class); //上传后的文件的Value的类型 Option valueOption = SequenceFile.Writer.valueClass(NullWritable.class); //采取何种压缩编码格式 // Option compression = SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK); Option compression = SequenceFile.Writer.compression(SequenceFile.CompressionType.BLOCK, new GzipCodec()); Option[] ops = new Option[]{outOption, keyOption, valueOption, compression}; Writer writer = SequenceFile.createWriter(conf, ops); String content = FileUtils.readFileToString(new File(inputpath));//指定要进行序列化的文件 writer.append(new Text(content), NullWritable.get()); writer.close(); } }
转载请注明原文地址: https://www.6miu.com/read-71158.html