master

分支 (85)

管理

管理

master

2120880206hujinfeng

2120880128yanwenjing

2120880240zhanganming

2120880132qiwenhe

2120880238chenyuchang

2120880143liangzhiyuan

2120880242tangwenkai

2120880226linwending

2120880220yangzixing

2120880218chaipengyu

2120880118wangyichun

2120880201songyuxin

2120880113qiaoyan

2120880239luyuyu

2120880104fuwenxue

2120880223lichenrui

2120880219duanhaibin

2120880130zhangzhongji

2120880215zhaoliping

tit-bd-2023
/
4

package com;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URI;
import java.util.ArrayList;
import java.util.List;

public class ReadFileUtil {
    // 从HDFS中读取文件内容，并将每行的前两个逗号分隔的字段添加到列表中。
    public static List<String> ReadFromHDFS(String file) throws IOException {
        //System.setProperty("hadoop.home.dir", "H:\\文件\\hadoop\\hadoop-2.6.4");
        List<String> list = new ArrayList();
        int i = 0;
        Configuration conf = new Configuration();
        StringBuffer buffer = new StringBuffer();
        FSDataInputStream fsr = null;
        BufferedReader bufferedReader = null;
        String lineTxt = null;

        try {
            FileSystem fs = FileSystem.get(URI.create(file), conf);
            fsr = fs.open(new Path(file));
            bufferedReader = new BufferedReader(new InputStreamReader(fsr));
            while ((lineTxt = bufferedReader.readLine()) != null) {
                // 假设每行数据都被双引号包围，这里去除了首尾的双引号
                // 注意：这里假设每行数据都至少有两个逗号分隔的字段
                lineTxt=lineTxt.substring(1,lineTxt.length()-1);
                //System.out.println(lineTxt);
                String[] arg = lineTxt.split(",");
                list.add(arg[0]);
                list.add(arg[1]);
            }
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            if (bufferedReader != null) {
                try {
                    bufferedReader.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
        return list;
    }

    // 主函数，用于测试ReadFromHDFS方法
    public static void main(String[] args) throws IOException {
        List<String> ll = new ReadFileUtil().ReadFromHDFS("hdfs://master:9000/out/view1/part-00000");
        for (int i = 0; i < ll.size(); i++) {
            System.out.println(ll.get(i));
        }
    }
}