Spark Java读取Windows目录下的文件

Spark Java读取Windows目录下的文件

1、准备文件 C:\java\test.txt
10001,jiang1,11,suzhou1
10002,jiang2,12,suzhou2
10003,jiang3,13,suzhou3
10004,jiang4,14,suzhou4
10005,jiang5,15,suzhou5

2、启动spark。
spark-shell

3、在spark中测试。

spark.read.textFile(“file:///c:/java/test.txt”).show()

4、在eclipse中java测试。(注意不要用eclipse自带的jre,重新指定自己安装的jdk对应的jre)。

package spark;

import java.util.Arrays;
import java.util.List;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;

public class SimpleApp {

  public static void main(String[] args) {

    String testFile = "file:///C:/java/test.txt";
    
    SparkConf conf = new SparkConf().setMaster("local").setAppName("AppSpark");

    JavaSparkContext sc = new JavaSparkContext(conf);
    
//		List<String> listName = Arrays.asList("w1", "w2", "w3", "w4", "w4");
//		JavaRDD<String> nameRdd = sc.parallelize(listName);
//		long dataNum = nameRdd.count();
//		System.out.println("统计:" + dataNum);

    
    JavaRDD<String> testData = sc.textFile(testFile);
    System.out.println("字母统计:" + testData.count());
    
    long numAs = testData.filter(new Function<String, Boolean>() {
      public Boolean call(String s) {
        return s.contains("jiang");
      }
    }).count();

    long numBs = testData.filter(new Function<String, Boolean>() {
      public Boolean call(String s) {
        return s.contains("suzhou");
      }
    }).count();

    System.out.println("jiang: " + numAs + ", suzhou: " + numBs);
  }

}

 

发表回复

您的电子邮箱地址不会被公开。