1、准备文件 C:\java\test.txt
10001,jiang1,11,suzhou1
10002,jiang2,12,suzhou2
10003,jiang3,13,suzhou3
10004,jiang4,14,suzhou4
10005,jiang5,15,suzhou5
2、启动spark。
spark-shell
3、在spark中测试。
spark.read.textFile(“file:///c:/java/test.txt”).show()
4、在eclipse中java测试。(注意不要用eclipse自带的jre,重新指定自己安装的jdk对应的jre)。
package spark; import java.util.Arrays; import java.util.List; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.api.java.function.Function; public class SimpleApp { public static void main(String[] args) { String testFile = "file:///C:/java/test.txt"; SparkConf conf = new SparkConf().setMaster("local").setAppName("AppSpark"); JavaSparkContext sc = new JavaSparkContext(conf); // List<String> listName = Arrays.asList("w1", "w2", "w3", "w4", "w4"); // JavaRDD<String> nameRdd = sc.parallelize(listName); // long dataNum = nameRdd.count(); // System.out.println("统计:" + dataNum); JavaRDD<String> testData = sc.textFile(testFile); System.out.println("字母统计:" + testData.count()); long numAs = testData.filter(new Function<String, Boolean>() { public Boolean call(String s) { return s.contains("jiang"); } }).count(); long numBs = testData.filter(new Function<String, Boolean>() { public Boolean call(String s) { return s.contains("suzhou"); } }).count(); System.out.println("jiang: " + numAs + ", suzhou: " + numBs); } }