1、准备文件 C:\java\test.txt
10001,jiang1,11,suzhou1
10002,jiang2,12,suzhou2
10003,jiang3,13,suzhou3
10004,jiang4,14,suzhou4
10005,jiang5,15,suzhou5
2、启动spark。
spark-shell
3、在spark中测试。
spark.read.textFile(“file:///c:/java/test.txt”).show()

4、在eclipse中java测试。(注意不要用eclipse自带的jre,重新指定自己安装的jdk对应的jre)。
package spark;
import java.util.Arrays;
import java.util.List;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
public class SimpleApp {
public static void main(String[] args) {
String testFile = "file:///C:/java/test.txt";
SparkConf conf = new SparkConf().setMaster("local").setAppName("AppSpark");
JavaSparkContext sc = new JavaSparkContext(conf);
// List<String> listName = Arrays.asList("w1", "w2", "w3", "w4", "w4");
// JavaRDD<String> nameRdd = sc.parallelize(listName);
// long dataNum = nameRdd.count();
// System.out.println("统计:" + dataNum);
JavaRDD<String> testData = sc.textFile(testFile);
System.out.println("字母统计:" + testData.count());
long numAs = testData.filter(new Function<String, Boolean>() {
public Boolean call(String s) {
return s.contains("jiang");
}
}).count();
long numBs = testData.filter(new Function<String, Boolean>() {
public Boolean call(String s) {
return s.contains("suzhou");
}
}).count();
System.out.println("jiang: " + numAs + ", suzhou: " + numBs);
}
}