1、准备文件 C:\java\test.txt
10001,jiang1,11,suzhou1
10002,jiang2,12,suzhou2
10003,jiang3,13,suzhou3
10004,jiang4,14,suzhou4
10005,jiang5,15,suzhou5
2、启动spark。
spark-shell
3、在spark中测试。
spark.read.textFile(“file:///c:/java/test.txt”).show()

4、在eclipse中java测试。(注意不要用eclipse自带的jre,重新指定自己安装的jdk对应的jre)。
package spark;
import java.util.Arrays;
import java.util.List;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
public class SimpleApp {
  public static void main(String[] args) {
    String testFile = "file:///C:/java/test.txt";
    
    SparkConf conf = new SparkConf().setMaster("local").setAppName("AppSpark");
    JavaSparkContext sc = new JavaSparkContext(conf);
    
//		List<String> listName = Arrays.asList("w1", "w2", "w3", "w4", "w4");
//		JavaRDD<String> nameRdd = sc.parallelize(listName);
//		long dataNum = nameRdd.count();
//		System.out.println("统计:" + dataNum);
    
    JavaRDD<String> testData = sc.textFile(testFile);
    System.out.println("字母统计:" + testData.count());
    
    long numAs = testData.filter(new Function<String, Boolean>() {
      public Boolean call(String s) {
        return s.contains("jiang");
      }
    }).count();
    long numBs = testData.filter(new Function<String, Boolean>() {
      public Boolean call(String s) {
        return s.contains("suzhou");
      }
    }).count();
    System.out.println("jiang: " + numAs + ", suzhou: " + numBs);
  }
}