data.take(5).foreach(println)
"user_id","activity","timeStampAsLong","x","y","z"
"user_001","Jumping",1446047227606,"4.33079","-12.72175","-3.18118"
"user_001","Jumping",1446047227671,"0.575403","-0.727487","2.95007"
"user_001","Jumping",1446047227735,"-1.60885","3.52607","-0.1922"
"user_001","Jumping",1446047227799,"0.690364","-0.037722","1.72382"
val dataDF = sqlContext.read
.format("com.databricks.spark.csv") // use spark.csv package
.option("header", "true") // Use first line of all files as header
.option("inferSchema", "true") // Automatically infer data types
.option("delimiter", ",") // Specify the delimiter as ','
.load("dbfs:///datasets/sds/ActivityRecognition/dataTraining.csv")
dataDF: org.apache.spark.sql.DataFrame = [user_id: string, activity: string ... 4 more fields]
ScaDaMaLe Course site and book