// a good habit to ensure the code is being run on the appropriate version of Spark - we are using Spark 2.2 actually if we use SparkSession object spark down the road... require(sc.version.replace(".", "").toInt >= 140, "Spark 1.4.0+ is required to run this notebook. Please attach it to a Spark 1.4.0+ cluster.")
// this reads the tsv file and turns it into a dataframe val powerPlantDF = spark.read // use 'sqlContext.read' instead if you want to use older Spark version > 1.3 see 008_ notebook .format("csv") // use spark.csv package .option("header", "true") // Use first line of all files as header .option("inferSchema", "true") // Automatically infer data types .option("delimiter", "\t") // Specify the delimiter as Tab or '\t' .load("/databricks-datasets/power-plant/data/Sheet1.tsv")
powerPlantDF: org.apache.spark.sql.DataFrame = [AT: double, V: double ... 3 more fields]
powerPlantDF.show(10) // try putting 1000 here instead of 10
+-----+-----+-------+-----+------+
| AT| V| AP| RH| PE|
+-----+-----+-------+-----+------+
|14.96|41.76|1024.07|73.17|463.26|
|25.18|62.96|1020.04|59.08|444.37|
| 5.11| 39.4|1012.16|92.14|488.56|
|20.86|57.32|1010.24|76.64|446.48|
|10.82| 37.5|1009.23|96.62| 473.9|
|26.27|59.44|1012.23|58.77|443.67|
|15.89|43.96|1014.02|75.24|467.35|
| 9.48|44.71|1019.12|66.43|478.42|
|14.64| 45.0|1021.78|41.25|475.98|
|11.74|43.56|1015.14|70.72| 477.5|
+-----+-----+-------+-----+------+
only showing top 10 rows
sqlContext.tables.show() // Ctrl+Enter to see available tables
+--------+--------------------+-----------+
|database| tableName|isTemporary|
+--------+--------------------+-----------+
| default| adult| false|
| default| business_csv_csv| false|
| default| checkin_table| false|
| default| diamonds| false|
| default| inventory| false|
| default|item_merchant_cat...| false|
| default| items_left_csv| false|
| default| logistic_detail| false|
| default| merchant_ratings| false|
| default| order_data| false|
| default| order_ids_left_csv| false|
| default| repeat_csv| false|
| default| review_2019_csv| false|
| default|sample_logistic_t...| false|
| default| sentimentlex_csv| false|
| default| simple_range| false|
| default| social_media_usage| false|
| default| tip_json| false|
| default| tips_csv_csv| false|
| default| users_csv| false|
+--------+--------------------+-----------+
only showing top 20 rows
spark.catalog.listTables.show(false)
+------------------------+--------+-----------+---------+-----------+
|name |database|description|tableType|isTemporary|
+------------------------+--------+-----------+---------+-----------+
|adult |default |null |EXTERNAL |false |
|business_csv_csv |default |null |EXTERNAL |false |
|checkin_table |default |null |MANAGED |false |
|diamonds |default |null |EXTERNAL |false |
|inventory |default |null |MANAGED |false |
|item_merchant_categories|default |null |MANAGED |false |
|items_left_csv |default |null |EXTERNAL |false |
|logistic_detail |default |null |MANAGED |false |
|merchant_ratings |default |null |MANAGED |false |
|order_data |default |null |MANAGED |false |
|order_ids_left_csv |default |null |EXTERNAL |false |
|repeat_csv |default |null |MANAGED |false |
|review_2019_csv |default |null |EXTERNAL |false |
|sample_logistic_table |default |null |EXTERNAL |false |
|sentimentlex_csv |default |null |EXTERNAL |false |
|simple_range |default |null |MANAGED |false |
|social_media_usage |default |null |MANAGED |false |
|tip_json |default |null |EXTERNAL |false |
|tips_csv_csv |default |null |EXTERNAL |false |
|users_csv |default |null |EXTERNAL |false |
+------------------------+--------+-----------+---------+-----------+
only showing top 20 rows
spark.catalog.listDatabases.show(false)
+---------+---------------------+--------------------------------------+
|name |description |locationUri |
+---------+---------------------+--------------------------------------+
|db_ad_gcs| |dbfs:/user/hive/warehouse/db_ad_gcs.db|
|default |Default Hive database|dbfs:/user/hive/warehouse |
+---------+---------------------+--------------------------------------+
sqlContext.tables.show()
+--------+--------------------+-----------+
|database| tableName|isTemporary|
+--------+--------------------+-----------+
| default| adult| false|
| default| business_csv_csv| false|
| default| checkin_table| false|
| default| diamonds| false|
| default| inventory| false|
| default|item_merchant_cat...| false|
| default| items_left_csv| false|
| default| logistic_detail| false|
| default| merchant_ratings| false|
| default| order_data| false|
| default| order_ids_left_csv| false|
| default| repeat_csv| false|
| default| review_2019_csv| false|
| default|sample_logistic_t...| false|
| default| sentimentlex_csv| false|
| default| simple_range| false|
| default| social_media_usage| false|
| default| tip_json| false|
| default| tips_csv_csv| false|
| default| users_csv| false|
+--------+--------------------+-----------+
only showing top 20 rows
SDS-2.x, Scalable Data Engineering Science
Last refresh: Never