G-SQL
Load Flights Data into Elastic Dataset
gsql("insert into pcatalog.gimel_flights_elastic select * from pcatalog.flights_hdfs")
Cache Flights
gsql("cache table flights_elastic select * from pcatalog.gimel_flights_elastic")
Read Data from Elastic
gsql("select * from flights_elastic").show(10)
Scala API for Catalog Provider-USER
Please execute the steps in this section if you have choosen CatalogProvider as USER or if you executed the following command
gsql("set gimel.catalog.provider=USER")
Set options
val datasetPropsJson = """{
"datasetType": "ELASTIC_SEARCH",
"fields": [],
"partitionFields": [],
"props": {
"es.mapping.date.rich":"true",
"es.nodes":"http://elasticsearch",
"es.port":"9200",
"es.resource":"flights/data",
"es.index.auto.create":"true",
"gimel.es.schema.mapping":"{\"location\": { \"type\": \"geo_point\" } }",
"gimel.es.index.partition.delimiter":"-",
"gimel.es.index.partition.isEnabled":"true",
"gimel.es.index.read.all.partitions.isEnabled":"true",
"gimel.es.index.partition.suffix":"20180205",
"gimel.es.schema.mapping":"{\"executionStartTime\": {\"format\": \"strict_date_optional_time||epoch_millis\", \"type\": \"date\" }, \"createdTime\": {\"format\": \"strict_date_optional_time||epoch_millis\", \"type\": \"date\"},\"endTime\": {\"format\": \"strict_date_optional_time||epoch_millis\", \"type\": \"date\"}}",
"gimel.storage.type":"ELASTIC_SEARCH",
"datasetName":"pcatalog.gimel_flights_elastic"
}
}"""
val options = Map("pcatalog.gimel_flights_elastic.dataSetProperties"->datasetPropsJson)
val datasetHivePropsJson = """{
"datasetType": "HDFS",
"fields": [],
"partitionFields": [],
"props": {
"gimel.hdfs.data.format":"csv",
"location":"hdfs://namenode:8020/flights/data",
"datasetName":"pcatalog.flights_hdfs"
}
}"""
val hiveOptions = Map("pcatalog.flights_hdfs.dataSetProperties"->datasetHivePropsJson)
Load Flights Data into Elastic Dataset
import com.paypal.gimel._
val dataSet = DataSet(spark)
val hivedf = dataSet.read("pcatalog.flights_hdfs",hiveOptions)
val df = dataSet.write("pcatalog.gimel_flights_elastic",hivedf,options)
df.count
Read Data from Elastic
import com.paypal.gimel._
val dataSet = DataSet(spark)
val df = dataSet.read("pcatalog.gimel_flights_elastic",options)
df.show(10)
Scala API for Catalog Provider-HIVE
Please execute the steps in this section if you have choosen CatalogProvider as HIVE or if you executed the following command
gsql("set gimel.catalog.provider=HIVE")
Load Flights Data into Elastic Dataset
import com.paypal.gimel._
val dataSet = DataSet(spark)
val hivedf = dataSet.read("pcatalog.flights_hdfs")
val df = dataSet.write("pcatalog.gimel_flights_elastic",hivedf)
df.count
Read Data from Elastic
import com.paypal.gimel._
val dataSet = DataSet(spark)
val df = dataSet.read("pcatalog.gimel_flights_elastic")
df.show(10)