G-SQL
Cache Airports Data from HDFS
val sql="""cache table lkp_airport
select
struct(lat,lon) as location
,concat(lat,",",lon) as location1
, *
from
(
select iata, lat, lon, country, city, name
, row_number() over (partition by iata order by 1 desc ) as rnk
from pcatalog.flights_lookup_airports_hdfs
) tbl
where rnk = 1
"""
gsql(sql)
Read Data from HDFS
gsql("select * from pcatalog.flights_hdfs").show()
Count Records From HDFS
gsql("select * from pcatalog.flights_hdfs").count()
Scala API
Read Data from HDFS
import com.paypal.gimel._
val dataSet = DataSet(spark)
val datasetHivePropsJson = """{
"datasetType": "HDFS",
"fields": [],
"partitionFields": [],
"props": {
"gimel.hdfs.data.format":"csv",
"location":"hdfs://namenode:8020/flights/data",
"datasetName":"pcatalog.flights_hdfs"
}
}"""
val options= Map("pcatalog.flights_hdfs.dataSetProperties"->datasetHivePropsJson)
val df = dataSet.read("pcatalog.flights_hdfs",options)
df.count
Count Records From HDFS
import com.paypal.gimel._
val dataSet = DataSet(spark)
val df = dataSet.read("pcatalog.flights_hdfs")
df.count