Download as txt, pdf, or txt
Download as txt, pdf, or txt
You are on page 1of 1

from pyspark.

sql import SparkSession


from pyspark.sql.functions import avg

project_name ='datos-gg-qa'
dataset_name='serverless_spark'

# Create a DataFrame using SparkSession


spark = SparkSession.builder.appName("ETL").config('spark.jars',
'gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-
0.22.2.jar').getOrCreate()
input_data="gs://gg-bronce-qa/data.csv"

#Reading the Input Data


data_df = spark.read.format("csv").option("header",
True).option("inferschema",True).load(input_data)
data_df.printSchema()

# Group the same names together, aggregate their ages, and compute an average
avg_df = data_df.groupBy("name").agg(avg("age"))

# Show the results of the final execution


avg_df.show()

# extract columns to create country table


avg_table = avg_df.selectExpr("name").dropDuplicates()

##Escribir en capa Plata/Transform


output_data_plata="gs://gg-silver-qa/datasets/"
avg_table.write.mode("overwrite").csv(output_data_plata +
'spark_dataframe_plata.csv')

##Escribir en capa Oro/Transform


output_data_gold="gs://gg-gold-qa/datasets/"
avg_table.write.mode("overwrite").csv(output_data_gold +
'spark_dataframe_gold.csv')

### Escribir hasta BQ


##avg_table.write.format('bigquery') .mode("overwrite").option('table',
project_name+':'+dataset_name+'._avgage') .save()

You might also like