Introduction

Using APIs gives us access to an incredible amount of data only available online. In this exercise, we want to extract New York City solar data. Such data can, for example, allow us to determine on average the most productive periods of the year for solar panel deployment.

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(httr)
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## 
## The following object is masked from 'package:purrr':
## 
##     flatten
library(ggplot2)

Finding the Suitable Endpoint and Parameters to Query the API

#KEY for DATAGOV
key <- 'cNTIeRx2hcKoKzn01hb2QxPTwv2hPNQcISsaDOC1'
base <- "https://developer.nrel.gov"
endpoint <- 'api/solar/solar_resource/v1.json'
url <- modify_url(base, path = endpoint)

#The latitude of New York City, NY, USA is 40.730610, and the longitude is -73.935242. 
parameters_list <- list(lat = 41, lon = -74, api_key = key)

Extracting the New York Solar Resource Data

response <- GET(url, query = parameters_list)

if (http_error(response)){
  print('Something went wrong', call. = FALSE)
}

status_code(response)
## [1] 200
class(response)
## [1] "response"
content <- content(response, 'text')
content
## [1] "{\"version\":\"1.0.0\",\"warnings\":[],\"errors\":[],\"metadata\":{\"sources\":[\"Perez-SUNY/NREL, 2012\"]},\"inputs\":{\"lat\":\"41\",\"lon\":\"-74\"},\"outputs\":{\"avg_dni\":{\"annual\":3.83,\"monthly\":{\"jan\":3.32,\"feb\":3.96,\"mar\":4.12,\"apr\":4.17,\"may\":4.21,\"jun\":4.19,\"jul\":4.4,\"aug\":4.23,\"sep\":4.12,\"oct\":3.46,\"nov\":2.78,\"dec\":2.97}},\"avg_ghi\":{\"annual\":3.92,\"monthly\":{\"jan\":2.01,\"feb\":2.89,\"mar\":3.89,\"apr\":4.78,\"may\":5.5,\"jun\":5.79,\"jul\":5.87,\"aug\":5.18,\"sep\":4.28,\"oct\":3.03,\"nov\":2.06,\"dec\":1.73}},\"avg_lat_tilt\":{\"annual\":4.63,\"monthly\":{\"jan\":3.67,\"feb\":4.49,\"mar\":4.91,\"apr\":5.07,\"may\":5.23,\"jun\":5.26,\"jul\":5.47,\"aug\":5.3,\"sep\":5.08,\"oct\":4.28,\"nov\":3.42,\"dec\":3.32}}}}"

Parsing the JSON into an R Object

json_list <- fromJSON(content)
str(json_list)
## List of 6
##  $ version : chr "1.0.0"
##  $ warnings: list()
##  $ errors  : list()
##  $ metadata:List of 1
##   ..$ sources: chr "Perez-SUNY/NREL, 2012"
##  $ inputs  :List of 2
##   ..$ lat: chr "41"
##   ..$ lon: chr "-74"
##  $ outputs :List of 3
##   ..$ avg_dni     :List of 2
##   .. ..$ annual : num 3.83
##   .. ..$ monthly:List of 12
##   .. .. ..$ jan: num 3.32
##   .. .. ..$ feb: num 3.96
##   .. .. ..$ mar: num 4.12
##   .. .. ..$ apr: num 4.17
##   .. .. ..$ may: num 4.21
##   .. .. ..$ jun: num 4.19
##   .. .. ..$ jul: num 4.4
##   .. .. ..$ aug: num 4.23
##   .. .. ..$ sep: num 4.12
##   .. .. ..$ oct: num 3.46
##   .. .. ..$ nov: num 2.78
##   .. .. ..$ dec: num 2.97
##   ..$ avg_ghi     :List of 2
##   .. ..$ annual : num 3.92
##   .. ..$ monthly:List of 12
##   .. .. ..$ jan: num 2.01
##   .. .. ..$ feb: num 2.89
##   .. .. ..$ mar: num 3.89
##   .. .. ..$ apr: num 4.78
##   .. .. ..$ may: num 5.5
##   .. .. ..$ jun: num 5.79
##   .. .. ..$ jul: num 5.87
##   .. .. ..$ aug: num 5.18
##   .. .. ..$ sep: num 4.28
##   .. .. ..$ oct: num 3.03
##   .. .. ..$ nov: num 2.06
##   .. .. ..$ dec: num 1.73
##   ..$ avg_lat_tilt:List of 2
##   .. ..$ annual : num 4.63
##   .. ..$ monthly:List of 12
##   .. .. ..$ jan: num 3.67
##   .. .. ..$ feb: num 4.49
##   .. .. ..$ mar: num 4.91
##   .. .. ..$ apr: num 5.07
##   .. .. ..$ may: num 5.23
##   .. .. ..$ jun: num 5.26
##   .. .. ..$ jul: num 5.47
##   .. .. ..$ aug: num 5.3
##   .. .. ..$ sep: num 5.08
##   .. .. ..$ oct: num 4.28
##   .. .. ..$ nov: num 3.42
##   .. .. ..$ dec: num 3.32

Creating a Datarame from a Complex List by taking vectors and building dataframe.

output_list <- json_list$outputs
avg_dni_monthly <- output_list$avg_dni$monthly
avg_ghi_monthly <- output_list$avg_ghi$monthly
avg_lat_monthly <- output_list$avg_lat_tilt$monthly
dataframe <- tibble( 'month' = month.abb, 
                     'avg_dni' = avg_dni_monthly,
                     'avg_ghi' = avg_ghi_monthly,
                     'avg_lat' = avg_lat_monthly)
dataframe_1_numeric <- dataframe %>%
  mutate(across(starts_with('avg'), as.numeric))

dataframe_1_numeric
## # A tibble: 12 × 4
##    month avg_dni avg_ghi avg_lat
##    <chr>   <dbl>   <dbl>   <dbl>
##  1 Jan      3.32    2.01    3.67
##  2 Feb      3.96    2.89    4.49
##  3 Mar      4.12    3.89    4.91
##  4 Apr      4.17    4.78    5.07
##  5 May      4.21    5.5     5.23
##  6 Jun      4.19    5.79    5.26
##  7 Jul      4.4     5.87    5.47
##  8 Aug      4.23    5.18    5.3 
##  9 Sep      4.12    4.28    5.08
## 10 Oct      3.46    3.03    4.28
## 11 Nov      2.78    2.06    3.42
## 12 Dec      2.97    1.73    3.32

Creating a Datarame from a Complex List by unlist, restructuring into matrix and converting into dataframe.

u_list <- unlist(json_list$outputs)

u_matrix <- matrix(u_list, nrow=13)

dataframe <- as.data.frame(u_matrix)

dataframe_2 <- dataframe[-1, ] %>%
# extra work to do compared with the first approach.
  mutate(month = month.abb)
#   below line is not needed for this approach.
#   mutate(across(starts_with('avg'), as.numeric))

dataframe_2
##      V1   V2   V3 month
## 2  3.32 2.01 3.67   Jan
## 3  3.96 2.89 4.49   Feb
## 4  4.12 3.89 4.91   Mar
## 5  4.17 4.78 5.07   Apr
## 6  4.21 5.50 5.23   May
## 7  4.19 5.79 5.26   Jun
## 8  4.40 5.87 5.47   Jul
## 9  4.23 5.18 5.30   Aug
## 10 4.12 4.28 5.08   Sep
## 11 3.46 3.03 4.28   Oct
## 12 2.78 2.06 3.42   Nov
## 13 2.97 1.73 3.32   Dec

Putting It All together

Creating the custom nrel_api_json_get_df() function for future extracting data from The National Renewable Energy Laboratory (NREL) via API.

library(tibble)
library(dplyr)
library(httr)
library(jsonlite)



nrel_api_json_get_df <- function(endpoint, queries = list()){
  url <- modify_url('https://developer.nrel.gov', path = endpoint)
  response <- GET(url, query = queries)
  if (http_error(response)){
    print(status_code(response))
    stop('Something went wrong..', call. = FALSE)
  }
  if (http_type(response)!= 'application/json'){
    stop('API did not return json', call. = FALSE)
  }
  content <- content(response, 'text')
  raw_data <- fromJSON(content)
  
  u_list <- unlist(raw_data$outputs)
  u_matrix <- matrix(u_list, nrow = 13)
  data <- as.data.frame(u_matrix)
  data <- data[-1, ] %>% 
    mutate(month = month.abb) %>% 
     select('avg_dni' = 1, 
            'avg_ghi' = 2, 
            'avg_lat_tilt' = 3,
            'month')
  data <- data[,c(4,1,2,3)]  #reorder coloumn, so month shows first
  
  data
}
  
 
key <- 'cNTIeRx2hcKoKzn01hb2QxPTwv2hPNQcISsaDOC1'
parameters_list <- list(lat = 41, lon = -74, api_key = key)

data <- nrel_api_json_get_df('api/solar/solar_resource/v1.json', parameters_list)
data
##    month avg_dni avg_ghi avg_lat_tilt
## 2    Jan    3.32    2.01         3.67
## 3    Feb    3.96    2.89         4.49
## 4    Mar    4.12    3.89         4.91
## 5    Apr    4.17    4.78         5.07
## 6    May    4.21    5.50         5.23
## 7    Jun    4.19    5.79         5.26
## 8    Jul    4.40    5.87         5.47
## 9    Aug    4.23    5.18         5.30
## 10   Sep    4.12    4.28         5.08
## 11   Oct    3.46    3.03         4.28
## 12   Nov    2.78    2.06         3.42
## 13   Dec    2.97    1.73         3.32

Visualizing New York City Solar Resource Data

The first plot x-axis is ordered alphabetically, while the second is ordered chronologically from January to December. This operation allows ordering the labels in the plot as we wish.

 data %>%
  ggplot(aes(x = month, y = avg_dni, group = 1)) +
  geom_line() +
  geom_point() +
  theme_bw()

 data %>%
  mutate(month = factor(month, levels = month.abb)) %>% 
  ggplot(aes(x = month, y = avg_dni, group = 1)) +
  geom_line() +
  geom_point() +
  theme_bw()