Using APIs gives us access to an incredible amount of data only available online. In this exercise, we want to extract New York City solar data. Such data can, for example, allow us to determine on average the most productive periods of the year for solar panel deployment.
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(httr)
library(jsonlite)
##
## Attaching package: 'jsonlite'
##
## The following object is masked from 'package:purrr':
##
## flatten
library(ggplot2)
#KEY for DATAGOV
key <- 'cNTIeRx2hcKoKzn01hb2QxPTwv2hPNQcISsaDOC1'
base <- "https://developer.nrel.gov"
endpoint <- 'api/solar/solar_resource/v1.json'
url <- modify_url(base, path = endpoint)
#The latitude of New York City, NY, USA is 40.730610, and the longitude is -73.935242.
parameters_list <- list(lat = 41, lon = -74, api_key = key)
response <- GET(url, query = parameters_list)
if (http_error(response)){
print('Something went wrong', call. = FALSE)
}
status_code(response)
## [1] 200
class(response)
## [1] "response"
content <- content(response, 'text')
content
## [1] "{\"version\":\"1.0.0\",\"warnings\":[],\"errors\":[],\"metadata\":{\"sources\":[\"Perez-SUNY/NREL, 2012\"]},\"inputs\":{\"lat\":\"41\",\"lon\":\"-74\"},\"outputs\":{\"avg_dni\":{\"annual\":3.83,\"monthly\":{\"jan\":3.32,\"feb\":3.96,\"mar\":4.12,\"apr\":4.17,\"may\":4.21,\"jun\":4.19,\"jul\":4.4,\"aug\":4.23,\"sep\":4.12,\"oct\":3.46,\"nov\":2.78,\"dec\":2.97}},\"avg_ghi\":{\"annual\":3.92,\"monthly\":{\"jan\":2.01,\"feb\":2.89,\"mar\":3.89,\"apr\":4.78,\"may\":5.5,\"jun\":5.79,\"jul\":5.87,\"aug\":5.18,\"sep\":4.28,\"oct\":3.03,\"nov\":2.06,\"dec\":1.73}},\"avg_lat_tilt\":{\"annual\":4.63,\"monthly\":{\"jan\":3.67,\"feb\":4.49,\"mar\":4.91,\"apr\":5.07,\"may\":5.23,\"jun\":5.26,\"jul\":5.47,\"aug\":5.3,\"sep\":5.08,\"oct\":4.28,\"nov\":3.42,\"dec\":3.32}}}}"
json_list <- fromJSON(content)
str(json_list)
## List of 6
## $ version : chr "1.0.0"
## $ warnings: list()
## $ errors : list()
## $ metadata:List of 1
## ..$ sources: chr "Perez-SUNY/NREL, 2012"
## $ inputs :List of 2
## ..$ lat: chr "41"
## ..$ lon: chr "-74"
## $ outputs :List of 3
## ..$ avg_dni :List of 2
## .. ..$ annual : num 3.83
## .. ..$ monthly:List of 12
## .. .. ..$ jan: num 3.32
## .. .. ..$ feb: num 3.96
## .. .. ..$ mar: num 4.12
## .. .. ..$ apr: num 4.17
## .. .. ..$ may: num 4.21
## .. .. ..$ jun: num 4.19
## .. .. ..$ jul: num 4.4
## .. .. ..$ aug: num 4.23
## .. .. ..$ sep: num 4.12
## .. .. ..$ oct: num 3.46
## .. .. ..$ nov: num 2.78
## .. .. ..$ dec: num 2.97
## ..$ avg_ghi :List of 2
## .. ..$ annual : num 3.92
## .. ..$ monthly:List of 12
## .. .. ..$ jan: num 2.01
## .. .. ..$ feb: num 2.89
## .. .. ..$ mar: num 3.89
## .. .. ..$ apr: num 4.78
## .. .. ..$ may: num 5.5
## .. .. ..$ jun: num 5.79
## .. .. ..$ jul: num 5.87
## .. .. ..$ aug: num 5.18
## .. .. ..$ sep: num 4.28
## .. .. ..$ oct: num 3.03
## .. .. ..$ nov: num 2.06
## .. .. ..$ dec: num 1.73
## ..$ avg_lat_tilt:List of 2
## .. ..$ annual : num 4.63
## .. ..$ monthly:List of 12
## .. .. ..$ jan: num 3.67
## .. .. ..$ feb: num 4.49
## .. .. ..$ mar: num 4.91
## .. .. ..$ apr: num 5.07
## .. .. ..$ may: num 5.23
## .. .. ..$ jun: num 5.26
## .. .. ..$ jul: num 5.47
## .. .. ..$ aug: num 5.3
## .. .. ..$ sep: num 5.08
## .. .. ..$ oct: num 4.28
## .. .. ..$ nov: num 3.42
## .. .. ..$ dec: num 3.32
output_list <- json_list$outputs
avg_dni_monthly <- output_list$avg_dni$monthly
avg_ghi_monthly <- output_list$avg_ghi$monthly
avg_lat_monthly <- output_list$avg_lat_tilt$monthly
dataframe <- tibble( 'month' = month.abb,
'avg_dni' = avg_dni_monthly,
'avg_ghi' = avg_ghi_monthly,
'avg_lat' = avg_lat_monthly)
dataframe_1_numeric <- dataframe %>%
mutate(across(starts_with('avg'), as.numeric))
dataframe_1_numeric
## # A tibble: 12 × 4
## month avg_dni avg_ghi avg_lat
## <chr> <dbl> <dbl> <dbl>
## 1 Jan 3.32 2.01 3.67
## 2 Feb 3.96 2.89 4.49
## 3 Mar 4.12 3.89 4.91
## 4 Apr 4.17 4.78 5.07
## 5 May 4.21 5.5 5.23
## 6 Jun 4.19 5.79 5.26
## 7 Jul 4.4 5.87 5.47
## 8 Aug 4.23 5.18 5.3
## 9 Sep 4.12 4.28 5.08
## 10 Oct 3.46 3.03 4.28
## 11 Nov 2.78 2.06 3.42
## 12 Dec 2.97 1.73 3.32
u_list <- unlist(json_list$outputs)
u_matrix <- matrix(u_list, nrow=13)
dataframe <- as.data.frame(u_matrix)
dataframe_2 <- dataframe[-1, ] %>%
# extra work to do compared with the first approach.
mutate(month = month.abb)
# below line is not needed for this approach.
# mutate(across(starts_with('avg'), as.numeric))
dataframe_2
## V1 V2 V3 month
## 2 3.32 2.01 3.67 Jan
## 3 3.96 2.89 4.49 Feb
## 4 4.12 3.89 4.91 Mar
## 5 4.17 4.78 5.07 Apr
## 6 4.21 5.50 5.23 May
## 7 4.19 5.79 5.26 Jun
## 8 4.40 5.87 5.47 Jul
## 9 4.23 5.18 5.30 Aug
## 10 4.12 4.28 5.08 Sep
## 11 3.46 3.03 4.28 Oct
## 12 2.78 2.06 3.42 Nov
## 13 2.97 1.73 3.32 Dec
Creating the custom nrel_api_json_get_df()
function for
future extracting data from The National Renewable Energy Laboratory
(NREL) via API.
library(tibble)
library(dplyr)
library(httr)
library(jsonlite)
nrel_api_json_get_df <- function(endpoint, queries = list()){
url <- modify_url('https://developer.nrel.gov', path = endpoint)
response <- GET(url, query = queries)
if (http_error(response)){
print(status_code(response))
stop('Something went wrong..', call. = FALSE)
}
if (http_type(response)!= 'application/json'){
stop('API did not return json', call. = FALSE)
}
content <- content(response, 'text')
raw_data <- fromJSON(content)
u_list <- unlist(raw_data$outputs)
u_matrix <- matrix(u_list, nrow = 13)
data <- as.data.frame(u_matrix)
data <- data[-1, ] %>%
mutate(month = month.abb) %>%
select('avg_dni' = 1,
'avg_ghi' = 2,
'avg_lat_tilt' = 3,
'month')
data <- data[,c(4,1,2,3)] #reorder coloumn, so month shows first
data
}
key <- 'cNTIeRx2hcKoKzn01hb2QxPTwv2hPNQcISsaDOC1'
parameters_list <- list(lat = 41, lon = -74, api_key = key)
data <- nrel_api_json_get_df('api/solar/solar_resource/v1.json', parameters_list)
data
## month avg_dni avg_ghi avg_lat_tilt
## 2 Jan 3.32 2.01 3.67
## 3 Feb 3.96 2.89 4.49
## 4 Mar 4.12 3.89 4.91
## 5 Apr 4.17 4.78 5.07
## 6 May 4.21 5.50 5.23
## 7 Jun 4.19 5.79 5.26
## 8 Jul 4.40 5.87 5.47
## 9 Aug 4.23 5.18 5.30
## 10 Sep 4.12 4.28 5.08
## 11 Oct 3.46 3.03 4.28
## 12 Nov 2.78 2.06 3.42
## 13 Dec 2.97 1.73 3.32
The first plot x-axis is ordered alphabetically, while the second is ordered chronologically from January to December. This operation allows ordering the labels in the plot as we wish.
data %>%
ggplot(aes(x = month, y = avg_dni, group = 1)) +
geom_line() +
geom_point() +
theme_bw()
data %>%
mutate(month = factor(month, levels = month.abb)) %>%
ggplot(aes(x = month, y = avg_dni, group = 1)) +
geom_line() +
geom_point() +
theme_bw()