1 |
##' @title Find a Historical Climate Data station (advanced) |
|
2 |
##' |
|
3 |
##' @description Search for stations in the Historical Climate Data inventory name, available data, and/or distance to a target. |
|
4 |
##' |
|
5 |
##' @param name character; optional character string or a regular expression to be matched against known station names. See \code{\link{grep}} for details. |
|
6 |
##' @param ignore.case logical; by default the search for station names is not case-sensitive. |
|
7 |
##' @param glob logical; use wildcards in station name as detailed in \code{link{glob2rx}}. |
|
8 |
##' @param province character; optional character string to filter by a given province. Use full name or two-letter code, e.g. ON for Ontario. |
|
9 |
##' @param baseline vector; optional vector with a start and end year for a desired baseline. |
|
10 |
##' @param type character; type of data to search for. Only used if a baseline is specified. Defaults to "hourly". |
|
11 |
##' @param target numeric; optional numeric value of the target (reference) station, or a vector of length 2 containing latitude and longitude (in that order). |
|
12 |
##' @param dist numeric; vector with a range of distance from the target in km. Only used if a target is specified. (default is 0:100) |
|
13 |
##' @param sort Boolean; if TRUE (default), will sort the resultant table by distance from `target`. Only used if a target is specified. |
|
14 |
##' @param ... Additional arguments passed to \code{\link{grep}}. |
|
15 |
##' |
|
16 |
##' @return A \code{tbl_df}, containing details of any matching HCD stations. |
|
17 |
##' |
|
18 |
##' @importFrom geosphere distGeo |
|
19 |
##' @importFrom utils glob2rx |
|
20 |
##' |
|
21 |
##' @export |
|
22 |
##' |
|
23 |
##' @author Conor I. Anderson |
|
24 |
##' |
|
25 |
##' @examples |
|
26 |
##' # Find all stations with names beginning in "Reg". |
|
27 |
##' find_station_adv("Reg*", glob = TRUE) |
|
28 |
##' |
|
29 |
##' # Find stations named "Yellowknife", with hourly data available from 1971 to 2000. |
|
30 |
##' find_station_adv("Yellowknife", baseline = c(1971, 2000), type = "hourly") |
|
31 |
##' |
|
32 |
##' # Find all stations between 0 and 100 km from Station No. 5051. |
|
33 |
##' find_station_adv(target = 5051, dist = 0:100) |
|
34 |
##' |
|
35 | ||
36 |
`find_station_adv` <- function(name = NULL, ignore.case = TRUE, glob = FALSE, province = NULL, baseline = NULL, type = "daily", target = NULL, dist = 0:100, sort = TRUE, ...) { |
|
37 | ||
38 |
# If `name` is not NULL, filter by name |
|
39 |
if (!is.null(name)) { |
13x |
40 |
if (glob) { |
5x |
41 |
name <- glob2rx(name) |
2x |
42 |
} |
5x |
43 |
take <- grep(name, station_data$Name, ignore.case = ignore.case, ...) |
5x |
44 |
} else { |
13x |
45 |
take <- 1:nrow(station_data) |
8x |
46 |
} |
13x |
47 | ||
48 |
# Next, set the data we are interested in, if necessary |
|
49 |
if (!is.null(baseline)) { |
13x |
50 |
if (length(baseline) < 2 | baseline[1] > baseline[length(baseline)]) stop("error: check baseline format") |
2x |
51 |
if (type == "hourly") data_vars <- c("HourlyFirstYr", "HourlyLastYr") |
2x |
52 |
if (type == "daily") data_vars <- c("DailyFirstYr", "DailyLastYr") |
2x |
53 |
if (type == "monthly") data_vars <- c("MonthlyFirstYr", "MonthlyLastYr") |
2x |
54 |
} else { |
13x |
55 |
data_vars <- NULL |
11x |
56 |
} |
13x |
57 | ||
58 |
# Make a table with the info we want |
|
59 |
vars_wanted <- c("Name", "Province", "StationID", "LatitudeDD", "LongitudeDD", data_vars) |
13x |
60 |
df <- station_data[take, vars_wanted] |
13x |
61 | ||
62 |
# If `province` is not NULL, we will filter by province |
|
63 |
if (!is.null(province)) { |
13x |
64 |
# Identify all stations outside of our baseline |
|
65 |
if (nchar(province) == 2L) { |
3x |
66 |
province <- levels(as.factor(station_data$Province))[which(c("AB", "BC", "MB", "NB", "NL", "NT", "NS", "NU", "ON", "PE", "QC", "SK", "YT") == province)] |
2x |
67 |
if (length(province) == 0L) { |
2x |
68 |
stop("Incorrect province code provided.") |
1x |
69 |
} |
2x |
70 |
} |
3x |
71 |
province <- toupper(province) |
2x |
72 |
df <- df[df$Province == province,] |
2x |
73 |
if (nrow(df) == 0) { |
2x |
74 |
stop("No data found for that province. Did you spell it correctly?") |
1x |
75 |
} |
2x |
76 |
} |
13x |
77 | ||
78 |
# If `baseline` is not NULL, filter by available data |
|
79 |
if (!is.null(baseline)) { |
11x |
80 |
index = NULL |
2x |
81 |
# Identify all stations outside of our baseline |
|
82 |
for (i in 1:nrow(df)) { |
2x |
83 |
if (is.na(df[i,6]) | df[i,6] > baseline[1]) index <- c(index, i) |
8741x |
84 |
else if (is.na(df[i,7]) | df[i,7] < baseline[length(baseline)]) index <- c(index, i) |
8741x |
85 |
} |
2x |
86 |
# Delete those stations |
|
87 |
if (!is.null(index)) df <- df[-index,] |
2x |
88 |
} |
11x |
89 | ||
90 |
# If `target` is not NULL, filter by distance to target |
|
91 |
if (!is.null(target)) { |
11x |
92 |
if (length(target) == 1L) { |
4x |
93 |
p1 <- c(station_data$LongitudeDD[grep(paste0("\\b", as.character(target), "\\b"), station_data$StationID)], station_data$LatitudeDD[grep(paste0("\\b", as.character(target), "\\b"), station_data$StationID)]) |
3x |
94 |
} |
4x |
95 |
else if (length(target) == 2L) { |
4x |
96 |
p1 <- c(target[2], target[1]) |
1x |
97 |
} else stop("error: check target format") |
4x |
98 |
df$Dist <- rep(NA, nrow(df)) |
4x |
99 |
for (j in 1:nrow(df)) { |
4x |
100 |
df$Dist[j] <- (distGeo(p1, c(df$LongitudeDD[j], df$LatitudeDD[j]))/1000) |
34940x |
101 |
} |
4x |
102 |
df <- df[(!is.na(df$Dist) & (df$Dist >= min(dist)) & (df$Dist <= max(dist))),] |
4x |
103 |
if (sort == TRUE) df <- df[order(df$Dist),] |
4x |
104 |
} |
11x |
105 |
class(df) <- c("hcd_station_list", class(df)) |
11x |
106 |
df |
11x |
107 |
} |
1 |
##' @title Map canadaHCD stations on an interactive map |
|
2 |
##' |
|
3 |
##' @description Show the stations of interest on an interactive map using Leaflet. Zoom levels are guessed based on an RStudio plot window. |
|
4 |
##' |
|
5 |
##' @param station character; one or more station ID numbers to show on the map. |
|
6 |
##' @param zoom numeric; the level to zoom the map to. |
|
7 |
##' |
|
8 |
##' @importFrom leaflet "%>%" leaflet addTiles setView addMarkers |
|
9 |
##' @importFrom tibble tibble |
|
10 |
##' |
|
11 |
##' @export |
|
12 |
##' |
|
13 |
##' @author Conor I. Anderson |
|
14 |
##' |
|
15 |
##' @examples |
|
16 |
##' # Make a map of all the stations named "Yellowknife". |
|
17 |
##' map_stations(find_station_adv(name = "Yellowknife")) |
|
18 |
##' # Make a map of all stations within 50km of Toronto Station 5051. |
|
19 |
##' map_stations(find_station_adv(target = 5051, dist = 0:50)) |
|
20 | ||
21 |
map_stations <- function(station, zoom) { |
|
22 | ||
23 |
if (inherits(station, "data.frame")) { |
4x |
24 |
station <- station$StationID |
3x |
25 |
} |
4x |
26 | ||
27 |
station <- as.character(station) |
4x |
28 | ||
29 |
poi <- NULL |
4x |
30 |
for (i in station) { |
4x |
31 |
row <- which(as.character(station_data$StationID) == i) |
254x |
32 |
if (!is.na(station_data$LatitudeDD[row]) && !is.na(station_data$LongitudeDD)[row]) { |
254x |
33 |
poi <- c(poi, row) |
254x |
34 |
} |
254x |
35 |
} |
4x |
36 |
poi <- station_data[poi,] |
4x |
37 | ||
38 |
if (nrow(poi) == 1L){ |
4x |
39 |
lats <- poi$LatitudeDD |
1x |
40 |
lons <- poi$LongitudeDD |
1x |
41 |
latrng <- 0 |
1x |
42 |
} else { |
4x |
43 |
hilat <- ceiling(max(poi$LatitudeDD)) |
3x |
44 |
lolat <- floor(min(poi$LatitudeDD)) |
3x |
45 |
hilon <- ceiling(max(poi$LongitudeDD)) |
3x |
46 |
lolon <- floor(min(poi$LongitudeDD)) |
3x |
47 |
lats <- (hilat + lolat)/2 |
3x |
48 |
lons <- (hilon + lolon)/2 |
3x |
49 |
latrng <- (hilat - lolat) |
3x |
50 |
} |
4x |
51 | ||
52 |
if (missing(zoom)) { |
4x |
53 |
zoomlevels <- tibble(range = c(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16), zoom = c(10,8,7,7,7,6,6,6,5,5,5,5,5,5,5,5,4)) |
4x |
54 |
zoom <- zoomlevels$zoom[which(zoomlevels$range == min(as.integer(latrng), 16L))] |
4x |
55 |
} |
4x |
56 | ||
57 |
leaflet() %>% addTiles() %>% |
4x |
58 |
setView(lng = lons, lat = lats, zoom = zoom) %>% |
4x |
59 |
addMarkers(poi, lng = poi$LongitudeDD, lat = poi$LatitudeDD, |
4x |
60 |
popup = paste0(poi$StationID, " - ", poi$Name)) |
4x |
61 |
} |
1 |
##' @title Perform a quick audit for missing values at a given station |
|
2 |
##' |
|
3 |
##' @description Returns a tibble with the proportion of missing values at a station for a given year and variables. |
|
4 |
##' |
|
5 |
##' @param datain A \code{tbl_df} containing the data to process |
|
6 |
##' @param variables numeric or character; By default, all variables will be included. Pass a numeric vector to specify columns to include, or pass a character vector to try to match column names. |
|
7 |
##' @param by character; whether values should be reported annually (\code{by = year}), or monthly (\code{by = month}). |
|
8 |
##' @param reverse Boolean; if true, will show proportion present instead of proportion missing. |
|
9 |
##' |
|
10 |
##' @importFrom tibble tibble add_column |
|
11 |
##' @importFrom zoo as.yearmon |
|
12 |
##' |
|
13 |
##' @export |
|
14 |
##' |
|
15 |
##' @author Conor I. Anderson |
|
16 |
##' |
|
17 |
##' @examples |
|
18 |
##' |
|
19 |
##' city <- hcd_daily(5051, 1981:2010) |
|
20 |
##' quick_audit(city, "MaxTemp", by = "year", reverse = TRUE) |
|
21 |
##' quick_audit(city, "MaxTemp", by = "month") |
|
22 |
##' |
|
23 | ||
24 |
quick_audit <- function(datain, variables, by = "year", reverse = FALSE) { |
|
25 | ||
26 |
dat <- datain |
6x |
27 | ||
28 |
if (missing(variables)) { |
6x |
29 |
variables <- 2:ncol(dat) |
2x |
30 |
} else { |
|
31 |
if (inherits(variables, "character")) { |
4x |
32 |
for (var in seq_along(variables)) { |
4x |
33 |
variables[var] <- as.numeric(grep(variables[var], names(dat), ignore.case = TRUE)) |
8x |
34 |
} |
|
35 |
} |
|
36 |
variables <- as.numeric(variables) |
4x |
37 |
} |
|
38 | ||
39 |
if (reverse) { |
6x |
40 |
ctl = 1 |
2x |
41 |
} else { |
|
42 |
ctl = 0 |
4x |
43 |
} |
|
44 | ||
45 |
years <- min(format(dat$Date, format = "%Y")):max(format(dat$Date, format = "%Y")) |
6x |
46 | ||
47 |
if (by == "month") { |
6x |
48 |
months <- min(format(dat$Date, format = "%m")):max(format(dat$Date, format = "%m")) |
2x |
49 |
yearmons <- apply(expand.grid(sprintf("%02d", months), years), 1, function(x) paste(x[2], x[1], sep = "-")) |
2x |
50 | ||
51 |
out <- tibble(`Year-month` = yearmons) |
2x |
52 | ||
53 |
for (var in variables) { |
2x |
54 |
integrity <- NULL |
4x |
55 |
for (yearmon in yearmons) { |
4x |
56 |
colname <- colnames(dat)[var] |
432x |
57 |
yearmonly <- dat[format(dat$Date, format = "%Y-%m") == yearmon,] |
432x |
58 |
obs <- nrow(yearmonly) |
432x |
59 |
missing <- sum(is.na(yearmonly[[var]])) |
432x |
60 |
integrity <- c(integrity, abs(ctl-(missing/obs))) |
432x |
61 |
} |
|
62 |
out <- add_column(out, newcol = integrity) |
4x |
63 |
names(out)[which(names(out) == "newcol")] <- colname |
4x |
64 |
} |
|
65 |
out$`Year-month` <- as.yearmon(out$`Year-month`) |
2x |
66 |
out <- add_column(out, Year = format(out$`Year-month`, format = "%Y"), Month = format(out$`Year-month`, format = "%m"), .before = 1) |
2x |
67 |
} else { |
|
68 |
if (by == "year") { |
4x |
69 |
out <- tibble(Year = years) |
3x |
70 | ||
71 |
for (var in variables) { |
3x |
72 |
integrity <- NULL |
16x |
73 |
for (year in years) { |
16x |
74 |
colname <- colnames(dat)[var] |
60x |
75 |
yearly <- dat[format(dat$Date, format = "%Y") == year,] |
60x |
76 |
obs <- nrow(yearly) |
60x |
77 |
missing <- sum(is.na(yearly[[var]])) |
60x |
78 |
integrity <- c(integrity, abs(ctl-(missing/obs))) |
60x |
79 |
} |
|
80 |
out <- add_column(out, newcol = integrity) |
16x |
81 |
names(out)[which(names(out) == "newcol")] <- colname |
16x |
82 |
} |
|
83 |
} else { |
|
84 |
stop("You must choose by \"month\", or by \"year\".") |
1x |
85 |
} |
|
86 |
} |
|
87 | ||
88 |
return(out) |
5x |
89 |
} |
1 |
##' @title Find a Historical Climate Data station (GUI) |
|
2 |
##' |
|
3 |
##' @description A function to launch a shiny web app to search for Historical Climate Data stations. |
|
4 |
##' |
|
5 |
##' @param stations optional \code{tbl_df} of stations such as a search result from \code{find_station()}. |
|
6 |
##' |
|
7 |
##' @return none |
|
8 |
##' |
|
9 |
##' @author Conor I. Anderson |
|
10 |
##' |
|
11 |
##' @importFrom shiny br column em fluidPage fluidRow runApp selectInput shinyApp shinyUI titlePanel |
|
12 |
##' @importFrom DT datatable dataTableOutput renderDataTable |
|
13 |
##' |
|
14 |
##' @export |
|
15 |
##' |
|
16 |
##' @examples |
|
17 |
##' \dontrun{find_station_GUI()} |
|
18 | ||
19 |
find_station_GUI <- function(stations = NULL) { |
|
20 | ||
21 |
if (is.null(stations)) { |
! |
22 |
data <- station_data |
! |
23 |
} else { |
! |
24 |
data <- stations |
! |
25 |
} |
! |
26 | ||
27 |
app <- shinyApp( |
! |
28 |
shinyUI( |
! |
29 |
fluidPage( |
! |
30 |
titlePanel("canadaHCD data inventory"), |
! |
31 | ||
32 |
# Create a new Row in the UI for selectInputs |
|
33 |
fluidRow( |
! |
34 |
column(4, |
! |
35 |
selectInput("prov", |
! |
36 |
"Province:", |
! |
37 |
c("All", |
! |
38 |
unique(as.character(data$Province)))) |
! |
39 |
) |
! |
40 |
), |
! |
41 |
# Create a new row for the table. |
|
42 |
fluidRow( |
! |
43 |
dataTableOutput("table")), |
! |
44 |
fluidRow(br(em(comment(data)))) |
! |
45 |
) |
! |
46 |
), |
! |
47 |
server = function(input, output) { |
! |
48 |
output$table <- renderDataTable(datatable({ |
! |
49 |
if (input$prov != "All") { |
! |
50 |
data <- data[data$Province == input$prov,] |
! |
51 |
} |
! |
52 |
data |
! |
53 |
})) |
! |
54 |
}) |
! |
55 |
runApp(app) |
! |
56 |
} |