To simplify R code examples will use the following packages:
library(httr2)
library(tidyverse)
library(jsonlite)
library(xml2)Get data for a specific series
Make a http GET request for data, the url must specify the dataflow via /agency,id,version/ and dataKey via dimension values separated by '.'
Include the format=csvfilewithlabels query parameter to return the CSV format with codes and names.
data_url <- "https://data.api.abs.gov.au/rest/data/ABS,CPI,2.0.0/1.10001.10.50.M?format=csvfilewithlabels"
data_resp <- data_url |>
request() |>
req_perform()Convert the http response into a tibble:
data_table <- data_resp |>
resp_body_raw() |>
read_csv(col_types = cols(.default = "c", "OBS_VALUE" = "d"))This CSV format has columns for dimension ids preceding columns for dimension name values.
Filter to view just the name columns:
data_table$`Time Period` <- data_table$TIME_PERIOD
data_table$`Observation Value` <- data_table$OBS_VALUE
data_table[,seq(6, ncol(data_table), 2)]
Get a partial data response
If all data for a dataflow is desired /all can be specified for the dataKey.
The x-range: values=start-end header specifies the range of data observations to be returned.
data_url <- "https://data.api.abs.gov.au/rest/data/ABS,CPI,2.0.0/all?format=csvfilewithlabels"
data_resp <- data_url |>
request() |>
req_headers("x-range" = "values=0-99") |>
req_perform()
Notes:
- This example uses the format query parameter to return CSV formatted data.
Alternative data formats can also be returned using the accept header.
Get all available dataflows in the API
First perform a http GET request for all dataflows via "https://data.api.abs.gov.au/rest/dataflow?detail=allstubs":
df_url <- "https://data.api.abs.gov.au/rest/dataflow?detail=allstubs"
df_resp <- df_url |>
request() |>
req_headers("Accept" = "application/json", "Accept-Encoding" = "gzip") |>
req_perform()Convert the http response into a tibble:
df_table <- df_resp |>
resp_body_string() |>
fromJSON() |>
getElement("data") |>
getElement("dataflows") |>
as_tibble() |>
mutate(
structure_link = paste0(
"https://data.api.abs.gov.au/rest/dataflow/",agencyID,"/",id,"/",version,"?references=all"
),
data_link = paste0(
"https://data.api.abs.gov.au/rest/data/",agencyID,",",id,",",version,"/all?format=csvfilewithlabels"
)
) |>
select(name, agencyID, id, version, structure_link, data_link)
Notes:
- The url does not specify /agency/id/version so defaults of /all/all/latest have been used.
"https://data.api.abs.gov.au/rest/dataflow/all/all/all?detail=allstubs" could be used to see past versions of dataflows still. - The detail=allstubs query parameter is used to improve performance meaning information such as linked datastructures has been omitted.
- The "application/json" accept header has been used to return the JSON structure format instead of the XML structure format.
- structure_link and data_link specify related all structure and data calls for each found dataflow.
Get structure information for a specific dataflow
Make a http GET request to get the structure information, this example uses the XML response:
structure_url <- "https://data.api.abs.gov.au/rest/dataflow/ABS/ABORIGINAL_ID_POP_PROJ/1.0?references=all"
structure_resp <- structure_url |>
request() |>
req_headers("Accept" = "application/xml", "Accept-Encoding" = "gzip") |>
req_perform()Parse the XML:
structure_tree <- structure_resp |>
resp_body_string() |>
read_xml()Extract overview information from the included Dataflow structure:
dataflow <- structure_tree |>
xml_find_first(".//structure:Dataflow")
dataflow_name <- dataflow |>
xml_find_first("./common:Name") |>
xml_text()
dataflow_description <- dataflow |>
xml_find_first("./common:Description") |>
xml_text()
dataflow_id <- paste0(xml_attr(dataflow, "agencyID"), "/",
xml_attr(dataflow, "id"), "/",
xml_attr(dataflow, "version"))Extract concept information used to define dimension meaning from the included ConceptScheme structures:
concept <- structure_tree |>
xml_find_all(".//structure:Concepts/structure:ConceptScheme/structure:Concept")
concept_id <- sapply(concept, function(x){paste0(xml_attr(xml_parent(x), "agencyID"), "/",
xml_attr(xml_parent(x), "id"), "/",
xml_attr(xml_parent(x), "version"), "/",
xml_attr(x, "id"))})
concept_name <- concept |>
xml_find_first("./common:Name") |>
xml_text()Extract codelist information defining values a dimension can take from the included Codelist structures:
codelist <- structure_tree |>
xml_find_all(".//structure:Codelists/structure:Codelist")
codelist_id <- paste0(xml_attr(codelist, "agencyID"), "/",
xml_attr(codelist, "id"), "/",
xml_attr(codelist, "version"))
codelist_val <- sapply(codelist, function(x){
codes <- xml_find_all(x, "./structure:Code")
codes_id <- xml_attr(codes, "id")
codes_name <- xml_text(xml_find_first(codes, "./common:Name"))
names(codes_name) = codes_id
return(codes_name)
})Extract dataset dimension information from the DataStructure and get their related information from the concept and codelist sections:
dimension <- structure_tree |>
xml_find_all(".//structure:DimensionList/structure:Dimension | .//structure:DimensionList/structure:TimeDimension")
dimension_concept <- dimension |>
xml_find_first("./structure:ConceptIdentity/Ref")
dimension_concept_id <- paste0(xml_attr(dimension_concept,"agencyID"), "/",
xml_attr(dimension_concept, "maintainableParentID"),"/",
xml_attr(dimension_concept, "maintainableParentVersion"),"/",
xml_attr(dimension_concept,"id"))
dimension_codelist <- dimension |>
xml_find_first("./structure:LocalRepresentation/structure:Enumeration/Ref")
dimension_codelist_id <- paste0(xml_attr(dimension_codelist,"agencyID"), "/",
xml_attr(dimension_codelist,"id"), "/",
xml_attr(dimension_codelist,"version"))
dimension_id <- xml_attr(dimension, "id")
dimension_name <- concept_name[match(dimension_concept_id, concept_id)]
dimension_codes <- code_val[match(dimension_codelist_id, codelist_id)]Get the data count from the Content Constraint which also outlines which codes have data loaded for each dimension:
data_count <- structure_tree |>
xml_find_first(".//common:Annotation[@id='obs_count']/common:AnnotationTitle") |>
xml_text() |>
as.numeric()Return the key information in a tibble:
dataflow_table <- tibble(id = dataflow_id,
name = dataflow_name,
description = dataflow_description,
data_count = data_count)
dimension_table <- tibble(id = dimension_id,
name = dimension_name,
codes = dimension_codes)Notes:
- The Time dimension does not use a codelist and instead has a TextFormat type specified in it's localRepresentation.