An important aim of the dataset package is to create native R objects where bibliographic metadata cannot be detached, thus ensuring Findability, Accessibility, Interoperability and Reusability in the long run. We provide an interface and methods to add metadata required by open data repositories according to the more general Dublin Core library metadata standard, or the more specific DataCite metadata standard.
print(get_bibentry(iris_dataset_2), "Bibtex")
#> @Misc{,
#> title = {Iris Dataset},
#> author = {Edgar Anderson},
#> identifier = {https://doi.org/10.5281/zenodo.10396807},
#> publisher = {American Iris Society},
#> year = {1935},
#> language = {en},
#> relation = {:unas},
#> format = {:unas},
#> rights = {:tba},
#> description = {The famous (Fisher's or Anderson's) iris data set.},
#> type = {DCMITYPE:Dataset},
#> datasource = {https://doi.org/10.1111/j.1469-1809.1936.tb02137.x},
#> coverage = {:unas},
#> subject = {Irises (plants)},
#> }
The corresponds to dct:creator in Dublin Core and Creator in DataCite, the two most important metadata definitions for publishing datasets in repositories. They refer to the The name of the entity that holds, archives, publishes prints, distributes, releases, issues, or produces the dataset. This property will be used to formulate the citation.
iris_dataset_2 <- iris_dataset
# Add a new creator, with overwriting existing authorship information:
creator(iris_dataset_2, overwrite=TRUE) <- person("Jane", "Doe", role = "aut")
# Add a new creator, without overwriting existing authorship information:
creator(iris_dataset_2, overwrite=FALSE) <- person("John", "Doe", role = "ctb")
# The two new creation contributors:
creator(iris_dataset_2)
#> [1] "Jane Doe [aut]" "John Doe [ctb]"
The publication year is usually one of the most important descriptive metadata in repositories and libraries:
The default value is :unas
for unassigned values:
Some metadata functions prevent accidental overwriting, except for
the default :unas
unassigned and :tba
to-be-announced values.
rights(iris_dataset_2) <- "CC0"
#> The dataset has already a rights field: :tba
rights(iris_dataset_2)
#> [1] ":tba"
Overwriting the rights statement needs an explicit approval:
DataCite currently allows the use of subproperties. For example, the Creative Commons Attribution 4.0 International would be described as:
list ( schemeURI="https://spdx.org/licenses/",
rightsIdentifierScheme="SPDX",
rightsIdentifier="CC-BY-4.0",
rightsURI="https://creativecommons.org/licenses/by/4.0/")
#> $schemeURI
#> [1] "https://spdx.org/licenses/"
#>
#> $rightsIdentifierScheme
#> [1] "SPDX"
#>
#> $rightsIdentifier
#> [1] "CC-BY-4.0"
#>
#> $rightsURI
#> [1] "https://creativecommons.org/licenses/by/4.0/"
The use of subproperties will be later implemented.
The description is currently implemented as a character string.
However, DataCite 4.6 states that if Description is used,
descriptionType
is mandatory. This will be implemented
later.
<descriptions>
<description xml:lang="en" descriptionType="Abstract">Example abstract</description>
</descriptions>
subject(iris_dataset)
#> $term
#> [1] "Irises (plants)"
#>
#> $subjectScheme
#> [1] "LCCH"
#>
#> $schemeURI
#> [1] "http://id.loc.gov/authorities/subjects"
#>
#> $valueURI
#> [1] "https://id.loc.gov/authorities/subjects/sh85068079"
#>
#> $classificationCode
#> NULL
#>
#> $prefix
#> [1] "lcch:"
#>
#> attr(,"class")
#> [1] "subject" "list"
<subjects>
<subject xml:lang="en" subjectScheme="Library of Congress Subject Headings (LCSH)" schemeURI="https://id.loc.gov/authorities/subjects.html" valueURI="https://id.loc.gov/authorities/subjects/sh2009009655.html">Climate change mitigation</subject>
<subject xml:lang="en" subjectScheme="ANZSRC Fields of Research" schemeURI="https://www.abs.gov.au/statistics/classifications/australian-and-new-zealand-standard-research-classification-anzsrc" classificationCode="370201">Climate change processes</subject>
</subject>
subject_create(
term = "data sets",
subjectScheme = "Library of Congress Subject Headings (LCSH)",
schemeURI = "https://id.loc.gov/authorities/subjects.html",
valueURI = "http://id.loc.gov/authorities/subjects/sh2018002256"
)
#> $term
#> [1] "data sets"
#>
#> $subjectScheme
#> [1] "Library of Congress Subject Headings (LCSH)"
#>
#> $schemeURI
#> [1] "https://id.loc.gov/authorities/subjects.html"
#>
#> $valueURI
#> [1] "http://id.loc.gov/authorities/subjects/sh2018002256"
#>
#> $classificationCode
#> NULL
#>
#> $prefix
#> [1] ""
#>
#> attr(,"class")
#> [1] "subject" "list"
Get the metadata according to the DataCite definition:
print(as_datacite(iris_dataset), "Bibtex")
#> @Misc{,
#> title = {Iris Dataset},
#> author = {Edgar Anderson},
#> identifier = {https://doi.org/10.5281/zenodo.10396807},
#> publisher = {American Iris Society},
#> year = {1935},
#> date = {:tba},
#> language = {en},
#> alternateidentifier = {:unas},
#> relatedidentifier = {:unas},
#> format = {:unas},
#> version = {:unas},
#> rights = {:tba},
#> description = {The famous (Fisher's or Anderson's) iris data set.},
#> geolocation = {:unas},
#> fundingreference = {:unas},
#> }
And according to DCTERMS (Dublin Core):
print(as_dublincore(iris_dataset), "Bibtex")
#> @Misc{,
#> title = {Iris Dataset},
#> author = {Edgar Anderson},
#> identifier = {https://doi.org/10.5281/zenodo.10396807},
#> publisher = {American Iris Society},
#> year = {1935},
#> language = {en},
#> relation = {:unas},
#> format = {:unas},
#> rights = {:tba},
#> description = {The famous (Fisher's or Anderson's) iris data set.},
#> type = {DCMITYPE:Dataset},
#> datasource = {https://doi.org/10.1111/j.1469-1809.1936.tb02137.x},
#> coverage = {:unas},
#> }