CEDA CF Checker#
The CF Checker software tool is provided by CEDA (Center for Environmental Analysis) to verify that netCDF files comply to the CF convention.
The CF conventions have been adopted by a number of projects and groups as a primary standard. The conventions define metadata that provide a definitive description of what the data in each variable represents, and the spatial and temporal properties of the data.
1. Installation and Preparation#
We recommend using pip
import sys
import os
newpath = f"{os.sep.join(sys.executable.split(os.sep)[:-1])}:{os.environ['PATH']}"
os.environ["PATH"] = newpath
2. Settings#
Specify the file or dataset to be tested in testfile
.
The CF Checker uses the standard name tables as input. They will be downloaded to the working directory working_dir
if you set the switch download_tables=True
. Three tables are required which are versioned with different version numbers. You can specify them directly in the versions
dictionary or set the switch update_versions=True
so that the recent versions are taken from the homepage.
testfile = "/work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/*"
update_versions = False
download_tables = False
table_dict = {
"cf-standard-name-table": {
"version": 76,
"page": "http://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html",
},
"area-type-table": {
"version": 9,
"page": "http://cfconventions.org/Data/area-type-table/current/build/area-type-table.html",
},
"standardized-region-list": {
"version": 4,
"page": "http://cfconventions.org/Data/standardized-region-list/standardized-region-list.current.html",
},
}
working_dir = "./"
3. Initialization#
If all switches are True, we download the homepage with the request
package and parse it with BeautifulSoup
. We then create download url
s with fitting version numbers for the tables and download them to the working directory.
import requests
from bs4 import BeautifulSoup
def get_recent_versions(page):
response = requests.get(page)
parsed_html = BeautifulSoup(response.content)
return int(str(parsed_html).split("Version")[1].split(",")[0])
if update_versions:
for idx, key in enumerate(table_dict.keys()):
table_dict[key]["version"] = get_recent_versions(table_dict[key]["page"])
table_dict
{'cf-standard-name-table': {'version': 76,
'page': 'http://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html'},
'area-type-table': {'version': 9,
'page': 'http://cfconventions.org/Data/area-type-table/current/build/area-type-table.html'},
'standardized-region-list': {'version': 4,
'page': 'http://cfconventions.org/Data/standardized-region-list/standardized-region-list.current.html'}}
table_dict["cf-standard-name-table"][
"url"
] = "http://cfconventions.org/Data/cf-standard-names/{0}/src/cf-standard-name-table.xml".format(
table_dict["cf-standard-name-table"]["version"]
)
table_dict["area-type-table"][
"url"
] = "http://cfconventions.org/Data/area-type-table/{0}/src/area-type-table.xml".format(
table_dict["area-type-table"]["version"]
)
table_dict["standardized-region-list"][
"url"
] = "http://cfconventions.org/Data/standardized-region-list/standardized-region-list.{0}.xml".format(
table_dict["standardized-region-list"]["version"]
)
for tablename in table_dict.keys():
table_dict[tablename]["local_path"] = "{0}/CF/{1}-{2}.xml".format(
working_dir, tablename, table_dict[tablename]["version"]
)
if download_tables:
response = requests.get(table_dict[tablename]["url"])
with open(
table_dict[tablename]["local_path"],
"wb",
) as file:
file.write(response.content)
table_dict
{'cf-standard-name-table': {'version': 76,
'page': 'http://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html',
'url': 'http://cfconventions.org/Data/cf-standard-names/76/src/cf-standard-name-table.xml',
'local_path': './/CF/cf-standard-name-table-76.xml'},
'area-type-table': {'version': 9,
'page': 'http://cfconventions.org/Data/area-type-table/current/build/area-type-table.html',
'url': 'http://cfconventions.org/Data/area-type-table/9/src/area-type-table.xml',
'local_path': './/CF/area-type-table-9.xml'},
'standardized-region-list': {'version': 4,
'page': 'http://cfconventions.org/Data/standardized-region-list/standardized-region-list.current.html',
'url': 'http://cfconventions.org/Data/standardized-region-list/standardized-region-list.4.xml',
'local_path': './/CF/standardized-region-list-4.xml'}}
4. Application#
We run the CF checker with subprocess
in a shell and capture all output.
import subprocess
a = subprocess.run(
"cfchecks -a {0} -r {1} -s {2} {3}".format(
table_dict["area-type-table"]["url"],
table_dict["standardized-region-list"]["url"],
table_dict["cf-standard-name-table"]["url"],
testfile,
),
capture_output=True,
shell=True,
)
5. Results#
We write the stdout into a file in the working_dir
. Additionally, we grep for three patterns in the stdout
to create a summary of the cfchecker results.
files = [
fileline.split(":")[1]
for fileline in a.stdout.decode("utf-8").split("\n")
if "CHECKING NetCDF FILE" in fileline
]
warnings = [
warningline.split(":")[1]
for warningline in a.stdout.decode("utf-8").split("\n")
if "WARNINGS given" in warningline
]
errors = [
errorline.split(":")[1]
for errorline in a.stdout.decode("utf-8").split("\n")
if "ERRORS detected" in errorline
]
!rm -r cf-checker-results
!mkdir -p cf-checker-results
with open(working_dir + "cf-checker-results/" + files[0].split("/")[-1], "w") as file:
file.write(a.stdout.decode("utf-8"))
rm: cannot remove ‘cf-checker-results’: No such file or directory
result_dict = {}
for idx, file in enumerate(files):
result_dict[file] = {"warnings": warnings[idx], "errors": errors[idx]}
print(result_dict)
{' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_201501-201912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_202001-202412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_202501-202912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_203001-203412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_203501-203912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_204001-204412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_204501-204912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_205001-205412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_205501-205912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_206001-206412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_206501-206912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_207001-207412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_207501-207912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_208001-208412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_208501-208912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_209001-209412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_209501-209912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_210001-210012.nc': {'warnings': ' 1', 'errors': ' 0'}}