CEDA CF Checker#

The CF Checker software tool is provided by CEDA (Center for Environmental Analysis) to verify that netCDF files comply to the CF convention.

The CF conventions have been adopted by a number of projects and groups as a primary standard. The conventions define metadata that provide a definitive description of what the data in each variable represents, and the spatial and temporal properties of the data.

1. Installation and Preparation#

We recommend using pip

import sys
import os

newpath = f"{os.sep.join(sys.executable.split(os.sep)[:-1])}:{os.environ['PATH']}"
os.environ["PATH"] = newpath

2. Settings#

Specify the file or dataset to be tested in testfile.

The CF Checker uses the standard name tables as input. They will be downloaded to the working directory working_dir if you set the switch download_tables=True. Three tables are required which are versioned with different version numbers. You can specify them directly in the versions dictionary or set the switch update_versions=True so that the recent versions are taken from the homepage.

testfile = "/work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/*"
update_versions = False
download_tables = False
table_dict = {
    "cf-standard-name-table": {
        "version": 76,
        "page": "http://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html",
    },
    "area-type-table": {
        "version": 9,
        "page": "http://cfconventions.org/Data/area-type-table/current/build/area-type-table.html",
    },
    "standardized-region-list": {
        "version": 4,
        "page": "http://cfconventions.org/Data/standardized-region-list/standardized-region-list.current.html",
    },
}
working_dir = "./"

3. Initialization#

If all switches are True, we download the homepage with the request package and parse it with BeautifulSoup. We then create download urls with fitting version numbers for the tables and download them to the working directory.

import requests
from bs4 import BeautifulSoup
def get_recent_versions(page):
    response = requests.get(page)
    parsed_html = BeautifulSoup(response.content)
    return int(str(parsed_html).split("Version")[1].split(",")[0])
if update_versions:
    for idx, key in enumerate(table_dict.keys()):
        table_dict[key]["version"] = get_recent_versions(table_dict[key]["page"])
table_dict
{'cf-standard-name-table': {'version': 76,
  'page': 'http://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html'},
 'area-type-table': {'version': 9,
  'page': 'http://cfconventions.org/Data/area-type-table/current/build/area-type-table.html'},
 'standardized-region-list': {'version': 4,
  'page': 'http://cfconventions.org/Data/standardized-region-list/standardized-region-list.current.html'}}
table_dict["cf-standard-name-table"][
    "url"
] = "http://cfconventions.org/Data/cf-standard-names/{0}/src/cf-standard-name-table.xml".format(
    table_dict["cf-standard-name-table"]["version"]
)
table_dict["area-type-table"][
    "url"
] = "http://cfconventions.org/Data/area-type-table/{0}/src/area-type-table.xml".format(
    table_dict["area-type-table"]["version"]
)
table_dict["standardized-region-list"][
    "url"
] = "http://cfconventions.org/Data/standardized-region-list/standardized-region-list.{0}.xml".format(
    table_dict["standardized-region-list"]["version"]
)
for tablename in table_dict.keys():
    table_dict[tablename]["local_path"] = "{0}/CF/{1}-{2}.xml".format(
        working_dir, tablename, table_dict[tablename]["version"]
    )
    if download_tables:
        response = requests.get(table_dict[tablename]["url"])
        with open(
            table_dict[tablename]["local_path"],
            "wb",
        ) as file:
            file.write(response.content)
table_dict
{'cf-standard-name-table': {'version': 76,
  'page': 'http://cfconventions.org/Data/cf-standard-names/current/build/cf-standard-name-table.html',
  'url': 'http://cfconventions.org/Data/cf-standard-names/76/src/cf-standard-name-table.xml',
  'local_path': './/CF/cf-standard-name-table-76.xml'},
 'area-type-table': {'version': 9,
  'page': 'http://cfconventions.org/Data/area-type-table/current/build/area-type-table.html',
  'url': 'http://cfconventions.org/Data/area-type-table/9/src/area-type-table.xml',
  'local_path': './/CF/area-type-table-9.xml'},
 'standardized-region-list': {'version': 4,
  'page': 'http://cfconventions.org/Data/standardized-region-list/standardized-region-list.current.html',
  'url': 'http://cfconventions.org/Data/standardized-region-list/standardized-region-list.4.xml',
  'local_path': './/CF/standardized-region-list-4.xml'}}

4. Application#

We run the CF checker with subprocess in a shell and capture all output.

import subprocess

a = subprocess.run(
    "cfchecks -a {0} -r {1} -s {2} {3}".format(
        table_dict["area-type-table"]["url"],
        table_dict["standardized-region-list"]["url"],
        table_dict["cf-standard-name-table"]["url"],
        testfile,
    ),
    capture_output=True,
    shell=True,
)

5. Results#

We write the stdout into a file in the working_dir. Additionally, we grep for three patterns in the stdout to create a summary of the cfchecker results.

files = [
    fileline.split(":")[1]
    for fileline in a.stdout.decode("utf-8").split("\n")
    if "CHECKING NetCDF FILE" in fileline
]
warnings = [
    warningline.split(":")[1]
    for warningline in a.stdout.decode("utf-8").split("\n")
    if "WARNINGS given" in warningline
]
errors = [
    errorline.split(":")[1]
    for errorline in a.stdout.decode("utf-8").split("\n")
    if "ERRORS detected" in errorline
]
!rm -r cf-checker-results
!mkdir -p cf-checker-results
with open(working_dir + "cf-checker-results/" + files[0].split("/")[-1], "w") as file:
    file.write(a.stdout.decode("utf-8"))
rm: cannot remove ‘cf-checker-results’: No such file or directory
result_dict = {}
for idx, file in enumerate(files):
    result_dict[file] = {"warnings": warnings[idx], "errors": errors[idx]}
print(result_dict)
{' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_201501-201912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_202001-202412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_202501-202912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_203001-203412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_203501-203912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_204001-204412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_204501-204912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_205001-205412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_205501-205912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_206001-206412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_206501-206912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_207001-207412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_207501-207912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_208001-208412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_208501-208912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_209001-209412.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_209501-209912.nc': {'warnings': ' 1', 'errors': ' 0'}, ' /work/ik1017/CMIP6//data/CMIP6/ScenarioMIP/DKRZ/MPI-ESM1-2-HR/ssp370/r1i1p1f1/Amon/tas/gn/v20190710/tas_Amon_MPI-ESM1-2-HR_ssp370_r1i1p1f1_gn_210001-210012.nc': {'warnings': ' 1', 'errors': ' 0'}}