Simple data enrichment with Wikidata

Found while tidying up the browser tabs…

…it was about how we can get more information (e.g. also an image) for a media station when we only have a GND ID…

The answer is simple: just ask Wikidata. Basically, it is a generalised version of the query from the article about Hugo and Wikidata.

SELECT ?propertyLabel ?valueLabel
WHERE {
  ?item wdt:P227 "10153740-2" .
  ?item ?wdt ?o .
  ?property wikibase:directClaim ?wdt .
  SERVICE wikibase:label {
    bd:serviceParam wikibase:language "en,de" .
    ?property rdfs:label ?propertyLabel .
  }
  OPTIONAL {
    FILTER(isIRI(?o))
    ?o rdfs:label ?enLabel .
    FILTER(LANG(?enLabel) = "en")
  }
  BIND(COALESCE(?enLabel, STR(?o)) AS ?valueLabel)
}

The query can simply be entered into the query window of the Wikidata Query Services To change the GND ID you are looking for, simply change wdt:P227 (here "10153740-2"). Of course, you can also search for other identifiers; there are now many available in Wikidata:

And to integrate it into a Jupyter Notebook or Python script, this function can be used:

import urllib.request
import json
import urllib.parse

def query_by_gnd(gnd_id):
    query = """
    SELECT ?propertyLabel ?valueLabel ?valueURI
    WHERE {{
      ?item wdt:P227 "{}" .
      ?item ?wdt ?o .
      ?property wikibase:directClaim ?wdt .

      SERVICE wikibase:label {{
        bd:serviceParam wikibase:language "en,de" .
        ?property rdfs:label ?propertyLabel .
      }}

      OPTIONAL {{
        FILTER(isIRI(?o))
        ?o rdfs:label ?enLabel .
        FILTER(LANG(?enLabel) = "en")
      }}

      BIND(COALESCE(?enLabel, STR(?o)) AS ?valueLabel)

      BIND(IF(isIRI(?o), ?o, ?undefined) AS ?valueURI)
    }}
    """.format(gnd_id)

    endpoint = "https://query.wikidata.org/sparql"
    params = {
        'query': query,
        'format': 'json'
    }
    url = f"{endpoint}?{urllib.parse.urlencode(params)}"

    wikidata = {}

    try:
        req = urllib.request.Request(url, headers={'Accept': 'application/sparql-results+json'})
        with urllib.request.urlopen(req) as response:
            data = response.read().decode('utf-8')
        results = json.loads(data)
        for binding in results['results']['bindings']:
            key = binding['propertyLabel']['value']
            value = binding['valueLabel']['value']
            if key in wikidata:
                if isinstance(wikidata[key], list):
                    wikidata[key].append(value)
                else:
                    wikidata[key] = [wikidata[key], value]
            else:
                wikidata[key] = value
        return wikidata

    except urllib.error.URLError as e:
        print(f"Error accessing Wikidata endpoint: {e.reason}")
    except json.JSONDecodeError as e:
        print(f"Error decoding JSON: {e.reason}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")