Skip to content

Commit 7aa4f6c

Browse files
committed
add schema-org annotations to html item encoding
resolves #718
1 parent ef2c511 commit 7aa4f6c

File tree

2 files changed

+117
-1
lines changed

2 files changed

+117
-1
lines changed

pycsw/ogc/api/records.py

+112
Original file line numberDiff line numberDiff line change
@@ -897,6 +897,8 @@ def item(self, headers_, args, collection, item):
897897
if headers_['Content-Type'] == 'text/html':
898898
response['title'] = self.config['metadata']['identification']['title']
899899
response['collection'] = collection
900+
response['schema-org'] = record2json(record, self.config['server']['url'],
901+
collection, 'schema-org')
900902

901903
if 'json' in headers_['Content-Type']:
902904
headers_['Content-Type'] = 'application/geo+json'
@@ -1390,9 +1392,119 @@ def record2json(record, url, collection, mode='ogcapi-records'):
13901392

13911393
record_dict['properties']['start_datetime'] = start_date
13921394
record_dict['properties']['end_datetime'] = end_date
1395+
elif mode == 'schema-org':
1396+
schema_org = record_dict['properties'].copy()
1397+
schema_org.update({
1398+
'@context':"http://schema.org/",
1399+
'@type': f"schema:{type_iso2schema_org(record_dict['properties'].get('type',''))}",
1400+
'@id':(f"{url}/collections/{collection}/items/{record_dict['id']}")
1401+
})
1402+
schema_org.pop('title','')
1403+
schema_org.pop('type','')
1404+
schema_org['name'] = schema_org.pop('title', None)
1405+
if record.links:
1406+
print('has links')
1407+
schema_org['distribution'] = []
1408+
for link in jsonify_links(record.links):
1409+
schema_org['distribution'].append({
1410+
'@type':'schema:DataDownload',
1411+
'contentUrl':link.get('url',''),
1412+
'name': link.get('name',''),
1413+
'description': link.get('description',''),
1414+
'encodingFormat': link.get('type',link.get('protocol',''))
1415+
})
1416+
schema_org['keywords'] = []
1417+
for t in schema_org.pop('themes',[]):
1418+
for c in t.get('concepts',[]):
1419+
schema_org['keywords'].append(c.get('url') or c.get('id'))
1420+
schema_org['inLanguage'] = schema_org.pop('language', None)
1421+
schema_org['dateModified'] = schema_org.pop('updated', None)
1422+
schema_org['dateCreated'] = schema_org.pop('created', None)
1423+
schema_org['datePublished'] = schema_org.pop('published', None)
1424+
schema_org['encodingFormat'] = [f.get('name') for f in schema_org.pop('formats', [])]
1425+
for c in record_dict['properties'].get('contacts',{}):
1426+
role = role_iso2schema_org(next(iter(c.get('roles',[])), 'contact'))
1427+
if role not in schema_org.keys():
1428+
schema_org[role] = []
1429+
cbase = {
1430+
'url': next(iter(c.get('links',[])), None).get('href',{}).get('url',''),
1431+
'email': next(iter(c.get('emails',[])), None).get('value',''),
1432+
'address': next(iter(c.get('addresses',[])), None).get('value',''),
1433+
'telephone': next(iter(c.get('phones',[])), None).get('value','')
1434+
}
1435+
if 'name' in c.keys():
1436+
schema_org[role].append(cbase.update({
1437+
'@type':'schema:Person',
1438+
'familyName': c.get('name',''),
1439+
'affiliation': c.get('organization','')
1440+
}))
1441+
else:
1442+
schema_org[role].append(cbase.update({
1443+
'@type': 'schema:Organization',
1444+
'name': c.get('organization','')
1445+
}))
1446+
schema_org.pop('contacts',None)
1447+
record_dict = schema_org
13931448

13941449
return record_dict
13951450

1451+
def type_iso2schema_org(tp):
1452+
tp = tp.split('/').pop().lower()
1453+
tps = {
1454+
"dataset": "Dataset",
1455+
"nongeographicdataset": "Dataset",
1456+
"service": "WebAPI",
1457+
"series": "Series",
1458+
"software": "SoftwareApplication",
1459+
"model": "ProductModel",
1460+
"document": "DigitalDocument",
1461+
"image": "ImageObject", # from dcmi
1462+
"text": "DigitalDocument",
1463+
"video": "VideoObject",
1464+
"sound": "AudioObject",
1465+
"party": "Organization",
1466+
"place": "Place",
1467+
"event": "Event",
1468+
"journalarticle": "ScholarlyArticle", # from Datacite
1469+
"audiovisual": "AudioObject",
1470+
"award": "Award",
1471+
"book": "Book",
1472+
"bookchapter": "Chapter",
1473+
"collection": "Collection",
1474+
"computationalnotebook": "SoftwareApplication",
1475+
"conferencepaper": "ScholarlyArticle",
1476+
"conferenceproceeding": "ScholarlyArticle",
1477+
"datapaper": "ScholarlyArticle",
1478+
"dissertation": "DigitalDocument",
1479+
"instrument": "Sensor",
1480+
"journal": "Periodical",
1481+
"outputmanagementplan": "DigitalDocument",
1482+
"peerreview": "Review",
1483+
"preprint": "ScholarlyArticle",
1484+
"project": "Project",
1485+
"report": "DigitalDocument",
1486+
"standard": "DigitalDocument",
1487+
"studyregistration": "DigitalDocument",
1488+
"workflow": "Workflow"
1489+
}
1490+
return tps.get(tp,'Thing')
1491+
1492+
def role_iso2schema_org(rl):
1493+
rl = rl.split('/').pop().lower()
1494+
rls = {
1495+
"custodian": "maintainer",
1496+
"funder": "funder",
1497+
"resourceprovider": "provider",
1498+
"author": "author",
1499+
"processor": "contributor",
1500+
"owner": "copyrightHolder",
1501+
"originator": "creator",
1502+
"distributor": "publisher",
1503+
"publisher": "publisher",
1504+
"user": "contributor",
1505+
"pointofcontact": "contributor"
1506+
}
1507+
return rls.get(rl,'contributor')
13961508

13971509
def build_anytext(name, value):
13981510
"""

pycsw/ogc/api/templates/item.html

+5-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,11 @@
99
height: 350px;
1010
}
1111
</style>
12-
12+
{% if data['schema-org'] %}
13+
<script type="application/ld+json">
14+
{{ data['schema-org'] | to_json }}
15+
</script>
16+
{% endif %}
1317
{% endblock %}
1418

1519
{% block crumbs %}

0 commit comments

Comments
 (0)