Skip to content

The Extract/Transform/Load pipeline underlying the public web API of the National Museum of Australia

Notifications You must be signed in to change notification settings


Folders and files

Last commit message
Last commit date

Latest commit


Repository files navigation

<xsl:stylesheet xmlns:xsl="" version="3.0" 
	<xsl:param name="root-resource"/><!-- e.g. "" -->
	<xsl:param name="debug" select=" 'false' "/><!-- 'true' will log the various of the redacted triples --> 
	<xsl:import href="util/trix-traversal-functions.xsl"/>
	<xsl:variable name="graph" select="/trix:trix/trix:graph" />
	<xsl:variable name="aat-ns" select=" '' "/> 
	<xsl:variable name="rdf-ns" select=" '' "/> 
	<xsl:variable name="ore-ns" select=" '' "/> 
	<xsl:variable name="dc-ns" select=" '' "/> 
	<xsl:variable name="rdfs-ns" select=" '' "/> 
	<xsl:variable name="crm-ns" select=" '' "/>
	<xsl:variable name="nma-term-ns" select="replace($root-resource, '([^/]+//[^/]+).*', '$1/term/')"/>
	<xsl:template match="/">
		<xsl:if test="$debug='true'">
			<xsl:message>redacting unlicensed images for public consumption</xsl:message>
		<xsl:call-template name="do-redaction" />
	<xsl:key name="representations-by-object-id"
	<xsl:template name="do-redaction">
		<trix xmlns="">
				<!-- Find the identifiers of all the physical objects in the graph -->
				<xsl:variable name="objects" select="path:backward(concat($crm-ns, 'E19_Physical_Object'), 'rdf:type')"/>
				<!-- ############################################################################## -->
				<!-- We can't include media unless they are bundled into an aggregation which is subject to a rights statement -->

				<!-- Remove links to those media from within objects -->
				<!-- identify any objects with media which are not aggregated into a collection which is subject to some legal rights -->
				<xsl:variable name="objects-with-media-but-no-rights" select="
						path:forward(., 'crm:P138i_has_representation')[
								path:forward(., ('ore:isAggregatedBy', 'crm:P104_is_subject_to'))
				<!-- identify any media statements which can be discarded -->
				<xsl:variable name="unlicensed-object-media-statements" select="key('representations-by-object-id', $objects-with-media-but-no-rights)"/>

				<!-- ############################################################################## -->
				<!-- Finally copy the triples of the graph, excluding any of the triples we've identified as unwanted -->
				<xsl:variable name="published-triples" select="$graph/trix:triple except $unlicensed-object-media-statements"/>

				<xsl:call-template name="debug-list-redacted-triples">
					<xsl:with-param name="reason">unlicensed object media statements</xsl:with-param>
					<xsl:with-param name="redaction" select="$unlicensed-object-media-statements"/>

				<!-- sort the triples into a stable order, to facilitate checking for changes later in the pipeline -->
				<!-- NB trix:id elements (blank nodes) are not used for sorting since their values are not stable -->
				<xsl:for-each select="$published-triples">
					<xsl:sort select="*[1]/self::trix:uri"/>
					<xsl:sort select="*[2]"/>
					<xsl:sort select="*[3]/self::trix:plainLiteral"/>
					<xsl:sort select="*[3]/self::trix:plainLiteral/@xml:lang"/>
					<xsl:sort select="*[3]/self::trix:typedLiteral"/>
					<xsl:sort select="*[3]/self::trix:typedLiteral/@dataType"/>
					<xsl:copy-of select="."/>
	<xsl:template name="debug-list-redacted-triples">
		<xsl:param name="reason"/>
		<xsl:param name="redaction"/>
		<xsl:if test="($debug = 'true') and $redaction">
			<xsl:message>Redacting: <xsl:value-of select="$reason"/></xsl:message>
			<xsl:for-each select="$redaction">
				<xsl:message><xsl:value-of select="string-join(*, ' ')"/></xsl:message>