More tweaks to the linked data loader

This commit is contained in:
Pieter Vander Vennet 2024-06-19 03:22:57 +02:00
parent 684932aebd
commit 734be4a702
5 changed files with 76 additions and 55 deletions

View file

@ -2,14 +2,14 @@ import Script from "./Script"
import LinkedDataLoader from "../src/Logic/Web/LinkedDataLoader"
import { writeFileSync } from "fs"
export default class DownloadLinkedDataList extends Script {
class DownloadLinkedDataList extends Script {
constructor() {
super("Downloads the localBusinesses from the given location. Usage: url [--no-proxy]")
}
async main([url, noProxy]: string[]): Promise<void> {
const useProxy = noProxy !== "--no-proxy"
const data = await LinkedDataLoader.fetchJsonLd(url, {}, useProxy)
const data = await LinkedDataLoader.fetchJsonLd(url, {}, useProxy ? "proxy" : "fetch-lod")
const path = "linked_data_" + url.replace(/[^a-zA-Z0-9_]/g, "_") + ".jsonld"
writeFileSync(path, JSON.stringify(data), "utf8")
console.log("Written", path)

View file

@ -17,7 +17,7 @@ class CompareWebsiteData extends Script {
if (fs.existsSync(filename)) {
return JSON.parse(fs.readFileSync(filename, "utf-8"))
}
const jsonLd = await LinkedDataLoader.fetchJsonLd(url, undefined, true)
const jsonLd = await LinkedDataLoader.fetchJsonLd(url, undefined, "proxy")
console.log("Got:", jsonLd)
fs.writeFileSync(filename, JSON.stringify(jsonLd))
return jsonLd

View file

@ -27,23 +27,23 @@ export default class LinkedDataLoader {
opening_hours: { "@id": "http://schema.org/openingHoursSpecification" },
openingHours: { "@id": "http://schema.org/openingHours", "@container": "@set" },
geo: { "@id": "http://schema.org/geo" },
alt_name: { "@id": "http://schema.org/alternateName" },
alt_name: { "@id": "http://schema.org/alternateName" }
}
private static COMPACTING_CONTEXT_OH = {
dayOfWeek: { "@id": "http://schema.org/dayOfWeek", "@container": "@set" },
closes: {
"@id": "http://schema.org/closes",
"@type": "http://www.w3.org/2001/XMLSchema#time",
"@type": "http://www.w3.org/2001/XMLSchema#time"
},
opens: {
"@id": "http://schema.org/opens",
"@type": "http://www.w3.org/2001/XMLSchema#time",
},
"@type": "http://www.w3.org/2001/XMLSchema#time"
}
}
private static formatters: Record<"phone" | "email" | "website", Validator> = {
phone: new PhoneValidator(),
email: new EmailValidator(),
website: new UrlValidator(undefined, undefined, true),
website: new UrlValidator(undefined, undefined, true)
}
private static ignoreKeys = [
"http://schema.org/logo",
@ -56,7 +56,7 @@ export default class LinkedDataLoader {
"http://schema.org/description",
"http://schema.org/hasMap",
"http://schema.org/priceRange",
"http://schema.org/contactPoint",
"http://schema.org/contactPoint"
]
private static shapeToPolygon(str: string): Polygon {
@ -69,8 +69,8 @@ export default class LinkedDataLoader {
.trim()
.split(" ")
.map((n) => Number(n))
),
],
)
]
}
}
@ -92,18 +92,18 @@ export default class LinkedDataLoader {
const context = {
lat: {
"@id": "http://schema.org/latitude",
"@type": "http://www.w3.org/2001/XMLSchema#double",
"@type": "http://www.w3.org/2001/XMLSchema#double"
},
lon: {
"@id": "http://schema.org/longitude",
"@type": "http://www.w3.org/2001/XMLSchema#double",
},
"@type": "http://www.w3.org/2001/XMLSchema#double"
}
}
const flattened = await jsonld.compact(geo, context)
return {
type: "Point",
coordinates: [Number(flattened.lon), Number(flattened.lat)],
coordinates: [Number(flattened.lon), Number(flattened.lat)]
}
}
@ -236,15 +236,30 @@ export default class LinkedDataLoader {
static async fetchJsonLd(
url: string,
options?: JsonLdLoaderOptions,
useProxy: boolean = false
mode: "fetch-lod" | "fetch-raw" | "proxy"
): Promise<object> {
if (useProxy) {
if (mode === "proxy") {
url = Constants.linkedDataProxy.replace("{url}", encodeURIComponent(url))
}
if (mode !== "fetch-raw") {
const data = await Utils.downloadJson(url)
return await LinkedDataLoader.compact(data, options)
}
let htmlContent = await Utils.download(url)
const div = document.createElement("div")
div.innerHTML = htmlContent
const script = Array.from(div.getElementsByTagName("script"))
.find(script => script.type === "application/ld+json")
const snippet = JSON.parse(script.textContent)
snippet["@base"] = url
return await LinkedDataLoader.compact(snippet, options)
}
/**
* Only returns different items
* @param externalData
@ -293,7 +308,7 @@ export default class LinkedDataLoader {
if (properties["latitude"] && properties["longitude"]) {
geometry = {
type: "Point",
coordinates: [Number(properties["longitude"]), Number(properties["latitude"])],
coordinates: [Number(properties["longitude"]), Number(properties["latitude"])]
}
delete properties["latitude"]
delete properties["longitude"]
@ -305,7 +320,7 @@ export default class LinkedDataLoader {
const geo: GeoJSON = {
type: "Feature",
properties,
geometry,
geometry
}
delete linkedData.geo
delete properties.shape
@ -423,7 +438,7 @@ export default class LinkedDataLoader {
"brede publiek",
"iedereen",
"bezoekers",
"iedereen - vooral bezoekers gemeentehuis of bibliotheek.",
"iedereen - vooral bezoekers gemeentehuis of bibliotheek."
].indexOf(audience.toLowerCase()) >= 0
) {
return "yes"
@ -506,7 +521,7 @@ export default class LinkedDataLoader {
mv: "http://schema.mobivoc.org/",
gr: "http://purl.org/goodrelations/v1#",
vp: "https://data.velopark.be/openvelopark/vocabulary#",
vpt: "https://data.velopark.be/openvelopark/terms#",
vpt: "https://data.velopark.be/openvelopark/terms#"
},
[url],
undefined,
@ -527,7 +542,7 @@ export default class LinkedDataLoader {
mv: "http://schema.mobivoc.org/",
gr: "http://purl.org/goodrelations/v1#",
vp: "https://data.velopark.be/openvelopark/vocabulary#",
vpt: "https://data.velopark.be/openvelopark/terms#",
vpt: "https://data.velopark.be/openvelopark/terms#"
},
[url],
"g",
@ -670,20 +685,20 @@ export default class LinkedDataLoader {
const withProxyUrl = Constants.linkedDataProxy.replace("{url}", encodeURIComponent(url))
const optionalPaths: Record<string, string | Record<string, string>> = {
"schema:interactionService": {
"schema:url": "website",
"schema:url": "website"
},
"mv:operatedBy": {
"gr:legalName": "operator",
"gr:legalName": "operator"
},
"schema:contactPoint": {
"schema:email": "email",
"schema:telephone": "phone",
"schema:telephone": "phone"
},
"schema:dateModified": "_last_edit_timestamp",
"schema:dateModified": "_last_edit_timestamp"
}
if (includeExtras) {
optionalPaths["schema:address"] = {
"schema:streetAddress": "addr",
"schema:streetAddress": "addr"
}
optionalPaths["schema:name"] = "name"
optionalPaths["schema:description"] = "description"
@ -701,19 +716,19 @@ export default class LinkedDataLoader {
"schema:geo": {
"schema:latitude": "latitude",
"schema:longitude": "longitude",
"schema:polygon": "shape",
"schema:polygon": "shape"
},
"schema:priceSpecification": {
"mv:freeOfCharge": "fee",
"schema:price": "charge",
},
"schema:price": "charge"
}
}
const extra = [
"schema:priceSpecification [ mv:dueForTime [ mv:timeStartValue ?chargeStart; mv:timeEndValue ?chargeEnd; mv:timeUnit ?timeUnit ] ]",
"vp:allows [vp:bicycleType <https://data.velopark.be/openvelopark/terms#CargoBicycle>; vp:bicyclesAmount ?capacityCargobike; vp:bicycleType ?cargoBikeType]",
"vp:allows [vp:bicycleType <https://data.velopark.be/openvelopark/terms#ElectricBicycle>; vp:bicyclesAmount ?capacityElectric; vp:bicycleType ?electricBikeType]",
"vp:allows [vp:bicycleType <https://data.velopark.be/openvelopark/terms#TandemBicycle>; vp:bicyclesAmount ?capacityTandem; vp:bicycleType ?tandemBikeType]",
"vp:allows [vp:bicycleType <https://data.velopark.be/openvelopark/terms#TandemBicycle>; vp:bicyclesAmount ?capacityTandem; vp:bicycleType ?tandemBikeType]"
]
const unpatched = await this.fetchEntry(

View file

@ -66,9 +66,8 @@
</script>
<div>
<div class:interactive={!readonly} class="flex w-full justify-between py-1 px-2">
<div class="flex flex-col">
<div>
<div class:interactive={!readonly} class="flex flex-col items-end py-1 px-2">
<div class="flex flex-col w-full">
{#if renderingExternal}
<TagRenderingAnswer
tags={new UIEventSource(mockPropertiesExternal)}
@ -83,7 +82,6 @@
{externalProperties[key]}
</div>
{/if}
</div>
{#if !readonly && ($isTesting || $isDebug || $showTags === "yes" || $showTags === "always" || $showTags === "full")}
<div class="subtle text-sm">
@ -103,7 +101,7 @@
{#if !readonly}
{#if currentStep === "init"}
<button
class="small"
class="w-fit"
on:click={() => apply(key)}
on:mouseover={() => (onOverwrite = true)}
on:focus={() => (onOverwrite = true)}

View file

@ -1833,8 +1833,15 @@ export default class SpecialVisualizations {
})()
)
}
return Stores.FromPromiseWithErr(
LinkedDataLoader.fetchJsonLd(url, { country }, useProxy)
return Stores.FromPromiseWithErr((async () => {
try {
return await LinkedDataLoader.fetchJsonLd(url, { country }, useProxy ? "proxy" : "fetch-lod")
} catch (e) {
console.log("Could not get with proxy/download LOD, attempting to download directly. Error for ",url,"is",e)
return await LinkedDataLoader.fetchJsonLd(url, { country }, "fetch-raw")
}
})()
)
})
@ -1850,7 +1857,8 @@ export default class SpecialVisualizations {
layer,
externalData,
sourceUrl,
readonly
readonly,
collapsed: isClosed
}),
undefined,
url.map((url) => !!url)