diff --git a/doc/release-notes/8424-signposting.md b/doc/release-notes/8424-signposting.md new file mode 100644 index 00000000000..7fc47ee4b82 --- /dev/null +++ b/doc/release-notes/8424-signposting.md @@ -0,0 +1,32 @@ +# Signposting for Dataverse + +This branch adds [Signposting](https://signposting.org/) support to Dataverse + +There are 2 Signposting profile levels, level 1 and level 2. In this implementation, + * level 1 links are shown in +HTTP header, which can be fetched by `curl -I https://domain/link-to-article`. + * The level 2 linkset can be fetched by visiting the dedicated linkset page for + that artifact. The link can be seen in level 1 links with key name `rel="linkset"`. + +The configuration is stored as JSON string in the `Bundle.properties` file, key name is +`signposting.configuration.SignpostingConf`. Please see a sample configuration below with explaination for each of the +config items. + +```json +{ + "useDefaultFileType": true, + "defaultFileTypeValue": "https://schema.org/Dataset", + "maxItems": 5, + "maxAuthors": 5 +} +``` + + * `useDefaultFileType` and `defaultFileTypeValue` are used in combination to provide extra `Dataset` type to DV + datasets. `AboutPage` is required by `Signposting`, hence always present in the datasets. Whilst a second type + could be configured to better reflect the actual scholarly type of the dataset. + * `maxItems` sets the max number of items/files which will be shown in `level 1` profile. Datasets with + too many files will not show any file link in `level 1` profile. They will be shown in `level 2` linkset only. + * `maxAuthors` Same with `maxItems`, `maxAuthors` sets the max number of authors to be shown in `level 1` profile. +If amount of authors exceeds this value, no link of authors will be shown in `level 1` profile. + +Note: Authors without author link will not be counted nor shown in any profile/linkset. \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 61720efafb2..2811d754234 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -143,6 +143,7 @@ import edu.harvard.iq.dataverse.search.SearchServiceBean; import edu.harvard.iq.dataverse.search.SearchUtil; import edu.harvard.iq.dataverse.search.SolrClientService; +import edu.harvard.iq.dataverse.util.SignpostingResources; import java.util.Comparator; import org.apache.solr.client.solrj.SolrQuery; import org.apache.solr.client.solrj.impl.HttpSolrClient; @@ -5872,4 +5873,15 @@ public boolean downloadingRestrictedFiles() { } return false; } + + /** + * Add Signposting + * @return String + */ + public String getSignpostingLinkHeader() { + if (!workingVersion.isReleased()) + return null; + SignpostingResources sr = new SignpostingResources(systemConfig, workingVersion, settingsService.getValueForKey(SettingsServiceBean.Key.SignpostingConf)); + return sr.getLinks(); + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java index d2c3f68dba2..d0571c50fac 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/AbstractApiBean.java @@ -850,6 +850,19 @@ protected static Response error( Status sts, String msg ) { .add( "message", msg ).build() ).type(MediaType.APPLICATION_JSON_TYPE).build(); } + + /** + * Adding Signposting + * + * @param bld + * @return HTTP OK response which contains the json structure of linkset + */ + protected Response okLinkset( JsonArrayBuilder bld ) { + return Response.ok( Json.createObjectBuilder() + .add("linkset", bld).build() ) + .type(MediaType.APPLICATION_JSON) + .build(); + } } class LazyRef { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 0d8f60119db..e4a1cdae03c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -86,6 +86,8 @@ import edu.harvard.iq.dataverse.util.json.JSONLDUtil; import edu.harvard.iq.dataverse.util.json.JsonLDTerm; import edu.harvard.iq.dataverse.util.json.JsonParseException; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; +import edu.harvard.iq.dataverse.util.SignpostingResources; import edu.harvard.iq.dataverse.search.IndexServiceBean; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.*; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @@ -530,7 +532,31 @@ public Response getVersionMetadataBlock( @PathParam("id") String datasetId, return notFound("metadata block named " + blockName + " not found"); }); } - + + /** + * Add Signposting + * @param datasetId + * @param versionId + * @param uriInfo + * @param headers + * @return + */ + @GET + @Path("{id}/versions/{versionId}/linkset") + public Response getLinkset( @PathParam("id") String datasetId, @PathParam("versionId") String versionId, @Context UriInfo uriInfo, @Context HttpHeaders headers) { + if ( ":draft".equals(versionId) ) { + return badRequest("The :draft version can be viewed"); + } + return response( req -> { + DatasetVersion dsv = getDatasetVersionOrDie(req, versionId, findDatasetOrDie(datasetId), uriInfo, headers); + String dataverseSiteUrl = systemConfig.getDataverseSiteUrl(); + String anchor = dataverseSiteUrl + "/dataset.xhtml?persistentId=" + dsv.getDataset().getPersistentURL(); + String signpostingConf = settingsService.getValueForKey(SettingsServiceBean.Key.SignpostingConf, BundleUtil.getStringFromBundle("signposting.configuration.SignpostingConf")); + if (signpostingConf.isEmpty()) return notFound("Configuration key for signposting is empty [SignpostingConf]"); + if (dsv.getId() == null) return notFound("Dataset not found: Id is empty"); + return okLinkset(JsonPrinter.jsonLinkset(new SignpostingResources(systemConfig, dsv, signpostingConf))); + }); + } @GET @Path("{id}/modifyRegistration") public Response updateDatasetTargetURL(@PathParam("id") String id ) { diff --git a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java index efa944cf633..4f8deaa12c0 100644 --- a/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/settings/SettingsServiceBean.java @@ -198,6 +198,11 @@ public enum Key { */ MetricsUrl, + /** + * Key for Setting of Signposting + */ + SignpostingConf, + /** * Number of minutes before a metrics query can be rerun. Otherwise a cached value is returned. * Previous month dates always return cache. Only applies to new internal caching system (not miniverse). diff --git a/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java b/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java new file mode 100644 index 00000000000..df0e5d0a256 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/util/SignpostingResources.java @@ -0,0 +1,316 @@ +package edu.harvard.iq.dataverse.util; + +/* + Eko Indarto, DANS + Vic Ding, DANS + + This file prepares the resources used in Signposting + + It requires correspondence configuration to function well. + The configuration key used is SignpostingConf. + + useDefaultFileType is an on/off switch during linkset creating time, it controls whether the default type is + used, which is always Dataset + + The configuration can be modified during run time by the administrator. + */ + +import edu.harvard.iq.dataverse.*; +import edu.harvard.iq.dataverse.dataaccess.StorageIO; +import edu.harvard.iq.dataverse.dataaccess.SwiftAccessIO; +import edu.harvard.iq.dataverse.dataset.DatasetUtil; +import edu.harvard.iq.dataverse.license.License; + +import javax.json.Json; +import javax.json.JsonArrayBuilder; +import javax.json.JsonObjectBuilder; +import javax.json.JsonObject; +import javax.json.JsonReader; +import java.io.IOException; +import java.io.StringReader; +import java.util.LinkedList; +import java.util.List; +import java.util.Objects; +import java.util.logging.Logger; + +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; + +public class SignpostingResources { + private static final Logger logger = Logger.getLogger(SignpostingResources.class.getCanonicalName()); + SystemConfig systemConfig; + DatasetVersion workingDatasetVersion; + Boolean useDefaultFileType; + String defaultFileTypeValue; + int maxAuthors; + int maxItems; + + public SignpostingResources(SystemConfig systemConfig, DatasetVersion workingDatasetVersion, String jsonSetting) { + this.systemConfig = systemConfig; + this.workingDatasetVersion = workingDatasetVersion; + if (jsonSetting == null) { + jsonSetting = BundleUtil.getStringFromBundle("signposting.configuration.SignpostingConf"); + } + JsonReader jsonReader = Json.createReader(new StringReader(jsonSetting)); + JsonObject spJsonSetting = jsonReader.readObject(); + jsonReader.close(); + useDefaultFileType = spJsonSetting.getBoolean("useDefaultFileType", true); + defaultFileTypeValue = spJsonSetting.getString("defaultFileTypeValue", "https://schema.org/Dataset"); + maxAuthors = spJsonSetting.getInt("maxAuthors", 5); + maxItems = spJsonSetting.getInt("maxItems", 5); + } + + /** + * Get Authors as string + * For example: + * if author has VIAF + * Link: ; rel="author" + * + * @param datasetAuthors list of all DatasetAuthor object + * @return all the non empty author links in a string + */ + private String getAuthorsAsString(List datasetAuthors) { + String singleAuthorString; + String identifierSchema = ""; + int visibleAuthorCounter = 0; +// if (datasetAuthors.size() > maxAuthors) {return "";} + for (DatasetAuthor da : datasetAuthors) { + logger.info(String.format( + "idtype: %s; idvalue: %s, affiliation: %s; identifierUrl: %s", + da.getIdType(), + da.getIdValue(), + da.getAffiliation(), + da.getIdentifierAsUrl() + )); + + String authorURL = ""; + authorURL = getAuthorUrl(da); + if (authorURL != null && !Objects.equals(authorURL, "")) { + visibleAuthorCounter++; + // return empty if number of visible author more than max allowed + if (visibleAuthorCounter >= maxAuthors) return ""; + singleAuthorString = "<" + authorURL + ">;rel=\"author\""; + if (Objects.equals(identifierSchema, "")) { + identifierSchema = singleAuthorString; + } else { + identifierSchema = String.join(",", identifierSchema, singleAuthorString); + } + } + } + + logger.info(String.format("identifierSchema: %s", identifierSchema)); + return identifierSchema; + } + + /** + * Get key, values of signposting items and return as string + * + * @return comma delimited string + */ + public String getLinks() { + List valueList = new LinkedList<>(); + Dataset ds = workingDatasetVersion.getDataset(); + + String identifierSchema = getAuthorsAsString(workingDatasetVersion.getDatasetAuthors()); + if (identifierSchema != null && !identifierSchema.equals("")) { + valueList.add(identifierSchema); + } + + if (!Objects.equals(ds.getPersistentURL(), "")) { + String citeAs = "<" + ds.getPersistentURL() + ">;rel=\"cite-as\""; + valueList.add(citeAs); + } + + List fms = workingDatasetVersion.getFileMetadatas(); + String items = getItems(fms); + if (items != null && !Objects.equals(items, "")) { + valueList.add(items); + } + + String describedby = "<" + ds.getGlobalId().toURL().toString() + ">;rel=\"describedby\"" + ";type=\"" + "application/vnd.citationstyles.csl+json\""; + describedby += ",<" + systemConfig.getDataverseSiteUrl() + "/api/datasets/export?exporter=schema.org&persistentId=" + + ds.getProtocol() + ":" + ds.getAuthority() + "/" + ds.getIdentifier() + ">;rel=\"describedby\"" + ";type=\"application/json+ld\""; + valueList.add(describedby); + + String type = ";rel=\"type\""; + if (useDefaultFileType) { + type = ";rel=\"type\",<" + defaultFileTypeValue + ">;rel=\"type\""; + } + valueList.add(type); + + String licenseString = DatasetUtil.getLicenseURI(workingDatasetVersion) + ";rel=\"license\""; + valueList.add(licenseString); + + String linkset = "<" + systemConfig.getDataverseSiteUrl() + "/api/datasets/:persistentId/versions/" + + workingDatasetVersion.getVersionNumber() + "." + workingDatasetVersion.getMinorVersionNumber() + + "/linkset?persistentId=" + ds.getProtocol() + ":" + ds.getAuthority() + "/" + ds.getIdentifier() + "> ; rel=\"linkset\";type=\"application/linkset+json\""; + valueList.add(linkset); + logger.info(String.format("valueList is: %s", valueList)); + + return String.join(", ", valueList); + } + + private String getAuthorUrl(DatasetAuthor da) { + String authorURL = ""; + if (da.getIdValue() != null && !da.getIdValue().trim().isEmpty()) { + authorURL = da.getIdValue(); + } else { + authorURL = da.getIdentifierAsUrl(); + } + return authorURL; + } + + private JsonArrayBuilder getJsonAuthors(List datasetAuthors) { + JsonArrayBuilder authors = Json.createArrayBuilder(); + boolean returnNull = true; + String authorURL = ""; + for (DatasetAuthor da : datasetAuthors) { + authorURL = getAuthorUrl(da); + if (!Objects.equals(authorURL, "")) { + authors.add(jsonObjectBuilder().add("href", authorURL)); + returnNull = false; + } + } + return returnNull ? null : authors; + } + + private String getItems(List fms) { + if (fms.size() > maxItems) { + logger.info(String.format("maxItem is %s and fms size is %s", maxItems, fms.size())); + return null; + } + + String result = ""; + for (FileMetadata fm : fms) { + DataFile df = fm.getDataFile(); + if (Objects.equals(result, "")) { + result = "<" + getPublicDownloadUrl(df) + ">;rel=\"item\";type=\"" + df.getContentType() + "\""; + } else { + result = String.join(",", result, "<" + getPublicDownloadUrl(df) + ">;rel=\"item\";type=\"" + df.getContentType() + "\""); + } + } + return result; + } + + private JsonArrayBuilder getJsonItems(List fms) { + JsonArrayBuilder items = Json.createArrayBuilder(); + for (FileMetadata fm : fms) { + DataFile df = fm.getDataFile(); + items.add(jsonObjectBuilder().add("href", getPublicDownloadUrl(df)).add("type", df.getContentType())); + } + + return items; + } + + public JsonArrayBuilder getJsonLinkset() { + Dataset ds = workingDatasetVersion.getDataset(); + GlobalId gid = new GlobalId(ds); + String landingPage = systemConfig.getDataverseSiteUrl() + "/dataset.xhtml?persistentId=" + ds.getProtocol() + ":" + ds.getAuthority() + "/" + ds.getIdentifier(); + JsonArrayBuilder authors = getJsonAuthors(workingDatasetVersion.getDatasetAuthors()); + + List fms = workingDatasetVersion.getFileMetadatas(); + JsonArrayBuilder items = getJsonItems(fms); + + License license = workingDatasetVersion.getTermsOfUseAndAccess().getLicense(); + String licenseString = license.getUri().toString(); + + JsonArrayBuilder mediaTypes = Json.createArrayBuilder(); + mediaTypes.add( + jsonObjectBuilder().add( + "href", + gid.toURL().toString() + ).add( + "type", + "application/vnd.citationstyles.csl+json" + ) + ); + + mediaTypes.add( + jsonObjectBuilder().add( + "href", + systemConfig.getDataverseSiteUrl() + "/api/datasets/export?exporter=schema.org&persistentId=" + ds.getProtocol() + ":" + ds.getAuthority() + "/" + ds.getIdentifier() + ).add( + "type", + "application/json+ld" + ) + ); + JsonArrayBuilder linksetJsonObj = Json.createArrayBuilder(); + + JsonObjectBuilder mandatory; + if (useDefaultFileType) { + mandatory = jsonObjectBuilder() + .add("anchor", landingPage) + .add("cite-as", Json.createArrayBuilder().add(jsonObjectBuilder().add("href", ds.getPersistentURL()))) + .add("type", Json.createArrayBuilder() + .add(jsonObjectBuilder().add("href", "https://schema.org/AboutPage")) + .add(jsonObjectBuilder().add("href", defaultFileTypeValue)) + ); + } else { + mandatory = jsonObjectBuilder() + .add("anchor", landingPage) + .add("cite-as", Json.createArrayBuilder().add(jsonObjectBuilder().add("href", ds.getPersistentURL()))) + .add("type", Json.createArrayBuilder() + .add(jsonObjectBuilder().add("href", "https://schema.org/AboutPage")) + ); + } + + if (authors != null) { + mandatory.add("author", authors); + } + if (licenseString != null && !Objects.equals(licenseString, "")) { + mandatory.add("license", jsonObjectBuilder().add("href", licenseString)); + } + if (!mediaTypes.toString().trim().isEmpty()) { + mandatory.add("describedby", mediaTypes); + } + if (items != null) { + mandatory.add("item", items); + } + linksetJsonObj.add(mandatory); + + // remove scholarly type as shown already on landing page + for (FileMetadata fm : fms) { + DataFile df = fm.getDataFile(); + JsonObjectBuilder itemAnchor = jsonObjectBuilder().add("anchor", getPublicDownloadUrl(df)); + itemAnchor.add("collection", Json.createArrayBuilder().add(jsonObjectBuilder() + .add("href", landingPage))); + linksetJsonObj.add(itemAnchor); + } + + return linksetJsonObj; + } + + + private String getPublicDownloadUrl(DataFile dataFile) { + StorageIO storageIO = null; + try { + storageIO = dataFile.getStorageIO(); + } catch (IOException e) { + logger.warning(String.format("Error getting storageID from file; original error message is: %s", e.getLocalizedMessage())); + } + + if (storageIO instanceof SwiftAccessIO) { + String fileDownloadUrl; + SwiftAccessIO swiftIO = (SwiftAccessIO) storageIO; + try { + swiftIO.open(); + } catch (IOException e) { + logger.warning(String.format("Error opening the swiftIO; original error message is: %s", e.getLocalizedMessage())); + } + + //if its a public install, lets just give users the permanent URL! + if (systemConfig.isPublicInstall()) { + fileDownloadUrl = swiftIO.getRemoteUrl(); + } else { + //TODO: if a user has access to this file, they should be given the swift url + // perhaps even we could use this as the "private url" + fileDownloadUrl = swiftIO.getTemporarySwiftUrl(); + } + // close the stream + swiftIO.closeInputStream(); + return fileDownloadUrl; + + } + + return FileUtil.getPublicDownloadUrl(systemConfig.getDataverseSiteUrl(), null, dataFile.getId()); + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java index 39c84562a09..253a14af552 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonPrinter.java @@ -22,6 +22,7 @@ import edu.harvard.iq.dataverse.privateurl.PrivateUrl; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import edu.harvard.iq.dataverse.util.DatasetFieldWalker; +import edu.harvard.iq.dataverse.util.SignpostingResources; import edu.harvard.iq.dataverse.util.StringUtil; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; @@ -904,4 +905,33 @@ public static JsonObjectBuilder mapToObject(Map in) { in.keySet().forEach( k->b.add(k, in.get(k)) ); return b; } + + + /** + * Get signposting from SignpostingResources + * + * @param sr corresponding SignpostingResources + * @return json linkset + */ + public static JsonArrayBuilder jsonLinkset(SignpostingResources sr) { + return sr.getJsonLinkset(); + } + + /** + * Get signposting from Dataset + * @param ds the designated Dataset + * @return json linkset + */ + public static JsonObjectBuilder jsonLinkset(Dataset ds) { + return jsonObjectBuilder() + .add("anchor", ds.getPersistentURL()) + .add("cite-as", Json.createArrayBuilder().add(jsonObjectBuilder().add("href", ds.getPersistentURL()))) + .add("type", Json.createArrayBuilder().add(jsonObjectBuilder().add("href", "https://schema.org/AboutPage"))) + .add("author", ds.getPersistentURL()) + .add("protocol", ds.getProtocol()) + .add("authority", ds.getAuthority()) + .add("publisher", BrandingUtil.getInstallationBrandName()) + .add("publicationDate", ds.getPublicationDateFormattedYYYYMMDD()) + .add("storageIdentifier", ds.getStorageIdentifier()); + } } diff --git a/src/main/java/propertyFiles/Bundle.properties b/src/main/java/propertyFiles/Bundle.properties index 8abca8ff3fd..84acf975db6 100644 --- a/src/main/java/propertyFiles/Bundle.properties +++ b/src/main/java/propertyFiles/Bundle.properties @@ -2753,3 +2753,6 @@ publishDatasetCommand.pidNotReserved=Cannot publish dataset because its persiste # APIs api.errors.invalidApiToken=Invalid API token. + +# Signposting configuration +signposting.configuration.SignpostingConf={"useDefaultFileType": true,"defaultFileTypeValue": "https://schema.org/Dataset","maxItems": 5,"maxAuthors": 5} diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index e42b61ef88f..bda0acf27fe 100644 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -88,6 +88,11 @@ + + + + +