Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
612 changes: 29 additions & 583 deletions README.md

Large diffs are not rendered by default.

32 changes: 16 additions & 16 deletions src/main/java/org/grobid/core/engines/SoftwareParser.java
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ public List<SoftwareEntity> processText(String text, boolean disambiguate) throw
entities = SoftwareContextClassifier.getInstance(softwareConfiguration).classifyDocumentContexts(entities);

} catch (Exception e) {
throw new GrobidException("An exception occured while running Grobid.", e);
throw new GrobidException("An exception occurred while running Grobid.", e);
}

return entities;
Expand Down Expand Up @@ -760,7 +760,7 @@ public Pair<List<SoftwareEntity>, Document> processPDF(File file,
}
} catch (Exception e) {
throw new GrobidException(
"An exception occured while running consolidation on bibliographical references.", e);
"An exception occurred while running consolidation on bibliographical references.", e);
}

// propagate the bib. ref. to the entities corresponding to the same software name without bib. ref.
Expand Down Expand Up @@ -825,12 +825,12 @@ public Pair<List<SoftwareEntity>, Document> processPDF(File file,
*/
private List<SoftwareEntity> processLayoutTokenSequence(
List<LayoutToken> layoutTokens,
List<SoftwareEntity> entities,
boolean disambiguate,
boolean addParagraphContext,
boolean fromPDF,
boolean fromXML,
List<PDFAnnotation> pdfAnnotations
List<SoftwareEntity> entities,
boolean disambiguate,
boolean addParagraphContext,
boolean fromPDF,
boolean fromXML,
List<PDFAnnotation> pdfAnnotations
) {
List<LayoutTokenization> layoutTokenizations = new ArrayList<LayoutTokenization>();
layoutTokenizations.add(new LayoutTokenization(layoutTokens));
Expand Down Expand Up @@ -1475,7 +1475,7 @@ public boolean accept(File dir, String name) {
String pathTEI = outputDirectory + "/" + file.getName().substring(0, file.getName().length() - 4) + ".training.tei.xml";
createTraining(file.getAbsolutePath(), pathTEI, n);
} catch (final Exception exp) {
logger.error("An error occured while processing the following pdf: "
logger.error("An error occurred while processing the following pdf: "
+ file.getPath() + ": " + exp);
}
if (ind != -1)
Expand All @@ -1484,7 +1484,7 @@ public boolean accept(File dir, String name) {

return refFiles.length;
} catch (final Exception exp) {
throw new GrobidException("An exception occured while running Grobid batch.", exp);
throw new GrobidException("An exception occurred while running Grobid batch.", exp);
}
}

Expand Down Expand Up @@ -1805,7 +1805,7 @@ public String addFeatures(List<LayoutToken> tokens,
isSoftwarePattern = false;
}
} catch (Exception e) {
throw new GrobidException("An exception occured while running Grobid.", e);
throw new GrobidException("An exception occurred while running Grobid.", e);
}
return result.toString();
}
Expand Down Expand Up @@ -2108,7 +2108,7 @@ public List<SoftwareComponent> extractSoftwareComponents(String text,

// conservative check, minimal well-formedness of the content for URL
if (clusterLabel.equals(SoftwareTaggingLabels.SOFTWARE_URL)) {
if (SoftwareAnalyzer.DELIMITERS.indexOf(clusterContent) != -1 ||
if (SoftwareAnalyzer.DELIMITERS.contains(clusterContent) ||
SoftwareLexicon.getInstance().isEnglishStopword(clusterContent) ||
FeatureFactory.getInstance().test_number(clusterContent) ||
clusterContent.replace("\n", "").equals("//")) {
Expand Down Expand Up @@ -2473,7 +2473,7 @@ public Pair<List<SoftwareEntity>, List<BibDataSet>> processXML(File file,
//tei = restoreDomParserAttributeBug(tei);

} catch (final Exception exp) {
logger.error("An error occured while processing the following XML file: "
logger.error("An error occurred while processing the following XML file: "
+ file.getPath(), exp);
}

Expand All @@ -2498,7 +2498,7 @@ public Pair<List<SoftwareEntity>, List<BibDataSet>> processTEI(File file,
//tei = restoreDomParserAttributeBug(tei);

} catch (final Exception exp) {
logger.error("An error occured while processing the following XML file: "
logger.error("An error occurred while processing the following XML file: "
+ file.getPath(), exp);
}

Expand Down Expand Up @@ -2532,7 +2532,7 @@ public String processXML(File file) throws Exception {
tei = FileUtils.readFileToString(new File(newFilePath), UTF_8);

} catch (final Exception exp) {
logger.error("An error occured while processing the following XML file: " + file.getAbsolutePath(), exp);
logger.error("An error occurred while processing the following XML file: " + file.getAbsolutePath(), exp);
} finally {
if (newFilePath != null) {
File newFile = new File(newFilePath);
Expand Down Expand Up @@ -2824,7 +2824,7 @@ public Pair<List<SoftwareEntity>, List<BibDataSet>> processTEIDocument(org.w3c.d
}
} catch (Exception e) {
throw new GrobidException(
"An exception occured while running consolidation on bibliographical references.", e);
"An exception occurred while running consolidation on bibliographical references.", e);
}

// propagate the bib. ref. to the entities corresponding to the same software name without bib. ref.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ public static File uploadFile(String urll, String path, String name) throws Exce
return outFile;
}
catch (Exception e) {
throw new Exception("An exception occured while downloading " + urll, e);
throw new Exception("An exception occurred while downloading " + urll, e);
}
}

Expand Down
120 changes: 118 additions & 2 deletions src/main/java/org/grobid/service/controller/SoftwareController.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@
import com.google.inject.Inject;
import com.google.inject.Singleton;
import jakarta.ws.rs.*;
import jakarta.ws.rs.client.Client;
import jakarta.ws.rs.core.MediaType;
import jakarta.ws.rs.core.Response;
import org.glassfish.jersey.media.multipart.FormDataParam;
import org.grobid.core.utilities.GrobidProperties;
import org.grobid.core.utilities.SoftwareConfiguration;
import org.grobid.core.utilities.Versioner;
import org.grobid.service.configuration.SoftwareServiceConfiguration;
Expand All @@ -15,6 +15,8 @@
import org.slf4j.LoggerFactory;

import java.io.InputStream;
import java.util.Collections;
import java.util.Map;

/**
* RESTful service for GROBID Software extension.
Expand All @@ -35,9 +37,11 @@ public class SoftwareController implements SoftwarePaths {
private static final String INPUT = "input";

private SoftwareConfiguration configuration;
private final SoftwareServiceConfiguration serviceConfiguration;
private final Client httpClient;

@Inject
public SoftwareController(SoftwareServiceConfiguration serviceConfiguration) {
public SoftwareController(SoftwareServiceConfiguration serviceConfiguration, Client httpClient) {
/*try {
ObjectMapper mapper = new ObjectMapper(new YAMLFactory());
this.configuration = mapper.readValue(new File("resources/config/config.yml"), SoftwareConfiguration.class);
Expand All @@ -46,6 +50,8 @@ public SoftwareController(SoftwareServiceConfiguration serviceConfiguration) {
this.configuration = null;
}*/
this.configuration = serviceConfiguration.getSoftwareConfiguration();
this.serviceConfiguration = serviceConfiguration;
this.httpClient = httpClient;
}

@Path(PATH_IS_ALIVE)
Expand Down Expand Up @@ -138,4 +144,114 @@ public ServiceInfo getVersion() {
return new ServiceInfo(Versioner.getVersion(), Versioner.getRevision());
}

// New endpoint: return concept service base URL derived from entity-fishing host/port
@Path(PATH_CONFIG_CONCEPT_BASE_URL)
@Produces(MediaType.APPLICATION_JSON)
@GET
public Response getConceptServiceBaseUrl() {
String base = buildConceptBaseUrl();
Map<String, String> payload = Collections.singletonMap("conceptBaseUrl", base);
return Response.ok(payload).build();
}

// New proxy endpoint: forward concept lookup using configured host/port
@Path("kb/concept/{identifier}")
@Produces(MediaType.APPLICATION_JSON)
@GET
public Response proxyKbConcept(@PathParam("identifier") String identifier, @QueryParam("lang") String lang) {
String base = buildConceptBaseUrl();
String sep = base.endsWith("/") ? "" : "/";
String target = base + sep + identifier;
if (lang != null && !lang.isEmpty()) {
target = target + "?lang=" + lang;
}
try {
String json = httpClient.target(target).request(MediaType.APPLICATION_JSON_TYPE).get(String.class);
return Response.ok(json, MediaType.APPLICATION_JSON_TYPE).build();
} catch (Exception e) {
LOGGER.error("Error proxying concept lookup to {}", target, e);
return Response.status(Response.Status.BAD_GATEWAY)
.entity(Collections.singletonMap("error", "Failed to fetch concept from upstream"))
.build();
}
}

// Build the concept base URL from entityFishingHost/Port, with sensible defaults
private String buildConceptBaseUrl() {
String host = serviceConfiguration != null ? serviceConfiguration.getEntityFishingHost() : null;
String port = serviceConfiguration != null ? serviceConfiguration.getEntityFishingPort() : null;
if (host == null || host.isEmpty()) {
// fall back to public endpoint
return "https://cloud.science-miner.com/nerd/service/kb/concept";
}

String original = host.trim();
String lower = original.toLowerCase();
boolean hasScheme = lower.startsWith("http://") || lower.startsWith("https://");

String scheme;
if (hasScheme) {
scheme = lower.startsWith("https://") ? "https" : "http";
} else {
scheme = (port != null && ("443".equals(port) || "8443".equals(port))) ? "https" : "http";
}

// Extract hostPart and pathPart if scheme is present
String hostPart = original;
String pathPart = "";
if (hasScheme) {
String noScheme = original.substring(original.indexOf("://") + 3);
int slash = noScheme.indexOf("/");
if (slash >= 0) {
hostPart = noScheme.substring(0, slash);
pathPart = noScheme.substring(slash); // includes leading '/'
} else {
hostPart = noScheme;
pathPart = "";
}
} else {
// original may already include a path like 'traces1.inria.fr/nerd'
int slash = original.indexOf("/");
if (slash >= 0) {
hostPart = original.substring(0, slash);
pathPart = original.substring(slash);
} else {
hostPart = original;
pathPart = "";
}
}

// Append port if missing in hostPart and provided in config (and non-default for scheme)
boolean hostHasPort = hostPart.contains(":");
if (!hostHasPort && port != null && !port.isEmpty()) {
boolean defaultForScheme = ("https".equals(scheme) && "443".equals(port)) || ("http".equals(scheme) && "80".equals(port));
if (!defaultForScheme) {
hostPart = hostPart + ":" + port;
}
}

// Ensure '/nerd' is present at the beginning of pathPart
if (pathPart == null || pathPart.isEmpty() || !pathPart.matches("(?i)^/nerd(/.*)?$")) {
// if pathPart is empty or doesn't start with '/nerd', prepend it
if (pathPart == null || pathPart.isEmpty()) {
pathPart = "/nerd";
} else {
// avoid double slashes
if (!pathPart.startsWith("/")) {
pathPart = "/" + pathPart;
}
pathPart = "/nerd" + pathPart;
}
}

// Build final base
String base = scheme + "://" + hostPart;
// remove trailing slash from pathPart
if (pathPart.endsWith("/")) {
pathPart = pathPart.substring(0, pathPart.length() - 1);
}
base += pathPart + "/service/kb/concept";
return base;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -54,4 +54,10 @@ public interface SoftwarePaths {
public static final String PATH_SOFTWARE_CONTEXT = "characterizeSoftwareContext";

public static final String PATH_VERSION = "version";

// New path to expose concept service base URL from configuration
public static final String PATH_CONFIG_CONCEPT_BASE_URL = "config/conceptBaseUrl";

// New path for proxying concept lookup via backend
public static final String PATH_KB_CONCEPT = "kb/concept/{identifier}";
}
2 changes: 1 addition & 1 deletion src/main/java/org/grobid/trainer/ExportCorpusJson.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public void convert() {
p.parse(inputPath, handler);

} catch (Exception e) {
throw new GrobidException("An exception occured while training GROBID.", e);
throw new GrobidException("An exception occurred while training GROBID.", e);
} finally {
try {
if (writer != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public void endElement(java.lang.String uri,
}
} catch (Exception e) {
// e.printStackTrace();
throw new GrobidException("An exception occured while running Grobid.", e);
throw new GrobidException("An exception occurred while running Grobid.", e);
}
}

Expand Down Expand Up @@ -133,7 +133,7 @@ public void startElement(String namespaceURI,
}
} catch (Exception e) {
// e.printStackTrace();
throw new GrobidException("An exception occured while running Grobid.", e);
throw new GrobidException("An exception occurred while running Grobid.", e);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ public void endElement(java.lang.String uri,
allLabeledSoftwareMarkers.add(labeledSoftwareMarkers);
}
} catch (Exception e) {
throw new GrobidException("An exception occured while running Grobid.", e);
throw new GrobidException("An exception occurred while running Grobid.", e);
}
}

Expand Down Expand Up @@ -192,7 +192,7 @@ public void startElement(String namespaceURI,
}
} catch (Exception e) {
// e.printStackTrace();
throw new GrobidException("An exception occured while running Grobid.", e);
throw new GrobidException("An exception occurred while running Grobid.", e);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/main/java/org/grobid/trainer/SoftwareExtendedEval.java
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ public int createCRFPPData(final File corpusDir,
}
}
} catch (Exception e) {
throw new GrobidException("An exception occured while training GROBID.", e);
throw new GrobidException("An exception occurred while training GROBID.", e);
} finally {
try {
if (writerTraining != null)
Expand Down
12 changes: 6 additions & 6 deletions src/main/java/org/grobid/trainer/SoftwareTrainer.java
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ public int createCRFPPData(final File corpusDir,
}
}
} catch (Exception e) {
throw new GrobidException("An exception occured while training GROBID.", e);
throw new GrobidException("An exception occurred while training GROBID.", e);
} finally {
try {
if (writerTraining != null)
Expand Down Expand Up @@ -582,7 +582,7 @@ else if (currentAnnotation.getPageNumber() > token.getPage())
}
}*/
} catch (Exception e) {
throw new GrobidException("An exception occured while training GROBID.", e);
throw new GrobidException("An exception occurred while training GROBID.", e);
} finally {
try {
if (writerTraining != null)
Expand Down Expand Up @@ -722,7 +722,7 @@ else if (currentAnnotation.getPageNumber() > token.getPage())
}
crfWriter.write("\n");
} catch (Exception e) {
throw new GrobidException("An exception occured while training Grobid.", e);
throw new GrobidException("An exception occurred while training Grobid.", e);
} finally {
try {
if (crfWriter != null)
Expand Down Expand Up @@ -815,7 +815,7 @@ static public void addFeatures(List<Pair<String, String>> texts,
isSoftwarePattern = false;
}
} catch (Exception e) {
throw new GrobidException("An exception occured while running Grobid.", e);
throw new GrobidException("An exception occurred while running Grobid.", e);
}
}

Expand Down Expand Up @@ -928,7 +928,7 @@ public int selectNegativeExamples(File negativeCorpusFile, double max, File outp
writer.write(serialize(document, null));
}
} catch (Exception e) {
throw new GrobidException("An exception occured while selecting negative examples.", e);
throw new GrobidException("An exception occurred while selecting negative examples.", e);
} finally {
try {
if (writer != null)
Expand Down Expand Up @@ -1035,7 +1035,7 @@ public int randomNegativeExamples(File negativeCorpusFile, double max, File outp
writer.write(serialize(document, null));
}
} catch (Exception e) {
throw new GrobidException("An exception occured while selecting negative examples.", e);
throw new GrobidException("An exception occurred while selecting negative examples.", e);
} finally {
try {
if (writer != null)
Expand Down
Loading