Skip to content

Commit 83c0ff4

Browse files
author
scampi
committed
Added additional configuration on the DGS sparql query to allow to set a
limit Corrected the query rewriting when there was a language or a datatype tag used Signed-off-by: scampi <stephane.campinas@deri.org>
1 parent 15a2d59 commit 83c0ff4

File tree

14 files changed

+844
-539
lines changed

14 files changed

+844
-539
lines changed

.gitignore

+4-1
Original file line numberDiff line numberDiff line change
@@ -1 +1,4 @@
1-
target
1+
target
2+
.classpath
3+
.settings
4+
.project

analytics-commons/src/main/java/org/sindice/core/analytics/commons/summary/AnalyticsVocab.java

+7-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,8 @@ public enum AnalyticsVocab {
8888
/*
8989
* Analytics Graphs Names
9090
*/
91-
public static final String GRAPH_SUMMARY_GRAPH = "http://sindice.com/analytics";
91+
private static final String DEFAULT_GSG = "http://sindice.com/analytics";
92+
public static String GRAPH_SUMMARY_GRAPH = DEFAULT_GSG;
9293
public static final String DOMAIN_ANALYTICS_GRAPH = "http://sindice.com/analytics/domain";
9394
public static final String EXTENDED_ANALYTICS_GRAPH = "http://sindice.com/analytics/extended"; // TODO: where is this used ?
9495

@@ -115,9 +116,14 @@ public static void setDomainUriPrefix(String dup) {
115116
DOMAIN_URI_PREFIX = dup;
116117
}
117118

119+
public static void setGraphSummaryGraph(String gsg) {
120+
GRAPH_SUMMARY_GRAPH = gsg;
121+
}
122+
118123
public static void resetToDefaults() {
119124
DOMAIN_URI_PREFIX = DEFAULT_DUP;
120125
DATASET_LABEL_DEF = DatasetLabel.SECOND_LEVEL_DOMAIN;
126+
GRAPH_SUMMARY_GRAPH = DEFAULT_GSG;
121127
}
122128

123129
@Override

recommendation-servlet/src/main/java/org/sindice/analytics/queryProcessor/DGSQueryProcessor.java

+24-1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626

2727
import org.apache.commons.lang.NotImplementedException;
2828
import org.openrdf.query.MalformedQueryException;
29+
import org.openrdf.sindice.query.parser.sparql.ast.ASTConstraint;
30+
import org.openrdf.sindice.query.parser.sparql.ast.ASTLimit;
2931
import org.openrdf.sindice.query.parser.sparql.ast.ASTQueryContainer;
3032
import org.openrdf.sindice.query.parser.sparql.ast.ParseException;
3133
import org.openrdf.sindice.query.parser.sparql.ast.SyntaxTreeBuilder;
@@ -50,10 +52,31 @@ public class DGSQueryProcessor
5052
private POFMetadata pofMetadata; // The Point Of Focus metadata
5153

5254
@Override
53-
public String getDGSQuery()
55+
public String getDGSQueryWithLimit(int limit, ASTConstraint... contraints)
56+
throws DGSException {
57+
final ASTLimit astLimit;
58+
59+
if (limit != 0) {
60+
if (ast.getQuery().getLimit() == null) {
61+
astLimit = new ASTLimit(SyntaxTreeBuilder.LIMIT);
62+
ast.getQuery().jjtAppendChild(astLimit);
63+
} else {
64+
astLimit = ast.getQuery().getLimit();
65+
}
66+
astLimit.setValue(limit);
67+
}
68+
return this.getDGSQuery(contraints);
69+
}
70+
71+
@Override
72+
public String getDGSQuery(ASTConstraint... contraints)
5473
throws DGSException {
5574
if (dgsQuery == null) {
5675
try {
76+
if (contraints != null && contraints.length != 0) {
77+
// TODO: add possible constraints to the query
78+
ast.getQuery().getWhereClause();
79+
}
5780
dgsQuery = AST2TextTranslator.translate(ast);
5881
} catch (VisitorException e) {
5982
throw new DGSException(e);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
package org.sindice.analytics.queryProcessor;
2+
3+
import java.util.List;
4+
5+
import org.openrdf.sindice.query.parser.sparql.ast.ASTBasicGraphPattern;
6+
import org.openrdf.sindice.query.parser.sparql.ast.ASTConstraint;
7+
import org.openrdf.sindice.query.parser.sparql.ast.ASTGraphPatternGroup;
8+
import org.openrdf.sindice.query.parser.sparql.ast.ASTQueryContainer;
9+
import org.openrdf.sindice.query.parser.sparql.ast.ASTRDFLiteral;
10+
import org.openrdf.sindice.query.parser.sparql.ast.ASTRegexExpression;
11+
import org.openrdf.sindice.query.parser.sparql.ast.ASTStr;
12+
import org.openrdf.sindice.query.parser.sparql.ast.ASTString;
13+
import org.openrdf.sindice.query.parser.sparql.ast.Node;
14+
import org.openrdf.sindice.query.parser.sparql.ast.SyntaxTreeBuilder;
15+
import org.openrdf.sindice.query.parser.sparql.ast.SyntaxTreeBuilderTreeConstants;
16+
import org.sindice.analytics.queryProcessor.QueryProcessor.POFMetadata;
17+
18+
public class PofFilterProcessor {
19+
20+
private PofFilterProcessor() {
21+
}
22+
23+
public static void process(ASTQueryContainer ast, POFMetadata meta) {
24+
final ASTGraphPatternGroup gpg = ast.getQuery().getWhereClause().getGraphPatternGroup();
25+
26+
final List<Object> keyword = meta.pofNode.getMetadata() == null ? null : meta.pofNode
27+
.getMetadata(SyntaxTreeBuilder.Keyword);
28+
final List<Object> prefix = meta.pofNode.getMetadata() == null ? null : meta.pofNode
29+
.getMetadata(SyntaxTreeBuilder.Prefix);
30+
final List<Object> qname = meta.pofNode.getMetadata() == null ? null : meta.pofNode
31+
.getMetadata(SyntaxTreeBuilder.Qname);
32+
33+
if (keyword != null) {
34+
final ASTBasicGraphPattern bgp = addRegexFilter(meta.pofNode, keyword.get(0).toString(), true);
35+
gpg.jjtAppendChild(bgp);
36+
} else if (prefix != null) {
37+
final ASTBasicGraphPattern bgp = addRegexFilter(meta.pofNode, "^" + prefix.get(0).toString().substring(1), true);
38+
gpg.jjtAppendChild(bgp);
39+
} else if (qname != null) {
40+
final ASTBasicGraphPattern bgp = addRegexFilter(meta.pofNode, "^" + qname.get(0).toString(), true);
41+
gpg.jjtAppendChild(bgp);
42+
}
43+
}
44+
45+
private static ASTBasicGraphPattern addRegexFilter(Node pof, String regex, boolean caseInsensitive) {
46+
final ASTBasicGraphPattern bgp = new ASTBasicGraphPattern(SyntaxTreeBuilderTreeConstants.JJTBASICGRAPHPATTERN);
47+
final ASTConstraint cst = new ASTConstraint(SyntaxTreeBuilderTreeConstants.JJTCONSTRAINT);
48+
final ASTRegexExpression astRegex = new ASTRegexExpression(SyntaxTreeBuilderTreeConstants.JJTREGEXEXPRESSION);
49+
50+
// variable to test
51+
final ASTStr str = new ASTStr(SyntaxTreeBuilderTreeConstants.JJTSTR);
52+
str.jjtAppendChild(pof);
53+
astRegex.jjtAppendChild(str);
54+
// regular expression
55+
final ASTString strRegex = new ASTString(SyntaxTreeBuilderTreeConstants.JJTSTRING);
56+
strRegex.setValue(regex);
57+
final ASTRDFLiteral rdfLiteral = new ASTRDFLiteral(SyntaxTreeBuilderTreeConstants.JJTRDFLITERAL);
58+
rdfLiteral.jjtAppendChild(strRegex);
59+
astRegex.jjtAppendChild(rdfLiteral);
60+
// case insensitive or not
61+
if (caseInsensitive) {
62+
final ASTRDFLiteral ci = new ASTRDFLiteral(SyntaxTreeBuilderTreeConstants.JJTRDFLITERAL);
63+
final ASTString cistr = new ASTString(SyntaxTreeBuilderTreeConstants.JJTSTRING);
64+
cistr.setValue("i");
65+
ci.jjtAppendChild(cistr);
66+
astRegex.jjtAppendChild(ci);
67+
}
68+
cst.jjtAppendChild(astRegex);
69+
bgp.jjtAppendChild(cst);
70+
return bgp;
71+
}
72+
73+
}

recommendation-servlet/src/main/java/org/sindice/analytics/queryProcessor/QueryProcessor.java

+11-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import java.util.List;
2626

2727
import org.openrdf.query.MalformedQueryException;
28+
import org.openrdf.sindice.query.parser.sparql.ast.ASTConstraint;
2829
import org.openrdf.sindice.query.parser.sparql.ast.ParseException;
2930
import org.openrdf.sindice.query.parser.sparql.ast.SimpleNode;
3031
import org.openrdf.sindice.query.parser.sparql.ast.SyntaxTreeBuilder;
@@ -90,7 +91,16 @@ public void load(String query, List<String> varsToProject)
9091
* @return
9192
* @throws VisitorException
9293
*/
93-
public String getDGSQuery()
94+
public String getDGSQuery(ASTConstraint... contraints)
95+
throws DGSException;
96+
97+
/**
98+
* Return the Data Graph Summary query from the one passed in {@link AbstractQueryProcessor#load(String)}.
99+
* Only valid after the call to load. Add a limit clause to the query.
100+
* @return
101+
* @throws VisitorException
102+
*/
103+
public String getDGSQueryWithLimit(int limit, ASTConstraint... contraints)
94104
throws DGSException;
95105

96106
/**
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
package org.sindice.analytics.queryProcessor;
2+
3+
import org.openrdf.sindice.query.parser.sparql.ASTVisitorBase;
4+
import org.openrdf.sindice.query.parser.sparql.ast.ASTIRI;
5+
import org.openrdf.sindice.query.parser.sparql.ast.ASTQueryContainer;
6+
import org.openrdf.sindice.query.parser.sparql.ast.ASTRDFLiteral;
7+
import org.openrdf.sindice.query.parser.sparql.ast.VisitorException;
8+
9+
public class RDFTagRemover {
10+
11+
private RDFTagRemover() {
12+
}
13+
14+
public static void remove(ASTQueryContainer ast)
15+
throws VisitorException {
16+
RDFTagVisitor tag = new RDFTagVisitor();
17+
tag.visit(ast, null);
18+
}
19+
20+
private static class RDFTagVisitor extends ASTVisitorBase {
21+
22+
@Override
23+
public Object visit(ASTRDFLiteral node, Object data)
24+
throws VisitorException {
25+
final ASTIRI datatype;
26+
27+
// Remove language tag
28+
node.setLang(null);
29+
// Remove datatype tag
30+
datatype = node.getDatatype();
31+
if (datatype != null) {
32+
node.removeChild(datatype);
33+
}
34+
return super.visit(node, data);
35+
}
36+
37+
}
38+
39+
}

recommendation-servlet/src/main/java/org/sindice/analytics/queryProcessor/SparqlToDGSQuery.java

+4
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@ public static POFMetadata process(ASTQueryContainer ast, List<String> varsToProj
4949
ASTVarGenerator.reset();
5050
// Retrieve the POF metadata
5151
final POFMetadata meta = PofNodesMetadata.retrieve(ast);
52+
// Remove RDF tags
53+
RDFTagRemover.remove(ast);
5254
// expand each TP into simple one: denormalize syntax sugar constructions
5355
DeNormalizeAST.process(ast);
5456

@@ -68,6 +70,8 @@ public static POFMetadata process(ASTQueryContainer ast, List<String> varsToProj
6870
// TODO: Optimize the query by removing unnecessary parts, e.g., optional, unions
6971
// 3. Map to the DGS query
7072
SparqlTranslationProcessor.process(ast);
73+
// Add DGS filters
74+
PofFilterProcessor.process(ast, meta);
7175
return meta;
7276
}
7377

recommendation-servlet/src/main/java/org/sindice/analytics/servlet/AssistedSparqlEditorListener.java

+10-2
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,10 @@ public class AssistedSparqlEditorListener
5252
public static final String BACKEND = "backend";
5353
public static final String BACKEND_ARGS = "backendArgs";
5454
public static final String PAGINATION = "pagination";
55+
public static final String LIMIT = "limit";
5556
public static final String DOMAIN_URI_PREFIX = "domainUriPrefix";
5657
public static final String DATASET_LABEL_DEF = "datasetLabelDef";
58+
public static final String GRAPH_SUMMARY_GRAPH = "graphSummaryGraph";
5759

5860
public static final String RANKING_CONFIGURATION = "rankingConfig";
5961
private static final String DEFAULT_RANKING = "default-ranking.yaml";
@@ -77,13 +79,19 @@ public void contextInitialized(ServletContextEvent sce) {
7779
final String domainUriPrefix = getParameterWithLogging(config, RECOMMENDER_WRAPPER + "." + DOMAIN_URI_PREFIX, AnalyticsVocab.DOMAIN_URI_PREFIX);
7880
context.setAttribute(RECOMMENDER_WRAPPER + DOMAIN_URI_PREFIX, domainUriPrefix);
7981

82+
final String gsg = getParameterWithLogging(config, RECOMMENDER_WRAPPER + "." + GRAPH_SUMMARY_GRAPH, AnalyticsVocab.GRAPH_SUMMARY_GRAPH);
83+
context.setAttribute(RECOMMENDER_WRAPPER + GRAPH_SUMMARY_GRAPH, gsg);
84+
8085
final String backend = getParameterWithLogging(config, RECOMMENDER_WRAPPER + "." + BACKEND, BackendType.HTTP.toString());
8186
context.setAttribute(RECOMMENDER_WRAPPER + BACKEND, backend);
8287
final String[] backendArgs = getParametersWithLogging(config, RECOMMENDER_WRAPPER + "." + BACKEND_ARGS, new String[] { "http://sparql.sindice.com/sparql" });
8388
context.setAttribute(RECOMMENDER_WRAPPER + BACKEND_ARGS, backendArgs);
8489

85-
final String limit = getParameterWithLogging(config, RECOMMENDER_WRAPPER + "." + PAGINATION, Integer.toString(SesameBackend.LIMIT));
86-
context.setAttribute(RECOMMENDER_WRAPPER + PAGINATION, Integer.valueOf(limit));
90+
final String pagination = getParameterWithLogging(config, RECOMMENDER_WRAPPER + "." + PAGINATION, Integer.toString(SesameBackend.LIMIT));
91+
context.setAttribute(RECOMMENDER_WRAPPER + PAGINATION, Integer.valueOf(pagination));
92+
93+
final String limit = getParameterWithLogging(config, RECOMMENDER_WRAPPER + "." + LIMIT, "0"); // No limit by default
94+
context.setAttribute(RECOMMENDER_WRAPPER + LIMIT, Integer.valueOf(limit));
8795

8896
final String[] classAttributes = getParametersWithLogging(config, RECOMMENDER_WRAPPER + "." + CLASS_ATTRIBUTES, new String[] { AnalyticsClassAttributes.DEFAULT_CLASS_ATTRIBUTE });
8997
context.setAttribute(RECOMMENDER_WRAPPER + CLASS_ATTRIBUTES, classAttributes);

recommendation-servlet/src/main/java/org/sindice/analytics/servlet/AssistedSparqlEditorServlet.java

+9-4
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ public class AssistedSparqlEditorServlet
7171
private final List<LabelsRanking> labelsRankings = new ArrayList<LabelsRanking>();
7272
private SesameBackend<Label, Context> dgsBackend = null;
7373
private int pagination;
74+
private int limit;
7475

7576
@Override
7677
public void init(ServletConfig config)
@@ -84,12 +85,16 @@ public void init(ServletConfig config)
8485
final String[] backendArgs = (String[]) config.getServletContext().getAttribute(AssistedSparqlEditorListener.RECOMMENDER_WRAPPER + AssistedSparqlEditorListener.BACKEND_ARGS);
8586
// The path to the ranking configuration
8687
final String rankingConfigPath = (String) config.getServletContext().getAttribute(AssistedSparqlEditorListener.RECOMMENDER_WRAPPER + AssistedSparqlEditorListener.RANKING_CONFIGURATION);
87-
// The pagination limit
88+
// The pagination value
8889
pagination = (Integer) config.getServletContext().getAttribute(AssistedSparqlEditorListener.RECOMMENDER_WRAPPER + AssistedSparqlEditorListener.PAGINATION);
90+
// The Limit of results to be retrieved
91+
limit = (Integer) config.getServletContext().getAttribute(AssistedSparqlEditorListener.RECOMMENDER_WRAPPER + AssistedSparqlEditorListener.LIMIT);
8992
// The ClassAttributes
9093
final String[] classAttributes = (String[]) config.getServletContext().getAttribute(AssistedSparqlEditorListener.RECOMMENDER_WRAPPER + AssistedSparqlEditorListener.CLASS_ATTRIBUTES);
9194
// Set the domain URI prefix
9295
AnalyticsVocab.setDomainUriPrefix((String) config.getServletContext().getAttribute(AssistedSparqlEditorListener.RECOMMENDER_WRAPPER + AssistedSparqlEditorListener.DOMAIN_URI_PREFIX));
96+
// Set the graph summary graph
97+
AnalyticsVocab.setGraphSummaryGraph((String) config.getServletContext().getAttribute(AssistedSparqlEditorListener.RECOMMENDER_WRAPPER + AssistedSparqlEditorListener.GRAPH_SUMMARY_GRAPH));
9398
// Set the dataset label definition
9499
AnalyticsVocab.setDatasetLabelDefinition(DatasetLabel.valueOf((String) config.getServletContext().getAttribute(AssistedSparqlEditorListener.RECOMMENDER_WRAPPER + AssistedSparqlEditorListener.DATASET_LABEL_DEF)));
95100

@@ -105,9 +110,9 @@ public void init(ServletConfig config)
105110
dgsBackend = SesameBackendFactory.getDgsBackend(backend, qrp, backendArgs);
106111
dgsBackend.initConnection();
107112

108-
logger.info("RankingConfiguration={} Backend={} BackendArgs={} ClassAttributes={} Pagination={} DomainUriPrefix={} DatasetLabelDef={}",
113+
logger.info("RankingConfiguration={} Backend={} BackendArgs={} ClassAttributes={} Pagination={} DomainUriPrefix={} DatasetLabelDef={} GraphSummaryGraph={} LIMIT={}",
109114
new Object[] { rankingConfigPath, backend, Arrays.toString(backendArgs), Arrays.toString(classAttributes),
110-
pagination, AnalyticsVocab.DOMAIN_URI_PREFIX, AnalyticsVocab.DATASET_LABEL_DEF});
115+
pagination, AnalyticsVocab.DOMAIN_URI_PREFIX, AnalyticsVocab.DATASET_LABEL_DEF, AnalyticsVocab.GRAPH_SUMMARY_GRAPH, limit});
111116
} catch (Exception e) {
112117
logger.error("Failed to start the DGS backend", e);
113118
}
@@ -189,7 +194,7 @@ private String computeResponse(HttpServletRequest request)
189194
if (request.getParameter(Protocol.QUERY_PARAM_NAME) != null) {
190195
final String query = URLDecoder.decode(request.getParameter(Protocol.QUERY_PARAM_NAME), "UTF-8");
191196
// Get recommendation
192-
response = (String) SparqlRecommender.run(dgsBackend, responseWriter, query, this.labelsRankings, pagination);
197+
response = (String) SparqlRecommender.run(dgsBackend, responseWriter, query, this.labelsRankings, pagination, limit);
193198
}
194199
}
195200
return response;

recommendation-servlet/src/main/java/org/sindice/analytics/servlet/SparqlRecommender.java

+10-12
Original file line numberDiff line numberDiff line change
@@ -59,13 +59,15 @@ private SparqlRecommender() {}
5959
* @param query
6060
* @param rankings
6161
* @param pagination
62+
* @param limit
6263
* @return
6364
*/
6465
public static <C> C run(SesameBackend<Label, DGSQueryResultProcessor.Context> dgsBackend,
6566
ResponseWriter<C> response,
6667
String query,
6768
List<LabelsRanking> rankings,
68-
int pagination) {
69+
int pagination,
70+
int limit) {
6971
// TODO: Support queries with multiple FROM clauses
7072
RecommendationType recommendationType = RecommendationType.NONE;
7173

@@ -85,7 +87,6 @@ public static <C> C run(SesameBackend<Label, DGSQueryResultProcessor.Context> dg
8587
*/
8688
final QueryProcessor qp = new DGSQueryProcessor();
8789
qp.load(query);
88-
final String dgsQuery = qp.getDGSQuery();
8990

9091
meta = qp.getPofASTMetadata();
9192
final List<Object> keyword = meta.pofNode.getMetadata() == null ? null : meta.pofNode
@@ -96,6 +97,12 @@ public static <C> C run(SesameBackend<Label, DGSQueryResultProcessor.Context> dg
9697
.getMetadata(SyntaxTreeBuilder.Qname);
9798
recommendationType = qp.getRecommendationType();
9899

100+
final String dgsQuery;
101+
if (keyword != null || prefix != null | qname != null) {
102+
dgsQuery = qp.getDGSQuery();
103+
} else {
104+
dgsQuery = qp.getDGSQueryWithLimit(limit);
105+
}
99106
logger.debug("RecommendationType: {}\nDGS query: [{}]", recommendationType, dgsQuery);
100107
if (!recommendationType.equals(RecommendationType.NONE)) {
101108
/*
@@ -108,16 +115,7 @@ public static <C> C run(SesameBackend<Label, DGSQueryResultProcessor.Context> dg
108115
while (qrp.hasNext()) {
109116
final Label label = qrp.next();
110117

111-
// filter If the POF contains keyword or Prefix metadata
112-
// toString(), but the Object is an instance of String, so it is OK
113-
if (keyword != null && !label.getLabel().toLowerCase().contains(keyword.get(0).toString())) { // case insensitive: keyword is lowercased
114-
continue;
115-
}
116-
if (prefix != null &&
117-
!label.getLabel().toLowerCase().startsWith(prefix.get(0).toString().substring(1))) { // case insensitive: prefix is lowercased
118-
// the first character is the opening <
119-
continue;
120-
}
118+
// QName filtering
121119
if (qname != null) {
122120
final String value = qname.get(0).toString();
123121
if (!label.getLabel().startsWith(value)) {

recommendation-servlet/src/main/resources/default-config.xml

+10
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,16 @@
33

44
<recommender>
55
<classAttributes>http://www.w3.org/1999/02/22-rdf-syntax-ns#type</classAttributes>
6+
<classAttributes>http://opengraphprotocol.org/schema/type</classAttributes>
7+
<classAttributes>http://opengraph.org/schema/type</classAttributes>
8+
<classAttributes>http://ogp.me/ns#type</classAttributes>
9+
<classAttributes>http://purl.org/dc/elements/1.1/type</classAttributes>
10+
<classAttributes>http://purl.org/stuff/rev#type</classAttributes>
11+
<classAttributes>http://purl.org/dc/terms/type</classAttributes>
12+
<classAttributes>http://dbpedia.org/property/type</classAttributes>
13+
<classAttributes>http://dbpedia.org/ontology/type</classAttributes>
14+
<classAttributes>http://dbpedia.org/ontology/Organisation/type</classAttributes>
15+
<classAttributes>http://xmlns.com/foaf/0.1/type</classAttributes>
616

717
<backend>HTTP</backend>
818
<backendArgs>http://path/to/summary/repository</backendArgs>

0 commit comments

Comments
 (0)