Skip to content

Commit

Permalink
Updated RA Challenge script.
Browse files Browse the repository at this point in the history
  • Loading branch information
LorenzBuehmann committed May 23, 2014
1 parent e1a2f42 commit 172400b
Showing 1 changed file with 41 additions and 9 deletions.
50 changes: 41 additions & 9 deletions scripts/src/main/java/org/dllearner/scripts/RAChallenge.java
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,15 @@
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedSet;
import java.util.TreeSet;

import org.apache.jena.riot.Lang;
import org.apache.jena.riot.RDFDataMgr;
import org.dllearner.algorithms.celoe.CELOE;
import org.dllearner.algorithms.celoe.OEHeuristicRuntime;
import org.dllearner.algorithms.elcopy.ELLearningAlgorithm;
Expand Down Expand Up @@ -50,6 +53,9 @@

import uk.ac.manchester.cs.owl.owlapi.mansyntaxrenderer.ManchesterOWLSyntaxOWLObjectRendererImpl;

import com.hp.hpl.jena.graph.Triple;
import com.hp.hpl.jena.query.Dataset;
import com.hp.hpl.jena.query.DatasetFactory;
import com.hp.hpl.jena.query.QueryExecution;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QuerySolution;
Expand All @@ -59,9 +65,8 @@
import com.hp.hpl.jena.rdf.model.ModelFactory;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.rdf.model.Statement;
import com.hp.hpl.jena.rdf.model.StmtIterator;
import com.hp.hpl.jena.vocabulary.OWL;
import com.hp.hpl.jena.vocabulary.RDF;
import com.hp.hpl.jena.sparql.core.Quad;
import com.hp.hpl.jena.sparql.util.ModelUtils;

/**
* @author Lorenz Buehmann
Expand Down Expand Up @@ -96,16 +101,32 @@ public static void main(String[] args) throws Exception{
File[] files = dataDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {
return name.endsWith(".nt") || name.endsWith(".ttl") || name.endsWith(".rdf") || name.endsWith(".owl");
return name.endsWith(".nt") || name.endsWith(".ttl") || name.endsWith(".rdf") || name.endsWith(".owl") || name.endsWith(".nq");
}
});
System.out.println("loading data...");
Model model = ModelFactory.createDefaultModel();
for (File file : files) {
model.read(new FileInputStream(file), null, "TURTLE");
if(file.getName().endsWith(".nq")){
Dataset ds = DatasetFactory.createMem() ;
RDFDataMgr.read(ds, new FileInputStream(file), Lang.NQUADS) ;
// Model subModel = ds.getNamedModel("http://bio2rdf.org/bio2rdf.dataset:bio2rdf-ra.challenge-R3");
// model.add(subModel);
Iterator<Quad> find = ds.asDatasetGraph().find();
Triple t;
while(find.hasNext()){
t = find.next().asTriple();
Statement st = model.asStatement(t);
model.add(st);
}
} else {
model.read(new FileInputStream(file), null, "TURTLE");
}

}
System.out.println("done. loaded " + model.size() + " triples.");

analyzeData(model);
// analyzeData(model);

//get the positive and negative examples via SPARQL
//<http://bio2rdf.org/ra.challenge:1877000> <http://bio2rdf.org/ra.challenge_vocabulary:non-responder> "true"^^<http://www.w3.org/2001/XMLSchema#boolean> .
Expand All @@ -132,7 +153,12 @@ public boolean accept(File dir, String name) {
System.out.println("#neg examples: " + negExamples.size());

//remove triples with property non-responder
System.out.println("Pruning data...");
model.remove(model.listStatements(null, model.createProperty("http://bio2rdf.org/ra.challenge_vocabulary:non-responder"), (RDFNode)null));
model.remove(model.listStatements(null, model.createProperty("http://rdfs.org/ns/void#inDataset"), (RDFNode)null));
model.remove(model.listStatements(null, model.createProperty("http://purl.org/dc/terms/description"), (RDFNode)null));
model.remove(model.listStatements(null, model.createProperty("http://www.w3.org/2000/01/rdf-schema#label"), (RDFNode)null));
System.out.println("done. remaining " + model.size() + " triples.");

//enrich with additional data
enrich(model);
Expand Down Expand Up @@ -177,7 +203,7 @@ public boolean accept(File dir, String name) {
} else {
OEHeuristicRuntime heuristic = new OEHeuristicRuntime();
heuristic.setExpansionPenaltyFactor(0.1);
la = new CELOE(lp, rc);
la = new CELOE(lp, baseReasoner);
((CELOE) la).setHeuristic(heuristic);
((CELOE) la).setMaxExecutionTimeInSeconds(100);
((CELOE) la).setNoisePercentage(50);
Expand All @@ -188,7 +214,7 @@ public boolean accept(File dir, String name) {
RhoDRDown op = new RhoDRDown();
op.setUseHasValueConstructor(true);
op.setUseObjectValueNegation(true);
op.setReasoner(rc);
op.setReasoner(baseReasoner);
op.init();
// ((CELOE) la).setOperator(op);
}
Expand Down Expand Up @@ -239,6 +265,12 @@ private static void analyzeData(Model model){
qe = QueryExecutionFactory.create(query, model);
rs = qe.execSelect();
System.out.println(ResultSetFormatter.asText(rs));

query = "SELECT * WHERE {<http://bio2rdf.org/ra.challenge:825000> ?p ?o. "
+ "OPTIONAL{?o ?p1 ?o1.OPTIONAL{?o1 ?p2 ?o2. OPTIONAL{?o2 ?p3 ?o3.OPTIONAL{?o3 ?p4 ?o4.}}}}}";
qe = QueryExecutionFactory.create(query, model);
rs = qe.execSelect();
System.out.println(ResultSetFormatter.asText(rs));
}

private static void computeLGG(Model model, SortedSet<Individual> posExamples){
Expand Down Expand Up @@ -293,7 +325,7 @@ private static void enrich(Model model) throws MalformedURLException, FileNotFou

drugbankData.write(new FileOutputStream("drugbank.ttl"), "TURTLE", null);
model.add(drugbankData);

System.out.println("done. remaining " + model.size() + " triples.");

}
}

0 comments on commit 172400b

Please sign in to comment.