From 53f0e1363af00bc58b80f056aabf8d5de21f4900 Mon Sep 17 00:00:00 2001 From: Kris Geusebroek Date: Tue, 3 Jul 2012 11:31:12 +0200 Subject: [PATCH 01/11] Creating nodes and halfway relationships --- .../graphs/neo/ByteBufferOutputFormat.java | 54 +++++ .../graphs/neo/ByteBufferScheme.java | 50 ++++ .../graphs/neo/ByteBufferTestJob.java | 229 ++++++++++++++++++ .../nl/waredingen/graphs/neo/NeoGraphJob.java | 223 +++++++++++++++++ 4 files changed, 556 insertions(+) create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/ByteBufferOutputFormat.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/ByteBufferScheme.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/ByteBufferTestJob.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/NeoGraphJob.java diff --git a/job/src/main/java/nl/waredingen/graphs/neo/ByteBufferOutputFormat.java b/job/src/main/java/nl/waredingen/graphs/neo/ByteBufferOutputFormat.java new file mode 100644 index 0000000..a600ef6 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/ByteBufferOutputFormat.java @@ -0,0 +1,54 @@ +package nl.waredingen.graphs.neo; + +import java.io.DataOutputStream; +import java.io.IOException; + +import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.FileOutputFormat; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordWriter; +import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.util.Progressable; + +public class ByteBufferOutputFormat extends FileOutputFormat { + + protected static class ByteRecordWriter implements RecordWriter { + private DataOutputStream out; + + public ByteRecordWriter(DataOutputStream out) { + this.out = out; + } + + public void write(K key, V value) throws IOException { + boolean nullValue = value == null || value instanceof NullWritable; + if (!nullValue) { + BytesWritable bw = (BytesWritable) value; + out.write(bw.get(), 0, bw.getSize()); + } + } + + @Override + public void close(Reporter reporter) throws IOException { + out.close(); + } + + } + + @Override + public RecordWriter getRecordWriter( + FileSystem ignored, JobConf job, String name, Progressable progress) + throws IOException { + Path path = FileOutputFormat.getTaskOutputPath(job, name); + + // create the file in the file system + FileSystem fs = path.getFileSystem(job); + FSDataOutputStream fileOut = fs.create(path, progress); + + // create our record writer with the new file + return new ByteRecordWriter(new DataOutputStream(fileOut)); + } +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/ByteBufferScheme.java b/job/src/main/java/nl/waredingen/graphs/neo/ByteBufferScheme.java new file mode 100644 index 0000000..5ed10fe --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/ByteBufferScheme.java @@ -0,0 +1,50 @@ +package nl.waredingen.graphs.neo; + +import java.io.IOException; +import java.nio.ByteBuffer; + +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.OutputCollector; + +import cascading.scheme.Scheme; +import cascading.tap.Tap; +import cascading.tuple.Fields; +import cascading.tuple.Tuple; +import cascading.tuple.TupleEntry; + +@SuppressWarnings("serial") +public class ByteBufferScheme extends Scheme { + + @Override + public void sink(TupleEntry tupleEntry, OutputCollector outputCollector) + throws IOException { + Fields sinkFields = getSinkFields(); + Tuple result = sinkFields != null ? tupleEntry.selectTuple(sinkFields) : tupleEntry.getTuple(); + + ByteBuffer bb = (ByteBuffer) result.getObject(0); + byte[] ba = bb.array(); + BytesWritable bw = new BytesWritable(); + bw.set(ba, 0, ba.length); + outputCollector.collect(NullWritable.get(), bw); + } + + @Override + public void sinkInit(Tap tap, JobConf jobconf) throws IOException { + jobconf.setOutputFormat(ByteBufferOutputFormat.class); + } + + @Override + public Tuple source(Object obj, Object obj1) { + // TODO Auto-generated method stub + return null; + } + + @Override + public void sourceInit(Tap tap, JobConf jobconf) throws IOException { + // TODO Auto-generated method stub + + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/ByteBufferTestJob.java b/job/src/main/java/nl/waredingen/graphs/neo/ByteBufferTestJob.java new file mode 100644 index 0000000..69e3799 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/ByteBufferTestJob.java @@ -0,0 +1,229 @@ +package nl.waredingen.graphs.neo; + +import java.nio.Buffer; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Properties; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.neo4j.kernel.impl.nioneo.store.NodeRecord; +import org.neo4j.kernel.impl.nioneo.store.Record; + +import cascading.flow.Flow; +import cascading.flow.FlowConnector; +import cascading.flow.FlowProcess; +import cascading.operation.BaseOperation; +import cascading.operation.BufferCall; +import cascading.operation.Function; +import cascading.operation.FunctionCall; +import cascading.operation.aggregator.First; +import cascading.pipe.CoGroup; +import cascading.pipe.Each; +import cascading.pipe.Every; +import cascading.pipe.GroupBy; +import cascading.pipe.Pipe; +import cascading.pipe.assembly.Unique; +import cascading.pipe.cogroup.InnerJoin; +import cascading.scheme.Scheme; +import cascading.scheme.TextDelimited; +import cascading.scheme.TextLine; +import cascading.tap.GlobHfs; +import cascading.tap.Hfs; +import cascading.tap.SinkMode; +import cascading.tap.Tap; +import cascading.tuple.Fields; +import cascading.tuple.Tuple; +import cascading.tuple.TupleEntry; + +public class ByteBufferTestJob { + + public static final long NUMBER_OF_PROPERTIES_PER_NODE = 1L; + private static final Log LOG = LogFactory.getLog(ByteBufferTestJob.class); + + public static int runJob(String nodesFile, String edgesFile, String output) { + + Scheme nodesScheme = new TextDelimited(new Fields("id", "name", "rownum"), "\t"); + Tap nodeSource = new GlobHfs(nodesScheme, nodesFile); + + Scheme edgesScheme = new TextDelimited(new Fields("from", "to", "rownum"), "\t"); + Tap edgeSource = new GlobHfs(edgesScheme, edgesFile); + + Map sourceMap = new HashMap(2); + sourceMap.put("nodes", nodeSource); + sourceMap.put("edges", edgeSource); + + Scheme graphNodesScheme = new ByteBufferScheme(); + Tap nodesSink = new Hfs(graphNodesScheme, output + "/neostore.nodestore.db", SinkMode.REPLACE); + + Scheme graphEdgesScheme = new TextLine(new Fields("edge")); + Tap edgesSink = new Hfs(graphEdgesScheme, output + "/neostore.relationshipstore.db", SinkMode.REPLACE); + + Map sinkMap = new HashMap(2); + sinkMap.put("graphnodes", nodesSink); + sinkMap.put("graphedges", edgesSink); + + // Pipe graph = new Pipe("graph.db"); + + Pipe nodesPipe = new Pipe("nodes"); + Pipe edgesPipe = new Pipe("edges"); + + Pipe fromjoin = new CoGroup("fromjoin", nodesPipe, new Fields("id"), edgesPipe, new Fields("from"), new Fields( + "id", "name", "rownum", "from", "to", "relnum"), new InnerJoin()); + Pipe tojoin = new CoGroup("tojoin", nodesPipe, new Fields("id"), edgesPipe, new Fields("to"), new Fields("id", + "name", "rownum", "from", "to", "relnum"), new InnerJoin()); + + Pipe graphNodesJoinPipe = new GroupBy(Pipe.pipes(fromjoin, tojoin), new Fields("id"), new Fields("relnum"), + true); + + Pipe graphNodesPipe = new Every(graphNodesJoinPipe, new First(), Fields.RESULTS); + graphNodesPipe = new GroupBy("graphnodes", graphNodesPipe, new Fields("rownum")); + graphNodesPipe = new Each(graphNodesPipe, new NodeRecordCreator(), Fields.RESULTS); + + Pipe graphEdgesJoinPipe = new GroupBy(Pipe.pipes(fromjoin, tojoin), new Fields("id"), new Fields("relnum"), + true); + + graphEdgesJoinPipe = new Unique(graphEdgesJoinPipe, new Fields("id", "from", "to", "relnum")); + Pipe graphEdgesPipe = new Every(graphEdgesJoinPipe, new RelationshipRownumBuffer(), Fields.RESULTS); + + + + + /// TODO make sure its sorted on relnum + // graphEdgesPipe = new GroupBy("graphedges", graphEdgesPipe, new + // Fields("relnum")); + // TODO for now its more helpfull to sort on id,relnum for visible checking + graphEdgesPipe = new GroupBy("graphedges", graphEdgesPipe, new Fields("id", "relnum")); + // + + Properties properties = new Properties(); + FlowConnector.setApplicationJarClass(properties, ByteBufferTestJob.class); + + FlowConnector flowConnector = new FlowConnector(properties); + Flow flow = flowConnector.connect(sourceMap, sinkMap, graphNodesPipe, graphEdgesPipe); + flow.writeDOT("flow.dot"); + + flow.complete(); + + return 0; + } + + @SuppressWarnings({ "serial", "rawtypes" }) + private static class NodeRecordCreator extends BaseOperation implements Function { + public NodeRecordCreator() { + super(new Fields("node")); + } + + @Override + public void operate(FlowProcess flow, FunctionCall call) { + TupleEntry arguments = call.getArguments(); + long relnum = arguments.getLong("relnum"); + long id = arguments.getLong("rownum"); + if (id == 0L) { + call.getOutputCollector().add( + new Tuple(getNodeAsBuffer(id, Record.NO_NEXT_RELATIONSHIP.intValue(), + Record.NO_NEXT_PROPERTY.intValue()))); + } + call.getOutputCollector().add( + new Tuple(getNodeAsBuffer(id + 1L, relnum, id * NUMBER_OF_PROPERTIES_PER_NODE))); + + } + + private Buffer getNodeAsBuffer(long id, long relnum, long prop) { + ByteBuffer buffer = ByteBuffer.allocate(9); + + NodeRecord nr = new NodeRecord(id, relnum, prop); + nr.setInUse(true); + nr.setCreated(); + + LOG.debug(nr.toString()); + + long nextRel = nr.getNextRel(); + long nextProp = nr.getNextProp(); + + short relModifier = nextRel == Record.NO_NEXT_RELATIONSHIP.intValue() ? 0 + : (short) ((nextRel & 0x700000000L) >> 31); + short propModifier = nextProp == Record.NO_NEXT_PROPERTY.intValue() ? 0 + : (short) ((nextProp & 0xF00000000L) >> 28); + + // [ , x] in use bit + // [ ,xxx ] higher bits for rel id + // [xxxx, ] higher bits for prop id + short inUseUnsignedByte = (nr.inUse() ? Record.IN_USE : Record.NOT_IN_USE).byteValue(); + inUseUnsignedByte = (short) (inUseUnsignedByte | relModifier | propModifier); + + buffer.put((byte) inUseUnsignedByte).putInt((int) nextRel).putInt((int) nextProp); + return buffer; + + } + } + + private static class RelationshipRownumContext { + long id = -1L, from = -1L, to = -1L, relnum = -1L, prev = -1L, next = -1L; + + public RelationshipRownumContext() { + } + + public Tuple asTuple() { + return new Tuple(id, from, to, relnum, prev, next); + } + + @Override + public String toString() { + return "" + id + ";" + from + ":" + to + ":" + relnum + ":" + prev + ":" + next; + } + } + + @SuppressWarnings({ "serial" }) + private static class RelationshipRownumBuffer extends BaseOperation implements + cascading.operation.Buffer { + public RelationshipRownumBuffer() { + super(new Fields("id", "from", "to", "relnum", "prev", "next")); + } + + @Override + public void operate(FlowProcess flow, BufferCall call) { + RelationshipRownumContext context = new RelationshipRownumContext(); + + // get all the current argument values for this grouping + Iterator arguments = call.getArgumentsIterator(); + + while (arguments.hasNext()) { + + TupleEntry entry = arguments.next(); + long id = entry.getLong("id"); + long from = entry.getLong("from"); + long to = entry.getLong("to"); + long relnum = entry.getLong("relnum"); + if (context.id == -1L) { + // first call, so set current fields + context.id = id; + context.from = from; + context.to = to; + context.relnum = relnum; + context.prev = -1L; // don't know yet + context.next = -1L; // first call, relationships ordered descending, so last rel, so no next available + } else if (context.prev == -1L) { + // not the first so current relationship will become prev in + // context and context can be emitted and refilled with + // current + context.prev = relnum; + call.getOutputCollector().add(context.asTuple()); + long next = context.relnum; + context.id = id; + context.from = from; + context.to = to; + context.relnum = relnum; + context.prev = -1L; // don't know yet + context.next = next; + } + + } + // write out last context + call.getOutputCollector().add(context.asTuple()); + + } + } +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/NeoGraphJob.java b/job/src/main/java/nl/waredingen/graphs/neo/NeoGraphJob.java new file mode 100644 index 0000000..68f81a5 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/NeoGraphJob.java @@ -0,0 +1,223 @@ +package nl.waredingen.graphs.neo; + +import java.nio.Buffer; +import java.nio.ByteBuffer; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.Properties; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.neo4j.kernel.impl.nioneo.store.NodeRecord; +import org.neo4j.kernel.impl.nioneo.store.Record; + +import cascading.flow.Flow; +import cascading.flow.FlowConnector; +import cascading.flow.FlowProcess; +import cascading.operation.BaseOperation; +import cascading.operation.BufferCall; +import cascading.operation.Function; +import cascading.operation.FunctionCall; +import cascading.operation.aggregator.First; +import cascading.pipe.CoGroup; +import cascading.pipe.Each; +import cascading.pipe.Every; +import cascading.pipe.GroupBy; +import cascading.pipe.Pipe; +import cascading.pipe.assembly.Unique; +import cascading.pipe.cogroup.InnerJoin; +import cascading.scheme.Scheme; +import cascading.scheme.TextDelimited; +import cascading.scheme.TextLine; +import cascading.tap.GlobHfs; +import cascading.tap.Hfs; +import cascading.tap.SinkMode; +import cascading.tap.Tap; +import cascading.tuple.Fields; +import cascading.tuple.Tuple; +import cascading.tuple.TupleEntry; + +public class NeoGraphJob { + + public static final long NUMBER_OF_PROPERTIES_PER_NODE = 1L; + private static final Log LOG = LogFactory.getLog(NeoGraphJob.class); + + public static int runJob(String nodesFile, String edgesFile, String output) { + + Scheme nodesScheme = new TextDelimited(new Fields("id", "name", "rownum"), "\t"); + Tap nodeSource = new GlobHfs(nodesScheme, nodesFile); + + Scheme edgesScheme = new TextDelimited(new Fields("from", "to", "rownum"), "\t"); + Tap edgeSource = new GlobHfs(edgesScheme, edgesFile); + + Map sourceMap = new HashMap(2); + sourceMap.put("nodes", nodeSource); + sourceMap.put("edges", edgeSource); + + Scheme graphNodesScheme = new ByteBufferScheme(); + Tap nodesSink = new Hfs(graphNodesScheme, output + "/neostore.nodestore.db", SinkMode.REPLACE); + + Scheme graphEdgesScheme = new TextLine(new Fields("edge")); + Tap edgesSink = new Hfs(graphEdgesScheme, output + "/neostore.relationshipstore.db", SinkMode.REPLACE); + + Map sinkMap = new HashMap(2); + sinkMap.put("graphnodes", nodesSink); + sinkMap.put("graphedges", edgesSink); + + // Pipe graph = new Pipe("graph.db"); + + Pipe nodesPipe = new Pipe("nodes"); + Pipe edgesPipe = new Pipe("edges"); + + Pipe fromjoin = new CoGroup("fromjoin", nodesPipe, new Fields("id"), edgesPipe, new Fields("from"), new Fields( + "id", "name", "rownum", "from", "to", "relnum"), new InnerJoin()); + Pipe tojoin = new CoGroup("tojoin", nodesPipe, new Fields("id"), edgesPipe, new Fields("to"), new Fields("id", + "name", "rownum", "from", "to", "relnum"), new InnerJoin()); + + Pipe graphNodesJoinPipe = new GroupBy(Pipe.pipes(fromjoin, tojoin), new Fields("id"), new Fields("relnum"), + true); + + Pipe graphNodesPipe = new Every(graphNodesJoinPipe, new First(), Fields.RESULTS); + graphNodesPipe = new GroupBy("graphnodes", graphNodesPipe, new Fields("rownum")); + graphNodesPipe = new Each(graphNodesPipe, new NodeRecordCreator(), Fields.RESULTS); + + Pipe graphEdgesJoinPipe = new GroupBy(Pipe.pipes(fromjoin, tojoin), new Fields("id"), new Fields("relnum"), + true); + + graphEdgesJoinPipe = new Unique(graphEdgesJoinPipe, new Fields("id", "from", "to", "relnum")); + Pipe graphEdgesPipe = new Every(graphEdgesJoinPipe, new RelationshipRownumBuffer(), Fields.RESULTS); + // graphEdgesPipe = new GroupBy("graphedges", graphEdgesPipe, new + // Fields("relnum")); + graphEdgesPipe = new GroupBy("graphedges", graphEdgesPipe, new Fields("id", "relnum")); + // + + Properties properties = new Properties(); + FlowConnector.setApplicationJarClass(properties, NeoGraphJob.class); + + FlowConnector flowConnector = new FlowConnector(properties); + Flow flow = flowConnector.connect(sourceMap, sinkMap, graphNodesPipe, graphEdgesPipe); + flow.writeDOT("flow.dot"); + + flow.complete(); + + return 0; + } + + @SuppressWarnings({ "serial", "rawtypes" }) + private static class NodeRecordCreator extends BaseOperation implements Function { + public NodeRecordCreator() { + super(new Fields("node")); + } + + @Override + public void operate(FlowProcess flow, FunctionCall call) { + TupleEntry arguments = call.getArguments(); + long relnum = arguments.getLong("relnum"); + long id = arguments.getLong("rownum"); + if (id == 0L) { + call.getOutputCollector().add( + new Tuple(getNodeAsBuffer(id, Record.NO_NEXT_RELATIONSHIP.intValue(), + Record.NO_NEXT_PROPERTY.intValue()))); + } + call.getOutputCollector().add( + new Tuple(getNodeAsBuffer(id + 1L, relnum, id * NUMBER_OF_PROPERTIES_PER_NODE))); + + } + + private Buffer getNodeAsBuffer(long id, long relnum, long prop) { + ByteBuffer buffer = ByteBuffer.allocate(9); + + NodeRecord nr = new NodeRecord(id, relnum, prop); + nr.setInUse(true); + nr.setCreated(); + + LOG.debug(nr.toString()); + + long nextRel = nr.getNextRel(); + long nextProp = nr.getNextProp(); + + short relModifier = nextRel == Record.NO_NEXT_RELATIONSHIP.intValue() ? 0 + : (short) ((nextRel & 0x700000000L) >> 31); + short propModifier = nextProp == Record.NO_NEXT_PROPERTY.intValue() ? 0 + : (short) ((nextProp & 0xF00000000L) >> 28); + + // [ , x] in use bit + // [ ,xxx ] higher bits for rel id + // [xxxx, ] higher bits for prop id + short inUseUnsignedByte = (nr.inUse() ? Record.IN_USE : Record.NOT_IN_USE).byteValue(); + inUseUnsignedByte = (short) (inUseUnsignedByte | relModifier | propModifier); + + buffer.put((byte) inUseUnsignedByte).putInt((int) nextRel).putInt((int) nextProp); + return buffer; + + } + } + + private static class RelationshipRownumContext { + long id = -1L, from = -1L, to = -1L, relnum = -1L, prev = -1L, next = -1L; + + public RelationshipRownumContext() { + } + + public Tuple asTuple() { + return new Tuple(id, from, to, relnum, prev, next); + } + + @Override + public String toString() { + return "" + id + ";" + from + ":" + to + ":" + relnum + ":" + prev + ":" + next; + } + } + + @SuppressWarnings({ "serial" }) + private static class RelationshipRownumBuffer extends BaseOperation implements + cascading.operation.Buffer { + public RelationshipRownumBuffer() { + super(new Fields("id", "from", "to", "relnum", "prev", "next")); + } + + @Override + public void operate(FlowProcess flow, BufferCall call) { + RelationshipRownumContext context = new RelationshipRownumContext(); + + // get all the current argument values for this grouping + Iterator arguments = call.getArgumentsIterator(); + + while (arguments.hasNext()) { + + TupleEntry entry = arguments.next(); + long id = entry.getLong("id"); + long from = entry.getLong("from"); + long to = entry.getLong("to"); + long relnum = entry.getLong("relnum"); + if (context.id == -1L) { + // first call, so set current fields + context.id = id; + context.from = from; + context.to = to; + context.relnum = relnum; + context.prev = -1L; // don't know yet + context.next = -1L; // first call, relationships ordered descending, so last rel, so no next available + } else if (context.prev == -1L) { + // not the first so current relationship will become prev in + // context and context can be emitted and refilled with + // current + context.prev = relnum; + call.getOutputCollector().add(context.asTuple()); + long next = context.relnum; + context.id = id; + context.from = from; + context.to = to; + context.relnum = relnum; + context.prev = -1L; // don't know yet + context.next = next; + } + + } + // write out last context + call.getOutputCollector().add(context.asTuple()); + + } + } +} From 70bb180b1fb27fddcc8c06fe8437b2c65016f19d Mon Sep 17 00:00:00 2001 From: Kris Geusebroek Date: Tue, 3 Jul 2012 11:32:42 +0200 Subject: [PATCH 02/11] Upgrade to nea 1.8M05 and added graph creation job --- job/pom.xml | 2 +- job/src/main/java/nl/waredingen/graphs/Main.java | 6 ++++++ .../nl/waredingen/graphs/importer/Neo4jImportJob.java | 8 ++++---- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/job/pom.xml b/job/pom.xml index 079e7ad..98d6eed 100644 --- a/job/pom.xml +++ b/job/pom.xml @@ -57,7 +57,7 @@ org.neo4j neo4j - 1.6 + 1.8.M05 junit @@ -43,6 +48,12 @@ 4.8 test + + org.apache.hadoop + hadoop-mrunit + 0.20.2-cdh3u2 + test + org.easymock easymock diff --git a/job/src/main/java/nl/waredingen/graphs/Main.java b/job/src/main/java/nl/waredingen/graphs/Main.java index cb82988..a6c67ae 100644 --- a/job/src/main/java/nl/waredingen/graphs/Main.java +++ b/job/src/main/java/nl/waredingen/graphs/Main.java @@ -5,6 +5,7 @@ import nl.waredingen.graphs.misc.RowNumberJob; import nl.waredingen.graphs.neo.NeoGraphEdgesJob; import nl.waredingen.graphs.neo.NeoGraphNodesJob; +import nl.waredingen.graphs.neo.mapreduce.PureMRNodesAndEdgesJob; import nl.waredingen.graphs.partition.IterateJob; import nl.waredingen.graphs.partition.IterateWithFlagsJob; import nl.waredingen.graphs.partition.PrepareJob; @@ -40,11 +41,13 @@ public int run(String[] args) throws Exception { } else if (args[0].equalsIgnoreCase("prepare-bgp")) { return PrepareBgpGraphJob.runJob(args[1], args[2], args[3]); } else if (args[0].equalsIgnoreCase("rownumbers")) { - return RowNumberJob.run(args[1], args[2], getConf()); + return (int) RowNumberJob.run(args[1], args[2], getConf()); } else if (args[0].equalsIgnoreCase("neographnodes")) { return NeoGraphNodesJob.runJob(args[1], args[2], args[3]); } else if (args[0].equalsIgnoreCase("neographedges")) { return NeoGraphEdgesJob.runJob(args[1], args[2], args[3]); + } else if (args[0].equalsIgnoreCase("neograph")) { + return PureMRNodesAndEdgesJob.run(args[1], args[2], args[3], getConf()); } else { System.err.println("Wrong arguments!"); System.exit(1); diff --git a/job/src/main/java/nl/waredingen/graphs/misc/RowNumberJob.java b/job/src/main/java/nl/waredingen/graphs/misc/RowNumberJob.java index ab2694a..9706e76 100644 --- a/job/src/main/java/nl/waredingen/graphs/misc/RowNumberJob.java +++ b/job/src/main/java/nl/waredingen/graphs/misc/RowNumberJob.java @@ -9,6 +9,7 @@ import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.RawComparator; import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapred.Task.Counter; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; @@ -21,7 +22,7 @@ public class RowNumberJob { public final static byte COUNTER_MARKER = (byte) 'T'; public final static byte VALUE_MARKER = (byte) 'W'; - public static int run(String input, String output, Configuration conf) { + public static long run(String input, String output, Configuration conf) { try { Job job = new Job(conf, "Row number generator job."); job.setGroupingComparatorClass(IndifferentComparator.class); @@ -44,13 +45,13 @@ public static int run(String input, String output, Configuration conf) { job.setJarByClass(RowNumberJob.class); job.waitForCompletion(true); + + return job.getCounters().findCounter(Counter.REDUCE_OUTPUT_RECORDS).getValue(); } catch (Exception e) { System.err.println(e.getMessage()); e.printStackTrace(System.err); return 1; } - - return 0; } static class RowNumberMapper extends Mapper { diff --git a/job/src/main/java/nl/waredingen/graphs/neo/NewByteBufferOutputFormat.java b/job/src/main/java/nl/waredingen/graphs/neo/NewByteBufferOutputFormat.java deleted file mode 100644 index b0624c8..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/NewByteBufferOutputFormat.java +++ /dev/null @@ -1,55 +0,0 @@ -package nl.waredingen.graphs.neo; - -import java.io.DataOutputStream; -import java.io.IOException; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataOutputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.mapreduce.RecordWriter; -import org.apache.hadoop.mapreduce.TaskAttemptContext; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; - -public class NewByteBufferOutputFormat extends FileOutputFormat { - - protected static class ByteRecordWriter extends RecordWriter { - private DataOutputStream out; - - public ByteRecordWriter(DataOutputStream out) { - this.out = out; - } - - @SuppressWarnings("deprecation") - @Override - public void write(K key, V value) throws IOException, InterruptedException { - boolean nullValue = value == null || value instanceof NullWritable; - if (!nullValue) { - BytesWritable bw = (BytesWritable) value; - out.write(bw.get(), 0, bw.getSize()); - } - } - - @Override - public void close(TaskAttemptContext context) throws IOException, InterruptedException { - out.close(); - } - - } - - @Override - public RecordWriter getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { - // TODO Auto-generated method stub - Configuration conf = job.getConfiguration(); - - Path file = getDefaultWorkFile(job, ""); - FileSystem fs = file.getFileSystem(conf); - - FSDataOutputStream fileOut = fs.create(file, false); - - return new ByteRecordWriter(new DataOutputStream(fileOut)); - } - -} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/PureMRNodesAndEdgesJob.java b/job/src/main/java/nl/waredingen/graphs/neo/PureMRNodesAndEdgesJob.java deleted file mode 100644 index a165040..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/PureMRNodesAndEdgesJob.java +++ /dev/null @@ -1,239 +0,0 @@ -package nl.waredingen.graphs.neo; - -import nl.waredingen.graphs.misc.RowNumberJob; -import nl.waredingen.graphs.neo.mapreduce.edges.EdgeOutputMapper; -import nl.waredingen.graphs.neo.mapreduce.edges.EdgeOutputReducer; -import nl.waredingen.graphs.neo.mapreduce.edges.surround.AscLongDescLongWritable; -import nl.waredingen.graphs.neo.mapreduce.edges.surround.AscLongDescLongWritablePartitioner; -import nl.waredingen.graphs.neo.mapreduce.edges.surround.EdgeSurroundKeyComparator; -import nl.waredingen.graphs.neo.mapreduce.edges.surround.EdgeSurroundKeyGroupingComparator; -import nl.waredingen.graphs.neo.mapreduce.edges.surround.EdgeSurroundMapper; -import nl.waredingen.graphs.neo.mapreduce.edges.surround.EdgeSurroundReducer; -import nl.waredingen.graphs.neo.mapreduce.edges.surround.join.JoinSurroundingEdgesMapper; -import nl.waredingen.graphs.neo.mapreduce.edges.surround.join.JoinSurroundingEdgesReducer; -import nl.waredingen.graphs.neo.mapreduce.group.GroupNodesAndEdgesMapper; -import nl.waredingen.graphs.neo.mapreduce.group.GroupNodesAndEdgesReducer; -import nl.waredingen.graphs.neo.mapreduce.group.NodeAndEdgeIdKeyComparator; -import nl.waredingen.graphs.neo.mapreduce.group.NodeAndEdgeIdKeyGroupingComparator; -import nl.waredingen.graphs.neo.mapreduce.group.NodeAndEdgeIdKeyPartitioner; -import nl.waredingen.graphs.neo.mapreduce.join.JoinFromEdgesMapper; -import nl.waredingen.graphs.neo.mapreduce.join.JoinNodesAndEdgesReducer; -import nl.waredingen.graphs.neo.mapreduce.join.JoinNodesMapper; -import nl.waredingen.graphs.neo.mapreduce.join.JoinToEdgesMapper; -import nl.waredingen.graphs.neo.mapreduce.join.NodeAndEdgeKeyComparator; -import nl.waredingen.graphs.neo.mapreduce.join.NodeAndEdgeKeyPartitioner; -import nl.waredingen.graphs.neo.mapreduce.join.NodeKeyGroupingComparator; -import nl.waredingen.graphs.neo.mapreduce.nodes.NodeOutputMapper; -import nl.waredingen.graphs.neo.mapreduce.nodes.NodeOutputReducer; -import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; -import org.apache.hadoop.mapreduce.lib.input.MultipleInputs; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; - -public class PureMRNodesAndEdgesJob { - - public static final String NUMBEROFROWS_CONFIG = "neo.nodes.edges.job.nr_of_rows"; - - public static int run(String nodes, String edges, String output, Configuration conf) { - String numberedNodes = output + "/numberednodes"; - String numberedEdges = output + "/numberededges"; - - String temp = output + "/temp"; - String joined = output + "/joined"; - String grouped = output + "/grouped"; - String nodesOutput = output + "/neostore.nodestore.db"; - String surrounding = output + "/surrounding"; - String joinededges = output + "/joinededges"; - String edgesOutput = output + "/neostore.relationshipstore.db"; - String typesOutput = output + "/neostore.relationshiptypestore.db"; - String typeIdsOutput = output + "/neostore.relationshiptypestore.db.id"; - String typeNamesOutput = output + "/neostore.relationshiptypestore.db.names"; - String typeNameIdsOutput = output + "/neostore.relationshiptypestore.db.names.id"; - - try { - long nrOfNodes = RowNumberJob.run(nodes, numberedNodes, conf); - long nrOfEdges = RowNumberJob.run(edges, numberedEdges, conf); - - System.out.println("Processing " + nrOfNodes + " nodes and " + nrOfEdges + " edges."); - - Job joinFrom = new Job(conf, "Join from nodes and edges job."); - joinFrom.setGroupingComparatorClass(NodeKeyGroupingComparator.class); - joinFrom.setSortComparatorClass(NodeAndEdgeKeyComparator.class); - joinFrom.setPartitionerClass(NodeAndEdgeKeyPartitioner.class); - - joinFrom.setMapOutputKeyClass(Text.class); - joinFrom.setMapOutputValueClass(Text.class); - - MultipleInputs - .addInputPath(joinFrom, new Path(numberedNodes), TextInputFormat.class, JoinNodesMapper.class); - MultipleInputs.addInputPath(joinFrom, new Path(numberedEdges), TextInputFormat.class, - JoinFromEdgesMapper.class); - - joinFrom.setReducerClass(JoinNodesAndEdgesReducer.class); - joinFrom.setOutputKeyClass(Text.class); - joinFrom.setOutputValueClass(Text.class); - - joinFrom.setOutputFormatClass(TextOutputFormat.class); - FileOutputFormat.setOutputPath(joinFrom, new Path(temp)); - - joinFrom.setJarByClass(PureMRNodesAndEdgesJob.class); - - joinFrom.waitForCompletion(true); - - Job joinTo = new Job(conf, "Join to nodes and edges job."); - joinTo.setGroupingComparatorClass(NodeKeyGroupingComparator.class); - joinTo.setSortComparatorClass(NodeAndEdgeKeyComparator.class); - joinTo.setPartitionerClass(NodeAndEdgeKeyPartitioner.class); - - joinTo.setMapOutputKeyClass(Text.class); - joinTo.setMapOutputValueClass(Text.class); - - MultipleInputs.addInputPath(joinTo, new Path(numberedNodes), TextInputFormat.class, JoinNodesMapper.class); - MultipleInputs.addInputPath(joinTo, new Path(temp), KeyValueTextInputFormat.class, JoinToEdgesMapper.class); - - joinTo.setReducerClass(JoinNodesAndEdgesReducer.class); - joinTo.setOutputKeyClass(Text.class); - joinTo.setOutputValueClass(Text.class); - - joinTo.setOutputFormatClass(TextOutputFormat.class); - FileOutputFormat.setOutputPath(joinTo, new Path(joined)); - - joinTo.setJarByClass(PureMRNodesAndEdgesJob.class); - - joinTo.waitForCompletion(true); - - Job groupJob = new Job(conf, "Join to nodes and edges job."); - groupJob.setGroupingComparatorClass(NodeAndEdgeIdKeyGroupingComparator.class); - groupJob.setSortComparatorClass(NodeAndEdgeIdKeyComparator.class); - groupJob.setPartitionerClass(NodeAndEdgeIdKeyPartitioner.class); - - groupJob.setMapOutputKeyClass(Text.class); - groupJob.setMapOutputValueClass(Text.class); - - groupJob.setMapperClass(GroupNodesAndEdgesMapper.class); - groupJob.setInputFormatClass(KeyValueTextInputFormat.class); - FileInputFormat.addInputPath(groupJob, new Path(joined)); - - groupJob.setReducerClass(GroupNodesAndEdgesReducer.class); - groupJob.setOutputKeyClass(NullWritable.class); - groupJob.setOutputValueClass(Text.class); - - groupJob.setOutputFormatClass(TextOutputFormat.class); - FileOutputFormat.setOutputPath(groupJob, new Path(grouped)); - - groupJob.setJarByClass(PureMRNodesAndEdgesJob.class); - - groupJob.waitForCompletion(true); - - conf.set(NUMBEROFROWS_CONFIG, "" + nrOfNodes); - Job nodeOutputJob = new Job(conf, "Output nodes job."); - nodeOutputJob.setPartitionerClass(RownumPartitioner.class); - - nodeOutputJob.setMapOutputKeyClass(LongWritable.class); - nodeOutputJob.setMapOutputValueClass(Text.class); - - nodeOutputJob.setMapperClass(NodeOutputMapper.class); - nodeOutputJob.setInputFormatClass(TextInputFormat.class); - FileInputFormat.addInputPath(nodeOutputJob, new Path(grouped)); - - nodeOutputJob.setReducerClass(NodeOutputReducer.class); - nodeOutputJob.setOutputKeyClass(NullWritable.class); - nodeOutputJob.setOutputValueClass(BytesWritable.class); - - nodeOutputJob.setOutputFormatClass(NewByteBufferOutputFormat.class); - FileOutputFormat.setOutputPath(nodeOutputJob, new Path(nodesOutput)); - - nodeOutputJob.setJarByClass(PureMRNodesAndEdgesJob.class); - - nodeOutputJob.waitForCompletion(true); - - Job edgeSurroundJob = new Job(conf, "Determine surrounding edges job."); - edgeSurroundJob.setGroupingComparatorClass(EdgeSurroundKeyGroupingComparator.class); - edgeSurroundJob.setSortComparatorClass(EdgeSurroundKeyComparator.class); - edgeSurroundJob.setPartitionerClass(AscLongDescLongWritablePartitioner.class); - - edgeSurroundJob.setMapOutputKeyClass(AscLongDescLongWritable.class); - edgeSurroundJob.setMapOutputValueClass(Text.class); - - edgeSurroundJob.setMapperClass(EdgeSurroundMapper.class); - edgeSurroundJob.setInputFormatClass(TextInputFormat.class); - FileInputFormat.addInputPath(edgeSurroundJob, new Path(grouped)); - - edgeSurroundJob.setReducerClass(EdgeSurroundReducer.class); - edgeSurroundJob.setOutputKeyClass(Text.class); - edgeSurroundJob.setOutputValueClass(Text.class); - - edgeSurroundJob.setOutputFormatClass(TextOutputFormat.class); - FileOutputFormat.setOutputPath(edgeSurroundJob, new Path(surrounding)); - - edgeSurroundJob.setJarByClass(PureMRNodesAndEdgesJob.class); - - edgeSurroundJob.waitForCompletion(true); - - Job joinSurroundJob = new Job(conf, "Join surrounding edges job."); - - joinSurroundJob.setMapOutputKeyClass(Text.class); - joinSurroundJob.setMapOutputValueClass(Text.class); - - joinSurroundJob.setMapperClass(JoinSurroundingEdgesMapper.class); - joinSurroundJob.setInputFormatClass(TextInputFormat.class); - FileInputFormat.addInputPath(joinSurroundJob, new Path(surrounding)); - - joinSurroundJob.setReducerClass(JoinSurroundingEdgesReducer.class); - joinSurroundJob.setOutputKeyClass(NullWritable.class); - joinSurroundJob.setOutputValueClass(Text.class); - - joinSurroundJob.setOutputFormatClass(TextOutputFormat.class); - FileOutputFormat.setOutputPath(joinSurroundJob, new Path(joinededges)); - - joinSurroundJob.setJarByClass(PureMRNodesAndEdgesJob.class); - - joinSurroundJob.waitForCompletion(true); - - conf.set(NUMBEROFROWS_CONFIG, "" + nrOfEdges); - Job edgeOutputJob = new Job(conf, "Output nodes job."); - edgeOutputJob.setPartitionerClass(RownumPartitioner.class); - - edgeOutputJob.setMapOutputKeyClass(LongWritable.class); - edgeOutputJob.setMapOutputValueClass(Text.class); - - edgeOutputJob.setMapperClass(EdgeOutputMapper.class); - edgeOutputJob.setInputFormatClass(TextInputFormat.class); - FileInputFormat.addInputPath(edgeOutputJob, new Path(joinededges)); - - edgeOutputJob.setReducerClass(EdgeOutputReducer.class); - edgeOutputJob.setOutputKeyClass(NullWritable.class); - edgeOutputJob.setOutputValueClass(BytesWritable.class); - - edgeOutputJob.setOutputFormatClass(NewByteBufferOutputFormat.class); - FileOutputFormat.setOutputPath(edgeOutputJob, new Path(edgesOutput)); - - edgeOutputJob.setJarByClass(PureMRNodesAndEdgesJob.class); - - edgeOutputJob.waitForCompletion(true); - - Neo4JUtils.writeNeostore(output, conf); - Neo4JUtils.writeNodeIds(nrOfNodes, output, conf); - Neo4JUtils.writeEdgeIds(nrOfEdges, output, conf); - Neo4JUtils.writeSingleTypeStore("TRANSFER_TO", output, conf); - - } catch (Exception e) { - System.err.println(e.getMessage()); - e.printStackTrace(System.err); - return 1; - } - - return 0; - } -} \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/RownumPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/RownumPartitioner.java deleted file mode 100644 index b9365d4..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/RownumPartitioner.java +++ /dev/null @@ -1,36 +0,0 @@ -package nl.waredingen.graphs.neo; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.mapreduce.Partitioner; - -public class RownumPartitioner extends Partitioner implements Configurable { - - private long max = 0L; - private Configuration conf; - - @Override - public int getPartition(K key, V value, int numPartitions) { - double divider = (double) max / numPartitions; - - return (int) (((LongWritable) key).get() / divider); - } - - @Override - public void setConf(Configuration conf) { - this.conf = conf; - configure(); - - } - - private void configure() { - this.max = Long.parseLong(getConf().get(PureMRNodesAndEdgesJob.NUMBEROFROWS_CONFIG)); - } - - @Override - public Configuration getConf() { - return conf; - } - -} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/AscLongDescLongWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/AscLongDescLongWritable.java deleted file mode 100644 index fb05f2b..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/AscLongDescLongWritable.java +++ /dev/null @@ -1,97 +0,0 @@ -package nl.waredingen.graphs.neo.mapreduce.edges.surround; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; - -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.WritableComparable; - -@SuppressWarnings("rawtypes") -public class AscLongDescLongWritable implements WritableComparable { - - private LongWritable left = new LongWritable(); - private LongWritable right = new LongWritable(); - - public AscLongDescLongWritable() { - - } - - public AscLongDescLongWritable(LongWritable left, LongWritable right) { - this.left = left; - this.right = right; - } - - public void setLeft(LongWritable left) { - this.left = left; - } - - public void setRight(LongWritable right) { - this.right = right; - } - - public LongWritable getLeft() { - return left; - } - - public LongWritable getRight() { - return right; - } - - @Override - public void write(DataOutput out) throws IOException { - left.write(out); - right.write(out); - } - - @Override - public void readFields(DataInput in) throws IOException { - left.readFields(in); - right.readFields(in); - } - - @Override - public int compareTo(Object obj) { - AscLongDescLongWritable other = (AscLongDescLongWritable) obj; - int leftDiff = left.compareTo(other.left); - // sort on left and descending on right - return (leftDiff == 0) ? -1 * right.compareTo(other.right) : leftDiff; - } - - @Override - public int hashCode() { - final int prime = 31; - int result = 1; - result = prime * result + ((left == null) ? 0 : left.hashCode()); - result = prime * result + ((right == null) ? 0 : right.hashCode()); - return result; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if (obj == null) - return false; - if (getClass() != obj.getClass()) - return false; - AscLongDescLongWritable other = (AscLongDescLongWritable) obj; - if (left == null) { - if (other.left != null) - return false; - } else if (!left.equals(other.left)) - return false; - if (right == null) { - if (other.right != null) - return false; - } else if (!right.equals(other.right)) - return false; - return true; - } - - @Override - public String toString() { - return left.toString()+"\t"+right.toString(); - } -} - diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/AscLongDescLongWritablePartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/AscLongDescLongWritablePartitioner.java deleted file mode 100644 index ae65c7c..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/AscLongDescLongWritablePartitioner.java +++ /dev/null @@ -1,13 +0,0 @@ -package nl.waredingen.graphs.neo.mapreduce.edges.surround; - -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Partitioner; - -public class AscLongDescLongWritablePartitioner extends Partitioner { - - @Override - public int getPartition(AscLongDescLongWritable key, Text value, int numPartitions) { - return key.getLeft().hashCode() % numPartitions; - } - -} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundContext.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundContext.java deleted file mode 100644 index 8109094..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundContext.java +++ /dev/null @@ -1,16 +0,0 @@ -package nl.waredingen.graphs.neo.mapreduce.edges.surround; - - -public class EdgeSurroundContext { - - long id = -1L, from = -1L, to = -1L, relnum = -1L, prev = -1L, next = -1L; - - public EdgeSurroundContext() { - } - - @Override - public String toString() { - return id + "\t" + relnum + "\t" + from + "\t" + to + "\t" + next + "\t" + prev; - } - -} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundKeyComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundKeyComparator.java deleted file mode 100644 index 6a06f32..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundKeyComparator.java +++ /dev/null @@ -1,20 +0,0 @@ -package nl.waredingen.graphs.neo.mapreduce.edges.surround; - -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.io.WritableComparator; - -public class EdgeSurroundKeyComparator extends WritableComparator { - protected EdgeSurroundKeyComparator() { - super(AscLongDescLongWritable.class, true); - } - - @SuppressWarnings("rawtypes") - @Override - public int compare(WritableComparable w1, WritableComparable w2) { - AscLongDescLongWritable k1 = (AscLongDescLongWritable) w1; - AscLongDescLongWritable k2 = (AscLongDescLongWritable) w2; - - return k1.getLeft().compareTo(k2.getLeft()); - } - -} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundKeyGroupingComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundKeyGroupingComparator.java deleted file mode 100644 index 276885c..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundKeyGroupingComparator.java +++ /dev/null @@ -1,20 +0,0 @@ -package nl.waredingen.graphs.neo.mapreduce.edges.surround; - -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.io.WritableComparator; - -public class EdgeSurroundKeyGroupingComparator extends WritableComparator { - protected EdgeSurroundKeyGroupingComparator() { - super(AscLongDescLongWritable.class, true); - } - - @SuppressWarnings("rawtypes") - @Override - public int compare(WritableComparable w1, WritableComparable w2) { - AscLongDescLongWritable k1 = (AscLongDescLongWritable) w1; - AscLongDescLongWritable k2 = (AscLongDescLongWritable) w2; - - return k1.getLeft().compareTo(k2.getLeft()); - } - -} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapper.java index ba39609..9927750 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapper.java @@ -2,6 +2,8 @@ import java.io.IOException; +import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; + import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducer.java index 19d1cd8..ed85220 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducer.java @@ -3,6 +3,9 @@ import java.io.IOException; import java.util.Iterator; +import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.SurroundingContext; + import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; @@ -14,7 +17,7 @@ public class EdgeSurroundReducer extends Reducer values, Context context) throws IOException ,InterruptedException { Iterator iter = values.iterator(); - EdgeSurroundContext edge = new EdgeSurroundContext(); + SurroundingContext edge = new SurroundingContext(); while (iter.hasNext()) { String[] vals = iter.next().toString().split("\t"); @@ -28,7 +31,7 @@ protected void reduce(AscLongDescLongWritable key, Iterable values, Contex edge.id = id; edge.from = from; edge.to = to; - edge.relnum = relnum; + edge.other = relnum; edge.prev = -1L; // don't know yet edge.next = -1L; // first call, relationships ordered descending, so last rel, so no next available @@ -41,11 +44,11 @@ protected void reduce(AscLongDescLongWritable key, Iterable values, Contex outputValue.set(edge.toString()); context.write(NullWritable.get(), outputValue); - long next = edge.relnum; + long next = edge.other; edge.id = id; edge.from = from; edge.to = to; - edge.relnum = relnum; + edge.other = relnum; edge.prev = -1L; // don't know yet edge.next = next; diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapper.java index e4dd79f..f0f2285 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapper.java @@ -14,9 +14,10 @@ public class NodeOutputMapper extends Mapper recordList = new LinkedList(); long nextBlock = id; int srcOffset = 0; - int dataSize = 30 /* blocksize */- 8 /* headersize */; + int dataSize = blockSize - DynamicStringStore.BLOCK_HEADER_SIZE; do { DynamicRecord record = new DynamicRecord(nextBlock); record.setCreated(); @@ -252,7 +267,7 @@ public static byte[] getNameRecordAsByteArray(long id, byte[] name) { byte data[] = new byte[dataSize]; System.arraycopy(name, srcOffset, data, 0, dataSize); record.setData(data); - record.setNextBlock(nextBlock++); + record.setNextBlock(++nextBlock); srcOffset += dataSize; } else { byte data[] = new byte[name.length - srcOffset]; @@ -264,8 +279,12 @@ public static byte[] getNameRecordAsByteArray(long id, byte[] name) { recordList.add(record); } while (nextBlock != Record.NO_NEXT_BLOCK.intValue()); - ByteBuffer buffer =ByteBuffer.allocate(30*recordList.size()); - + return getDynamicRecordsAsByteArray(recordList, blockSize); + } + + public static byte[] getDynamicRecordsAsByteArray(List recordList, int blockSize) { + ByteBuffer buffer = ByteBuffer.allocate(blockSize * recordList.size()); + for (DynamicRecord dynamicRecord : recordList) { long nextProp = dynamicRecord.getNextBlock(); @@ -287,8 +306,363 @@ public static byte[] getNameRecordAsByteArray(long id, byte[] name) { buffer.put(dynamicRecord.getData()); } } - + + buffer.flip(); + return buffer.array(); + } + + public static void encodeValue(PropertyBlock block, int keyId, Object value, long stringBlockId) { + if (value instanceof String) { // Try short string first, i.e. inlined + // in the property block + String string = (String) value; + if (LongerShortString.encode(keyId, string, block, PropertyType.getPayloadSize())) + return; + + // Fall back to dynamic string store + //long stringBlockId = 0L;// nextStringBlockId(); + setSingleBlockValue(block, keyId, PropertyType.STRING, stringBlockId); + byte[] encodedString = encodeString(string); + Collection valueRecords = allocateRecords(stringBlockId, encodedString); + for (DynamicRecord valueRecord : valueRecords) { + valueRecord.setType(PropertyType.STRING.intValue()); + block.addValueRecord(valueRecord); + } + } else if (value instanceof Integer) + setSingleBlockValue(block, keyId, PropertyType.INT, ((Integer) value).longValue()); + else if (value instanceof Boolean) + setSingleBlockValue(block, keyId, PropertyType.BOOL, (((Boolean) value).booleanValue() ? 1L : 0L)); + else if (value instanceof Float) + setSingleBlockValue(block, keyId, PropertyType.FLOAT, Float.floatToRawIntBits(((Float) value).floatValue())); + else if (value instanceof Long) { + long keyAndType = keyId | (((long) PropertyType.LONG.intValue()) << 24); + if (ShortArray.LONG.getRequiredBits((Long) value) <= 35) { // We + // only + // need + // one + // block + // for + // this + // value, + // special + // layout + // compared + // to, + // say, + // an + // integer + block.setSingleBlock(keyAndType | (1L << 28) | (((Long) value).longValue() << 29)); + } else { // We need two blocks for this value + block.setValueBlocks(new long[] { keyAndType, ((Long) value).longValue() }); + } + } else if (value instanceof Double) + block.setValueBlocks(new long[] { keyId | (((long) PropertyType.DOUBLE.intValue()) << 24), + Double.doubleToRawLongBits(((Double) value).doubleValue()) }); + else if (value instanceof Byte) + setSingleBlockValue(block, keyId, PropertyType.BYTE, ((Byte) value).longValue()); + else if (value instanceof Character) + setSingleBlockValue(block, keyId, PropertyType.CHAR, ((Character) value).charValue()); + else if (value instanceof Short) + setSingleBlockValue(block, keyId, PropertyType.SHORT, ((Short) value).longValue()); + else if (value.getClass().isArray()) { // Try short array first, i.e. + // inlined in the property block + if (ShortArray.encode(keyId, value, block, PropertyType.getPayloadSize())) + return; + + // Fall back to dynamic array store + long arrayBlockId = 0L;// nextArrayBlockId(); + setSingleBlockValue(block, keyId, PropertyType.ARRAY, arrayBlockId); + Collection arrayRecords = allocateArrayRecords(arrayBlockId, value); + for (DynamicRecord valueRecord : arrayRecords) { + valueRecord.setType(PropertyType.ARRAY.intValue()); + block.addValueRecord(valueRecord); + } + } else { + throw new IllegalArgumentException("Unknown property type on: " + value + ", " + value.getClass()); + } + } + + private static void setSingleBlockValue(PropertyBlock block, int keyId, PropertyType type, long longValue) { + block.setSingleBlock(keyId | (((long) type.intValue()) << 24) | (longValue << 28)); + } + + private static byte[] encodeString(String string) { + return UTF8.encode(string); + } + + private static Collection allocateRecords(long startBlock, byte src[]) { + + List recordList = new LinkedList(); + long nextBlock = startBlock; + int srcOffset = 0; + int dataSize = 128 - AbstractDynamicStore.BLOCK_HEADER_SIZE; + do { + DynamicRecord record = new DynamicRecord(nextBlock); + record.setCreated(); + record.setInUse(true); + if (src.length - srcOffset > dataSize) { + byte data[] = new byte[dataSize]; + System.arraycopy(src, srcOffset, data, 0, dataSize); + record.setData(data); + nextBlock++;// = nextBlockId(); + record.setNextBlock(nextBlock); + srcOffset += dataSize; + } else { + byte data[] = new byte[src.length - srcOffset]; + System.arraycopy(src, srcOffset, data, 0, data.length); + record.setData(data); + nextBlock = Record.NO_NEXT_BLOCK.intValue(); + record.setNextBlock(nextBlock); + } + recordList.add(record); + } while (nextBlock != Record.NO_NEXT_BLOCK.intValue()); + return recordList; + } + + private static Collection allocateFromNumbers(long startBlock, Object array) { + Class componentType = array.getClass().getComponentType(); + boolean isPrimitiveByteArray = componentType.equals(Byte.TYPE); + boolean isByteArray = componentType.equals(Byte.class) || isPrimitiveByteArray; + byte[] bytes = null; + ShortArray type = ShortArray.typeOf(array); + if (type == null) + throw new IllegalArgumentException(array + " not a valid array type."); + + int arrayLength = Array.getLength(array); + int requiredBits = isByteArray ? Byte.SIZE : type.calculateRequiredBitsForArray(array, arrayLength); + int totalBits = requiredBits * arrayLength; + int numberOfBytes = (totalBits - 1) / 8 + 1; + int bitsUsedInLastByte = totalBits % 8; + bitsUsedInLastByte = bitsUsedInLastByte == 0 ? 8 : bitsUsedInLastByte; + numberOfBytes += 3;// DynamicArrayStore.NUMBER_HEADER_SIZE; // type + + // rest + requiredBits header. TODO no need to use + // full bytes + int length = arrayLength; + if (isByteArray) { + bytes = new byte[3/* DynamicArrayStore.NUMBER_HEADER_SIZE */+ length]; + bytes[0] = (byte) type.intValue(); + bytes[1] = (byte) bitsUsedInLastByte; + bytes[2] = (byte) requiredBits; + if (isPrimitiveByteArray) + arraycopy((byte[]) array, 0, bytes, 3/* + * DynamicArrayStore. + * NUMBER_HEADER_SIZE + */, length); + else { + Byte[] source = (Byte[]) array; + for (int i = 0; i < source.length; i++) + bytes[3/* DynamicArrayStore.NUMBER_HEADER_SIZE */+ i] = source[i].byteValue(); + } + } else { + Bits bits = Bits.bits(numberOfBytes); + bits.put((byte) type.intValue()); + bits.put((byte) bitsUsedInLastByte); + bits.put((byte) requiredBits); + type.writeAll(array, length, requiredBits, bits); + bytes = bits.asBytes(); + } + return allocateRecords(startBlock, bytes); + } + + private static Collection allocateFromString(long startBlock, String[] array) { + List stringsAsBytes = new ArrayList(); + int totalBytesRequired = 5;// DynamicArrayStore.STRING_HEADER_SIZE; // + // 1b type + 4b array length + for (String string : array) { + byte[] bytes = PropertyStore.encodeString(string); + stringsAsBytes.add(bytes); + totalBytesRequired += 4/* byte[].length */+ bytes.length; + } + + ByteBuffer buf = ByteBuffer.allocate(totalBytesRequired); + buf.put(PropertyType.STRING.byteValue()); + buf.putInt(array.length); + for (byte[] stringAsBytes : stringsAsBytes) { + buf.putInt(stringAsBytes.length); + buf.put(stringAsBytes); + } + return allocateRecords(startBlock, buf.array()); + } + + private static Collection allocateArrayRecords(long startBlock, Object array) { + if (!array.getClass().isArray()) { + throw new IllegalArgumentException(array + " not an array"); + } + + Class type = array.getClass().getComponentType(); + if (type.equals(String.class)) { + return allocateFromString(startBlock, (String[]) array); + } else { + return allocateFromNumbers(startBlock, array); + } + } + + public static byte[] getPropertyReferenceAsByteArray(PropertyRecord record) { + ByteBuffer buffer = ByteBuffer.allocate(41); + + // Set up the record header + short prevModifier = record.getPrevProp() == Record.NO_NEXT_RELATIONSHIP.intValue() ? 0 : (short) ((record + .getPrevProp() & 0xF00000000L) >> 28); + short nextModifier = record.getNextProp() == Record.NO_NEXT_RELATIONSHIP.intValue() ? 0 : (short) ((record + .getNextProp() & 0xF00000000L) >> 32); + byte modifiers = (byte) (prevModifier | nextModifier); + /* + * [pppp,nnnn] previous, next high bits + */ + buffer.put(modifiers); + buffer.putInt((int) record.getPrevProp()).putInt((int) record.getNextProp()); + + // Then go through the blocks + int longsAppended = 0; // For marking the end of blocks + for (PropertyBlock block : record.getPropertyBlocks()) { + long[] propBlockValues = block.getValueBlocks(); + for (int k = 0; k < propBlockValues.length; k++) { + buffer.putLong(propBlockValues[k]); + } + + longsAppended += propBlockValues.length; + } + if (longsAppended < PropertyType.getPayloadSizeLongs()) { + buffer.putLong(0); + } buffer.flip(); return buffer.array(); } + + public static void writePropertyKeyStore(Map>> namesMap, String output, + Configuration conf) throws IOException { + + String indexOutput = output + "/neostore.propertystore.db.index"; + String keysOutput = output + "/neostore.propertystore.db.index.keys"; + + int lastUsedIndexId = 0; + int nextKeyBlockId = 1; + int blockSize = AbstractNameStore.NAME_STORE_BLOCK_SIZE + AbstractDynamicStore.BLOCK_HEADER_SIZE; + FileSystem fs = FileSystem.get(conf); + FSDataOutputStream idos = fs.create(new Path(indexOutput)); + FSDataOutputStream kdos = fs.create(new Path(keysOutput)); + + int endHeaderSize = blockSize; + ByteBuffer buffer = ByteBuffer.allocate( endHeaderSize ); + buffer.putInt( blockSize ); + kdos.write(buffer.array()); + + for (Integer key : namesMap.keySet()) { + ByteBuffer indexBuffer = ByteBuffer.allocate(9); + indexBuffer.put(Record.IN_USE.byteValue()); + indexBuffer.putInt(0); + indexBuffer.putInt(nextKeyBlockId); + + idos.write(indexBuffer.array()); + + byte[] name = getStringRecordAsByteArray(nextKeyBlockId, namesMap.get(key).getKey().getBytes(), blockSize ); + nextKeyBlockId += (name.length / blockSize); + + kdos.write(name, 0, name.length); + + lastUsedIndexId = key.intValue() +1; + } + + String type = PropertyIndexStore.TYPE_DESCRIPTOR + " " + CommonAbstractStore.ALL_STORES_VERSION; + byte[] encodedType = UTF8.encode(type); + + ByteBuffer tailBuffer = ByteBuffer.allocate(encodedType.length); + tailBuffer.put(encodedType); + tailBuffer.flip(); + idos.write(tailBuffer.array()); + + type = DynamicStringStore.TYPE_DESCRIPTOR + " " + CommonAbstractStore.ALL_STORES_VERSION; + encodedType = UTF8.encode(type); + tailBuffer = ByteBuffer.allocate(encodedType.length); + tailBuffer.put(encodedType); + tailBuffer.flip(); + kdos.write(tailBuffer.array()); + + idos.close(); + kdos.close(); + + writePropertyIds(lastUsedIndexId + 1, indexOutput, conf); + writePropertyIds(nextKeyBlockId -1, keysOutput, conf); + + + } + + public static void writePropertyIds(long lastTypeId, String output, Configuration conf) throws IOException { + String idsOutput = output + ".id"; + FileSystem fs = FileSystem.get(conf); + FSDataOutputStream idos = fs.create(new Path(idsOutput)); + + ByteBuffer typeBuffer = ByteBuffer.allocate(9); + typeBuffer.put((byte) 0).putLong(lastTypeId); + typeBuffer.flip(); + idos.write(typeBuffer.array()); + + idos.close(); + + } + + public static void writePropertyStoreFooter(String propertiesOutput, Configuration conf) throws IOException { + // TODO Auto-generated method stub + FileSystem fs = FileSystem.get(conf); + FSDataOutputStream fdos = fs.create(new Path(propertiesOutput + "/neostore.propertystore.db.footer")); + + String type = PropertyStore.TYPE_DESCRIPTOR + " " + CommonAbstractStore.ALL_STORES_VERSION; + ByteBuffer tailBuffer = ByteBuffer.allocate(type.length()); + tailBuffer.put(type.getBytes(Charset.forName("UTF-8"))); + tailBuffer.flip(); + fdos.write(tailBuffer.array()); + fdos.close(); + } + + public static void writePropertyStringStoreHeader(String propertiesOutput, Configuration conf) throws IOException { + FileSystem fs = FileSystem.get(conf); + FSDataOutputStream hdos = fs.create(new Path(propertiesOutput + "/neostore.propertystore.db.strings.header")); + + int blockSize = 120 + AbstractDynamicStore.BLOCK_HEADER_SIZE; + + ByteBuffer headBuffer = ByteBuffer.allocate(blockSize); + headBuffer.putInt(blockSize); + // headBuffer.flip(); + hdos.write(headBuffer.array()); + hdos.close(); + + } + + public static void writePropertyStringStoreFooter(String propertiesOutput, Configuration conf) throws IOException { + FileSystem fs = FileSystem.get(conf); + FSDataOutputStream fdos = fs.create(new Path(propertiesOutput + "/neostore.propertystore.db.strings.footer")); + + String type = DynamicStringStore.TYPE_DESCRIPTOR + " " + CommonAbstractStore.ALL_STORES_VERSION; + ByteBuffer tailBuffer = ByteBuffer.allocate(type.length()); + tailBuffer.put(type.getBytes(Charset.forName("UTF-8"))); + tailBuffer.flip(); + fdos.write(tailBuffer.array()); + + fdos.close(); + + } + + public static void writeEmptArrayStore(String propertiesOutput, Configuration conf) throws IOException { + FileSystem fs = FileSystem.get(conf); + FSDataOutputStream ados = fs.create(new Path(propertiesOutput + "/neostore.propertystore.db.arrays")); + + int blockSize = 120 + AbstractDynamicStore.BLOCK_HEADER_SIZE; + + ByteBuffer headBuffer = ByteBuffer.allocate(blockSize); + headBuffer.putInt(blockSize); + // headBuffer.flip(); + ados.write(headBuffer.array()); + + String type = DynamicArrayStore.TYPE_DESCRIPTOR + " " + CommonAbstractStore.ALL_STORES_VERSION; + ByteBuffer tailBuffer = ByteBuffer.allocate(type.length()); + tailBuffer.put(type.getBytes(Charset.forName("UTF-8"))); + tailBuffer.flip(); + ados.write(tailBuffer.array()); + + ados.close(); + + writePropertyIds(42L, propertiesOutput + "/neostore.propertystore.db.arrays", conf); + + + } + } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/RownumPartitionerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/RownumPartitionerTest.java index 0ad05cf..3c7cf59 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/RownumPartitionerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/RownumPartitionerTest.java @@ -3,6 +3,9 @@ import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.assertThat; +import nl.waredingen.graphs.neo.mapreduce.PureMRNodesAndEdgesJob; +import nl.waredingen.graphs.neo.mapreduce.RownumPartitioner; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapperTest.java index 207e5aa..08823e6 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapperTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapperTest.java @@ -6,6 +6,8 @@ import java.util.List; +import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; + import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.MapDriver; diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducerTest.java index ccfd1d4..10f5139 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducerTest.java @@ -8,6 +8,7 @@ import java.util.ArrayList; import java.util.List; +import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; import nl.waredingen.graphs.neo.mapreduce.join.JoinFromEdgesMapper; import org.apache.hadoop.io.BytesWritable; diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapperTest.java index 729f194..fca658b 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapperTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapperTest.java @@ -41,7 +41,7 @@ public void shouldOutputAsNodeWhereNodeIdIsTheKey() throws Exception { assertThat(output.size(), is(1)); assertThat(output.get(0).getFirst(), equalTo(new LongWritable(11))); - assertThat(output.get(0).getSecond(), equalTo(new Text("3 0"))); + assertThat(output.get(0).getSecond(), equalTo(new Text("3 22"))); } } diff --git a/samples/simple-nodes.txt b/samples/simple-nodes.txt index f1ca5c9..a633d6d 100644 --- a/samples/simple-nodes.txt +++ b/samples/simple-nodes.txt @@ -1,3 +1,3 @@ A Aname -B Bname +B BnameWhichIsVeryLongBecauseWeNeedSomeProofThatOurPropertyStringStoreIsRealyWorkingOkAndAlsoSpanningTwoBlocksOfThe120ByteLargeBlockSize C Cname From dc0bf933e6195796b9967d5e231712217f759924 Mon Sep 17 00:00:00 2001 From: Kris Geusebroek Date: Fri, 7 Sep 2012 17:19:49 +0200 Subject: [PATCH 07/11] Added script to get separate files to for a neo db. --- job/gatherNeoFiles.sh | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100755 job/gatherNeoFiles.sh diff --git a/job/gatherNeoFiles.sh b/job/gatherNeoFiles.sh new file mode 100755 index 0000000..318f96e --- /dev/null +++ b/job/gatherNeoFiles.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +rm -rf ./graph.db +mkdir graph.db/ + +TO=./graph.db/ +FROM=${1} +hadoop fs -get ${FROM}/neostore* ${TO} +hadoop fs -get ${FROM}/properties/neostore.propertystore.db.* ${TO} + +hadoop fs -cat ${FROM}/neostore.nodestore.db/part-r-* > ${TO}/neostore.nodestore.db +hadoop fs -cat ${FROM}/neostore.relationshipstore.db/part-r-* > ${TO}/neostore.relationshipstore.db + +hadoop fs -cat ${FROM}/properties/propertystore.db/props-r-* ${FROM}/properties/neostore.propertystore.db.footer > ${TO}/neostore.propertystore.db +hadoop fs -cat ${FROM}/properties/neostore.propertystore.db.strings.header ${FROM}/properties/propertystore.db/strings-r-* ${FROM}/properties/neostore.propertystore.db.strings.footer > ${TO}/neostore.propertystore.db.strings + +rm ${TO}/*.footer +rm ${TO}/*.header +exit From b253a06b3dae45dccbba604d12a2a4b34a4d17ca Mon Sep 17 00:00:00 2001 From: Kris Geusebroek Date: Thu, 13 Sep 2012 10:03:20 +0200 Subject: [PATCH 08/11] Fixed negative partitions. --- .../graphs/neo/mapreduce/PureMRNodesAndEdgesJob.java | 12 ++++++++++++ .../graphs/neo/mapreduce/RownumPartitioner.java | 2 +- .../mapreduce/group/NodeAndEdgeIdKeyPartitioner.java | 2 +- .../mapreduce/join/NodeAndEdgeKeyPartitioner.java | 2 +- .../PropertyOutputIdBlockcountPartitioner.java | 2 +- .../PropertyOutputIdBlockcountValueWritable.java | 7 +++++++ .../group/NodeAndEdgeIdKeyPartitionerTest.java | 11 +++++++++++ .../join/NodeAndEdgeKeyPartitionerTest.java | 9 +++++++++ .../properties/PropertiesOutputMapperTest.java | 10 +++++----- 9 files changed, 48 insertions(+), 9 deletions(-) diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/PureMRNodesAndEdgesJob.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/PureMRNodesAndEdgesJob.java index 954ecb3..c8a2238 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/PureMRNodesAndEdgesJob.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/PureMRNodesAndEdgesJob.java @@ -59,6 +59,18 @@ public class PureMRNodesAndEdgesJob { public static final String NUMBEROFROWS_CONFIG = "neo.nodes.edges.job.nr_of_rows"; public static int run(String nodes, String edges, String output, Configuration conf) { + //TODO Refactoring: Split up all the jobs in separate jobs + //TODO Refactoring: Get all CONSTANTS in here + //TODO Refactoring: Configure propertynames (and totalnr of props) in config xml or if not exists in here + //TODO Refactoring: Configure file formats in here (tab separated but which field is what in which step + //TODO Refactoring: Add more test to better describe the meaning of the mappers and reducers + //TODO Refactoring: Think of more descriptive names + //TODO Refactoring: Combine jobs if possible + //TODO Add: Array properties + //TODO Add: Primitive properties + //TODO Fix: 42 is not the correct arrayprop index! + //TODO Fix: Make import work with 1.8 version + //TODO Check: Check copied code is still the same in neo 1.8 String numberedNodes = output + "/numberednodes"; String numberedEdges = output + "/numberededges"; diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/RownumPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/RownumPartitioner.java index d3e9729..61d7c18 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/RownumPartitioner.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/RownumPartitioner.java @@ -12,7 +12,7 @@ public class RownumPartitioner extends Partitioner implements Config @Override public int getPartition(K key, V value, int numPartitions) { - double divider = (double) max / numPartitions; + double divider = Math.max(1, (double) max / numPartitions); return (int) (((LongWritable) key).get() / divider); } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitioner.java index f3ac515..d932e12 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitioner.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitioner.java @@ -8,7 +8,7 @@ public class NodeAndEdgeIdKeyPartitioner extends Partitioner { @Override public int getPartition(Text key, Text val, int numPartitions) { int hash = key.toString().split(";")[0].hashCode(); - return hash % numPartitions; + return (hash & Integer.MAX_VALUE) % numPartitions; } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitioner.java index ea249b4..7002f9a 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitioner.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitioner.java @@ -8,7 +8,7 @@ public class NodeAndEdgeKeyPartitioner extends Partitioner { @Override public int getPartition(Text key, Text val, int numPartitions) { int hash = key.toString().substring(1).hashCode(); - return hash % numPartitions; + return (hash & Integer.MAX_VALUE) % numPartitions; } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountPartitioner.java index 35b9802..8a1bb77 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountPartitioner.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountPartitioner.java @@ -24,7 +24,7 @@ public int getPartition(ByteWritable key, PropertyOutputIdBlockcountValueWritabl } public static int partitionForValue(PropertyOutputIdBlockcountValueWritable value, int numPartitions, long maximumIds) { - double divider = Math.max(numPartitions, (double) maximumIds / numPartitions); + double divider = Math.max(1, (double) maximumIds / numPartitions); return (int) (value.getId().get() / divider); } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountValueWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountValueWritable.java index ebddd4f..d06c134 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountValueWritable.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountValueWritable.java @@ -115,5 +115,12 @@ public boolean equals(Object obj) { return false; return true; } + + @Override + public String toString() { + return "PropertyOutputIdBlockcountValueWritable [id=" + id + ", value=" + value + ", count=" + count + + ", partition=" + partition + "]"; + } + } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitionerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitionerTest.java index 6aa0894..b925818 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitionerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitionerTest.java @@ -3,6 +3,8 @@ import static org.junit.Assert.*; import static org.hamcrest.CoreMatchers.*; +import nl.waredingen.graphs.neo.mapreduce.join.NodeAndEdgeKeyPartitioner; + import org.apache.hadoop.io.Text; import org.junit.Test; @@ -19,5 +21,14 @@ public void testSamePartitionForNodeAndEdgeIdKey() { assertThat(partitioner.getPartition(firstKey, new Text(), 50), is(partitioner.getPartition(secondKey, new Text(), 50))); } + + @Test + public void testNonNegativePartitionForNodeAndEdgeKey() { + Text nodeKey = new Text("3663243826;1"); + + NodeAndEdgeKeyPartitioner partitioner = new NodeAndEdgeKeyPartitioner(); + + assertTrue(partitioner.getPartition(nodeKey, new Text(), 50) >= 0); + } } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitionerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitionerTest.java index ab20804..0ee074a 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitionerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitionerTest.java @@ -20,4 +20,13 @@ public void testSamePartitionForNodeAndEdgeKey() { } + @Test + public void testPartitionNotNegative() { + Text key = new Text("N365545643"); + + NodeAndEdgeKeyPartitioner partitioner = new NodeAndEdgeKeyPartitioner(); + + assertTrue(partitioner.getPartition(key, new Text(), 50) >= 0); + + } } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertiesOutputMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertiesOutputMapperTest.java index 7df5ae2..b450ff7 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertiesOutputMapperTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertiesOutputMapperTest.java @@ -55,22 +55,22 @@ public void shouldOutputAsProperties() throws Exception { @Test public void shouldOutputAsPropertieswithPropertyIdAsTheKey() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 2 1 Aname 40 1 2")).run(); + output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 1 1 Aname 40 1 2")).run(); assertThat(output.size(), is(3)); PropertyOutputIdBlockcountValueWritable val = new PropertyOutputIdBlockcountValueWritable(); - val.setValues(new LongWritable(2), new Text("1 Aname 40 1 2")); + val.setValues(new LongWritable(1), new Text("1 Aname 40 1 2")); assertThat(output.get(0).getFirst(), equalTo(new ByteWritable(RowNumberJob.VALUE_MARKER))); assertThat(output.get(0).getSecond(), equalTo(val)); val = new PropertyOutputIdBlockcountValueWritable(); val.setValues(PropertyOutputIdBlockcountValueWritable.EMPTY_ID, PropertyOutputIdBlockcountValueWritable.EMPTY_STRING); - val.setCounter(1, 40); + val.setCounter(2, 40); - assertThat(output.get(1).getFirst(), equalTo(new ByteWritable(RowNumberJob.COUNTER_MARKER))); - assertThat(output.get(1).getSecond(), equalTo(val)); + assertThat(output.get(2).getFirst(), equalTo(new ByteWritable(RowNumberJob.COUNTER_MARKER))); + assertThat(output.get(2).getSecond(), equalTo(val)); } } From 2dc2025f8dab8ab77984b8bf75f89158817496d0 Mon Sep 17 00:00:00 2001 From: Kris Geusebroek Date: Mon, 1 Oct 2012 11:31:26 +0200 Subject: [PATCH 09/11] First working version of a graph.db with string properties for nodes. --- job/gatherNeoFiles.sh | 10 +- .../AscLongDescLongKeyComparator.java | 6 +- .../neo/mapreduce/PureMRNodesAndEdgesJob.java | 31 +- .../neo/mapreduce/SurroundingContext.java | 2 +- .../edges/EdgeOutputAsTextReducer.java | 48 +++ .../neo/mapreduce/edges/EdgeOutputMapper.java | 2 +- .../mapreduce/edges/EdgeOutputReducer.java | 3 +- .../edges/surround/EdgeSurroundMapper.java | 2 +- .../edges/surround/EdgeSurroundReducer.java | 2 +- .../join/JoinSurroundingEdgesMapper.java | 2 +- .../group/GroupNodesAndEdgesMapper.java | 2 +- .../group/GroupNodesAndEdgesReducer.java | 3 +- .../group/NodeAndEdgeIdKeyComparator.java | 14 +- .../NodeAndEdgeIdKeyGroupingComparator.java | 7 +- .../group/NodeAndEdgeIdKeyPartitioner.java | 3 +- .../mapreduce/join/JoinFromEdgesMapper.java | 2 +- .../join/JoinNodesAndEdgesReducer.java | 2 +- .../neo/mapreduce/join/JoinNodesMapper.java | 2 +- .../neo/mapreduce/join/JoinToEdgesMapper.java | 2 +- .../nodes/NodeOutputAsTextReducer.java | 51 +++ .../neo/mapreduce/nodes/NodeOutputMapper.java | 2 +- .../mapreduce/nodes/NodeOutputReducer.java | 20 +- ...teMarkerAndPropertyOutputIdComparator.java | 24 ++ .../ByteMarkerPropertyIdWritable.java | 99 +++++ ...teMarkerAndPropertyOutputIdComparator.java | 15 + .../NodePreparePropertiesMapper.java | 3 +- .../PropertyAsTextOutputReducer.java | 98 +++++ ...PropertyOutputIdBlockcountPartitioner.java | 6 +- ...opertyOutputIdBlockcountValueWritable.java | 6 +- .../properties/PropertyOutputMapper.java | 24 +- .../properties/PropertyOutputReducer.java | 26 +- .../graphs/neo/neo4j/Neo4JUtils.java | 6 +- .../waredingen/graphs/neo/PropertyTest.java | 394 ++++++++++++++++++ .../surround/EdgeSurroundReducerTest.java | 4 +- .../group/NodeAndEdgeIdKeyComparatorTest.java | 24 +- .../NodeAndEdgeIdKeyPartitionerTest.java | 15 +- .../join/NodeAndEdgeKeyPartitionerTest.java | 12 +- .../NodePreparePropertiesReducerTest.java | 4 +- .../PropertiesOutputMapperTest.java | 29 +- 39 files changed, 896 insertions(+), 111 deletions(-) create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputAsTextReducer.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputAsTextReducer.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerAndPropertyOutputIdComparator.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerPropertyIdWritable.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/IndifferentByteMarkerAndPropertyOutputIdComparator.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyAsTextOutputReducer.java create mode 100644 job/src/test/java/nl/waredingen/graphs/neo/PropertyTest.java diff --git a/job/gatherNeoFiles.sh b/job/gatherNeoFiles.sh index 318f96e..8a4e25a 100755 --- a/job/gatherNeoFiles.sh +++ b/job/gatherNeoFiles.sh @@ -5,7 +5,15 @@ mkdir graph.db/ TO=./graph.db/ FROM=${1} -hadoop fs -get ${FROM}/neostore* ${TO} +hadoop fs -get ${FROM}/neostore ${TO} +hadoop fs -get ${FROM}/neostore.id ${TO} +hadoop fs -get ${FROM}/neostore.nodestore.db.id ${TO} +hadoop fs -get ${FROM}/neostore.relationshipstore.db.id ${TO} +hadoop fs -get ${FROM}/neostore.relationshiptypestore.db ${TO} +hadoop fs -get ${FROM}/neostore.relationshiptypestore.db.id ${TO} +hadoop fs -get ${FROM}/neostore.relationshiptypestore.db.names ${TO} +hadoop fs -get ${FROM}/neostore.relationshiptypestore.db.names.id ${TO} + hadoop fs -get ${FROM}/properties/neostore.propertystore.db.* ${TO} hadoop fs -cat ${FROM}/neostore.nodestore.db/part-r-* > ${TO}/neostore.nodestore.db diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongKeyComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongKeyComparator.java index ea43947..cd438a4 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongKeyComparator.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongKeyComparator.java @@ -14,7 +14,11 @@ public int compare(WritableComparable w1, WritableComparable w2) { AscLongDescLongWritable k1 = (AscLongDescLongWritable) w1; AscLongDescLongWritable k2 = (AscLongDescLongWritable) w2; - return k1.getLeft().compareTo(k2.getLeft()); + int result = k1.getLeft().compareTo(k2.getLeft()); + if (0 == result) { + result = -1 * k1.getRight().compareTo(k2.getRight()); + } + return result; } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/PureMRNodesAndEdgesJob.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/PureMRNodesAndEdgesJob.java index c8a2238..bb1b2da 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/PureMRNodesAndEdgesJob.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/PureMRNodesAndEdgesJob.java @@ -1,13 +1,11 @@ package nl.waredingen.graphs.neo.mapreduce; import java.util.AbstractMap.SimpleEntry; -import java.util.Collection; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import nl.waredingen.graphs.misc.RowNumberJob; -import nl.waredingen.graphs.misc.RowNumberJob.IndifferentComparator; import nl.waredingen.graphs.neo.mapreduce.edges.EdgeOutputMapper; import nl.waredingen.graphs.neo.mapreduce.edges.EdgeOutputReducer; import nl.waredingen.graphs.neo.mapreduce.edges.surround.EdgeSurroundMapper; @@ -28,6 +26,9 @@ import nl.waredingen.graphs.neo.mapreduce.join.NodeKeyGroupingComparator; import nl.waredingen.graphs.neo.mapreduce.nodes.NodeOutputMapper; import nl.waredingen.graphs.neo.mapreduce.nodes.NodeOutputReducer; +import nl.waredingen.graphs.neo.mapreduce.properties.ByteMarkerAndPropertyOutputIdComparator; +import nl.waredingen.graphs.neo.mapreduce.properties.ByteMarkerPropertyIdWritable; +import nl.waredingen.graphs.neo.mapreduce.properties.IndifferentByteMarkerAndPropertyOutputIdComparator; import nl.waredingen.graphs.neo.mapreduce.properties.NodePreparePropertiesMapper; import nl.waredingen.graphs.neo.mapreduce.properties.NodePreparePropertiesReducer; import nl.waredingen.graphs.neo.mapreduce.properties.PropertyOutputIdBlockcountPartitioner; @@ -38,13 +39,10 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.ByteWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.Task.Counter; -import org.apache.hadoop.mapreduce.Counters; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; @@ -68,6 +66,7 @@ public static int run(String nodes, String edges, String output, Configuration c //TODO Refactoring: Combine jobs if possible //TODO Add: Array properties //TODO Add: Primitive properties + //TODO Fix: Make sure all works if splitcharacter is also in key or value! //TODO Fix: 42 is not the correct arrayprop index! //TODO Fix: Make import work with 1.8 version //TODO Check: Check copied code is still the same in neo 1.8 @@ -136,7 +135,7 @@ public static int run(String nodes, String edges, String output, Configuration c joinTo.waitForCompletion(true); - Job groupJob = new Job(conf, "Join to nodes and edges job."); + Job groupJob = new Job(conf, "Group nodes and edges job."); groupJob.setGroupingComparatorClass(NodeAndEdgeIdKeyGroupingComparator.class); groupJob.setSortComparatorClass(NodeAndEdgeIdKeyComparator.class); groupJob.setPartitionerClass(NodeAndEdgeIdKeyPartitioner.class); @@ -171,10 +170,13 @@ public static int run(String nodes, String edges, String output, Configuration c FileInputFormat.addInputPath(nodeOutputJob, new Path(grouped)); nodeOutputJob.setReducerClass(NodeOutputReducer.class); +// nodeOutputJob.setReducerClass(NodeOutputAsTextReducer.class); nodeOutputJob.setOutputKeyClass(NullWritable.class); nodeOutputJob.setOutputValueClass(BytesWritable.class); +// nodeOutputJob.setOutputValueClass(Text.class); nodeOutputJob.setOutputFormatClass(NewByteBufferOutputFormat.class); +// nodeOutputJob.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(nodeOutputJob, new Path(nodesOutput)); nodeOutputJob.setJarByClass(PureMRNodesAndEdgesJob.class); @@ -225,7 +227,7 @@ public static int run(String nodes, String edges, String output, Configuration c joinSurroundJob.waitForCompletion(true); conf.set(NUMBEROFROWS_CONFIG, "" + nrOfEdges); - Job edgeOutputJob = new Job(conf, "Output nodes job."); + Job edgeOutputJob = new Job(conf, "Output edges job."); edgeOutputJob.setPartitionerClass(RownumPartitioner.class); edgeOutputJob.setMapOutputKeyClass(LongWritable.class); @@ -236,10 +238,13 @@ public static int run(String nodes, String edges, String output, Configuration c FileInputFormat.addInputPath(edgeOutputJob, new Path(joinededges)); edgeOutputJob.setReducerClass(EdgeOutputReducer.class); +// edgeOutputJob.setReducerClass(EdgeOutputAsTextReducer.class); edgeOutputJob.setOutputKeyClass(NullWritable.class); edgeOutputJob.setOutputValueClass(BytesWritable.class); +// edgeOutputJob.setOutputValueClass(Text.class); edgeOutputJob.setOutputFormatClass(NewByteBufferOutputFormat.class); +// edgeOutputJob.setOutputFormatClass(TextOutputFormat.class); FileOutputFormat.setOutputPath(edgeOutputJob, new Path(edgesOutput)); edgeOutputJob.setJarByClass(PureMRNodesAndEdgesJob.class); @@ -285,11 +290,12 @@ public static int run(String nodes, String edges, String output, Configuration c Neo4JUtils.writePropertyStringStoreFooter(propertiesOutput, conf); conf.set(NUMBEROFROWS_CONFIG, "" + nrOfNodes * namesMap.size()); - Job nodePropertiesOutputJob = new Job(conf, "Output nodes job."); + Job nodePropertiesOutputJob = new Job(conf, "Output properties job."); nodePropertiesOutputJob.setPartitionerClass(PropertyOutputIdBlockcountPartitioner.class); - nodePropertiesOutputJob.setGroupingComparatorClass(IndifferentComparator.class); + nodePropertiesOutputJob.setSortComparatorClass(ByteMarkerAndPropertyOutputIdComparator.class); + nodePropertiesOutputJob.setGroupingComparatorClass(IndifferentByteMarkerAndPropertyOutputIdComparator.class); - nodePropertiesOutputJob.setMapOutputKeyClass(ByteWritable.class); + nodePropertiesOutputJob.setMapOutputKeyClass(ByteMarkerPropertyIdWritable.class); nodePropertiesOutputJob.setMapOutputValueClass(PropertyOutputIdBlockcountValueWritable.class); nodePropertiesOutputJob.setMapperClass(PropertyOutputMapper.class); @@ -297,8 +303,11 @@ public static int run(String nodes, String edges, String output, Configuration c FileInputFormat.addInputPath(nodePropertiesOutputJob, new Path(nodePropertiesPrepareOutput)); nodePropertiesOutputJob.setReducerClass(PropertyOutputReducer.class); +// nodePropertiesOutputJob.setReducerClass(PropertyAsTextOutputReducer.class); FileOutputFormat.setOutputPath(nodePropertiesOutputJob, new Path(propertiesOutput + "/propertystore.db")); +// MultipleOutputs.addNamedOutput(nodePropertiesOutputJob, "props", TextOutputFormat.class, NullWritable.class, Text.class); +// MultipleOutputs.addNamedOutput(nodePropertiesOutputJob, "strings", TextOutputFormat.class, NullWritable.class, Text.class); MultipleOutputs.addNamedOutput(nodePropertiesOutputJob, "props", NewByteBufferOutputFormat.class, NullWritable.class, BytesWritable.class); MultipleOutputs.addNamedOutput(nodePropertiesOutputJob, "strings", NewByteBufferOutputFormat.class, NullWritable.class, BytesWritable.class); @@ -310,7 +319,7 @@ public static int run(String nodes, String edges, String output, Configuration c long nrOfWrittenStringBlocks = nodePropertiesOutputJob.getCounters().findCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").getValue(); - System.out.println(nrOfWrittenStringBlocks); + System.out.println(nrOfWrittenStringBlocks); Neo4JUtils.writePropertyIds(nrOfNodes * namesMap.size(), propertiesOutput + "/neostore.propertystore.db", conf); Neo4JUtils.writePropertyIds(nrOfWrittenStringBlocks, propertiesOutput + "/neostore.propertystore.db.strings", conf); diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/SurroundingContext.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/SurroundingContext.java index d67156c..c346d41 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/SurroundingContext.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/SurroundingContext.java @@ -12,7 +12,7 @@ public SurroundingContext() { @Override public String toString() { - return id + "\t" + other + "\t" + ((from != -1L) ? from + "\t" : "") + ((to != -1L) ? to + "\t" : "") + ((val != null) ? val + "\t" : "") + next + "\t" + prev; + return id + "\t" + other + "\t" + ((from != -1L) ? from + "\t" : "") + ((to != -1L) ? to + "\t" : "") + ((val != null) ? val + "\t" : "") + prev + "\t" + next; } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputAsTextReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputAsTextReducer.java new file mode 100644 index 0000000..bad8fc7 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputAsTextReducer.java @@ -0,0 +1,48 @@ +package nl.waredingen.graphs.neo.mapreduce.edges; + +import java.io.IOException; +import java.util.Iterator; + +import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; + +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; + +public class EdgeOutputAsTextReducer extends Reducer { + + private Text outputValue = new Text(); + + protected void reduce(LongWritable key, Iterable values, Context context) throws IOException, + InterruptedException { + Iterator itr = values.iterator(); + if (!itr.hasNext()) { + return; + } + + // only use first record per key. Rest is duplicates from the selfjoin in the previous step + Text value = itr.next(); + + String[] vals = value.toString().split("\t", 6); + long relnum = key.get(); + long from = Long.parseLong(vals[0]); + long to = Long.parseLong(vals[1]); + long fromprev = Long.parseLong(vals[2]); + long fromnext = Long.parseLong(vals[3]); + long toprev = Long.parseLong(vals[4]); + long tonext = Long.parseLong(vals[5]); + long prop = -1L; + + writeEdge(relnum, from , to, 0, fromprev, fromnext, toprev, tonext, prop, context); + } + + private void writeEdge(long relnum, long from, long to, int type, long fromprev, long fromnext, long toprev, + long tonext, long prop, Context context) throws IOException, InterruptedException { + //byte[] ba = Neo4JUtils.getEdgeAsByteArray(relnum, from, to, type, fromprev, fromnext, toprev, tonext, prop); + outputValue.set(relnum + "\t"+from + "\t"+to + "\t"+type + "\t"+fromprev + "\t"+fromnext + "\t"+toprev + "\t"+tonext + "\t"+prop); + context.write(NullWritable.get(), outputValue); + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputMapper.java index b2e6cac..c8da1ce 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputMapper.java @@ -12,7 +12,7 @@ public class EdgeOutputMapper extends Mapper values, Context context) // only use first record per key. Rest is duplicates from the selfjoin in the previous step Text value = itr.next(); - String[] vals = value.toString().split("\t"); + String[] vals = value.toString().split("\t", 6); long relnum = key.get(); + long from = Long.parseLong(vals[0]); long to = Long.parseLong(vals[1]); long fromprev = Long.parseLong(vals[2]); diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapper.java index 9927750..5123d57 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapper.java @@ -15,7 +15,7 @@ public class EdgeSurroundMapper extends Mapper values, Contex SurroundingContext edge = new SurroundingContext(); while (iter.hasNext()) { - String[] vals = iter.next().toString().split("\t"); + String[] vals = iter.next().toString().split("\t", 2); long id = key.getLeft().get(); long from = Long.parseLong(vals[0]); diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapper.java index c18745e..abfefeb 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapper.java @@ -11,7 +11,7 @@ public class JoinSurroundingEdgesMapper extends Mapper { protected void map(Text key, Text value, Context context) throws IOException ,InterruptedException { //edgeid fromnode tonode fromnodeid fromnode fromname tonodeid tonode toname - String[] values = value.toString().split("\t"); + String[] values = value.toString().split("\t",9); //edgeid fromnodeid tonodeid outputValue.set(values[0] + "\t" + values[3]+ "\t" + values[6]); diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesReducer.java index c284893..a9fe5fa 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesReducer.java @@ -10,7 +10,8 @@ public class GroupNodesAndEdgesReducer extends Reducer values, Context context) throws IOException ,InterruptedException { - String outputKey = key.toString().split(";")[0]; + String keyString = key.toString(); + String outputKey = keyString.substring(0, keyString.lastIndexOf(";")); for (Text value : values) { outputValue.set(outputKey+"\t"+ value); context.write(NullWritable.get(), outputValue); diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparator.java index 4d439a5..34aa173 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparator.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparator.java @@ -15,14 +15,16 @@ public int compare(WritableComparable w1, WritableComparable w2) { Text k1 = (Text) w1; Text k2 = (Text) w2; - String[] keys1 = k1.toString().split(";"); - Long edgeId = Long.valueOf(keys1[1]); - String[] keys2 = k2.toString().split(";"); - Long edgeId2 = Long.valueOf(keys2[1]); + String k1s = k1.toString(); + String key1 = k1s.substring(0, k1s.lastIndexOf(";")); + Long edgeId = Long.valueOf(k1s.substring(k1s.lastIndexOf(";")+1)); + String k2s = k2.toString(); + String key2 = k2s.substring(0, k2s.lastIndexOf(";")); + Long edgeId2 = Long.valueOf(k2s.substring(k2s.lastIndexOf(";")+1)); - int result = keys1[0].compareTo(keys2[0]); + int result = key1.compareTo(key2); if (0 == result) { - result = -1 * edgeId.compareTo(edgeId2); + result = edgeId.compareTo(edgeId2); } return result; } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyGroupingComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyGroupingComparator.java index ec3c728..3e8c1f2 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyGroupingComparator.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyGroupingComparator.java @@ -15,8 +15,11 @@ public int compare(WritableComparable w1, WritableComparable w2) { Text k1 = (Text) w1; Text k2 = (Text) w2; - String key1 = k1.toString().split(";")[0]; - String key2 = k2.toString().split(";")[0]; + String k1s = k1.toString(); + String key1 = k1s.substring(0, k1s.lastIndexOf(";")); + String k2s = k2.toString(); + String key2 = k2s.substring(0, k2s.lastIndexOf(";")); + return key1.compareTo(key2); } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitioner.java index d932e12..179caa6 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitioner.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitioner.java @@ -7,7 +7,8 @@ public class NodeAndEdgeIdKeyPartitioner extends Partitioner { @Override public int getPartition(Text key, Text val, int numPartitions) { - int hash = key.toString().split(";")[0].hashCode(); + String keyString = key.toString(); + int hash = keyString.substring(0,keyString.lastIndexOf(";")).hashCode(); return (hash & Integer.MAX_VALUE) % numPartitions; } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromEdgesMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromEdgesMapper.java index e733c70..e1babda 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromEdgesMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromEdgesMapper.java @@ -11,7 +11,7 @@ public class JoinFromEdgesMapper extends Mapper private Text outputKey = new Text(); protected void map(LongWritable key, Text value, Context context) throws IOException ,InterruptedException { - String[] values = value.toString().split("\t"); + String[] values = value.toString().split("\t", 3); outputKey.set("E"+values[1]); context.write(outputKey, value); } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesAndEdgesReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesAndEdgesReducer.java index 13e9a10..dc88216 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesAndEdgesReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesAndEdgesReducer.java @@ -24,7 +24,7 @@ protected void reduce(Text key, Iterable values, Context context) throws I while (iter.hasNext()) { Text value = iter.next(); - String toNode = value.toString().split("\t")[2]; + String toNode = value.toString().split("\t", 4)[2]; outputKey.set("R"+toNode); outputValue.set(value.toString() + "\t" + node); diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesMapper.java index 165d949..9ca42d4 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesMapper.java @@ -11,7 +11,7 @@ public class JoinNodesMapper extends Mapper { private Text outputKey = new Text(); protected void map(LongWritable key, Text value, Context context) throws IOException ,InterruptedException { - String[] values = value.toString().split("\t"); + String[] values = value.toString().split("\t", 3); outputKey.set("N"+values[1]); context.write(outputKey, value); } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToEdgesMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToEdgesMapper.java index 03a58bc..777f1da 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToEdgesMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToEdgesMapper.java @@ -10,7 +10,7 @@ public class JoinToEdgesMapper extends Mapper { private Text outputKey = new Text(); protected void map(Text key, Text value, Context context) throws IOException ,InterruptedException { - String[] values = value.toString().split("\t"); + String[] values = value.toString().split("\t", 4); outputKey.set("E"+values[2]); context.write(outputKey, value); } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputAsTextReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputAsTextReducer.java new file mode 100644 index 0000000..529068e --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputAsTextReducer.java @@ -0,0 +1,51 @@ +package nl.waredingen.graphs.neo.mapreduce.nodes; + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; +import org.neo4j.kernel.impl.nioneo.store.Record; + +public class NodeOutputAsTextReducer extends Reducer { + private Text outputValue = new Text(); + + protected void reduce(LongWritable key, Iterable values, Context context) throws IOException, + InterruptedException { + Iterator itr = values.iterator(); + if (!itr.hasNext()) { + return; + } + + long id = key.get(); + + // walk through values and pick the smallest relnum and propid + long relnum = Long.MAX_VALUE; + long propnum = Long.MAX_VALUE; + while (itr.hasNext()) { + Text value = itr.next(); + String[] vals = value.toString().split("\t",2); + relnum = Math.min(relnum, Long.parseLong(vals[0])); + propnum = Math.min(propnum, Long.parseLong(vals[1])); + } + + if (relnum == Long.MAX_VALUE) relnum = -1L; + if (propnum == Long.MAX_VALUE) propnum = -1L; + + if (id == 0L) { + // write a rootnode once + writeNode(id, Record.NO_NEXT_RELATIONSHIP.intValue(), Record.NO_NEXT_PROPERTY.intValue(), context); + } + + writeNode(id + 1L, relnum, propnum, context); + } + + private void writeNode(long id, long relnum, long prop, Context context) throws IOException, InterruptedException { + //byte[] ba = Neo4JUtils.getNodeAsByteArray(id, relnum, prop); + outputValue.set(id +"\t"+relnum+"\t"+prop); + context.write(NullWritable.get(), outputValue); + } + +} \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapper.java index f0f2285..85731f6 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapper.java @@ -13,7 +13,7 @@ public class NodeOutputMapper extends Mapper values, Context context) return; } - // only use first record per key. Rest is sequential edges which are not needed here - Text value = itr.next(); - - String[] vals = value.toString().split("\t"); long id = key.get(); - long relnum = Long.parseLong(vals[0]); - long propnum = Long.parseLong(vals[1]); + + // walk through values and pick the smallest relnum and propid + long relnum = Long.MAX_VALUE; + long propnum = Long.MAX_VALUE; + while (itr.hasNext()) { + Text value = itr.next(); + String[] vals = value.toString().split("\t",2); + relnum = Math.min(relnum, Long.parseLong(vals[0])); + propnum = Math.min(propnum, Long.parseLong(vals[1])); + } + + if (relnum == Long.MAX_VALUE) relnum = -1L; + if (propnum == Long.MAX_VALUE) propnum = -1L; + if (id == 0L) { // write a rootnode once writeNode(id, Record.NO_NEXT_RELATIONSHIP.intValue(), Record.NO_NEXT_PROPERTY.intValue(), context); diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerAndPropertyOutputIdComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerAndPropertyOutputIdComparator.java new file mode 100644 index 0000000..b0dab4d --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerAndPropertyOutputIdComparator.java @@ -0,0 +1,24 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableComparator; + +public class ByteMarkerAndPropertyOutputIdComparator extends WritableComparator { + protected ByteMarkerAndPropertyOutputIdComparator() { + super(ByteMarkerPropertyIdWritable.class, true); + } + + @SuppressWarnings("rawtypes") + @Override + public int compare(WritableComparable w1, WritableComparable w2) { + ByteMarkerPropertyIdWritable k1 = (ByteMarkerPropertyIdWritable) w1; + ByteMarkerPropertyIdWritable k2 = (ByteMarkerPropertyIdWritable) w2; + + int result = k1.getMarker().compareTo(k2.getMarker()); + if (0 == result) { + result = k1.getId().compareTo(k2.getId()); + } + return result; + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerPropertyIdWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerPropertyIdWritable.java new file mode 100644 index 0000000..82046dd --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerPropertyIdWritable.java @@ -0,0 +1,99 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; + +import org.apache.hadoop.io.ByteWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.WritableComparable; + +@SuppressWarnings("rawtypes") +public class ByteMarkerPropertyIdWritable implements WritableComparable { + + private ByteWritable marker = new ByteWritable(); + private LongWritable id = new LongWritable(); + + public ByteMarkerPropertyIdWritable() { + + } + + public ByteMarkerPropertyIdWritable(ByteWritable marker, LongWritable id) { + this.marker = marker; + this.id = id; + } + + public void setMarker(ByteWritable marker) { + this.marker = marker; + } + + public void setId(LongWritable id) { + this.id = id; + } + + public ByteWritable getMarker() { + return marker; + } + + public LongWritable getId() { + return id; + } + + @Override + public void write(DataOutput out) throws IOException { + marker.write(out); + id.write(out); + } + + @Override + public void readFields(DataInput in) throws IOException { + marker.readFields(in); + id.readFields(in); + } + + @Override + public int compareTo(Object obj) { + ByteMarkerPropertyIdWritable other = (ByteMarkerPropertyIdWritable) obj; + int markerDiff = marker.compareTo(other.marker); + // sort on marker and then on id + return (markerDiff == 0) ? id.compareTo(other.id) : markerDiff; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((id == null) ? 0 : id.hashCode()); + result = prime * result + ((marker == null) ? 0 : marker.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + ByteMarkerPropertyIdWritable other = (ByteMarkerPropertyIdWritable) obj; + if (id == null) { + if (other.id != null) + return false; + } else if (!id.equals(other.id)) + return false; + if (marker == null) { + if (other.marker != null) + return false; + } else if (!marker.equals(other.marker)) + return false; + return true; + } + + @Override + public String toString() { + return marker + "\t" + id; + } +} \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/IndifferentByteMarkerAndPropertyOutputIdComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/IndifferentByteMarkerAndPropertyOutputIdComparator.java new file mode 100644 index 0000000..3a2dfea --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/IndifferentByteMarkerAndPropertyOutputIdComparator.java @@ -0,0 +1,15 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import org.apache.hadoop.io.RawComparator; + +public class IndifferentByteMarkerAndPropertyOutputIdComparator implements RawComparator { + @Override + public int compare(ByteMarkerPropertyIdWritable left, ByteMarkerPropertyIdWritable right) { + return 0; + } + + @Override + public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { + return 0; + } +} \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapper.java index 7fd5e27..9a2f2d0 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapper.java @@ -15,7 +15,8 @@ public class NodePreparePropertiesMapper extends Mapper { + + private MultipleOutputs mos; + private Text outputValue = new Text(); + + protected void reduce(ByteMarkerPropertyIdWritable key, Iterable values, Context context) throws IOException, + InterruptedException { + System.out.println("In reduce method with key: "+key.toString()); + Iterator itr = values.iterator(); + if (!itr.hasNext()) { + return; + } + + long offset = 1; + PropertyOutputIdBlockcountValueWritable value = itr.next(); + while (itr.hasNext() && value.getCount() > 0) { + System.out.println("Reducer adding offset:"+value.getCount()); + offset += value.getCount(); + value = itr.next(); + } + System.out.println("Reducer starting offset:"+offset); + + long blocksProcessed = 0L; + if (!value.getValue().equals(PropertyOutputIdBlockcountValueWritable.EMPTY_STRING)) { + blocksProcessed = processValue(value, offset); + offset += blocksProcessed; + context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").increment(blocksProcessed); + } + while(itr.hasNext()) { + value = itr.next(); + if (!value.getValue().equals(PropertyOutputIdBlockcountValueWritable.EMPTY_STRING)) { + blocksProcessed = processValue(value, offset); + if (blocksProcessed > 0) System.out.println("Incrementing offset by "+blocksProcessed); + offset += blocksProcessed; + + context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").increment(blocksProcessed); + } + } + } + + private long processValue(PropertyOutputIdBlockcountValueWritable value, long offset) throws IOException, InterruptedException { + String[] vals = value.getValue().toString().split("\t", 6); + PropertyBlock block = new PropertyBlock(); + int propId = Integer.parseInt(vals[0]); + + Neo4JUtils.encodeValue(block, propId, vals[1], offset); + //PropertyRecord record = new PropertyRecord(propId); + //record.setInUse(true); + //record.setPrevProp(Long.parseLong(vals[3])); + //record.setNextProp(Long.parseLong(vals[4])); + //record.addPropertyBlock(block); + //byte[] ba = Neo4JUtils.getPropertyReferenceAsByteArray(record); + outputValue.set(propId + "\t" + vals[1] + "\t" + vals[3] + "\t" + vals[4] + "\t" + block.getSingleValueLong()); + mos.write("props", NullWritable.get(), outputValue); + + if (block.getValueRecords().size() > 0) { + + StringBuilder sb = new StringBuilder().append(propId).append("\t"); + int i=0; + for (DynamicRecord dynamicRecord : block.getValueRecords()) { + + long nextProp = dynamicRecord.getNextBlock(); + int mostlyNrOfBytesInt = dynamicRecord.getLength(); + + sb.append(i).append("\t").append(mostlyNrOfBytesInt).append("\t").append(nextProp).append("\t").append(new String(dynamicRecord.getData())).append("\t"); + i++; + } + //ba = Neo4JUtils.getDynamicRecordsAsByteArray(block.getValueRecords(), 128); + outputValue.set(sb.toString()); + mos.write("strings", NullWritable.get(), outputValue); + } + + return Long.parseLong(vals[2]); + + } + protected void setup(Context context) throws IOException, InterruptedException { + mos = new MultipleOutputs(context); + } + + protected void cleanup(Context context) throws IOException, InterruptedException { + mos.close(); + } +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountPartitioner.java index 8a1bb77..be6b528 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountPartitioner.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountPartitioner.java @@ -8,15 +8,15 @@ import org.apache.hadoop.io.ByteWritable; import org.apache.hadoop.mapreduce.Partitioner; -public class PropertyOutputIdBlockcountPartitioner extends Partitioner implements Configurable { +public class PropertyOutputIdBlockcountPartitioner extends Partitioner implements Configurable { private long max = 0L; private Configuration conf; @Override - public int getPartition(ByteWritable key, PropertyOutputIdBlockcountValueWritable value, int numPartitions) { + public int getPartition(ByteMarkerPropertyIdWritable key, PropertyOutputIdBlockcountValueWritable value, int numPartitions) { - if (key.get() == (byte) RowNumberJob.COUNTER_MARKER) { + if (key.getMarker().get() == (byte) RowNumberJob.COUNTER_MARKER) { return value.getPartition(); } else { return PropertyOutputIdBlockcountPartitioner.partitionForValue(value, numPartitions, max); diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountValueWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountValueWritable.java index d06c134..de397b8 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountValueWritable.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountValueWritable.java @@ -20,10 +20,8 @@ public class PropertyOutputIdBlockcountValueWritable implements Writable { public void setValues(LongWritable id, Text value) { this.id = id; this.value = value; - if (id.equals(EMPTY_ID) && value.getLength() == 0) { - this.count = 0; - this.partition = 0; - } + this.count = 0; + this.partition = 0; } public void setCounter(int partition, long count) { diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputMapper.java index 403d496..657f001 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputMapper.java @@ -11,9 +11,9 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; -public class PropertyOutputMapper extends Mapper { +public class PropertyOutputMapper extends Mapper { - private ByteWritable outputKey = new ByteWritable(); + private ByteMarkerPropertyIdWritable outputKey = new ByteMarkerPropertyIdWritable(); private PropertyOutputIdBlockcountValueWritable outputValue = new PropertyOutputIdBlockcountValueWritable(); private long[] counters; private int numReduceTasks; @@ -24,26 +24,28 @@ protected void setup(Context context) throws IOException, InterruptedException { maxIds = Long.parseLong(context.getConfiguration().get(PureMRNodesAndEdgesJob.NUMBEROFROWS_CONFIG)); counters = new long[numReduceTasks]; - outputKey.set(RowNumberJob.VALUE_MARKER); + outputKey.setMarker(new ByteWritable(RowNumberJob.VALUE_MARKER)); } protected void map(LongWritable key, Text value, Context context) throws IOException ,InterruptedException { - String[] vals = value.toString().split("\t"); + String[] vals = value.toString().split("\t", 7); - outputValue.setValues(new LongWritable(Long.parseLong(vals[1])), new Text(StringUtils.join(vals, "\t", 2, vals.length))); + LongWritable id = new LongWritable(Long.parseLong(vals[1])); + outputKey.setId(id); + outputValue.setValues(id, new Text(StringUtils.join(vals, "\t", 2, vals.length))); + counters[PropertyOutputIdBlockcountPartitioner.partitionForValue(outputValue, numReduceTasks, maxIds)] += Long.parseLong(vals[4]); context.write(outputKey, outputValue); - long blockCount = Long.parseLong(vals[4]); - counters[PropertyOutputIdBlockcountPartitioner.partitionForValue(outputValue, numReduceTasks, maxIds)] += blockCount ; } protected void cleanup(Context context) throws IOException, InterruptedException { - outputKey.set(RowNumberJob.COUNTER_MARKER); + outputKey.setMarker(new ByteWritable(RowNumberJob.COUNTER_MARKER)); + outputKey.setId(new LongWritable(Long.MIN_VALUE)); for(int c = 0; c < counters.length - 1; c++) { - if (counters[c] >= 0) { - outputValue.setCounter(c + 1, counters[c]); + if (counters[c] > 0) { + outputValue.setCounter(c+1, counters[c]); context.write(outputKey, outputValue); } - counters[c + 1] += counters[c]; + counters[c+1] += counters[c]; } } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputReducer.java index 916d8bf..28eb1af 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputReducer.java @@ -5,8 +5,6 @@ import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.io.ByteWritable; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Reducer; @@ -14,12 +12,12 @@ import org.neo4j.kernel.impl.nioneo.store.PropertyBlock; import org.neo4j.kernel.impl.nioneo.store.PropertyRecord; -public class PropertyOutputReducer extends Reducer { +public class PropertyOutputReducer extends Reducer { private MultipleOutputs mos; private BytesWritable outputValue = new BytesWritable(); - protected void reduce(ByteWritable key, Iterable values, Context context) throws IOException, + protected void reduce(ByteMarkerPropertyIdWritable key, Iterable values, Context context) throws IOException, InterruptedException { Iterator itr = values.iterator(); if (!itr.hasNext()) { @@ -33,21 +31,25 @@ protected void reduce(ByteWritable key, Iterable>> n idos.close(); kdos.close(); - writePropertyIds(lastUsedIndexId + 1, indexOutput, conf); - writePropertyIds(nextKeyBlockId -1, keysOutput, conf); + writePropertyIds(lastUsedIndexId, indexOutput, conf); + writePropertyIds(nextKeyBlockId, keysOutput, conf); } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/PropertyTest.java b/job/src/test/java/nl/waredingen/graphs/neo/PropertyTest.java new file mode 100644 index 0000000..56c703b --- /dev/null +++ b/job/src/test/java/nl/waredingen/graphs/neo/PropertyTest.java @@ -0,0 +1,394 @@ +package nl.waredingen.graphs.neo; + +import static org.junit.Assert.assertNotNull; + +import java.nio.ByteBuffer; +import java.util.Arrays; + +import org.junit.Test; +import org.neo4j.kernel.impl.nioneo.store.IdGeneratorImpl; +import org.neo4j.kernel.impl.nioneo.store.PropertyBlock; +import org.neo4j.kernel.impl.nioneo.store.PropertyType; + +public class PropertyTest { + + // @Test + // public void test() { + // long tst = hexToLong("0000000016000000".getBytes()); + // System.out.println(tst); + // long propBlock = Long.parseLong("0000000016000000", 16); + // System.out.println(propBlock); + // int type = (int) ((propBlock & 0x000000000F000000L) >> 24); + // System.out.println(type); + // } + // + // @Test + // public void testBlock1() { + // String[] stringBuffer = { "0000000016000000", "bae2608c7b000001", + // "00000000000003de", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock2() { + // String[] stringBuffer = { "0000000036000000", "bae2608e7b000001", + // "000000000001e77f", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock3() { + // String[] stringBuffer = { "0000000056000000", "7cf2608c7b000001", + // "000000000000078e", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock4() { + // String[] stringBuffer = { "0000000076000000", "79e2608c7b000001", + // "00000000000007be", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock5() { + // String[] stringBuffer = { "0000000096000000", "3dea608c7b000001", + // "000000000000076e", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock6() { + // String[] stringBuffer = { "0000065396000000", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // PropertyBlock block = toBlock(stringBuffer); + // System.out.println(block); + // System.out.println(block.getKeyIndexId()); + // System.out.println(block.getSingleValueLong() >>> 1); + // + // } + // + // @Test + // public void testBlock7() { + // String[] stringBuffer = { "000001dd16000000", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock8() { + // String[] stringBuffer = { "0000039276000000", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock9() { + // String[] stringBuffer = { "0000039276000000", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock10() { + // String[] stringBuffer = { "0000009f36000000", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock11() { + // String[] stringBuffer = { "0000039276000000", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock12() { + // String[] stringBuffer = { "000000823b000000", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock13() { + // String[] stringBuffer = { "db0dc08a7b000001", "0000000000000012", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock14() { + // String[] stringBuffer = { "000001023b000002", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock15() { + // String[] stringBuffer = { "0000000009000003", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock16() { + // String[] stringBuffer = { "000001823b000004", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock17() { + // String[] stringBuffer = { "db0dc18a7b000005", "0000000000000012", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock18() { + // String[] stringBuffer = { "000000823b000000", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock19() { + // String[] stringBuffer = { "db0dc08a7b000001", "0000000000000012", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock20() { + // String[] stringBuffer = { "000001023b000002", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock21() { + // String[] stringBuffer = { "0000000019000003", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock22() { + // String[] stringBuffer = { "000001823b000004", "0000000000000000", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock23() { + // String[] stringBuffer = { "db0dc18a7b000005", "0000000000000012", + // "0000000000000000", "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock24() { + // String[] stringBuffer = { + // "000000823b000000","db0dc08a7b000001","0000000000000012", + // "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock25() { + // String[] stringBuffer = { + // "000001023b000000","0000000019000001","0000000000000000", + // "0000000000000000" }; + // System.out.println(toBlock(stringBuffer)); + // + // } + // + // @Test + // public void testBlock26() { + // String[] stringBuffer = { + // "000001823b000000","db0dc18a7b000001","0000000000000012", + // "0000000000000000"}; + // System.out.println(toBlock(stringBuffer)); + // + // } + + @Test + public void testInuseOfDynamicStore2011() { + String stringBuffer = "00ffffffffffffffff0008a39c7b00000004974365d2e94eb19a1a1110ab0000010000002221299938"; + ByteBuffer buf = getBufferFromIntLikeString(stringBuffer); + System.out.println(buf); + byte modifiers = buf.get(); + long prevMod = ((modifiers & 0xF0L) << 28); + long nextMod = ((modifiers & 0x0FL) << 32); + long prevProp = buf.getInt() & 0xFFFFFFFFL; + long nextProp = buf.getInt() & 0xFFFFFFFFL; + long recordPrevProp = longFromIntAndMod(prevProp, prevMod); + long recordNextProp = longFromIntAndMod(nextProp, nextMod); + + System.out.println(recordPrevProp); + System.out.println(recordNextProp); + System.out.println(toBlock(buf)); + } + + @Test + public void testInuseOfDynamicStoreNeo() { + String stringBuffer = "00000000c6ffffffff000005021b000001000000000000000000000000000000000000000000000000"; + ByteBuffer buf = getBufferFromIntLikeString(stringBuffer); + System.out.println(buf); + byte modifiers = buf.get(); + long prevMod = ((modifiers & 0xF0L) << 28); + long nextMod = ((modifiers & 0x0FL) << 32); + long prevProp = buf.getInt() & 0xFFFFFFFFL; + long nextProp = buf.getInt() & 0xFFFFFFFFL; + long recordPrevProp = longFromIntAndMod(prevProp, prevMod); + long recordNextProp = longFromIntAndMod(nextProp, nextMod); + + System.out.println(recordPrevProp); + System.out.println(recordNextProp); + System.out.println(toBlock(buf)); + } + + protected long longFromIntAndMod(long base, long modifier) { + return modifier == 0 && base == IdGeneratorImpl.INTEGER_MINUS_ONE ? -1 : base | modifier; + } + + private ByteBuffer getBufferFromIntLikeString(String s) { + int len = s.length(); + ByteBuffer buf = ByteBuffer.allocate(len / 2); + + byte[] data = new byte[len / 2]; + for (int i = 0; i < len; i += 2) { + data[i / 2] = (byte) ((Character.digit(s.charAt(i), 16) << 4) + Character.digit(s.charAt(i + 1), 16)); + } + buf.put(data); + buf.flip(); + return buf; + } + + private PropertyBlock toBlock(String[] stringBuffer) { + long header = hexToLong(stringBuffer[0].getBytes()); + PropertyType type = PropertyType.getPropertyType(header, true); + + assertNotNull(type); + + PropertyBlock toReturn = new PropertyBlock(); + // toReturn.setInUse( true ); + int numBlocks = type.calculateNumberOfBlocksUsed(header); + long[] blockData = new long[numBlocks]; + blockData[0] = header; // we already have that + for (int i = 1; i < numBlocks; i++) { + blockData[i] = hexToLong(stringBuffer[1].getBytes()); + } + toReturn.setValueBlocks(blockData); + return toReturn; + } + + private PropertyBlock toBlock(ByteBuffer buffer) { + long header = buffer.getLong(); + PropertyType type = PropertyType.getPropertyType(header, true); + + assertNotNull(type); + + PropertyBlock toReturn = new PropertyBlock(); + // toReturn.setInUse( true ); + int numBlocks = type.calculateNumberOfBlocksUsed(header); + long[] blockData = new long[numBlocks]; + blockData[0] = header; // we already have that + for (int i = 1; i < numBlocks; i++) { + blockData[i] = buffer.getLong(); + } + toReturn.setValueBlocks(blockData); + return toReturn; + } + + private long hexToLong(byte[] bytes) { + + if (bytes.length > 16) { + throw new IllegalArgumentException("Byte array too long (max 16 elements)"); + } + long v = 0; + for (int i = 0; i < bytes.length; i += 2) { + byte b1 = (byte) (bytes[i] & 0xFF); + + b1 -= 48; + if (b1 > 9) + b1 -= 39; + + if (b1 < 0 || b1 > 15) { + throw new IllegalArgumentException("Illegal hex value: " + bytes[i]); + } + + b1 <<= 4; + + byte b2 = (byte) (bytes[i + 1] & 0xFF); + b2 -= 48; + if (b2 > 9) + b2 -= 39; + + if (b2 < 0 || b2 > 15) { + throw new IllegalArgumentException("Illegal hex value: " + bytes[i + 1]); + } + + v |= (((b1 & 0xF0) | (b2 & 0x0F))) & 0x00000000000000FFL; + + if (i + 2 < bytes.length) + v <<= 8; + } + + return v; + } + + private byte[] longToHex(final long l) { + long v = l & 0xFFFFFFFFFFFFFFFFL; + + byte[] result = new byte[16]; + Arrays.fill(result, 0, result.length, (byte) 0); + + for (int i = 0; i < result.length; i += 2) { + byte b = (byte) ((v & 0xFF00000000000000L) >> 56); + + byte b2 = (byte) (b & 0x0F); + byte b1 = (byte) ((b >> 4) & 0x0F); + + if (b1 > 9) + b1 += 39; + b1 += 48; + + if (b2 > 9) + b2 += 39; + b2 += 48; + + result[i] = (byte) (b1 & 0xFF); + result[i + 1] = (byte) (b2 & 0xFF); + + v <<= 8; + } + + return result; + } + +} diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducerTest.java index 10f5139..b8db9fc 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducerTest.java @@ -45,11 +45,11 @@ public void shouldOutputSurroundingEdges() throws Exception { assertThat(output.size(), is(3)); assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(0).getSecond(), equalTo(new Text("1 3 2 0 -1 3"))); + assertThat(output.get(0).getSecond(), equalTo(new Text("1 3 2 0 3 -1"))); assertThat(output.get(1).getFirst(), equalTo(NullWritable.get())); assertThat(output.get(1).getSecond(), equalTo(new Text("1 3 0 2 3 3"))); assertThat(output.get(2).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(2).getSecond(), equalTo(new Text("1 3 0 1 3 -1"))); + assertThat(output.get(2).getSecond(), equalTo(new Text("1 3 0 1 -1 3"))); } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparatorTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparatorTest.java index 51fcf5f..ea52c30 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparatorTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparatorTest.java @@ -9,13 +9,33 @@ public class NodeAndEdgeIdKeyComparatorTest { @Test - public void shouldSortNodeKeysOnEdgeIdDescKey() { + public void shouldSortNodeKeysOnEdgeIdAscKey() { Text firstKey = new Text("0 A Aname;0"); Text secondKey = new Text("0 A Aname;1"); NodeAndEdgeIdKeyComparator comp = new NodeAndEdgeIdKeyComparator(); - assertThat(comp.compare(firstKey, secondKey), is(1)); + assertThat(comp.compare(firstKey, secondKey), is(-1)); } + @Test + public void shouldSortNodeKeysOnEdgeIdDescKeyAlsoIfKeyContainsSplitCharacter() { + Text firstKey = new Text("0 A Ana;;me;0"); + Text secondKey = new Text("0 A Ana;;me;1"); + + NodeAndEdgeIdKeyComparator comp = new NodeAndEdgeIdKeyComparator(); + + assertThat(comp.compare(firstKey, secondKey), is(-1)); + } + + + @Test + public void shouldSortNodeKeysOnKeyIfDifferent() { + Text firstKey = new Text("0 A Aname;0"); + Text secondKey = new Text("1 B Bname;1"); + + NodeAndEdgeIdKeyComparator comp = new NodeAndEdgeIdKeyComparator(); + + assertThat(comp.compare(firstKey, secondKey), is(-1)); + } } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitionerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitionerTest.java index b925818..678eac3 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitionerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitionerTest.java @@ -1,9 +1,8 @@ package nl.waredingen.graphs.neo.mapreduce.group; -import static org.junit.Assert.*; -import static org.hamcrest.CoreMatchers.*; - -import nl.waredingen.graphs.neo.mapreduce.join.NodeAndEdgeKeyPartitioner; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; +import static org.junit.Assert.assertTrue; import org.apache.hadoop.io.Text; import org.junit.Test; @@ -21,14 +20,16 @@ public void testSamePartitionForNodeAndEdgeIdKey() { assertThat(partitioner.getPartition(firstKey, new Text(), 50), is(partitioner.getPartition(secondKey, new Text(), 50))); } - + @Test public void testNonNegativePartitionForNodeAndEdgeKey() { Text nodeKey = new Text("3663243826;1"); - - NodeAndEdgeKeyPartitioner partitioner = new NodeAndEdgeKeyPartitioner(); + + NodeAndEdgeIdKeyPartitioner partitioner = new NodeAndEdgeIdKeyPartitioner(); assertTrue(partitioner.getPartition(nodeKey, new Text(), 50) >= 0); + } + } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitionerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitionerTest.java index 0ee074a..edb362f 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitionerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitionerTest.java @@ -21,12 +21,14 @@ public void testSamePartitionForNodeAndEdgeKey() { } @Test - public void testPartitionNotNegative() { - Text key = new Text("N365545643"); - - NodeAndEdgeKeyPartitioner partitioner = new NodeAndEdgeKeyPartitioner(); + public void testNonNegativePartitionForNodeAndEdgeKey() { + Text nodeKey = new Text("N3663243826"); - assertTrue(partitioner.getPartition(key, new Text(), 50) >= 0); + NodeAndEdgeKeyPartitioner partitioner = new NodeAndEdgeKeyPartitioner(); + assertTrue(partitioner.getPartition(nodeKey, new Text(), 50) >= 0); + } + + } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesReducerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesReducerTest.java index 846e345..4f415f3 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesReducerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesReducerTest.java @@ -45,11 +45,11 @@ public void shouldOutputSurroundingProperties() throws Exception { assertThat(output.size(), is(3)); assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(0).getSecond(), equalTo(new Text("1 3 blah 0 -1 3"))); + assertThat(output.get(0).getSecond(), equalTo(new Text("1 3 blah 0 3 -1"))); assertThat(output.get(1).getFirst(), equalTo(NullWritable.get())); assertThat(output.get(1).getSecond(), equalTo(new Text("1 3 otherblah 0 3 3"))); assertThat(output.get(2).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(2).getSecond(), equalTo(new Text("1 3 longblahorsomething 2 3 -1"))); + assertThat(output.get(2).getSecond(), equalTo(new Text("1 3 longblahorsomething 2 -1 3"))); } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertiesOutputMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertiesOutputMapperTest.java index b450ff7..4e09ccd 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertiesOutputMapperTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertiesOutputMapperTest.java @@ -20,12 +20,12 @@ public class PropertiesOutputMapperTest { - private MapDriver driver; - private List> output; + private MapDriver driver; + private List> output; @Before public void setUp() throws Exception { - driver = new MapDriver(new PropertyOutputMapper()); + driver = new MapDriver(new PropertyOutputMapper()); Configuration configuration = new Configuration(); configuration.setLong(PureMRNodesAndEdgesJob.NUMBEROFROWS_CONFIG, 1); configuration.setInt("mapred.reduce.tasks", 3); @@ -36,41 +36,34 @@ public void setUp() throws Exception { public void shouldOutputAsProperties() throws Exception { output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 0 0 A 0 -1 1")).run(); - assertThat(output.size(), is(3)); + assertThat(output.size(), is(1)); PropertyOutputIdBlockcountValueWritable val = new PropertyOutputIdBlockcountValueWritable(); val.setValues(new LongWritable(0), new Text("0 A 0 -1 1")); - assertThat(output.get(0).getFirst(), equalTo(new ByteWritable(RowNumberJob.VALUE_MARKER))); + assertThat(output.get(0).getFirst(), equalTo(new ByteMarkerPropertyIdWritable(new ByteWritable(RowNumberJob.VALUE_MARKER), new LongWritable(0)))); assertThat(output.get(0).getSecond(), equalTo(val)); - val = new PropertyOutputIdBlockcountValueWritable(); - val.setValues(PropertyOutputIdBlockcountValueWritable.EMPTY_ID, PropertyOutputIdBlockcountValueWritable.EMPTY_STRING); - val.setCounter(1, 0); - - assertThat(output.get(1).getFirst(), equalTo(new ByteWritable(RowNumberJob.COUNTER_MARKER))); - assertThat(output.get(1).getSecond(), equalTo(val)); - } @Test public void shouldOutputAsPropertieswithPropertyIdAsTheKey() throws Exception { output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 1 1 Aname 40 1 2")).run(); - assertThat(output.size(), is(3)); + assertThat(output.size(), is(2)); PropertyOutputIdBlockcountValueWritable val = new PropertyOutputIdBlockcountValueWritable(); val.setValues(new LongWritable(1), new Text("1 Aname 40 1 2")); - assertThat(output.get(0).getFirst(), equalTo(new ByteWritable(RowNumberJob.VALUE_MARKER))); + assertThat(output.get(0).getFirst(), equalTo(new ByteMarkerPropertyIdWritable(new ByteWritable(RowNumberJob.VALUE_MARKER), new LongWritable(1)))); assertThat(output.get(0).getSecond(), equalTo(val)); val = new PropertyOutputIdBlockcountValueWritable(); val.setValues(PropertyOutputIdBlockcountValueWritable.EMPTY_ID, PropertyOutputIdBlockcountValueWritable.EMPTY_STRING); val.setCounter(2, 40); - - assertThat(output.get(2).getFirst(), equalTo(new ByteWritable(RowNumberJob.COUNTER_MARKER))); - assertThat(output.get(2).getSecond(), equalTo(val)); - + + assertThat(output.get(1).getFirst(), equalTo(new ByteMarkerPropertyIdWritable(new ByteWritable(RowNumberJob.COUNTER_MARKER), new LongWritable(Long.MIN_VALUE)))); + assertThat(output.get(1).getSecond(), equalTo(val)); + } } From 358f05cd43416000f61e033ae4132171de816c7d Mon Sep 17 00:00:00 2001 From: Kris Geusebroek Date: Fri, 19 Oct 2012 14:27:57 +0200 Subject: [PATCH 10/11] Finalized optimized properties --- job/gatherNeoFiles.sh | 4 +- .../waredingen/graphs/misc/RowNumberJob.java | 4 +- .../mapreduce/AbstractRownumPartitioner.java | 37 ++ .../AscLongDescLongKeyComparator.java | 4 +- .../AscLongDescLongKeyGroupingComparator.java | 4 +- .../AscLongDescLongWritablePartitioner.java | 9 +- .../IndifferentKeyGroupingComparator.java | 23 ++ .../mapreduce/NewByteBufferOutputFormat.java | 1 - .../neo/mapreduce/PureMRNodesAndEdgesJob.java | 342 +++++++++++------- .../neo/mapreduce/RownumPartitioner.java | 4 +- .../neo/mapreduce/SurroundingContext.java | 10 +- .../edges/EdgeOutputAsTextReducer.java | 3 - .../neo/mapreduce/edges/EdgeOutputMapper.java | 26 +- .../mapreduce/edges/EdgeOutputReducer.java | 26 +- .../edges/EdgeOutputRownumPartitioner.java | 13 + .../edges/surround/EdgeSurroundMapper.java | 22 +- .../edges/surround/EdgeSurroundReducer.java | 43 ++- .../join/EdgeWritableKeyComparator.java | 22 ++ .../EdgeWritableKeyGroupingComparator.java | 22 ++ .../join/JoinSurroundingEdgesMapper.java | 23 +- .../join/JoinSurroundingEdgesReducer.java | 24 +- .../group/GroupNodesAndEdgesMapper.java | 42 ++- .../group/GroupNodesAndEdgesReducer.java | 19 +- .../group/NodeAndEdgeIdKeyComparator.java | 22 +- .../NodeAndEdgeIdKeyGroupingComparator.java | 16 +- .../group/NodeAndEdgeIdKeyPartitioner.java | 12 +- .../neo/mapreduce/input/AbstractMetaData.java | 116 ++++++ .../input/HardCodedMetaDataImpl.java | 51 +++ .../graphs/neo/mapreduce/input/MetaData.java | 49 +++ .../input/MetaDataFromConfigImpl.java | 80 ++++ .../writables}/AscLongDescLongWritable.java | 8 +- .../ByteMarkerIdPropIdWritable.java} | 40 +- .../DoubleSurroundingEdgeWritable.java | 92 +++++ .../input/writables/EdgeIdPropIdWritable.java | 94 +++++ ...ropertyOutputCountersAndValueWritable.java | 136 +++++++ .../input/writables/EdgeWritable.java | 127 +++++++ .../writables/FullEdgePropertiesWritable.java | 207 +++++++++++ .../input/writables/FullEdgeWritable.java | 180 +++++++++ .../writables/FullNodePropertiesWritable.java | 191 ++++++++++ .../input/writables/NodeEdgeIdWritable.java | 110 ++++++ .../input/writables/NodeEdgeWritable.java | 92 +++++ ...opertyOutputCountersAndValueWritable.java} | 66 ++-- .../input/writables/NodeWritable.java | 98 +++++ .../input/writables/PropertyListWritable.java | 193 ++++++++++ .../writables/SurroundingEdgeWritable.java | 186 ++++++++++ .../mapreduce/join/JoinFromEdgesMapper.java | 20 +- .../join/JoinNodesAndEdgesReducer.java | 10 +- .../neo/mapreduce/join/JoinNodesMapper.java | 20 +- .../neo/mapreduce/join/JoinToEdgesMapper.java | 8 +- .../join/NodeAndEdgeKeyPartitioner.java | 2 +- .../neo/mapreduce/nodes/NodeOutputMapper.java | 21 +- .../mapreduce/nodes/NodeOutputReducer.java | 16 +- .../nodes/NodeOutputRownumPartitioner.java | 13 + .../properties/ByteMarkerAndIdComparator.java | 29 ++ ...teMarkerAndPropertyOutputIdComparator.java | 24 -- .../EdgePreparePropertiesMapper.java | 103 ++++++ .../EdgePreparePropertiesReducer.java | 84 +++++ .../properties/EdgePropertyOutputMapper.java | 59 +++ .../EdgePropertyOutputPartitioner.java | 50 +++ .../properties/EdgePropertyOutputReducer.java | 155 ++++++++ .../IndifferentByteMarkerAndIdComparator.java | 17 + ...teMarkerAndPropertyOutputIdComparator.java | 15 - .../NodePreparePropertiesMapper.java | 96 ++++- .../NodePreparePropertiesReducer.java | 74 ++-- .../properties/NodePropertyOutputMapper.java | 59 +++ .../NodePropertyOutputPartitioner.java | 50 +++ .../properties/NodePropertyOutputReducer.java | 155 ++++++++ .../PropertyAsTextOutputReducer.java | 98 ----- ...PropertyOutputIdBlockcountPartitioner.java | 47 --- .../properties/PropertyOutputMapper.java | 52 --- .../properties/PropertyOutputReducer.java | 82 ----- .../SurroundingPropertyContext.java | 15 + .../graphs/neo/neo4j/Neo4JUtils.java | 75 +++- .../graphs/neo/neo4j/ShortArray.java | 3 +- .../graphs/neo/RownumPartitionerTest.java | 9 +- .../mapreduce/DualInputMapReduceDriver.java | 1 + .../mapreduce/edges/EdgeOutputMapperTest.java | 22 +- .../edges/EdgeOutputReducerTest.java | 10 +- .../surround/EdgeSurroundMapReduceTest.java | 59 +++ .../surround/EdgeSurroundMapperTest.java | 46 ++- .../surround/EdgeSurroundReducerTest.java | 77 +++- ...eAndJoinSurroundingEdgesMapReduceTest.java | 88 +++++ .../join/EdgeWritableComparatorTest.java | 93 +++++ .../JoinSurroundingEdgesMapReduceTest.java | 56 +++ .../join/JoinSurroundingEdgesMapperTest.java | 34 +- .../join/JoinSurroundingEdgesReducerTest.java | 26 +- .../GroupNodesAndEdgesMapReduceTest.java | 47 ++- .../group/GroupNodesAndEdgesMapperTest.java | 92 +++-- .../group/GroupNodesAndEdgesReducerTest.java | 15 +- .../group/NodeAndEdgeIdKeyComparatorTest.java | 25 +- ...odeAndEdgeIdKeyGroupingComparatorTest.java | 11 +- .../NodeAndEdgeIdKeyPartitionerTest.java | 16 +- .../input/InputTypeConversionTest.java | 52 +++ .../neo/mapreduce/input/MetaDataTest.java | 134 +++++++ .../join/JoinFromEdgesMapperTest.java | 21 +- .../JoinFromNodesAndEdgesMapReduceTest.java | 23 +- .../join/JoinNodesAndEdgesReducerTest.java | 13 +- .../mapreduce/join/JoinNodesMapperTest.java | 22 +- .../mapreduce/join/JoinToEdgesMapperTest.java | 17 +- .../JoinToNodesAndEdgesMapReduceTest.java | 34 +- .../mapreduce/nodes/NodeOutputMapperTest.java | 24 +- .../nodes/NodeOutputReducerTest.java | 10 +- .../EdgePreparePropertiesMapperTest.java | 148 ++++++++ .../EdgePreparePropertiesReducerTest.java | 48 +++ .../EdgePropertiesOutputMapperTest.java | 95 +++++ .../NodePreparePropertiesMapReduceTest.java | 94 +++++ .../NodePreparePropertiesMapperTest.java | 123 ++++++- .../NodePreparePropertiesReducerTest.java | 38 +- .../NodePropertiesOutputMapperTest.java | 95 +++++ .../PropertiesOutputMapperTest.java | 69 ---- .../PropertyListWritableComparatorTest.java | 61 ++++ 111 files changed, 5118 insertions(+), 1075 deletions(-) create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AbstractRownumPartitioner.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/IndifferentKeyGroupingComparator.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputRownumPartitioner.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeWritableKeyComparator.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeWritableKeyGroupingComparator.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/AbstractMetaData.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/HardCodedMetaDataImpl.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/MetaData.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/MetaDataFromConfigImpl.java rename job/src/main/java/nl/waredingen/graphs/neo/mapreduce/{ => input/writables}/AscLongDescLongWritable.java (93%) rename job/src/main/java/nl/waredingen/graphs/neo/mapreduce/{properties/ByteMarkerPropertyIdWritable.java => input/writables/ByteMarkerIdPropIdWritable.java} (57%) create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/DoubleSurroundingEdgeWritable.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgeIdPropIdWritable.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgePropertyOutputCountersAndValueWritable.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgeWritable.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/FullEdgePropertiesWritable.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/FullEdgeWritable.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/FullNodePropertiesWritable.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodeEdgeIdWritable.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodeEdgeWritable.java rename job/src/main/java/nl/waredingen/graphs/neo/mapreduce/{properties/PropertyOutputIdBlockcountValueWritable.java => input/writables/NodePropertyOutputCountersAndValueWritable.java} (50%) create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodeWritable.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/PropertyListWritable.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/SurroundingEdgeWritable.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputRownumPartitioner.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerAndIdComparator.java delete mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerAndPropertyOutputIdComparator.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesMapper.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesReducer.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputMapper.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputPartitioner.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputReducer.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/IndifferentByteMarkerAndIdComparator.java delete mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/IndifferentByteMarkerAndPropertyOutputIdComparator.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputMapper.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputPartitioner.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputReducer.java delete mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyAsTextOutputReducer.java delete mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountPartitioner.java delete mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputMapper.java delete mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputReducer.java create mode 100644 job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/SurroundingPropertyContext.java create mode 100644 job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapReduceTest.java create mode 100644 job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeSurroundMapReduceAndJoinSurroundingEdgesMapReduceTest.java create mode 100644 job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeWritableComparatorTest.java create mode 100644 job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapReduceTest.java create mode 100644 job/src/test/java/nl/waredingen/graphs/neo/mapreduce/input/InputTypeConversionTest.java create mode 100644 job/src/test/java/nl/waredingen/graphs/neo/mapreduce/input/MetaDataTest.java create mode 100644 job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesMapperTest.java create mode 100644 job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesReducerTest.java create mode 100644 job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertiesOutputMapperTest.java create mode 100644 job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapReduceTest.java create mode 100644 job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertiesOutputMapperTest.java delete mode 100644 job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertiesOutputMapperTest.java create mode 100644 job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyListWritableComparatorTest.java diff --git a/job/gatherNeoFiles.sh b/job/gatherNeoFiles.sh index 8a4e25a..e12cdcd 100755 --- a/job/gatherNeoFiles.sh +++ b/job/gatherNeoFiles.sh @@ -19,8 +19,8 @@ hadoop fs -get ${FROM}/properties/neostore.propertystore.db.* ${TO} hadoop fs -cat ${FROM}/neostore.nodestore.db/part-r-* > ${TO}/neostore.nodestore.db hadoop fs -cat ${FROM}/neostore.relationshipstore.db/part-r-* > ${TO}/neostore.relationshipstore.db -hadoop fs -cat ${FROM}/properties/propertystore.db/props-r-* ${FROM}/properties/neostore.propertystore.db.footer > ${TO}/neostore.propertystore.db -hadoop fs -cat ${FROM}/properties/neostore.propertystore.db.strings.header ${FROM}/properties/propertystore.db/strings-r-* ${FROM}/properties/neostore.propertystore.db.strings.footer > ${TO}/neostore.propertystore.db.strings +hadoop fs -cat ${FROM}/nodeproperties/propertystore.db/props-r-* ${FROM}/edgeproperties/propertystore.db/props-r-* ${FROM}/properties/neostore.propertystore.db.footer > ${TO}/neostore.propertystore.db +hadoop fs -cat ${FROM}/properties/neostore.propertystore.db.strings.header ${FROM}/nodeproperties/propertystore.db/strings-r-* ${FROM}/edgeproperties/propertystore.db/strings-r-* ${FROM}/properties/neostore.propertystore.db.strings.footer > ${TO}/neostore.propertystore.db.strings rm ${TO}/*.footer rm ${TO}/*.header diff --git a/job/src/main/java/nl/waredingen/graphs/misc/RowNumberJob.java b/job/src/main/java/nl/waredingen/graphs/misc/RowNumberJob.java index 9706e76..cac29a1 100644 --- a/job/src/main/java/nl/waredingen/graphs/misc/RowNumberJob.java +++ b/job/src/main/java/nl/waredingen/graphs/misc/RowNumberJob.java @@ -16,7 +16,7 @@ import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; public class RowNumberJob { public final static byte COUNTER_MARKER = (byte) 'T'; @@ -36,7 +36,7 @@ public static long run(String input, String output, Configuration conf) { job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); - job.setOutputFormatClass(TextOutputFormat.class); + job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(job, new Path(output)); job.setInputFormatClass(TextInputFormat.class); diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AbstractRownumPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AbstractRownumPartitioner.java new file mode 100644 index 0000000..b72cba7 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AbstractRownumPartitioner.java @@ -0,0 +1,37 @@ +package nl.waredingen.graphs.neo.mapreduce; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.Partitioner; + +public abstract class AbstractRownumPartitioner extends Partitioner implements Configurable { + + private long max = 0L; + protected Configuration conf; + + @Override + public int getPartition(K key, V value, int numPartitions) { + double divider = Math.max(1, (double) max / numPartitions); + + return (int) (((LongWritable) key).get() / divider); + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + configure(); + + } + + private void configure() { + this.max = getMaxCounter(); + } + + @Override + public Configuration getConf() { + return conf; + } + + public abstract long getMaxCounter(); +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongKeyComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongKeyComparator.java index cd438a4..800caf5 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongKeyComparator.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongKeyComparator.java @@ -1,10 +1,12 @@ package nl.waredingen.graphs.neo.mapreduce; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; + import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; public class AscLongDescLongKeyComparator extends WritableComparator { - protected AscLongDescLongKeyComparator() { + public AscLongDescLongKeyComparator() { super(AscLongDescLongWritable.class, true); } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongKeyGroupingComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongKeyGroupingComparator.java index 9ee8ded..fb57496 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongKeyGroupingComparator.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongKeyGroupingComparator.java @@ -1,10 +1,12 @@ package nl.waredingen.graphs.neo.mapreduce; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; + import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; public class AscLongDescLongKeyGroupingComparator extends WritableComparator { - protected AscLongDescLongKeyGroupingComparator() { + public AscLongDescLongKeyGroupingComparator() { super(AscLongDescLongWritable.class, true); } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongWritablePartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongWritablePartitioner.java index 9ffa9f4..a616047 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongWritablePartitioner.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongWritablePartitioner.java @@ -1,12 +1,15 @@ package nl.waredingen.graphs.neo.mapreduce; -import org.apache.hadoop.io.Text; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; + +import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapreduce.Partitioner; -public class AscLongDescLongWritablePartitioner extends Partitioner { +@SuppressWarnings("rawtypes") +public class AscLongDescLongWritablePartitioner extends Partitioner { @Override - public int getPartition(AscLongDescLongWritable key, Text value, int numPartitions) { + public int getPartition(AscLongDescLongWritable key, WritableComparable value, int numPartitions) { return key.getLeft().hashCode() % numPartitions; } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/IndifferentKeyGroupingComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/IndifferentKeyGroupingComparator.java new file mode 100644 index 0000000..a3b98a1 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/IndifferentKeyGroupingComparator.java @@ -0,0 +1,23 @@ +package nl.waredingen.graphs.neo.mapreduce; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableComparator; + +public class IndifferentKeyGroupingComparator extends WritableComparator { + + protected IndifferentKeyGroupingComparator() { + super(LongWritable.class, true); + } + + @SuppressWarnings("rawtypes") + @Override + public int compare(WritableComparable a, WritableComparable b) { + return 0; + } + + @Override + public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { + return 0; + } +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/NewByteBufferOutputFormat.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/NewByteBufferOutputFormat.java index 8ab001c..02b8a60 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/NewByteBufferOutputFormat.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/NewByteBufferOutputFormat.java @@ -41,7 +41,6 @@ public void close(TaskAttemptContext context) throws IOException, InterruptedExc @Override public RecordWriter getRecordWriter(TaskAttemptContext job) throws IOException, InterruptedException { - // TODO Auto-generated method stub Configuration conf = job.getConfiguration(); Path file = getDefaultWorkFile(job, ""); diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/PureMRNodesAndEdgesJob.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/PureMRNodesAndEdgesJob.java index bb1b2da..4c27e75 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/PureMRNodesAndEdgesJob.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/PureMRNodesAndEdgesJob.java @@ -1,15 +1,13 @@ package nl.waredingen.graphs.neo.mapreduce; -import java.util.AbstractMap.SimpleEntry; -import java.util.HashMap; -import java.util.Map; -import java.util.Map.Entry; - import nl.waredingen.graphs.misc.RowNumberJob; import nl.waredingen.graphs.neo.mapreduce.edges.EdgeOutputMapper; import nl.waredingen.graphs.neo.mapreduce.edges.EdgeOutputReducer; +import nl.waredingen.graphs.neo.mapreduce.edges.EdgeOutputRownumPartitioner; import nl.waredingen.graphs.neo.mapreduce.edges.surround.EdgeSurroundMapper; import nl.waredingen.graphs.neo.mapreduce.edges.surround.EdgeSurroundReducer; +import nl.waredingen.graphs.neo.mapreduce.edges.surround.join.EdgeWritableKeyComparator; +import nl.waredingen.graphs.neo.mapreduce.edges.surround.join.EdgeWritableKeyGroupingComparator; import nl.waredingen.graphs.neo.mapreduce.edges.surround.join.JoinSurroundingEdgesMapper; import nl.waredingen.graphs.neo.mapreduce.edges.surround.join.JoinSurroundingEdgesReducer; import nl.waredingen.graphs.neo.mapreduce.group.GroupNodesAndEdgesMapper; @@ -17,6 +15,22 @@ import nl.waredingen.graphs.neo.mapreduce.group.NodeAndEdgeIdKeyComparator; import nl.waredingen.graphs.neo.mapreduce.group.NodeAndEdgeIdKeyGroupingComparator; import nl.waredingen.graphs.neo.mapreduce.group.NodeAndEdgeIdKeyPartitioner; +import nl.waredingen.graphs.neo.mapreduce.input.AbstractMetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaDataFromConfigImpl; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.ByteMarkerIdPropIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.DoubleSurroundingEdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeIdPropIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgePropertyOutputCountersAndValueWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullEdgePropertiesWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullEdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullNodePropertiesWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodePropertyOutputCountersAndValueWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.SurroundingEdgeWritable; import nl.waredingen.graphs.neo.mapreduce.join.JoinFromEdgesMapper; import nl.waredingen.graphs.neo.mapreduce.join.JoinNodesAndEdgesReducer; import nl.waredingen.graphs.neo.mapreduce.join.JoinNodesMapper; @@ -26,15 +40,19 @@ import nl.waredingen.graphs.neo.mapreduce.join.NodeKeyGroupingComparator; import nl.waredingen.graphs.neo.mapreduce.nodes.NodeOutputMapper; import nl.waredingen.graphs.neo.mapreduce.nodes.NodeOutputReducer; -import nl.waredingen.graphs.neo.mapreduce.properties.ByteMarkerAndPropertyOutputIdComparator; -import nl.waredingen.graphs.neo.mapreduce.properties.ByteMarkerPropertyIdWritable; -import nl.waredingen.graphs.neo.mapreduce.properties.IndifferentByteMarkerAndPropertyOutputIdComparator; +import nl.waredingen.graphs.neo.mapreduce.nodes.NodeOutputRownumPartitioner; +import nl.waredingen.graphs.neo.mapreduce.properties.ByteMarkerAndIdComparator; +import nl.waredingen.graphs.neo.mapreduce.properties.EdgePreparePropertiesMapper; +import nl.waredingen.graphs.neo.mapreduce.properties.EdgePreparePropertiesReducer; +import nl.waredingen.graphs.neo.mapreduce.properties.EdgePropertyOutputMapper; +import nl.waredingen.graphs.neo.mapreduce.properties.EdgePropertyOutputPartitioner; +import nl.waredingen.graphs.neo.mapreduce.properties.EdgePropertyOutputReducer; +import nl.waredingen.graphs.neo.mapreduce.properties.IndifferentByteMarkerAndIdComparator; import nl.waredingen.graphs.neo.mapreduce.properties.NodePreparePropertiesMapper; import nl.waredingen.graphs.neo.mapreduce.properties.NodePreparePropertiesReducer; -import nl.waredingen.graphs.neo.mapreduce.properties.PropertyOutputIdBlockcountPartitioner; -import nl.waredingen.graphs.neo.mapreduce.properties.PropertyOutputIdBlockcountValueWritable; -import nl.waredingen.graphs.neo.mapreduce.properties.PropertyOutputMapper; -import nl.waredingen.graphs.neo.mapreduce.properties.PropertyOutputReducer; +import nl.waredingen.graphs.neo.mapreduce.properties.NodePropertyOutputMapper; +import nl.waredingen.graphs.neo.mapreduce.properties.NodePropertyOutputPartitioner; +import nl.waredingen.graphs.neo.mapreduce.properties.NodePropertyOutputReducer; import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; import org.apache.hadoop.conf.Configuration; @@ -45,31 +63,15 @@ import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat; import org.apache.hadoop.mapreduce.lib.input.MultipleInputs; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; +import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; public class PureMRNodesAndEdgesJob { - public static final String NUMBEROFROWS_CONFIG = "neo.nodes.edges.job.nr_of_rows"; - public static int run(String nodes, String edges, String output, Configuration conf) { - //TODO Refactoring: Split up all the jobs in separate jobs - //TODO Refactoring: Get all CONSTANTS in here - //TODO Refactoring: Configure propertynames (and totalnr of props) in config xml or if not exists in here - //TODO Refactoring: Configure file formats in here (tab separated but which field is what in which step - //TODO Refactoring: Add more test to better describe the meaning of the mappers and reducers - //TODO Refactoring: Think of more descriptive names - //TODO Refactoring: Combine jobs if possible - //TODO Add: Array properties - //TODO Add: Primitive properties - //TODO Fix: Make sure all works if splitcharacter is also in key or value! - //TODO Fix: 42 is not the correct arrayprop index! - //TODO Fix: Make import work with 1.8 version - //TODO Check: Check copied code is still the same in neo 1.8 String numberedNodes = output + "/numberednodes"; String numberedEdges = output + "/numberededges"; @@ -80,14 +82,158 @@ public static int run(String nodes, String edges, String output, Configuration c String surrounding = output + "/surrounding"; String joinededges = output + "/joinededges"; String edgesOutput = output + "/neostore.relationshipstore.db"; - String nodePropertiesPrepareOutput = output +"/nodeproperties"; + String nodePropertiesPrepareOutput = output +"/nodepropprepared"; + String edgePropertiesPrepareOutput = output +"/edgepropprepared"; String propertiesOutput = output +"/properties"; + String nodePropertiesOutput = output +"/nodeproperties"; + String edgePropertiesOutput = output +"/edgeproperties"; try { long nrOfNodes = RowNumberJob.run(nodes, numberedNodes, conf); long nrOfEdges = RowNumberJob.run(edges, numberedEdges, conf); System.out.println("Processing " + nrOfNodes + " nodes and " + nrOfEdges + " edges."); + conf.set(AbstractMetaData.METADATA_NUMBER_OF_NODES, "" + nrOfNodes); + conf.set(AbstractMetaData.METADATA_NUMBER_OF_EDGES, "" + nrOfEdges); + + conf.setClass(AbstractMetaData.METADATA_CLASS, MetaDataFromConfigImpl.class, MetaData.class); + + conf.setStrings(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_NAMES, "rekening", "integraalklantnummer", "klantnummer", "cddklasse", "individu_organisatie_code", "naam", "postcode", "woonplaats", "label"); + conf.set(MetaDataFromConfigImpl.METADATA_NODE_ID_NAME, "rekening"); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "rekening", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "integraalklantnummer", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "klantnummer", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "cddklasse", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "individu_organisatie_code", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "naam", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "postcode", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "woonplaats", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "label", String.class, Object.class); + + conf.setStrings(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_NAMES, "from", "to", "netto", "eerste", "laatste", "aantal"); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "from", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "to", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "netto", Long.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "eerste", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "laatste", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "aantal", Long.class, Object.class); + + Job nodePropertiesPrepareJob = new Job(conf, "Prepare node properties job."); + nodePropertiesPrepareJob.setGroupingComparatorClass(AscLongDescLongKeyGroupingComparator.class); + nodePropertiesPrepareJob.setSortComparatorClass(AscLongDescLongKeyComparator.class); + nodePropertiesPrepareJob.setPartitionerClass(AscLongDescLongWritablePartitioner.class); + + nodePropertiesPrepareJob.setMapOutputKeyClass(AscLongDescLongWritable.class); + nodePropertiesPrepareJob.setMapOutputValueClass(FullNodePropertiesWritable.class); + + nodePropertiesPrepareJob.setMapperClass(NodePreparePropertiesMapper.class); + nodePropertiesPrepareJob.setInputFormatClass(SequenceFileInputFormat.class); + FileInputFormat.addInputPath(nodePropertiesPrepareJob, new Path(numberedNodes)); + + nodePropertiesPrepareJob.setReducerClass(NodePreparePropertiesReducer.class); + nodePropertiesPrepareJob.setOutputKeyClass(LongWritable.class); + nodePropertiesPrepareJob.setOutputValueClass(FullNodePropertiesWritable.class); + + nodePropertiesPrepareJob.setOutputFormatClass(SequenceFileOutputFormat.class); + FileOutputFormat.setOutputPath(nodePropertiesPrepareJob, new Path(nodePropertiesPrepareOutput)); + + nodePropertiesPrepareJob.setJarByClass(PureMRNodesAndEdgesJob.class); + + nodePropertiesPrepareJob.waitForCompletion(true); + + Job edgePropertiesPrepareJob = new Job(conf, "Prepare edge properties job."); + edgePropertiesPrepareJob.setGroupingComparatorClass(AscLongDescLongKeyGroupingComparator.class); + edgePropertiesPrepareJob.setSortComparatorClass(AscLongDescLongKeyComparator.class); + edgePropertiesPrepareJob.setPartitionerClass(AscLongDescLongWritablePartitioner.class); + + edgePropertiesPrepareJob.setMapOutputKeyClass(AscLongDescLongWritable.class); + edgePropertiesPrepareJob.setMapOutputValueClass(FullEdgePropertiesWritable.class); + + edgePropertiesPrepareJob.setMapperClass(EdgePreparePropertiesMapper.class); + edgePropertiesPrepareJob.setInputFormatClass(SequenceFileInputFormat.class); + FileInputFormat.addInputPath(edgePropertiesPrepareJob, new Path(numberedEdges)); + + edgePropertiesPrepareJob.setReducerClass(EdgePreparePropertiesReducer.class); + edgePropertiesPrepareJob.setOutputKeyClass(LongWritable.class); + edgePropertiesPrepareJob.setOutputValueClass(FullEdgePropertiesWritable.class); + + edgePropertiesPrepareJob.setOutputFormatClass(SequenceFileOutputFormat.class); + FileOutputFormat.setOutputPath(edgePropertiesPrepareJob, new Path(edgePropertiesPrepareOutput)); + + edgePropertiesPrepareJob.setJarByClass(PureMRNodesAndEdgesJob.class); + + edgePropertiesPrepareJob.waitForCompletion(true); + + Job nodePropertiesOutputJob = new Job(conf, "Output node properties job."); + nodePropertiesOutputJob.setPartitionerClass(NodePropertyOutputPartitioner.class); + nodePropertiesOutputJob.setSortComparatorClass(ByteMarkerAndIdComparator.class); + nodePropertiesOutputJob.setGroupingComparatorClass(IndifferentByteMarkerAndIdComparator.class); + + nodePropertiesOutputJob.setMapOutputKeyClass(ByteMarkerIdPropIdWritable.class); + nodePropertiesOutputJob.setMapOutputValueClass(NodePropertyOutputCountersAndValueWritable.class); + + nodePropertiesOutputJob.setMapperClass(NodePropertyOutputMapper.class); + nodePropertiesOutputJob.setInputFormatClass(SequenceFileInputFormat.class); + FileInputFormat.addInputPath(nodePropertiesOutputJob, new Path(nodePropertiesPrepareOutput)); + + + nodePropertiesOutputJob.setReducerClass(NodePropertyOutputReducer.class); + + FileOutputFormat.setOutputPath(nodePropertiesOutputJob, new Path(nodePropertiesOutput + "/propertystore.db")); + MultipleOutputs.addNamedOutput(nodePropertiesOutputJob, "nodes", SequenceFileOutputFormat.class, NullWritable.class, BytesWritable.class); + MultipleOutputs.addNamedOutput(nodePropertiesOutputJob, "props", NewByteBufferOutputFormat.class, NullWritable.class, BytesWritable.class); + MultipleOutputs.addNamedOutput(nodePropertiesOutputJob, "strings", NewByteBufferOutputFormat.class, NullWritable.class, BytesWritable.class); + + MultipleOutputs.setCountersEnabled(nodePropertiesOutputJob, true); + + nodePropertiesOutputJob.setJarByClass(PureMRNodesAndEdgesJob.class); + + nodePropertiesOutputJob.waitForCompletion(true); + + long nrOfWrittenStringBlocks = nodePropertiesOutputJob.getCounters().findCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").getValue(); + long nrOfWrittenNodeProperties = nodePropertiesOutputJob.getCounters().findCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "node.properties").getValue(); + + conf.set(AbstractMetaData.METADATA_NUMBER_OF_NODE_PROPERTIES, "" + nrOfWrittenNodeProperties); + + Job edgePropertiesOutputJob = new Job(conf, "Output edge properties job."); + edgePropertiesOutputJob.setPartitionerClass(EdgePropertyOutputPartitioner.class); + edgePropertiesOutputJob.setSortComparatorClass(ByteMarkerAndIdComparator.class); + edgePropertiesOutputJob.setGroupingComparatorClass(IndifferentByteMarkerAndIdComparator.class); + + edgePropertiesOutputJob.setMapOutputKeyClass(ByteMarkerIdPropIdWritable.class); + edgePropertiesOutputJob.setMapOutputValueClass(EdgePropertyOutputCountersAndValueWritable.class); + + edgePropertiesOutputJob.setMapperClass(EdgePropertyOutputMapper.class); + edgePropertiesOutputJob.setInputFormatClass(SequenceFileInputFormat.class); + FileInputFormat.addInputPath(edgePropertiesOutputJob, new Path(edgePropertiesPrepareOutput)); + + + edgePropertiesOutputJob.setReducerClass(EdgePropertyOutputReducer.class); + + FileOutputFormat.setOutputPath(edgePropertiesOutputJob, new Path(edgePropertiesOutput + "/propertystore.db")); + MultipleOutputs.addNamedOutput(edgePropertiesOutputJob, "edges", SequenceFileOutputFormat.class, NullWritable.class, BytesWritable.class); + MultipleOutputs.addNamedOutput(edgePropertiesOutputJob, "props", NewByteBufferOutputFormat.class, NullWritable.class, BytesWritable.class); + MultipleOutputs.addNamedOutput(edgePropertiesOutputJob, "strings", NewByteBufferOutputFormat.class, NullWritable.class, BytesWritable.class); + + MultipleOutputs.setCountersEnabled(edgePropertiesOutputJob, true); + + edgePropertiesOutputJob.setJarByClass(PureMRNodesAndEdgesJob.class); + + edgePropertiesOutputJob.waitForCompletion(true); + + nrOfWrittenStringBlocks += edgePropertiesOutputJob.getCounters().findCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").getValue(); + long nrOfWrittenEdgeProperties = edgePropertiesOutputJob.getCounters().findCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "edge.properties").getValue(); + + Neo4JUtils.writePropertyKeyStore(propertiesOutput, conf); + + Neo4JUtils.writePropertyStoreFooter(propertiesOutput, conf); + Neo4JUtils.writePropertyStringStoreHeader(propertiesOutput, conf); + Neo4JUtils.writePropertyStringStoreFooter(propertiesOutput, conf); + + Neo4JUtils.writePropertyIds((nrOfWrittenNodeProperties + nrOfWrittenEdgeProperties), propertiesOutput + "/neostore.propertystore.db", conf); + Neo4JUtils.writePropertyIds(nrOfWrittenStringBlocks + 1, propertiesOutput + "/neostore.propertystore.db.strings", conf); + + Neo4JUtils.writeEmptArrayStore(propertiesOutput, conf); Job joinFrom = new Job(conf, "Join from nodes and edges job."); joinFrom.setGroupingComparatorClass(NodeKeyGroupingComparator.class); @@ -96,17 +242,17 @@ public static int run(String nodes, String edges, String output, Configuration c joinFrom.setMapOutputKeyClass(Text.class); joinFrom.setMapOutputValueClass(Text.class); - + MultipleInputs - .addInputPath(joinFrom, new Path(numberedNodes), TextInputFormat.class, JoinNodesMapper.class); - MultipleInputs.addInputPath(joinFrom, new Path(numberedEdges), TextInputFormat.class, + .addInputPath(joinFrom, new Path(nodePropertiesOutput + "/propertystore.db/nodes*"), SequenceFileInputFormat.class, JoinNodesMapper.class); + MultipleInputs.addInputPath(joinFrom, new Path(edgePropertiesOutput + "/propertystore.db/edges*"), SequenceFileInputFormat.class, JoinFromEdgesMapper.class); joinFrom.setReducerClass(JoinNodesAndEdgesReducer.class); - joinFrom.setOutputKeyClass(Text.class); + joinFrom.setOutputKeyClass(NullWritable.class); joinFrom.setOutputValueClass(Text.class); - joinFrom.setOutputFormatClass(TextOutputFormat.class); + joinFrom.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(joinFrom, new Path(temp)); joinFrom.setJarByClass(PureMRNodesAndEdgesJob.class); @@ -121,14 +267,14 @@ public static int run(String nodes, String edges, String output, Configuration c joinTo.setMapOutputKeyClass(Text.class); joinTo.setMapOutputValueClass(Text.class); - MultipleInputs.addInputPath(joinTo, new Path(numberedNodes), TextInputFormat.class, JoinNodesMapper.class); - MultipleInputs.addInputPath(joinTo, new Path(temp), KeyValueTextInputFormat.class, JoinToEdgesMapper.class); + MultipleInputs.addInputPath(joinTo, new Path(nodePropertiesOutput + "/propertystore.db/nodes*"), SequenceFileInputFormat.class, JoinNodesMapper.class); + MultipleInputs.addInputPath(joinTo, new Path(temp), SequenceFileInputFormat.class, JoinToEdgesMapper.class); joinTo.setReducerClass(JoinNodesAndEdgesReducer.class); - joinTo.setOutputKeyClass(Text.class); + joinTo.setOutputKeyClass(NullWritable.class); joinTo.setOutputValueClass(Text.class); - joinTo.setOutputFormatClass(TextOutputFormat.class); + joinTo.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(joinTo, new Path(joined)); joinTo.setJarByClass(PureMRNodesAndEdgesJob.class); @@ -140,33 +286,32 @@ public static int run(String nodes, String edges, String output, Configuration c groupJob.setSortComparatorClass(NodeAndEdgeIdKeyComparator.class); groupJob.setPartitionerClass(NodeAndEdgeIdKeyPartitioner.class); - groupJob.setMapOutputKeyClass(Text.class); - groupJob.setMapOutputValueClass(Text.class); + groupJob.setMapOutputKeyClass(NodeEdgeIdWritable.class); + groupJob.setMapOutputValueClass(EdgeWritable.class); groupJob.setMapperClass(GroupNodesAndEdgesMapper.class); - groupJob.setInputFormatClass(KeyValueTextInputFormat.class); + groupJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(groupJob, new Path(joined)); groupJob.setReducerClass(GroupNodesAndEdgesReducer.class); groupJob.setOutputKeyClass(NullWritable.class); - groupJob.setOutputValueClass(Text.class); + groupJob.setOutputValueClass(NodeEdgeWritable.class); - groupJob.setOutputFormatClass(TextOutputFormat.class); + groupJob.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(groupJob, new Path(grouped)); groupJob.setJarByClass(PureMRNodesAndEdgesJob.class); groupJob.waitForCompletion(true); - conf.set(NUMBEROFROWS_CONFIG, "" + nrOfNodes); Job nodeOutputJob = new Job(conf, "Output nodes job."); - nodeOutputJob.setPartitionerClass(RownumPartitioner.class); + nodeOutputJob.setPartitionerClass(NodeOutputRownumPartitioner.class); nodeOutputJob.setMapOutputKeyClass(LongWritable.class); - nodeOutputJob.setMapOutputValueClass(Text.class); + nodeOutputJob.setMapOutputValueClass(EdgeIdPropIdWritable.class); nodeOutputJob.setMapperClass(NodeOutputMapper.class); - nodeOutputJob.setInputFormatClass(TextInputFormat.class); + nodeOutputJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(nodeOutputJob, new Path(grouped)); nodeOutputJob.setReducerClass(NodeOutputReducer.class); @@ -189,17 +334,17 @@ public static int run(String nodes, String edges, String output, Configuration c edgeSurroundJob.setPartitionerClass(AscLongDescLongWritablePartitioner.class); edgeSurroundJob.setMapOutputKeyClass(AscLongDescLongWritable.class); - edgeSurroundJob.setMapOutputValueClass(Text.class); + edgeSurroundJob.setMapOutputValueClass(EdgeWritable.class); edgeSurroundJob.setMapperClass(EdgeSurroundMapper.class); - edgeSurroundJob.setInputFormatClass(TextInputFormat.class); + edgeSurroundJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(edgeSurroundJob, new Path(grouped)); edgeSurroundJob.setReducerClass(EdgeSurroundReducer.class); - edgeSurroundJob.setOutputKeyClass(Text.class); - edgeSurroundJob.setOutputValueClass(Text.class); + edgeSurroundJob.setOutputKeyClass(NullWritable.class); + edgeSurroundJob.setOutputValueClass(SurroundingEdgeWritable.class); - edgeSurroundJob.setOutputFormatClass(TextOutputFormat.class); + edgeSurroundJob.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(edgeSurroundJob, new Path(surrounding)); edgeSurroundJob.setJarByClass(PureMRNodesAndEdgesJob.class); @@ -207,34 +352,35 @@ public static int run(String nodes, String edges, String output, Configuration c edgeSurroundJob.waitForCompletion(true); Job joinSurroundJob = new Job(conf, "Join surrounding edges job."); + joinSurroundJob.setGroupingComparatorClass(EdgeWritableKeyGroupingComparator.class); + joinSurroundJob.setSortComparatorClass(EdgeWritableKeyComparator.class); - joinSurroundJob.setMapOutputKeyClass(Text.class); - joinSurroundJob.setMapOutputValueClass(Text.class); + joinSurroundJob.setMapOutputKeyClass(EdgeWritable.class); + joinSurroundJob.setMapOutputValueClass(SurroundingEdgeWritable.class); joinSurroundJob.setMapperClass(JoinSurroundingEdgesMapper.class); - joinSurroundJob.setInputFormatClass(TextInputFormat.class); + joinSurroundJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(joinSurroundJob, new Path(surrounding)); joinSurroundJob.setReducerClass(JoinSurroundingEdgesReducer.class); joinSurroundJob.setOutputKeyClass(NullWritable.class); - joinSurroundJob.setOutputValueClass(Text.class); + joinSurroundJob.setOutputValueClass(DoubleSurroundingEdgeWritable.class); - joinSurroundJob.setOutputFormatClass(TextOutputFormat.class); + joinSurroundJob.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(joinSurroundJob, new Path(joinededges)); joinSurroundJob.setJarByClass(PureMRNodesAndEdgesJob.class); joinSurroundJob.waitForCompletion(true); - conf.set(NUMBEROFROWS_CONFIG, "" + nrOfEdges); Job edgeOutputJob = new Job(conf, "Output edges job."); - edgeOutputJob.setPartitionerClass(RownumPartitioner.class); + edgeOutputJob.setPartitionerClass(EdgeOutputRownumPartitioner.class); edgeOutputJob.setMapOutputKeyClass(LongWritable.class); - edgeOutputJob.setMapOutputValueClass(Text.class); + edgeOutputJob.setMapOutputValueClass(FullEdgeWritable.class); edgeOutputJob.setMapperClass(EdgeOutputMapper.class); - edgeOutputJob.setInputFormatClass(TextInputFormat.class); + edgeOutputJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(edgeOutputJob, new Path(joinededges)); edgeOutputJob.setReducerClass(EdgeOutputReducer.class); @@ -256,76 +402,6 @@ public static int run(String nodes, String edges, String output, Configuration c Neo4JUtils.writeEdgeIds(nrOfEdges, output, conf); Neo4JUtils.writeSingleTypeStore("TRANSFER_TO", output, conf); - Job nodePropertiesPrepareJob = new Job(conf, "Prepare node properties job."); - nodePropertiesPrepareJob.setGroupingComparatorClass(AscLongDescLongKeyGroupingComparator.class); - nodePropertiesPrepareJob.setSortComparatorClass(AscLongDescLongKeyComparator.class); - nodePropertiesPrepareJob.setPartitionerClass(AscLongDescLongWritablePartitioner.class); - - nodePropertiesPrepareJob.setMapOutputKeyClass(AscLongDescLongWritable.class); - nodePropertiesPrepareJob.setMapOutputValueClass(Text.class); - - nodePropertiesPrepareJob.setMapperClass(NodePreparePropertiesMapper.class); - nodePropertiesPrepareJob.setInputFormatClass(TextInputFormat.class); - FileInputFormat.addInputPath(nodePropertiesPrepareJob, new Path(numberedNodes)); - - nodePropertiesPrepareJob.setReducerClass(NodePreparePropertiesReducer.class); - nodePropertiesPrepareJob.setOutputKeyClass(AscLongDescLongWritable.class); - nodePropertiesPrepareJob.setOutputValueClass(Text.class); - - nodePropertiesPrepareJob.setOutputFormatClass(TextOutputFormat.class); - FileOutputFormat.setOutputPath(nodePropertiesPrepareJob, new Path(nodePropertiesPrepareOutput)); - - nodePropertiesPrepareJob.setJarByClass(PureMRNodesAndEdgesJob.class); - - nodePropertiesPrepareJob.waitForCompletion(true); - - Map>> namesMap = new HashMap>>(); - namesMap.put(0, new SimpleEntry>("identifier", String.class)); - namesMap.put(1, new SimpleEntry>("name", String.class)); - - Neo4JUtils.writePropertyKeyStore(namesMap, propertiesOutput, conf); - - Neo4JUtils.writePropertyStoreFooter(propertiesOutput, conf); - Neo4JUtils.writePropertyStringStoreHeader(propertiesOutput, conf); - Neo4JUtils.writePropertyStringStoreFooter(propertiesOutput, conf); - - conf.set(NUMBEROFROWS_CONFIG, "" + nrOfNodes * namesMap.size()); - Job nodePropertiesOutputJob = new Job(conf, "Output properties job."); - nodePropertiesOutputJob.setPartitionerClass(PropertyOutputIdBlockcountPartitioner.class); - nodePropertiesOutputJob.setSortComparatorClass(ByteMarkerAndPropertyOutputIdComparator.class); - nodePropertiesOutputJob.setGroupingComparatorClass(IndifferentByteMarkerAndPropertyOutputIdComparator.class); - - nodePropertiesOutputJob.setMapOutputKeyClass(ByteMarkerPropertyIdWritable.class); - nodePropertiesOutputJob.setMapOutputValueClass(PropertyOutputIdBlockcountValueWritable.class); - - nodePropertiesOutputJob.setMapperClass(PropertyOutputMapper.class); - nodePropertiesOutputJob.setInputFormatClass(TextInputFormat.class); - FileInputFormat.addInputPath(nodePropertiesOutputJob, new Path(nodePropertiesPrepareOutput)); - - nodePropertiesOutputJob.setReducerClass(PropertyOutputReducer.class); -// nodePropertiesOutputJob.setReducerClass(PropertyAsTextOutputReducer.class); - - FileOutputFormat.setOutputPath(nodePropertiesOutputJob, new Path(propertiesOutput + "/propertystore.db")); -// MultipleOutputs.addNamedOutput(nodePropertiesOutputJob, "props", TextOutputFormat.class, NullWritable.class, Text.class); -// MultipleOutputs.addNamedOutput(nodePropertiesOutputJob, "strings", TextOutputFormat.class, NullWritable.class, Text.class); - MultipleOutputs.addNamedOutput(nodePropertiesOutputJob, "props", NewByteBufferOutputFormat.class, NullWritable.class, BytesWritable.class); - MultipleOutputs.addNamedOutput(nodePropertiesOutputJob, "strings", NewByteBufferOutputFormat.class, NullWritable.class, BytesWritable.class); - - MultipleOutputs.setCountersEnabled(nodePropertiesOutputJob, true); - - nodePropertiesOutputJob.setJarByClass(PureMRNodesAndEdgesJob.class); - - nodePropertiesOutputJob.waitForCompletion(true); - - long nrOfWrittenStringBlocks = nodePropertiesOutputJob.getCounters().findCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").getValue(); - - System.out.println(nrOfWrittenStringBlocks); - - Neo4JUtils.writePropertyIds(nrOfNodes * namesMap.size(), propertiesOutput + "/neostore.propertystore.db", conf); - Neo4JUtils.writePropertyIds(nrOfWrittenStringBlocks, propertiesOutput + "/neostore.propertystore.db.strings", conf); - - Neo4JUtils.writeEmptArrayStore(propertiesOutput, conf); - } catch (Exception e) { System.err.println(e.getMessage()); diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/RownumPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/RownumPartitioner.java index 61d7c18..e42997f 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/RownumPartitioner.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/RownumPartitioner.java @@ -1,5 +1,7 @@ package nl.waredingen.graphs.neo.mapreduce; +import nl.waredingen.graphs.neo.mapreduce.input.AbstractMetaData; + import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; @@ -25,7 +27,7 @@ public void setConf(Configuration conf) { } private void configure() { - this.max = Long.parseLong(getConf().get(PureMRNodesAndEdgesJob.NUMBEROFROWS_CONFIG)); + this.max = Long.parseLong(getConf().get(AbstractMetaData.METADATA_NUMBER_OF_NODES)); } @Override diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/SurroundingContext.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/SurroundingContext.java index c346d41..80f5585 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/SurroundingContext.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/SurroundingContext.java @@ -1,18 +1,10 @@ package nl.waredingen.graphs.neo.mapreduce; - - public class SurroundingContext { - public long id = -1L, from = -1L, to = -1L, other = -1L, prev = -1L, next = -1L; - public String val = null; + public long nodeid = -1L, from = -1L, to = -1L, edgeid = -1L, edgeprop = -1L, prev = -1L, next = -1L; public SurroundingContext() { } - @Override - public String toString() { - return id + "\t" + other + "\t" + ((from != -1L) ? from + "\t" : "") + ((to != -1L) ? to + "\t" : "") + ((val != null) ? val + "\t" : "") + prev + "\t" + next; - } - } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputAsTextReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputAsTextReducer.java index bad8fc7..10ba16e 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputAsTextReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputAsTextReducer.java @@ -3,9 +3,6 @@ import java.io.IOException; import java.util.Iterator; -import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; - -import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputMapper.java index c8da1ce..7a55352 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputMapper.java @@ -2,24 +2,26 @@ import java.io.IOException; +import nl.waredingen.graphs.neo.mapreduce.input.writables.DoubleSurroundingEdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullEdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.SurroundingEdgeWritable; + import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Mapper; -public class EdgeOutputMapper extends Mapper { +public class EdgeOutputMapper extends Mapper { private LongWritable outputKey = new LongWritable(); - private Text outputValue = new Text(); - - protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - String[] vals = value.toString().split("\t", 12); - long node = Long.parseLong(vals[0]); - long from = Long.parseLong(vals[2]); + private FullEdgeWritable outputValue = new FullEdgeWritable(); - if (from == node) { - outputKey.set(Long.parseLong(vals[1])); - outputValue - .set(from + "\t" + vals[3] + "\t" + vals[4] + "\t" + vals[5] + "\t" + vals[10] + "\t" + vals[11]); + protected void map(NullWritable key, DoubleSurroundingEdgeWritable value, Context context) throws IOException, InterruptedException { + SurroundingEdgeWritable left = value.getLeft(); + SurroundingEdgeWritable right = value.getRight(); + + if (left.getNodeId().equals(left.getFromNodeId())) { + outputKey.set(value.getLeft().getEdgeId().get()); + outputValue.set(left.getFromNodeId(), left.getToNodeId(), left.getEdgePropId(), left.getEdgePrev(), left.getEdgeNext(), right.getEdgePrev(), right.getEdgeNext()); context.write(outputKey, outputValue); } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputReducer.java index 5370bee..289d47e 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputReducer.java @@ -3,38 +3,38 @@ import java.io.IOException; import java.util.Iterator; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullEdgeWritable; import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; -public class EdgeOutputReducer extends Reducer { +public class EdgeOutputReducer extends Reducer { private BytesWritable outputValue = new BytesWritable(); - protected void reduce(LongWritable key, Iterable values, Context context) throws IOException, + @Override + protected void reduce(LongWritable key, Iterable values, Context context) throws IOException, InterruptedException { - Iterator itr = values.iterator(); + Iterator itr = values.iterator(); if (!itr.hasNext()) { return; } // only use first record per key. Rest is duplicates from the selfjoin in the previous step - Text value = itr.next(); + FullEdgeWritable value = itr.next(); - String[] vals = value.toString().split("\t", 6); long relnum = key.get(); - long from = Long.parseLong(vals[0]); - long to = Long.parseLong(vals[1]); - long fromprev = Long.parseLong(vals[2]); - long fromnext = Long.parseLong(vals[3]); - long toprev = Long.parseLong(vals[4]); - long tonext = Long.parseLong(vals[5]); - long prop = -1L; + long from = value.getFromNodeId().get(); + long to = value.getToNodeId().get(); + long fromprev = value.getFromPrev().get(); + long fromnext = value.getFromNext().get(); + long toprev = value.getToPrev().get(); + long tonext = value.getToNext().get(); + long prop = value.getEdgeProp().get(); writeEdge(relnum, from , to, 0, fromprev, fromnext, toprev, tonext, prop, context); } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputRownumPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputRownumPartitioner.java new file mode 100644 index 0000000..eccde69 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputRownumPartitioner.java @@ -0,0 +1,13 @@ +package nl.waredingen.graphs.neo.mapreduce.edges; + +import nl.waredingen.graphs.neo.mapreduce.AbstractRownumPartitioner; +import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; + +public class EdgeOutputRownumPartitioner extends AbstractRownumPartitioner { + + @Override + public long getMaxCounter() { + return Neo4JUtils.getMetaData(conf).getNumberOfEdges(); + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapper.java index 5123d57..6f776de 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapper.java @@ -2,24 +2,22 @@ import java.io.IOException; -import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Mapper; -public class EdgeSurroundMapper extends Mapper { +public class EdgeSurroundMapper extends Mapper { private AscLongDescLongWritable outputKey = new AscLongDescLongWritable(); - private Text outputValue = new Text(); - protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - //nodeid node nodename edgeid fromnodeid tonodeid - String[] vals = value.toString().split("\t", 6); - outputKey.setLeft(new LongWritable(Long.parseLong(vals[0]))); - outputKey.setRight(new LongWritable(Long.parseLong(vals[3]))); + @Override + protected void map(NullWritable key, NodeEdgeWritable value, Context context) throws IOException, InterruptedException { + outputKey.setLeft(value.getNode().getNodeId()); + outputKey.setRight(value.getEdge().getEdgeId()); - outputValue.set(vals[4]+"\t"+vals[5]); - context.write(outputKey, outputValue); + context.write(outputKey, value.getEdge()); } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducer.java index fc918f2..6d62834 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducer.java @@ -3,35 +3,39 @@ import java.io.IOException; import java.util.Iterator; -import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; import nl.waredingen.graphs.neo.mapreduce.SurroundingContext; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.SurroundingEdgeWritable; import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; -public class EdgeSurroundReducer extends Reducer { +public class EdgeSurroundReducer extends Reducer { - private Text outputValue = new Text(); - - protected void reduce(AscLongDescLongWritable key, Iterable values, Context context) throws IOException ,InterruptedException { - Iterator iter = values.iterator(); + private SurroundingEdgeWritable outputValue = new SurroundingEdgeWritable(); + + @Override + protected void reduce(AscLongDescLongWritable key, Iterable values, Context context) throws IOException ,InterruptedException { + Iterator iter = values.iterator(); SurroundingContext edge = new SurroundingContext(); while (iter.hasNext()) { - String[] vals = iter.next().toString().split("\t", 2); + EdgeWritable value = iter.next(); long id = key.getLeft().get(); - long from = Long.parseLong(vals[0]); - long to = Long.parseLong(vals[1]); - long relnum = key.getRight().get(); - if (edge.id == -1L) { + long from = value.getFromNodeId().get(); + long to = value.getToNodeId().get(); + long relnum = value.getEdgeId().get(); + long prop = value.getEdgePropId().get(); + if (edge.nodeid == -1L) { // first call, so set current fields - edge.id = id; + edge.nodeid = id; edge.from = from; edge.to = to; - edge.other = relnum; + edge.edgeid = relnum; + edge.edgeprop = prop; edge.prev = -1L; // don't know yet edge.next = -1L; // first call, relationships ordered descending, so last rel, so no next available @@ -41,14 +45,15 @@ protected void reduce(AscLongDescLongWritable key, Iterable values, Contex // current edge.prev = relnum; - outputValue.set(edge.toString()); + outputValue.set(edge.nodeid, edge.edgeid, edge.from, edge.to, edge.edgeprop, edge.prev, edge.next); context.write(NullWritable.get(), outputValue); - long next = edge.other; - edge.id = id; + long next = edge.edgeid; + edge.nodeid = id; edge.from = from; edge.to = to; - edge.other = relnum; + edge.edgeid = relnum; + edge.edgeprop = prop; edge.prev = -1L; // don't know yet edge.next = next; @@ -57,7 +62,7 @@ protected void reduce(AscLongDescLongWritable key, Iterable values, Contex } // write out last context - outputValue.set(edge.toString()); + outputValue.set(edge.nodeid, edge.edgeid, edge.from, edge.to, edge.edgeprop, edge.prev, edge.next); context.write(NullWritable.get(), outputValue); } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeWritableKeyComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeWritableKeyComparator.java new file mode 100644 index 0000000..0db45c1 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeWritableKeyComparator.java @@ -0,0 +1,22 @@ +package nl.waredingen.graphs.neo.mapreduce.edges.surround.join; + +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; + +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableComparator; + +public class EdgeWritableKeyComparator extends WritableComparator { + protected EdgeWritableKeyComparator() { + super(EdgeWritable.class, true); + } + + @SuppressWarnings("rawtypes") + @Override + public int compare(WritableComparable w1, WritableComparable w2) { + EdgeWritable k1 = (EdgeWritable) w1; + EdgeWritable k2 = (EdgeWritable) w2; + + return k1.compareTo(k2); + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeWritableKeyGroupingComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeWritableKeyGroupingComparator.java new file mode 100644 index 0000000..2d14236 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeWritableKeyGroupingComparator.java @@ -0,0 +1,22 @@ +package nl.waredingen.graphs.neo.mapreduce.edges.surround.join; + +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; + +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableComparator; + +public class EdgeWritableKeyGroupingComparator extends WritableComparator { + protected EdgeWritableKeyGroupingComparator() { + super(EdgeWritable.class, true); + } + + @SuppressWarnings("rawtypes") + @Override + public int compare(WritableComparable w1, WritableComparable w2) { + EdgeWritable k1 = (EdgeWritable) w1; + EdgeWritable k2 = (EdgeWritable) w2; + + return k1.compareTo(k2); + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapper.java index abfefeb..96aaf88 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapper.java @@ -2,18 +2,21 @@ import java.io.IOException; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.SurroundingEdgeWritable; + +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Mapper; -public class JoinSurroundingEdgesMapper extends Mapper { +public class JoinSurroundingEdgesMapper extends Mapper { + + private EdgeWritable outputKey = new EdgeWritable(); + private SurroundingEdgeWritable outputValue = new SurroundingEdgeWritable(); - private Text outputKey = new Text(); - - protected void map(LongWritable key, Text value, Context context) throws IOException ,InterruptedException { - String[] values = value.toString().split("\t", 5); - - outputKey.set(values[1]+";"+values[2]+";"+values[3]); - context.write(outputKey, value); + @Override + protected void map(NullWritable key, SurroundingEdgeWritable value, Context context) throws IOException, InterruptedException { + outputKey.set(value.getEdgeId(), value.getFromNodeId(), value.getToNodeId(), value.getEdgePropId()); + outputValue.set(value.getNodeId(), value.getEdgeId(), value.getFromNodeId(), value.getToNodeId(), value.getEdgePropId(), value.getEdgePrev(), value.getEdgeNext()); + context.write(outputKey, outputValue); } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesReducer.java index 18380e7..7e379a2 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesReducer.java @@ -3,36 +3,40 @@ import java.io.IOException; import java.util.Iterator; +import nl.waredingen.graphs.neo.mapreduce.input.writables.DoubleSurroundingEdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.SurroundingEdgeWritable; + import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; -public class JoinSurroundingEdgesReducer extends Reducer { +public class JoinSurroundingEdgesReducer extends Reducer { - private Text outputValue = new Text(); + private DoubleSurroundingEdgeWritable outputValue = new DoubleSurroundingEdgeWritable(); @Override - protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { - Iterator iter = values.iterator(); + protected void reduce(EdgeWritable key, Iterable values, Context context) throws IOException, InterruptedException { + + Iterator iter = values.iterator(); if (!iter.hasNext()) { return; } - String left = iter.next().toString(); + //Deep copy this to don't run into the pitfall of the hadoop iterable optimalization to reuse the next object + SurroundingEdgeWritable left = new SurroundingEdgeWritable(iter.next()); if (iter.hasNext()) { - String right = iter.next().toString(); + SurroundingEdgeWritable right = new SurroundingEdgeWritable(iter.next()); if (!left.equals(right)) { - outputValue.set(left + "\t" + right); + outputValue.set(left,right); context.write(NullWritable.get(), outputValue); - outputValue.set(right + "\t" + left); + outputValue.set(right,left); context.write(NullWritable.get(), outputValue); } } - } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesMapper.java index 52a4cbe..f842d7e 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesMapper.java @@ -2,26 +2,42 @@ import java.io.IOException; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeIdWritable; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; -public class GroupNodesAndEdgesMapper extends Mapper { +public class GroupNodesAndEdgesMapper extends Mapper { + + private NodeEdgeIdWritable outputKey = new NodeEdgeIdWritable(); + private EdgeWritable outputValue = new EdgeWritable(); + private LongWritable edgeId = new LongWritable(); + private LongWritable fromNodeId = new LongWritable(); + private LongWritable toNodeId = new LongWritable(); + private LongWritable edgePropId = new LongWritable(); + private LongWritable fromPropId = new LongWritable(); + private LongWritable toPropId = new LongWritable(); + + @Override + protected void map(NullWritable key, Text value, Context context) throws IOException ,InterruptedException { + //edgeid fromnode tonode edgePropId fromnodeid fromPropId tonodeid toPropId + String[] values = value.toString().split("\t", 8); - private Text outputKey = new Text(); - private Text outputValue = new Text(); - - protected void map(Text key, Text value, Context context) throws IOException ,InterruptedException { - //edgeid fromnode tonode fromnodeid fromnode fromname tonodeid tonode toname - String[] values = value.toString().split("\t",9); - //edgeid fromnodeid tonodeid - outputValue.set(values[0] + "\t" + values[3]+ "\t" + values[6]); + edgeId.set(Long.parseLong(values[0])); + edgePropId.set(Long.parseLong(values[3])); + fromNodeId.set(Long.parseLong(values[4])); + fromPropId.set(Long.parseLong(values[5])); + toNodeId.set(Long.parseLong(values[6])); + toPropId.set(Long.parseLong(values[7])); + outputValue.set(edgeId, fromNodeId, toNodeId, edgePropId); - //fromnodeid fromnode fromname;edgeid - outputKey.set(values[3]+"\t"+values[4]+"\t"+values[5]+";"+values[0]); + outputKey.set(fromNodeId, fromPropId, edgeId); context.write(outputKey, outputValue); - //tonodeid tonode toname;edgeid - outputKey.set(values[6]+"\t"+values[7]+"\t"+values[8]+";"+values[0]); + outputKey.set(toNodeId, toPropId, edgeId); context.write(outputKey, outputValue); } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesReducer.java index a9fe5fa..1b03ffb 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesReducer.java @@ -2,18 +2,21 @@ import java.io.IOException; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeWritable; + import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; -public class GroupNodesAndEdgesReducer extends Reducer { +public class GroupNodesAndEdgesReducer extends Reducer { - private Text outputValue = new Text(); - protected void reduce(Text key, Iterable values, Context context) throws IOException ,InterruptedException { - String keyString = key.toString(); - String outputKey = keyString.substring(0, keyString.lastIndexOf(";")); - for (Text value : values) { - outputValue.set(outputKey+"\t"+ value); + private NodeEdgeWritable outputValue = new NodeEdgeWritable(); + + @Override + protected void reduce(NodeEdgeIdWritable key, Iterable values, Context context) throws IOException ,InterruptedException { + for (EdgeWritable value : values) { + outputValue.set(key.getNodeId().get(), key.getPropId().get(), value.getEdgeId().get(), value.getFromNodeId().get(), value.getToNodeId().get(), value.getEdgePropId().get()); context.write(NullWritable.get(), outputValue); } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparator.java index 34aa173..afc1772 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparator.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparator.java @@ -1,32 +1,22 @@ package nl.waredingen.graphs.neo.mapreduce.group; -import org.apache.hadoop.io.Text; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeIdWritable; + import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; public class NodeAndEdgeIdKeyComparator extends WritableComparator { protected NodeAndEdgeIdKeyComparator() { - super(Text.class, true); + super(NodeEdgeIdWritable.class, true); } @SuppressWarnings("rawtypes") @Override public int compare(WritableComparable w1, WritableComparable w2) { - Text k1 = (Text) w1; - Text k2 = (Text) w2; + NodeEdgeIdWritable k1 = (NodeEdgeIdWritable) w1; + NodeEdgeIdWritable k2 = (NodeEdgeIdWritable) w2; - String k1s = k1.toString(); - String key1 = k1s.substring(0, k1s.lastIndexOf(";")); - Long edgeId = Long.valueOf(k1s.substring(k1s.lastIndexOf(";")+1)); - String k2s = k2.toString(); - String key2 = k2s.substring(0, k2s.lastIndexOf(";")); - Long edgeId2 = Long.valueOf(k2s.substring(k2s.lastIndexOf(";")+1)); - - int result = key1.compareTo(key2); - if (0 == result) { - result = edgeId.compareTo(edgeId2); - } - return result; + return k1.compareTo(k2); } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyGroupingComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyGroupingComparator.java index 3e8c1f2..7cddacd 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyGroupingComparator.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyGroupingComparator.java @@ -1,26 +1,22 @@ package nl.waredingen.graphs.neo.mapreduce.group; -import org.apache.hadoop.io.Text; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeIdWritable; + import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.io.WritableComparator; public class NodeAndEdgeIdKeyGroupingComparator extends WritableComparator { protected NodeAndEdgeIdKeyGroupingComparator() { - super(Text.class, true); + super(NodeEdgeIdWritable.class, true); } @SuppressWarnings("rawtypes") @Override public int compare(WritableComparable w1, WritableComparable w2) { - Text k1 = (Text) w1; - Text k2 = (Text) w2; - - String k1s = k1.toString(); - String key1 = k1s.substring(0, k1s.lastIndexOf(";")); - String k2s = k2.toString(); - String key2 = k2s.substring(0, k2s.lastIndexOf(";")); + NodeEdgeIdWritable k1 = (NodeEdgeIdWritable) w1; + NodeEdgeIdWritable k2 = (NodeEdgeIdWritable) w2; - return key1.compareTo(key2); + return k1.getNodeId().compareTo(k2.getNodeId()); } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitioner.java index 179caa6..a323539 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitioner.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitioner.java @@ -1,15 +1,15 @@ package nl.waredingen.graphs.neo.mapreduce.group; -import org.apache.hadoop.io.Text; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeIdWritable; + import org.apache.hadoop.mapreduce.Partitioner; -public class NodeAndEdgeIdKeyPartitioner extends Partitioner { +public class NodeAndEdgeIdKeyPartitioner extends Partitioner { @Override - public int getPartition(Text key, Text val, int numPartitions) { - String keyString = key.toString(); - int hash = keyString.substring(0,keyString.lastIndexOf(";")).hashCode(); - return (hash & Integer.MAX_VALUE) % numPartitions; + public int getPartition(NodeEdgeIdWritable key, EdgeWritable val, int numPartitions) { + return (key.getNodeId().hashCode() & Integer.MAX_VALUE) % numPartitions; } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/AbstractMetaData.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/AbstractMetaData.java new file mode 100644 index 0000000..e7a2415 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/AbstractMetaData.java @@ -0,0 +1,116 @@ +package nl.waredingen.graphs.neo.mapreduce.input; + +import org.apache.hadoop.conf.Configuration; + +public abstract class AbstractMetaData implements MetaData { + + public static final String METADATA_NUMBER_OF_NODES = "neo.db.creation.metadata.node.count"; + public static final String METADATA_NUMBER_OF_NODE_PROPERTIES = "neo.db.creation.metadata.node.property.count"; + public static final String METADATA_NUMBER_OF_EDGES = "neo.db.creation.metadata.edge.count"; + public static final String METADATA_CLASS = "neo.db.creation.metadata.class"; + + private long numberOfNodes; + private long numberOfNodeProperties; + private long numberOfEdges; + + public AbstractMetaData(Configuration conf) { + numberOfNodes = conf.getLong(METADATA_NUMBER_OF_NODES, 0L); + numberOfNodeProperties = conf.getLong(METADATA_NUMBER_OF_NODE_PROPERTIES, 0L); + numberOfEdges = conf.getLong(METADATA_NUMBER_OF_EDGES, 0L); + } + + + public AbstractMetaData() { + } + + public long getNumberOfNodes() { + return numberOfNodes; + } + + public long getNumberOfNodeProperties() { + return numberOfNodeProperties; + } + + public long getNumberOfEdges() { + return numberOfEdges; + } + + @Override + public int getNodePropertySize() { + return getNodeTypeNames().length; + } + + @Override + public int getNodePropertyIndexForName(String name) { + return getPropertyIndexForName(name, getNodeTypeNames()); + } + + @SuppressWarnings("rawtypes") + @Override + public Class getNodePropertyTypeForName(String name) { + int nodePropertyIndexForName = getNodePropertyIndexForName(name); + if (nodePropertyIndexForName >= 0) { + return getNodeTypes()[nodePropertyIndexForName]; + } else { + return String.class; + } + } + + @Override + public int getNodeIdIndex() { + return getNodePropertyIndexForName(getNodeIdIdentifier()); + } + + @Override + public String[] getNodePropertyNames() { + return getNodeTypeNames(); + } + + @Override + public int getEdgePropertySize() { + return getEdgeTypeNames().length; + } + + @Override + public int getEdgePropertyIndexForName(String name) { + return getPropertyIndexForName(name, getEdgeTypeNames()); + } + + @SuppressWarnings("rawtypes") + @Override + public Class getEdgePropertyTypeForName(String name) { + int edgePropertyIndexForName = getEdgePropertyIndexForName(name); + if (edgePropertyIndexForName >= 0) { + return getEdgeTypes()[edgePropertyIndexForName]; + } else { + return String.class; + } + } + + @Override + public int getEdgeFromNodeIdIndex() { + return getEdgePropertyIndexForName("from"); + } + + @Override + public int getEdgeToNodeIdIndex() { + return getEdgePropertyIndexForName("to"); + } + + @Override + public String[] getEdgePropertyNames() { + return getEdgeTypeNames(); + } + + private int getPropertyIndexForName(String name, String[] array) { + int result = -1; + for (int i = 0; i < array.length; i++) { + if (array[i] != null && array[i].equals(name)) { + result = i; + } + } + return result; + } + + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/HardCodedMetaDataImpl.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/HardCodedMetaDataImpl.java new file mode 100644 index 0000000..08330cb --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/HardCodedMetaDataImpl.java @@ -0,0 +1,51 @@ +package nl.waredingen.graphs.neo.mapreduce.input; + +import org.apache.hadoop.conf.Configuration; + + +public class HardCodedMetaDataImpl extends AbstractMetaData { + + private final String[] nodeTypeNames = { "identifier", "name" }; + @SuppressWarnings("rawtypes") + private final Class[] nodeTypes = { Long.class, String.class }; + private final String[] edgeTypeNames = { "from", "to" }; + @SuppressWarnings("rawtypes") + private final Class[] edgeTypes = { Long.class, Long.class }; + + public HardCodedMetaDataImpl(Configuration conf) { + super(conf); + } + + @Override + public boolean isDynamicTyping() { + return false; + } + + @Override + public String[] getNodeTypeNames() { + return nodeTypeNames; + } + + @SuppressWarnings("rawtypes") + @Override + public Class[] getNodeTypes() { + return nodeTypes; + } + + @Override + public String[] getEdgeTypeNames() { + return edgeTypeNames; + } + + @SuppressWarnings("rawtypes") + @Override + public Class[] getEdgeTypes() { + return edgeTypes; + } + + @Override + public String getNodeIdIdentifier() { + return "identifier"; + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/MetaData.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/MetaData.java new file mode 100644 index 0000000..bebb2ab --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/MetaData.java @@ -0,0 +1,49 @@ +package nl.waredingen.graphs.neo.mapreduce.input; + +public interface MetaData { + + public abstract boolean isDynamicTyping(); + + public abstract String[] getNodeTypeNames(); + + @SuppressWarnings("rawtypes") + public abstract Class[] getNodeTypes(); + + public abstract String[] getEdgeTypeNames(); + + @SuppressWarnings("rawtypes") + public abstract Class[] getEdgeTypes(); + + public abstract String getNodeIdIdentifier(); + + public int getNodePropertySize(); + + public int getNodePropertyIndexForName(String name); + + @SuppressWarnings("rawtypes") + public Class getNodePropertyTypeForName(String name); + + public int getNodeIdIndex(); + + public String[] getNodePropertyNames(); + + public int getEdgePropertySize(); + + public int getEdgePropertyIndexForName(String name); + + @SuppressWarnings("rawtypes") + public Class getEdgePropertyTypeForName(String name); + + public int getEdgeFromNodeIdIndex(); + + public int getEdgeToNodeIdIndex(); + + public String[] getEdgePropertyNames(); + + public long getNumberOfNodes(); + + public long getNumberOfEdges(); + + public abstract long getNumberOfNodeProperties(); + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/MetaDataFromConfigImpl.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/MetaDataFromConfigImpl.java new file mode 100644 index 0000000..bd4008a --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/MetaDataFromConfigImpl.java @@ -0,0 +1,80 @@ +package nl.waredingen.graphs.neo.mapreduce.input; + +import org.apache.hadoop.conf.Configuration; + +public class MetaDataFromConfigImpl extends AbstractMetaData { + + public static final String METADATA_NODE_ID_NAME = "neo.db.creation.metadata.node.id.name"; + public static final String METADATA_NODE_PROPERTY_NAMES = "neo.db.creation.metadata.node.property.names"; + public static final String METADATA_NODE_PROPERTY_TYPE_PREFIX = "neo.db.creation.metadata.node.property.type."; + public static final String METADATA_EDGE_PROPERTY_TYPE_PREFIX = "neo.db.creation.metadata.edge.property.type."; + public static final String METADATA_EDGE_PROPERTY_NAMES = "neo.db.creation.metadata.edge.property.names"; + + private final String[] nodeTypeNames; + @SuppressWarnings("rawtypes") + private final Class[] nodeTypes; + private final String[] edgeTypeNames; + @SuppressWarnings("rawtypes") + private final Class[] edgeTypes; + private String nodeIdName; + + public MetaDataFromConfigImpl(Configuration conf) { + super(conf); + nodeTypeNames = conf.getStrings(METADATA_NODE_PROPERTY_NAMES); + nodeIdName = conf.get(METADATA_NODE_ID_NAME); + nodeTypes = getNodeTypesFromConfig(conf); + edgeTypeNames = conf.getStrings(METADATA_EDGE_PROPERTY_NAMES); + edgeTypes = getEdgeTypesFromConfig(conf); + } + + @SuppressWarnings("rawtypes") + private Class[] getEdgeTypesFromConfig(Configuration conf) { + Class[] result = new Class[getEdgeTypeNames().length]; + for (int i = 0; i < getEdgeTypeNames().length; i++) { + result[i] = conf.getClass(METADATA_EDGE_PROPERTY_TYPE_PREFIX+getEdgeTypeNames()[i], String.class); + } + return result; + } + + @SuppressWarnings("rawtypes") + private Class[] getNodeTypesFromConfig(Configuration conf) { + Class[] result = new Class[getNodeTypeNames().length]; + for (int i = 0; i < getNodeTypeNames().length; i++) { + result[i] = conf.getClass(METADATA_NODE_PROPERTY_TYPE_PREFIX+getNodeTypeNames()[i], String.class); + } + return result; + } + + @Override + public boolean isDynamicTyping() { + return false; + } + + @Override + public String[] getNodeTypeNames() { + return nodeTypeNames; + } + + @Override + public String getNodeIdIdentifier() { + return nodeIdName; + } + + @SuppressWarnings("rawtypes") + @Override + public Class[] getNodeTypes() { + return nodeTypes; + } + + @Override + public String[] getEdgeTypeNames() { + return edgeTypeNames; + } + + @SuppressWarnings("rawtypes") + @Override + public Class[] getEdgeTypes() { + return edgeTypes; + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/AscLongDescLongWritable.java similarity index 93% rename from job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongWritable.java rename to job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/AscLongDescLongWritable.java index 082c06e..51aa2f2 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/AscLongDescLongWritable.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/AscLongDescLongWritable.java @@ -1,4 +1,4 @@ -package nl.waredingen.graphs.neo.mapreduce; +package nl.waredingen.graphs.neo.mapreduce.input.writables; import java.io.DataInput; import java.io.DataOutput; @@ -88,10 +88,12 @@ public boolean equals(Object obj) { return false; return true; } - + @Override public String toString() { - return left.toString()+"\t"+right.toString(); + return "AscLongDescLongWritable [left=" + left + ", right=" + right + "]"; } + + } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerPropertyIdWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/ByteMarkerIdPropIdWritable.java similarity index 57% rename from job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerPropertyIdWritable.java rename to job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/ByteMarkerIdPropIdWritable.java index 82046dd..dadfdc8 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerPropertyIdWritable.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/ByteMarkerIdPropIdWritable.java @@ -1,36 +1,38 @@ -package nl.waredingen.graphs.neo.mapreduce.properties; +package nl.waredingen.graphs.neo.mapreduce.input.writables; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; -import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; - import org.apache.hadoop.io.ByteWritable; +import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.WritableComparable; @SuppressWarnings("rawtypes") -public class ByteMarkerPropertyIdWritable implements WritableComparable { +public class ByteMarkerIdPropIdWritable implements WritableComparable { private ByteWritable marker = new ByteWritable(); private LongWritable id = new LongWritable(); + private IntWritable propId = new IntWritable(); - public ByteMarkerPropertyIdWritable() { + public ByteMarkerIdPropIdWritable() { } - public ByteMarkerPropertyIdWritable(ByteWritable marker, LongWritable id) { + public ByteMarkerIdPropIdWritable(ByteWritable marker, LongWritable id, IntWritable propId) { this.marker = marker; this.id = id; + this.propId = propId; } public void setMarker(ByteWritable marker) { this.marker = marker; } - public void setId(LongWritable id) { + public void setIds(LongWritable id, IntWritable propId) { this.id = id; + this.propId = propId; } public ByteWritable getMarker() { @@ -41,24 +43,31 @@ public LongWritable getId() { return id; } + public IntWritable getPropId() { + return propId; + } + @Override public void write(DataOutput out) throws IOException { marker.write(out); id.write(out); + propId.write(out); } @Override public void readFields(DataInput in) throws IOException { marker.readFields(in); id.readFields(in); + propId.readFields(in); } @Override public int compareTo(Object obj) { - ByteMarkerPropertyIdWritable other = (ByteMarkerPropertyIdWritable) obj; + ByteMarkerIdPropIdWritable other = (ByteMarkerIdPropIdWritable) obj; int markerDiff = marker.compareTo(other.marker); - // sort on marker and then on id - return (markerDiff == 0) ? id.compareTo(other.id) : markerDiff; + int nodeDiff = id.compareTo(other.id); + // sort on marker and then on nodeId and propertyId + return (markerDiff == 0) ? (nodeDiff == 0) ? propId.compareTo(other.propId) : nodeDiff: markerDiff; } @Override @@ -66,6 +75,7 @@ public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((id == null) ? 0 : id.hashCode()); + result = prime * result + ((propId == null) ? 0 : propId.hashCode()); result = prime * result + ((marker == null) ? 0 : marker.hashCode()); return result; } @@ -78,12 +88,17 @@ public boolean equals(Object obj) { return false; if (getClass() != obj.getClass()) return false; - ByteMarkerPropertyIdWritable other = (ByteMarkerPropertyIdWritable) obj; + ByteMarkerIdPropIdWritable other = (ByteMarkerIdPropIdWritable) obj; if (id == null) { if (other.id != null) return false; } else if (!id.equals(other.id)) return false; + if (propId == null) { + if (other.propId != null) + return false; + } else if (!propId.equals(other.propId)) + return false; if (marker == null) { if (other.marker != null) return false; @@ -94,6 +109,7 @@ public boolean equals(Object obj) { @Override public String toString() { - return marker + "\t" + id; + return "ByteMarkerIdPropIdWritable [marker=" + marker + ", id=" + id + ", propId=" + propId + "]"; } + } \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/DoubleSurroundingEdgeWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/DoubleSurroundingEdgeWritable.java new file mode 100644 index 0000000..d14b7aa --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/DoubleSurroundingEdgeWritable.java @@ -0,0 +1,92 @@ +package nl.waredingen.graphs.neo.mapreduce.input.writables; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.WritableComparable; + +@SuppressWarnings("rawtypes") +public class DoubleSurroundingEdgeWritable implements WritableComparable { + + private SurroundingEdgeWritable left = new SurroundingEdgeWritable(); + private SurroundingEdgeWritable right = new SurroundingEdgeWritable(); + + public DoubleSurroundingEdgeWritable() { + + } + + public DoubleSurroundingEdgeWritable(SurroundingEdgeWritable left, SurroundingEdgeWritable right) { + this.left = left; + this.right = right; + } + + public SurroundingEdgeWritable getLeft() { + return left; + } + + public SurroundingEdgeWritable getRight() { + return right; + } + + public void set(SurroundingEdgeWritable left, SurroundingEdgeWritable right) { + this.left = left; + this.right = right; + } + + @Override + public void write(DataOutput out) throws IOException { + left.write(out); + right.write(out); + } + + @Override + public void readFields(DataInput in) throws IOException { + left.readFields(in); + right.readFields(in); + } + + @Override + public int compareTo(Object obj) { + DoubleSurroundingEdgeWritable other = (DoubleSurroundingEdgeWritable) obj; + int leftDiff = left.compareTo(other.left); + return (leftDiff == 0) ? right.compareTo(other.right): leftDiff; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((left == null) ? 0 : left.hashCode()); + result = prime * result + ((right == null) ? 0 : right.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + DoubleSurroundingEdgeWritable other = (DoubleSurroundingEdgeWritable) obj; + if (left == null) { + if (other.left != null) + return false; + } else if (!left.equals(other.left)) + return false; + if (right == null) { + if (other.right != null) + return false; + } else if (!right.equals(other.right)) + return false; + return true; + } + + @Override + public String toString() { + return "DoubleSurroundingEdgeWritable [left=" + left + ", right=" + right + "]"; + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgeIdPropIdWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgeIdPropIdWritable.java new file mode 100644 index 0000000..b7a9838 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgeIdPropIdWritable.java @@ -0,0 +1,94 @@ +package nl.waredingen.graphs.neo.mapreduce.input.writables; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.WritableComparable; + +@SuppressWarnings("rawtypes") +public class EdgeIdPropIdWritable implements WritableComparable { + + private LongWritable edgeId = new LongWritable(); + private LongWritable propId = new LongWritable(); + + public EdgeIdPropIdWritable() { + + } + + public EdgeIdPropIdWritable(long edgeId, long propId) { + this.edgeId = new LongWritable(edgeId); + this.propId = new LongWritable(propId); + } + + public void set(LongWritable edgeId, LongWritable propId) { + this.edgeId = edgeId; + this.propId = propId; + } + + public LongWritable getEdgeId() { + return edgeId; + } + + public LongWritable getPropId() { + return propId; + } + + @Override + public void write(DataOutput out) throws IOException { + edgeId.write(out); + propId.write(out); + } + + @Override + public void readFields(DataInput in) throws IOException { + edgeId.readFields(in); + propId.readFields(in); + } + + @Override + public int compareTo(Object obj) { + EdgeIdPropIdWritable other = (EdgeIdPropIdWritable) obj; + int edgeDiff = edgeId.compareTo(other.edgeId); + return (edgeDiff == 0) ? propId.compareTo(other.edgeId) : edgeDiff; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((propId == null) ? 0 : propId.hashCode()); + result = prime * result + ((edgeId == null) ? 0 : edgeId.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + EdgeIdPropIdWritable other = (EdgeIdPropIdWritable) obj; + if (propId == null) { + if (other.propId != null) + return false; + } else if (!propId.equals(other.propId)) + return false; + if (edgeId == null) { + if (other.edgeId != null) + return false; + } else if (!edgeId.equals(other.edgeId)) + return false; + return true; + } + + @Override + public String toString() { + return "EdgeIdPropIdWritable [edgeId=" + edgeId + ", propId=" + propId + "]"; + } + +} + diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgePropertyOutputCountersAndValueWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgePropertyOutputCountersAndValueWritable.java new file mode 100644 index 0000000..d054115 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgePropertyOutputCountersAndValueWritable.java @@ -0,0 +1,136 @@ +package nl.waredingen.graphs.neo.mapreduce.input.writables; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Writable; + +public class EdgePropertyOutputCountersAndValueWritable implements Writable { + private LongWritable id; + private FullEdgePropertiesWritable value; + private long countBlockOffset; + private long countIdOffset; + private int partition; + + public static final FullEdgePropertiesWritable EMPTY_VAL = new FullEdgePropertiesWritable(-1, null, null, -1, -1, -1, -1, -1, ""); + public static final LongWritable EMPTY_ID = new LongWritable(Long.MIN_VALUE); + + public void setValues(LongWritable id, FullEdgePropertiesWritable value) { + this.id = id; + this.value = value; + this.countBlockOffset = 0; + this.countIdOffset = 0; + this.partition = 0; + } + + public void setCounter(int partition, long blockCount, long idOffsetCount) { + this.id = EMPTY_ID; + this.value = EMPTY_VAL; + this.partition = partition; + this.countBlockOffset = blockCount; + this.countIdOffset = idOffsetCount; + } + + public long getBlockOffset() { + return countBlockOffset; + } + + public long getIdOffset() { + return countIdOffset; + } + + public int getPartition() { + return partition; + } + + public FullEdgePropertiesWritable getValue() { + return value; + } + + public LongWritable getId() { + return id; + } + + @Override + public void write(DataOutput out) throws IOException { + id.write(out); + value.write(out); + if (id.equals(EMPTY_ID)) { + out.writeInt(partition); + out.writeLong(countBlockOffset); + out.writeLong(countIdOffset); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + if (id == null) { + id = new LongWritable(); + } + id.readFields(in); + if (value == null) { + value = new FullEdgePropertiesWritable(); + } + value.readFields(in); + + if (id.equals(EMPTY_ID)) { + partition = in.readInt(); + countBlockOffset = in.readLong(); + countIdOffset = in.readLong(); + } else { + partition = 0; + countBlockOffset = 0; + countIdOffset = 0; + } + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + (int) (countBlockOffset ^ (countBlockOffset >>> 32)); + result = prime * result + (int) (countIdOffset ^ (countIdOffset >>> 32)); + result = prime * result + ((id == null) ? 0 : id.hashCode()); + result = prime * result + partition; + result = prime * result + ((value == null) ? 0 : value.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + EdgePropertyOutputCountersAndValueWritable other = (EdgePropertyOutputCountersAndValueWritable) obj; + if (countBlockOffset != other.countBlockOffset) + return false; + if (countIdOffset != other.countIdOffset) + return false; + if (id == null) { + if (other.id != null) + return false; + } else if (!id.equals(other.id)) + return false; + if (partition != other.partition) + return false; + if (value == null) { + if (other.value != null) + return false; + } else if (!value.equals(other.value)) + return false; + return true; + } + + @Override + public String toString() { + return "EdgePropertyOutputCountersAndValueWritable [id=" + id + ", value=" + value + ", countBlockOffset=" + countBlockOffset + ", countIdOffset=" + + countIdOffset + ", partition=" + partition + "]"; + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgeWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgeWritable.java new file mode 100644 index 0000000..67221c3 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgeWritable.java @@ -0,0 +1,127 @@ +package nl.waredingen.graphs.neo.mapreduce.input.writables; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.WritableComparable; + +@SuppressWarnings("rawtypes") +public class EdgeWritable implements WritableComparable { + + private LongWritable edgeId = new LongWritable(); + private LongWritable fromNodeId = new LongWritable(); + private LongWritable toNodeId = new LongWritable(); + private LongWritable edgePropId = new LongWritable(); + + public EdgeWritable() { + + } + + public EdgeWritable(long edgeId, long from, long to, long prop) { + this.edgeId = new LongWritable(edgeId); + this.fromNodeId = new LongWritable(from); + this.toNodeId = new LongWritable(to); + this.edgePropId = new LongWritable(prop); + } + + public void set(LongWritable edgeId, LongWritable fromNodeId, LongWritable toNodeId, LongWritable prop) { + this.edgeId = edgeId; + this.fromNodeId = fromNodeId; + this.toNodeId = toNodeId; + this.edgePropId = prop; + } + + public LongWritable getEdgeId() { + return edgeId; + } + + public LongWritable getFromNodeId() { + return fromNodeId; + } + + public LongWritable getToNodeId() { + return toNodeId; + } + + public LongWritable getEdgePropId() { + return edgePropId; + } + + @Override + public void write(DataOutput out) throws IOException { + edgeId.write(out); + fromNodeId.write(out); + toNodeId.write(out); + edgePropId.write(out); + } + + @Override + public void readFields(DataInput in) throws IOException { + edgeId.readFields(in); + fromNodeId.readFields(in); + toNodeId.readFields(in); + edgePropId.readFields(in); + } + + @Override + public int compareTo(Object obj) { + EdgeWritable other = (EdgeWritable) obj; + int edgeDiff = edgeId.compareTo(other.edgeId); + int fromDiff = fromNodeId.compareTo(other.fromNodeId); + int toDiff = toNodeId.compareTo(other.toNodeId); + int propDiff = edgePropId.compareTo(other.edgePropId); + return (edgeDiff == 0) ? (fromDiff == 0) ? (toDiff ==0) ? propDiff : toDiff: fromDiff : edgeDiff; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((edgeId == null) ? 0 : edgeId.hashCode()); + result = prime * result + ((fromNodeId == null) ? 0 : fromNodeId.hashCode()); + result = prime * result + ((toNodeId == null) ? 0 : toNodeId.hashCode()); + result = prime * result + ((edgePropId == null) ? 0 : edgePropId.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + EdgeWritable other = (EdgeWritable) obj; + if (edgeId == null) { + if (other.edgeId != null) + return false; + } else if (!edgeId.equals(other.edgeId)) + return false; + if (fromNodeId == null) { + if (other.fromNodeId != null) + return false; + } else if (!fromNodeId.equals(other.fromNodeId)) + return false; + if (toNodeId == null) { + if (other.toNodeId != null) + return false; + } else if (!toNodeId.equals(other.toNodeId)) + return false; + if (edgePropId == null) { + if (other.edgePropId != null) + return false; + } else if (!edgePropId.equals(other.edgePropId)) + return false; + return true; + } + + @Override + public String toString() { + return "EdgeWritable [edgeId=" + edgeId + ", fromNodeId=" + fromNodeId + ", toNodeId=" + toNodeId + ", edgePropId=" + edgePropId+ "]"; + } + + +} \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/FullEdgePropertiesWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/FullEdgePropertiesWritable.java new file mode 100644 index 0000000..228f5d3 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/FullEdgePropertiesWritable.java @@ -0,0 +1,207 @@ +package nl.waredingen.graphs.neo.mapreduce.input.writables; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; + +@SuppressWarnings("rawtypes") +public class FullEdgePropertiesWritable implements WritableComparable { + + private LongWritable edgeId = new LongWritable(); + private Text fromNodeIdentifier = new Text(); + private Text toNodeIdentifier = new Text(); + private IntWritable propertyIndex = new IntWritable(); + private LongWritable blockCount = new LongWritable(); + private PropertyListWritable properties = new PropertyListWritable(); + private LongWritable prevProp = new LongWritable(); + private LongWritable nextProp = new LongWritable(); + + public FullEdgePropertiesWritable() { + + } + + public FullEdgePropertiesWritable(long edgeId, String fromNode, String toNode, int propIndex, long blockCount, long prevProp, long nextProp, int propKeyIndex, String val) { + this.set(edgeId, fromNode, toNode, propIndex, blockCount, prevProp, nextProp, propKeyIndex, val); + } + + public void set(FullEdgePropertiesWritable other) { + String[] props = other.getProperties().valuesToArray(); + int[] keys = other.getProperties().keysToArray(); + for (int i=0; i < props.length; i++) { + if (i ==0 ) { + this.set(other.getEdgeId().get(), other.getFromNodeIdentifier().toString(), other.getToNodeIdentifier().toString(), other.getPropertyIndex().get(), other.getBlockCount().get(), other.getPrevProp().get(), other.getNextProp().get(), keys[i], props[i] ); + } else { + this.add(keys[i], props[i], 0); + } + } + } + + public void set(long edgeId, String fromNode, String toNode, int propIndex, long blockCount, long prevProp, long nextProp, int propKeyIndex, String val) { + this.edgeId = new LongWritable(edgeId); + if (fromNode != null) this.fromNodeIdentifier = new Text(fromNode); + if (toNode != null) this.toNodeIdentifier = new Text(toNode); + this.propertyIndex = new IntWritable(propIndex); + this.blockCount = new LongWritable(blockCount); + this.prevProp = new LongWritable(prevProp); + this.nextProp = new LongWritable(nextProp); + this.properties = new PropertyListWritable(); + this.properties.add(propKeyIndex, val); + + } + + public void add(int propKeyIndex, String string, int blockCount) { + this.properties.add(propKeyIndex, string); + this.blockCount = new LongWritable(blockCount + this.blockCount.get()); + } + + public LongWritable getEdgeId() { + return edgeId; + } + + public Text getFromNodeIdentifier() { + return fromNodeIdentifier; + } + + public Text getToNodeIdentifier() { + return toNodeIdentifier; + } + + public IntWritable getPropertyIndex() { + return propertyIndex; + } + + public LongWritable getBlockCount() { + return blockCount; + } + + public PropertyListWritable getProperties() { + return properties; + } + + public LongWritable getPrevProp() { + return prevProp; + } + + public LongWritable getNextProp() { + return nextProp; + } + + @Override + public void write(DataOutput out) throws IOException { + edgeId.write(out); + fromNodeIdentifier.write(out); + toNodeIdentifier.write(out); + propertyIndex.write(out); + blockCount.write(out); + properties.write(out); + prevProp.write(out); + nextProp.write(out); + } + + @Override + public void readFields(DataInput in) throws IOException { + edgeId.readFields(in); + fromNodeIdentifier.readFields(in); + toNodeIdentifier.readFields(in); + propertyIndex.readFields(in); + blockCount.readFields(in); + properties.readFields(in); + prevProp.readFields(in); + nextProp.readFields(in); + } + + @Override + public int compareTo(Object o) { + FullEdgePropertiesWritable obj = (FullEdgePropertiesWritable) o; + int edgeDiff = edgeId.compareTo(obj.edgeId); + int fromDiff = fromNodeIdentifier.compareTo(obj.fromNodeIdentifier); + int toDiff = toNodeIdentifier.compareTo(obj.toNodeIdentifier); + int indexDiff = propertyIndex.compareTo(obj.propertyIndex); + int cntDiff = blockCount.compareTo(obj.blockCount); + int propDiff = properties.compareTo(obj.properties); + int prevDiff = prevProp.compareTo(obj.prevProp); + int nextDiff = nextProp.compareTo(obj.nextProp); + return (edgeDiff == 0) ? (fromDiff == 0) ? (toDiff == 0) ? (indexDiff == 0) ? (cntDiff == 0) ? (propDiff == 0) ? (prevDiff == 0) ? nextDiff : prevDiff : propDiff : cntDiff : indexDiff : toDiff : fromDiff: edgeDiff; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((blockCount == null) ? 0 : blockCount.hashCode()); + result = prime * result + ((edgeId == null) ? 0 : edgeId.hashCode()); + result = prime * result + ((fromNodeIdentifier == null) ? 0 : fromNodeIdentifier.hashCode()); + result = prime * result + ((nextProp == null) ? 0 : nextProp.hashCode()); + result = prime * result + ((prevProp == null) ? 0 : prevProp.hashCode()); + result = prime * result + ((properties == null) ? 0 : properties.hashCode()); + result = prime * result + ((propertyIndex == null) ? 0 : propertyIndex.hashCode()); + result = prime * result + ((toNodeIdentifier == null) ? 0 : toNodeIdentifier.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + FullEdgePropertiesWritable other = (FullEdgePropertiesWritable) obj; + if (blockCount == null) { + if (other.blockCount != null) + return false; + } else if (!blockCount.equals(other.blockCount)) + return false; + if (edgeId == null) { + if (other.edgeId != null) + return false; + } else if (!edgeId.equals(other.edgeId)) + return false; + if (fromNodeIdentifier == null) { + if (other.fromNodeIdentifier != null) + return false; + } else if (!fromNodeIdentifier.equals(other.fromNodeIdentifier)) + return false; + if (nextProp == null) { + if (other.nextProp != null) + return false; + } else if (!nextProp.equals(other.nextProp)) + return false; + if (prevProp == null) { + if (other.prevProp != null) + return false; + } else if (!prevProp.equals(other.prevProp)) + return false; + if (properties == null) { + if (other.properties != null) + return false; + } else if (!properties.equals(other.properties)) + return false; + if (propertyIndex == null) { + if (other.propertyIndex != null) + return false; + } else if (!propertyIndex.equals(other.propertyIndex)) + return false; + if (toNodeIdentifier == null) { + if (other.toNodeIdentifier != null) + return false; + } else if (!toNodeIdentifier.equals(other.toNodeIdentifier)) + return false; + return true; + } + + @Override + public String toString() { + return "FullEdgePropertiesWritable [edgeId=" + edgeId + ", fromNodeIdentifier=" + fromNodeIdentifier + ", toNodeIdentifier=" + toNodeIdentifier + + ", propertyIndex=" + propertyIndex + ", blockCount=" + blockCount + ", properties=" + properties + ", prevProp=" + prevProp + ", nextProp=" + + nextProp + "]"; + } + + +} \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/FullEdgeWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/FullEdgeWritable.java new file mode 100644 index 0000000..d0d6287 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/FullEdgeWritable.java @@ -0,0 +1,180 @@ +package nl.waredingen.graphs.neo.mapreduce.input.writables; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.WritableComparable; + +@SuppressWarnings("rawtypes") +public class FullEdgeWritable implements WritableComparable { + + private LongWritable fromNodeId = new LongWritable(); + private LongWritable toNodeId = new LongWritable(); + private LongWritable fromPrev = new LongWritable(); + private LongWritable fromNext = new LongWritable(); + private LongWritable toPrev = new LongWritable(); + private LongWritable toNext = new LongWritable(); + private LongWritable edgeProp = new LongWritable(); + + public FullEdgeWritable() { + + } + + public FullEdgeWritable(long fromNode, long toNode, long edgeProp, long fromPrev, long fromNext, long toPrev, long toNext) { + this.set(fromNode, toNode, edgeProp, fromPrev, fromNext, toPrev, toNext); + } + + public LongWritable getFromNodeId() { + return fromNodeId; + } + + public LongWritable getToNodeId() { + return toNodeId; + } + + public LongWritable getEdgeProp() { + return edgeProp; + } + + public LongWritable getFromPrev() { + return fromPrev; + } + + public LongWritable getFromNext() { + return fromNext; + } + + public LongWritable getToPrev() { + return toPrev; + } + + public LongWritable getToNext() { + return toNext; + } + + public void set(long fromNode, long toNode, long edgeProp, long fromPrev, long fromNext, long toPrev, long toNext) { + this.fromNodeId = new LongWritable(fromNode); + this.toNodeId = new LongWritable(toNode); + this.edgeProp = new LongWritable(edgeProp); + this.fromPrev = new LongWritable(fromPrev); + this.fromNext = new LongWritable(fromNext); + this.toPrev = new LongWritable(toPrev); + this.toNext = new LongWritable(toNext); + } + + public void set(LongWritable fromNode, LongWritable toNode, LongWritable edgeProp, LongWritable fromPrev, LongWritable fromNext, LongWritable toPrev, LongWritable toNext) { + this.fromNodeId = fromNode; + this.toNodeId = toNode; + this.edgeProp = edgeProp; + this.fromPrev = fromPrev; + this.fromNext = fromNext; + this.toPrev = toPrev; + this.toNext = toNext; + } + + @Override + public void write(DataOutput out) throws IOException { + fromNodeId.write(out); + toNodeId.write(out); + edgeProp.write(out); + fromPrev.write(out); + fromNext.write(out); + toPrev.write(out); + toNext.write(out); + } + + @Override + public void readFields(DataInput in) throws IOException { + fromNodeId.readFields(in); + toNodeId.readFields(in); + edgeProp.readFields(in); + fromPrev.readFields(in); + fromNext.readFields(in); + toPrev.readFields(in); + toNext.readFields(in); + } + + @Override + public int compareTo(Object obj) { + FullEdgeWritable other = (FullEdgeWritable) obj; + int nodeDiff = fromPrev.compareTo(other.fromPrev); + int edgeDiff = fromNext.compareTo(other.fromNext); + int propDiff = edgeProp.compareTo(other.edgeProp); + int fromDiff = fromNodeId.compareTo(other.fromNodeId); + int toDiff = toNodeId.compareTo(other.toNodeId); + int prevDiff = toPrev.compareTo(other.toPrev); + int nextDiff = toNext.compareTo(other.toNext); + return (nodeDiff == 0) ? (edgeDiff == 0) ? (propDiff == 0) ? (fromDiff == 0) ? (toDiff == 0) ? (prevDiff == 0) ? nextDiff: prevDiff : toDiff : fromDiff : propDiff : edgeDiff : nodeDiff; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((fromNext == null) ? 0 : fromNext.hashCode()); + result = prime * result + ((toNext == null) ? 0 : toNext.hashCode()); + result = prime * result + ((toPrev == null) ? 0 : toPrev.hashCode()); + result = prime * result + ((fromNodeId == null) ? 0 : fromNodeId.hashCode()); + result = prime * result + ((fromPrev == null) ? 0 : fromPrev.hashCode()); + result = prime * result + ((toNodeId == null) ? 0 : toNodeId.hashCode()); + result = prime * result + ((edgeProp == null) ? 0 : edgeProp.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + FullEdgeWritable other = (FullEdgeWritable) obj; + if (fromNext == null) { + if (other.fromNext != null) + return false; + } else if (!fromNext.equals(other.fromNext)) + return false; + if (toNext == null) { + if (other.toNext != null) + return false; + } else if (!toNext.equals(other.toNext)) + return false; + if (toPrev == null) { + if (other.toPrev != null) + return false; + } else if (!toPrev.equals(other.toPrev)) + return false; + if (fromNodeId == null) { + if (other.fromNodeId != null) + return false; + } else if (!fromNodeId.equals(other.fromNodeId)) + return false; + if (fromPrev == null) { + if (other.fromPrev != null) + return false; + } else if (!fromPrev.equals(other.fromPrev)) + return false; + if (toNodeId == null) { + if (other.toNodeId != null) + return false; + } else if (!toNodeId.equals(other.toNodeId)) + return false; + if (edgeProp == null) { + if (other.edgeProp != null) + return false; + } else if (!edgeProp.equals(other.edgeProp)) + return false; + return true; + } + + @Override + public String toString() { + return "FullEdgeWritable [fromNodeId=" + fromNodeId + ", toNodeId=" + toNodeId + ", edgeProp=" + edgeProp + ", fromPrev=" + fromPrev + ", fromNext=" + fromNext + ", toPrev=" + + toPrev + ", toNext=" + toNext + "]"; + } + + +} \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/FullNodePropertiesWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/FullNodePropertiesWritable.java new file mode 100644 index 0000000..f106a85 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/FullNodePropertiesWritable.java @@ -0,0 +1,191 @@ +package nl.waredingen.graphs.neo.mapreduce.input.writables; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; + +@SuppressWarnings("rawtypes") +public class FullNodePropertiesWritable implements WritableComparable { + + private LongWritable nodeId = new LongWritable(); + private Text nodeIdentifier = new Text(); + private IntWritable propertyIndex = new IntWritable(); + private LongWritable blockCount = new LongWritable(); + private PropertyListWritable properties = new PropertyListWritable(); + private LongWritable prevProp = new LongWritable(); + private LongWritable nextProp = new LongWritable(); + + public FullNodePropertiesWritable() { + + } + + public FullNodePropertiesWritable(long nodeId, String node, int propIndex, long blockCount, long prevProp, long nextProp, int propKeyIndex, String val) { + this.set(nodeId, node, propIndex, blockCount, prevProp, nextProp, propKeyIndex, val); + } + + public void set(FullNodePropertiesWritable other) { + String[] props = other.getProperties().valuesToArray(); + int[] keys = other.getProperties().keysToArray(); + for (int i=0; i < props.length; i++) { + if (i==0) { + this.set(other.getNodeId().get(), other.getNodeIdentifier().toString(), other.getPropertyIndex().get(), other.getBlockCount().get(), other.getPrevProp().get(), other.getNextProp().get(), keys[i], props[i]); + } else { + this.add(keys[i], props[i], 0); + } + } + } + + public void set(long nodeId, String node, int propIndex, long blockCount, long prevProp, long nextProp, int propKeyIndex, String val) { + this.nodeId = new LongWritable(nodeId); + if (node != null) this.nodeIdentifier = new Text(node); + this.propertyIndex = new IntWritable(propIndex); + this.blockCount = new LongWritable(blockCount); + this.prevProp = new LongWritable(prevProp); + this.nextProp = new LongWritable(nextProp); + this.properties = new PropertyListWritable(); + this.properties.add(propKeyIndex, val); + } + + public void add(int propKeyIndex, String string, int blockCount) { + this.properties.add(propKeyIndex, string); + this.blockCount = new LongWritable(blockCount + this.blockCount.get()); + } + + public LongWritable getNodeId() { + return nodeId; + } + + public Text getNodeIdentifier() { + return nodeIdentifier; + } + + public IntWritable getPropertyIndex() { + return propertyIndex; + } + + public LongWritable getBlockCount() { + return blockCount; + } + + public PropertyListWritable getProperties() { + return properties; + } + + public LongWritable getPrevProp() { + return prevProp; + } + + public LongWritable getNextProp() { + return nextProp; + } + + @Override + public void write(DataOutput out) throws IOException { + nodeId.write(out); + nodeIdentifier.write(out); + propertyIndex.write(out); + blockCount.write(out); + properties.write(out); + prevProp.write(out); + nextProp.write(out); + } + + @Override + public void readFields(DataInput in) throws IOException { + nodeId.readFields(in); + nodeIdentifier.readFields(in); + propertyIndex.readFields(in); + blockCount.readFields(in); + properties.readFields(in); + prevProp.readFields(in); + nextProp.readFields(in); + } + + @Override + public int compareTo(Object o) { + FullNodePropertiesWritable obj = (FullNodePropertiesWritable) o; + int nodeDiff = nodeId.compareTo(obj.nodeId); + int fromDiff = nodeIdentifier.compareTo(obj.nodeIdentifier); + int indexDiff = propertyIndex.compareTo(obj.propertyIndex); + int cntDiff = blockCount.compareTo(obj.blockCount); + int propDiff = properties.compareTo(obj.properties); + int prevDiff = prevProp.compareTo(obj.prevProp); + int nextDiff = nextProp.compareTo(obj.nextProp); + return (nodeDiff == 0) ? (fromDiff == 0) ? (indexDiff == 0) ? (cntDiff == 0) ? (propDiff == 0) ? (prevDiff == 0) ? nextDiff : prevDiff : propDiff : cntDiff : indexDiff : fromDiff: nodeDiff; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((blockCount == null) ? 0 : blockCount.hashCode()); + result = prime * result + ((nodeId == null) ? 0 : nodeId.hashCode()); + result = prime * result + ((nodeIdentifier == null) ? 0 : nodeIdentifier.hashCode()); + result = prime * result + ((nextProp == null) ? 0 : nextProp.hashCode()); + result = prime * result + ((prevProp == null) ? 0 : prevProp.hashCode()); + result = prime * result + ((properties == null) ? 0 : properties.hashCode()); + result = prime * result + ((propertyIndex == null) ? 0 : propertyIndex.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + FullNodePropertiesWritable other = (FullNodePropertiesWritable) obj; + if (blockCount == null) { + if (other.blockCount != null) + return false; + } else if (!blockCount.equals(other.blockCount)) + return false; + if (nodeId == null) { + if (other.nodeId != null) + return false; + } else if (!nodeId.equals(other.nodeId)) + return false; + if (nodeIdentifier == null) { + if (other.nodeIdentifier != null) + return false; + } else if (!nodeIdentifier.equals(other.nodeIdentifier)) + return false; + if (nextProp == null) { + if (other.nextProp != null) + return false; + } else if (!nextProp.equals(other.nextProp)) + return false; + if (prevProp == null) { + if (other.prevProp != null) + return false; + } else if (!prevProp.equals(other.prevProp)) + return false; + if (properties == null) { + if (other.properties != null) + return false; + } else if (!properties.equals(other.properties)) + return false; + if (propertyIndex == null) { + if (other.propertyIndex != null) + return false; + } else if (!propertyIndex.equals(other.propertyIndex)) + return false; + return true; + } + + @Override + public String toString() { + return "FullNodePropertiesWritable [nodeId=" + nodeId + ", nodeIdentifier=" + nodeIdentifier + + ", propertyIndex=" + propertyIndex + ", blockCount=" + blockCount + ", properties=" + properties + ", prevProp=" + prevProp + ", nextProp=" + + nextProp + "]"; + } + + +} \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodeEdgeIdWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodeEdgeIdWritable.java new file mode 100644 index 0000000..d664618 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodeEdgeIdWritable.java @@ -0,0 +1,110 @@ +package nl.waredingen.graphs.neo.mapreduce.input.writables; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.WritableComparable; + +@SuppressWarnings("rawtypes") +public class NodeEdgeIdWritable implements WritableComparable { + + private LongWritable nodeId = new LongWritable(); + private LongWritable propId = new LongWritable(); + private LongWritable edgeId = new LongWritable(); + + public NodeEdgeIdWritable() { + + } + + public NodeEdgeIdWritable(long nodeId, long propId, long edgeId) { + this.nodeId = new LongWritable(nodeId); + this.propId = new LongWritable(propId); + this.edgeId = new LongWritable(edgeId); + } + + public void set(LongWritable nodeId, LongWritable propId, LongWritable edgeId) { + this.nodeId = nodeId; + this.propId = propId; + this.edgeId = edgeId; + } + + public LongWritable getNodeId() { + return nodeId; + } + + public LongWritable getPropId() { + return propId; + } + + public LongWritable getEdgeId() { + return edgeId; + } + + @Override + public void write(DataOutput out) throws IOException { + nodeId.write(out); + propId.write(out); + edgeId.write(out); + } + + @Override + public void readFields(DataInput in) throws IOException { + nodeId.readFields(in); + propId.readFields(in); + edgeId.readFields(in); + } + + @Override + public int compareTo(Object obj) { + NodeEdgeIdWritable other = (NodeEdgeIdWritable) obj; + int nodeDiff = nodeId.compareTo(other.nodeId); + return (nodeDiff == 0) ? edgeId.compareTo(other.edgeId) : nodeDiff; + } + + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((edgeId == null) ? 0 : edgeId.hashCode()); + result = prime * result + ((nodeId == null) ? 0 : nodeId.hashCode()); + result = prime * result + ((propId == null) ? 0 : propId.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + NodeEdgeIdWritable other = (NodeEdgeIdWritable) obj; + if (edgeId == null) { + if (other.edgeId != null) + return false; + } else if (!edgeId.equals(other.edgeId)) + return false; + if (nodeId == null) { + if (other.nodeId != null) + return false; + } else if (!nodeId.equals(other.nodeId)) + return false; + if (propId == null) { + if (other.propId != null) + return false; + } else if (!propId.equals(other.propId)) + return false; + return true; + } + + @Override + public String toString() { + return "NodeEdgeIdWritable [nodeId=" + nodeId + ", propId=" + propId + ", edgeId=" + edgeId + "]"; + } + +} + diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodeEdgeWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodeEdgeWritable.java new file mode 100644 index 0000000..1e7e540 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodeEdgeWritable.java @@ -0,0 +1,92 @@ +package nl.waredingen.graphs.neo.mapreduce.input.writables; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.WritableComparable; + +@SuppressWarnings("rawtypes") +public class NodeEdgeWritable implements WritableComparable { + + private NodeWritable node = new NodeWritable(); + private EdgeWritable edge = new EdgeWritable(); + + public NodeEdgeWritable() { + + } + + public NodeEdgeWritable(long nodeId, long nodePropId, long edgeId, long from, long to, long edgePropId) { + this.set(nodeId, nodePropId, edgeId, from, to, edgePropId); + } + + public NodeWritable getNode() { + return node; + } + + public EdgeWritable getEdge() { + return edge; + } + + public void set(long nodeId, long nodePropId, long edgeId, long from, long to, long edgePropId) { + this.node = new NodeWritable(nodeId, nodePropId); + this.edge = new EdgeWritable(edgeId, from , to, edgePropId); + } + + @Override + public void write(DataOutput out) throws IOException { + node.write(out); + edge.write(out); + } + + @Override + public void readFields(DataInput in) throws IOException { + node.readFields(in); + edge.readFields(in); + } + + @Override + public int compareTo(Object obj) { + NodeEdgeWritable other = (NodeEdgeWritable) obj; + int nodeDiff = node.compareTo(other.node); + int edgeDiff = edge.compareTo(other.edge); + return (nodeDiff == 0) ? edgeDiff : nodeDiff; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((edge == null) ? 0 : edge.hashCode()); + result = prime * result + ((node == null) ? 0 : node.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + NodeEdgeWritable other = (NodeEdgeWritable) obj; + if (edge == null) { + if (other.edge != null) + return false; + } else if (!edge.equals(other.edge)) + return false; + if (node == null) { + if (other.node != null) + return false; + } else if (!node.equals(other.node)) + return false; + return true; + } + + @Override + public String toString() { + return "NodeEdgeWritable [node=" + node + ", edge=" + edge + "]"; + } + +} \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountValueWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodePropertyOutputCountersAndValueWritable.java similarity index 50% rename from job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountValueWritable.java rename to job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodePropertyOutputCountersAndValueWritable.java index de397b8..6332d16 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountValueWritable.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodePropertyOutputCountersAndValueWritable.java @@ -1,45 +1,52 @@ -package nl.waredingen.graphs.neo.mapreduce.properties; +package nl.waredingen.graphs.neo.mapreduce.input.writables; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; + import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; import org.apache.hadoop.io.Writable; -public class PropertyOutputIdBlockcountValueWritable implements Writable { +public class NodePropertyOutputCountersAndValueWritable implements Writable { private LongWritable id; - private Text value; - private long count; + private FullNodePropertiesWritable value; + private long countBlockOffset; + private long countIdOffset; private int partition; - public static final Text EMPTY_STRING = new Text(""); + public static final FullNodePropertiesWritable EMPTY_VAL = new FullNodePropertiesWritable(-1, null, -1, -1, -1, -1, -1, ""); public static final LongWritable EMPTY_ID = new LongWritable(Long.MIN_VALUE); - public void setValues(LongWritable id, Text value) { + public void setValues(LongWritable id, FullNodePropertiesWritable value) { this.id = id; this.value = value; - this.count = 0; + this.countBlockOffset = 0; + this.countIdOffset = 0; this.partition = 0; } - public void setCounter(int partition, long count) { + public void setCounter(int partition, long blockCount, long idOffsetCount) { this.id = EMPTY_ID; - this.value = EMPTY_STRING; + this.value = EMPTY_VAL; this.partition = partition; - this.count = count; + this.countBlockOffset = blockCount; + this.countIdOffset = idOffsetCount; } - public long getCount() { - return count; + public long getBlockOffset() { + return countBlockOffset; } + public long getIdOffset() { + return countIdOffset; + } + public int getPartition() { return partition; } - public Text getValue() { + public FullNodePropertiesWritable getValue() { return value; } @@ -51,9 +58,10 @@ public LongWritable getId() { public void write(DataOutput out) throws IOException { id.write(out); value.write(out); - if (id.equals(EMPTY_ID) && value.getLength() == 0) { + if (id.equals(EMPTY_ID)) { out.writeInt(partition); - out.writeLong(count); + out.writeLong(countBlockOffset); + out.writeLong(countIdOffset); } } @@ -64,16 +72,18 @@ public void readFields(DataInput in) throws IOException { } id.readFields(in); if (value == null) { - value = new Text(); + value = new FullNodePropertiesWritable(); } value.readFields(in); - if (id.equals(EMPTY_ID) && value.getLength() == 0) { + if (id.equals(EMPTY_ID)) { partition = in.readInt(); - count = in.readLong(); + countBlockOffset = in.readLong(); + countIdOffset = in.readLong(); } else { partition = 0; - count = 0; + countBlockOffset = 0; + countIdOffset = 0; } } @@ -81,13 +91,14 @@ public void readFields(DataInput in) throws IOException { public int hashCode() { final int prime = 31; int result = 1; - result = prime * result + (int) (count ^ (count >>> 32)); + result = prime * result + (int) (countBlockOffset ^ (countBlockOffset >>> 32)); + result = prime * result + (int) (countIdOffset ^ (countIdOffset >>> 32)); result = prime * result + ((id == null) ? 0 : id.hashCode()); result = prime * result + partition; result = prime * result + ((value == null) ? 0 : value.hashCode()); return result; } - + @Override public boolean equals(Object obj) { if (this == obj) @@ -96,8 +107,10 @@ public boolean equals(Object obj) { return false; if (getClass() != obj.getClass()) return false; - PropertyOutputIdBlockcountValueWritable other = (PropertyOutputIdBlockcountValueWritable) obj; - if (count != other.count) + NodePropertyOutputCountersAndValueWritable other = (NodePropertyOutputCountersAndValueWritable) obj; + if (countBlockOffset != other.countBlockOffset) + return false; + if (countIdOffset != other.countIdOffset) return false; if (id == null) { if (other.id != null) @@ -116,9 +129,8 @@ public boolean equals(Object obj) { @Override public String toString() { - return "PropertyOutputIdBlockcountValueWritable [id=" + id + ", value=" + value + ", count=" + count - + ", partition=" + partition + "]"; + return "NodePropertyOutputCountersAndValueWritable [id=" + id + ", value=" + value + ", countBlockOffset=" + countBlockOffset + ", countIdOffset=" + + countIdOffset + ", partition=" + partition + "]"; } - } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodeWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodeWritable.java new file mode 100644 index 0000000..b0d2223 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodeWritable.java @@ -0,0 +1,98 @@ +package nl.waredingen.graphs.neo.mapreduce.input.writables; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.WritableComparable; + +@SuppressWarnings("rawtypes") +public class NodeWritable implements WritableComparable { + + private LongWritable nodeId = new LongWritable(); + private LongWritable propId = new LongWritable(); + + public NodeWritable() { + + } + + public NodeWritable(long nodeId, long propId) { + this.set(nodeId, propId); + } + + public void set(long nodeId, long propId) { + this.nodeId = new LongWritable(nodeId); + this.propId = new LongWritable(propId); + } + + public void set(LongWritable nodeId, LongWritable propId) { + this.nodeId = nodeId; + this.propId = propId; + } + + public LongWritable getNodeId() { + return nodeId; + } + + public LongWritable getPropId() { + return propId; + } + + @Override + public void write(DataOutput out) throws IOException { + nodeId.write(out); + propId.write(out); + } + + @Override + public void readFields(DataInput in) throws IOException { + nodeId.readFields(in); + propId.readFields(in); + } + + @Override + public int compareTo(Object obj) { + NodeWritable other = (NodeWritable) obj; + int nodeDiff = nodeId.compareTo(other.nodeId); + return (nodeDiff == 0) ? propId.compareTo(other.propId) : nodeDiff; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((propId == null) ? 0 : propId.hashCode()); + result = prime * result + ((nodeId == null) ? 0 : nodeId.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + NodeWritable other = (NodeWritable) obj; + if (propId == null) { + if (other.propId != null) + return false; + } else if (!propId.equals(other.propId)) + return false; + if (nodeId == null) { + if (other.nodeId != null) + return false; + } else if (!nodeId.equals(other.nodeId)) + return false; + return true; + } + + @Override + public String toString() { + return "NodeIdPropIdWritable [nodeId=" + nodeId + ", propId=" + propId + "]"; + } + +} + diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/PropertyListWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/PropertyListWritable.java new file mode 100644 index 0000000..39e5a1d --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/PropertyListWritable.java @@ -0,0 +1,193 @@ +package nl.waredingen.graphs.neo.mapreduce.input.writables; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; + +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.WritableComparable; + +@SuppressWarnings("rawtypes") +public class PropertyListWritable implements WritableComparable { + + private ArrayList propertyIndexKeys = new ArrayList(); + private ArrayList properties = new ArrayList(); + + public PropertyListWritable() { + + } + + public PropertyListWritable(PropertyListWritable other) { + for (Text text : other.getProperties()) { + properties.add(new Text(text.toString())); + } + for (IntWritable key : other.getPropertyIndexKeys()) { + propertyIndexKeys.add(new IntWritable(key.get())); + } + } + + public void setValues(String... vals) { + for (String string : vals) { + Text val = new Text(); + val.set(string); + this.properties.add(val); + } + } + + public void setKeys(int... vals) { + for (int key : vals) { + IntWritable val = new IntWritable(); + val.set(key); + this.propertyIndexKeys.add(val); + } + } + + public ArrayList getProperties() { + return this.properties; + } + + public ArrayList getPropertyIndexKeys() { + return this.propertyIndexKeys; + } + + public String[] valuesToArray() { + String[] result = new String[this.properties.size()]; + for (int i = 0; i < properties.size(); i++) { + result[i] = properties.get(i).toString(); + } + return result; + } + + public int[] keysToArray() { + int[] result = new int[this.propertyIndexKeys.size()]; + for (int i = 0; i < propertyIndexKeys.size(); i++) { + result[i] = propertyIndexKeys.get(i).get(); + } + return result; + } + + @Override + public void write(DataOutput out) throws IOException { + out.writeInt(properties.size()); + for (IntWritable value : propertyIndexKeys) { + value.write(out); + } + for (Text value : properties) { + value.write(out); + } + } + + @Override + public void readFields(DataInput in) throws IOException { + int size = in.readInt(); + propertyIndexKeys = new ArrayList(size); + for (int i = 0; i < size; i++) { + IntWritable value = new IntWritable(); + value.readFields(in); + propertyIndexKeys.add(value); + } + properties = new ArrayList(size); + for (int i = 0; i < size; i++) { + Text value = new Text(); + value.readFields(in); + properties.add(value); + } + + } + + @Override + public int compareTo(Object o) { + PropertyListWritable obj = (PropertyListWritable) o; + if (this.properties.equals(obj.properties) && this.propertyIndexKeys.equals(obj.propertyIndexKeys)) { + return 0; + } else { + int propDiff = comparePropertiesTo(obj.properties); + if (propDiff == 0) { + return comparePropertyIndexesTo(obj.propertyIndexKeys); + } else { + return propDiff; + } + } + } + + private int comparePropertiesTo(ArrayList other) { + int result = 0; + int sizeDiff = this.properties.size() - other.size(); + if (sizeDiff == 0) { + for (int i = 0; i < this.properties.size(); i++) { + result = this.properties.get(i).compareTo(other.get(i)); + if (result != 0) + break; + } + } else { + result = sizeDiff; + } + return result; + } + + private int comparePropertyIndexesTo(ArrayList other) { + int result = 0; + int sizeDiff = this.propertyIndexKeys.size() - other.size(); + if (sizeDiff == 0) { + for (int i = 0; i < this.propertyIndexKeys.size(); i++) { + result = this.propertyIndexKeys.get(i).compareTo(other.get(i)); + if (result != 0) + break; + } + } else { + result = sizeDiff; + } + return result; + } + + public void add(int index, String prop) { + Text val = new Text(); + val.set(prop); + this.properties.add(val); + IntWritable key = new IntWritable(index); + this.propertyIndexKeys.add(key); + } + + public int getLength() { + return this.properties.size(); + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((properties == null) ? 0 : properties.hashCode()); + result = prime * result + ((propertyIndexKeys == null) ? 0 : propertyIndexKeys.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + PropertyListWritable other = (PropertyListWritable) obj; + if (properties == null) { + if (other.properties != null) + return false; + } else if (!properties.equals(other.properties)) + return false; + if (propertyIndexKeys == null) { + if (other.propertyIndexKeys != null) + return false; + } else if (!propertyIndexKeys.equals(other.propertyIndexKeys)) + return false; + return true; + } + + @Override + public String toString() { + return "PropertyListWritable [propertyIndexKeys=" + propertyIndexKeys + ", properties=" + properties + "]"; + } + +} \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/SurroundingEdgeWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/SurroundingEdgeWritable.java new file mode 100644 index 0000000..3eb3a51 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/SurroundingEdgeWritable.java @@ -0,0 +1,186 @@ +package nl.waredingen.graphs.neo.mapreduce.input.writables; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.WritableComparable; + +@SuppressWarnings("rawtypes") +public class SurroundingEdgeWritable implements WritableComparable { + + private LongWritable nodeId = new LongWritable(); + private LongWritable edgeId = new LongWritable(); + private LongWritable fromNodeId = new LongWritable(); + private LongWritable toNodeId = new LongWritable(); + private LongWritable edgePropId = new LongWritable(); + private LongWritable edgePrev = new LongWritable(); + private LongWritable edgeNext = new LongWritable(); + + public SurroundingEdgeWritable() { + + } + + public SurroundingEdgeWritable(long nodeId, long edgeId, long fromNode, long toNode, long edgeProp, long prev, long next) { + this.set(nodeId, edgeId, fromNode, toNode, edgeProp, prev, next); + } + + public SurroundingEdgeWritable(SurroundingEdgeWritable other) { + this.set(other.getNodeId().get(), other.getEdgeId().get(), other.getFromNodeId().get(), other.getToNodeId().get(), other.getEdgePropId().get(), other + .getEdgePrev().get(), other.getEdgeNext().get()); + } + + public LongWritable getNodeId() { + return nodeId; + } + + public LongWritable getEdgeId() { + return edgeId; + } + + public LongWritable getFromNodeId() { + return fromNodeId; + } + + public LongWritable getToNodeId() { + return toNodeId; + } + + public LongWritable getEdgePropId() { + return edgePropId; + } + + public LongWritable getEdgePrev() { + return edgePrev; + } + + public LongWritable getEdgeNext() { + return edgeNext; + } + + public void set(long nodeId, long edgeId, long fromNode, long toNode, long edgeProp, long prev, long next) { + this.nodeId = new LongWritable(nodeId); + this.edgeId = new LongWritable(edgeId); + this.fromNodeId = new LongWritable(fromNode); + this.toNodeId = new LongWritable(toNode); + this.edgePropId = new LongWritable(edgeProp); + this.edgePrev = new LongWritable(prev); + this.edgeNext = new LongWritable(next); + } + + public void set(LongWritable nodeId, LongWritable edgeId, LongWritable fromNodeId, LongWritable toNodeId, LongWritable edgeProp, LongWritable edgePrev, + LongWritable edgeNext) { + this.nodeId = nodeId; + this.edgeId = edgeId; + this.fromNodeId = fromNodeId; + this.toNodeId = toNodeId; + this.edgePropId = edgeProp; + this.edgePrev = edgePrev; + this.edgeNext = edgeNext; + } + + @Override + public void write(DataOutput out) throws IOException { + nodeId.write(out); + edgeId.write(out); + fromNodeId.write(out); + toNodeId.write(out); + edgePropId.write(out); + edgePrev.write(out); + edgeNext.write(out); + } + + @Override + public void readFields(DataInput in) throws IOException { + nodeId.readFields(in); + edgeId.readFields(in); + fromNodeId.readFields(in); + toNodeId.readFields(in); + edgePropId.readFields(in); + edgePrev.readFields(in); + edgeNext.readFields(in); + } + + @Override + public int compareTo(Object obj) { + SurroundingEdgeWritable other = (SurroundingEdgeWritable) obj; + int nodeDiff = nodeId.compareTo(other.nodeId); + int edgeDiff = edgeId.compareTo(other.edgeId); + int fromDiff = fromNodeId.compareTo(other.fromNodeId); + int toDiff = toNodeId.compareTo(other.toNodeId); + int propDiff = edgePropId.compareTo(other.edgePropId); + int prevDiff = edgePrev.compareTo(other.edgePrev); + int nextDiff = edgeNext.compareTo(other.edgeNext); + return (nodeDiff == 0) ? (edgeDiff == 0) ? (fromDiff == 0) ? (toDiff == 0) ? (propDiff == 0) ? (prevDiff == 0) ? nextDiff : prevDiff : propDiff + : toDiff : fromDiff : edgeDiff : nodeDiff; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((edgeId == null) ? 0 : edgeId.hashCode()); + result = prime * result + ((edgeNext == null) ? 0 : edgeNext.hashCode()); + result = prime * result + ((edgePrev == null) ? 0 : edgePrev.hashCode()); + result = prime * result + ((edgePropId == null) ? 0 : edgePropId.hashCode()); + result = prime * result + ((fromNodeId == null) ? 0 : fromNodeId.hashCode()); + result = prime * result + ((nodeId == null) ? 0 : nodeId.hashCode()); + result = prime * result + ((toNodeId == null) ? 0 : toNodeId.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + SurroundingEdgeWritable other = (SurroundingEdgeWritable) obj; + if (edgeId == null) { + if (other.edgeId != null) + return false; + } else if (!edgeId.equals(other.edgeId)) + return false; + if (edgeNext == null) { + if (other.edgeNext != null) + return false; + } else if (!edgeNext.equals(other.edgeNext)) + return false; + if (edgePrev == null) { + if (other.edgePrev != null) + return false; + } else if (!edgePrev.equals(other.edgePrev)) + return false; + if (edgePropId == null) { + if (other.edgePropId != null) + return false; + } else if (!edgePropId.equals(other.edgePropId)) + return false; + if (fromNodeId == null) { + if (other.fromNodeId != null) + return false; + } else if (!fromNodeId.equals(other.fromNodeId)) + return false; + if (nodeId == null) { + if (other.nodeId != null) + return false; + } else if (!nodeId.equals(other.nodeId)) + return false; + if (toNodeId == null) { + if (other.toNodeId != null) + return false; + } else if (!toNodeId.equals(other.toNodeId)) + return false; + return true; + } + + @Override + public String toString() { + return "SurroundingEdgeWritable [nodeId=" + nodeId + ", edgeId=" + edgeId + ", fromNodeId=" + fromNodeId + ", toNodeId=" + toNodeId + ", edgePropId=" + + edgePropId + ", edgePrev=" + edgePrev + ", edgeNext=" + edgeNext + "]"; + } + +} \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromEdgesMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromEdgesMapper.java index e1babda..f168bc9 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromEdgesMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromEdgesMapper.java @@ -2,17 +2,25 @@ import java.io.IOException; -import org.apache.hadoop.io.LongWritable; +import org.apache.commons.lang.StringUtils; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; -public class JoinFromEdgesMapper extends Mapper { +public class JoinFromEdgesMapper extends Mapper { private Text outputKey = new Text(); - - protected void map(LongWritable key, Text value, Context context) throws IOException ,InterruptedException { - String[] values = value.toString().split("\t", 3); + private Text outputValue = new Text(); + private Text valAsText = new Text(); + + @Override + protected void map(NullWritable key, BytesWritable value, Context context) throws IOException ,InterruptedException { + valAsText.set(value.getBytes(), 0, value.getLength()); + String[] values = valAsText.toString().split("\t", 4); + outputKey.set("E"+values[1]); - context.write(outputKey, value); + outputValue.set(StringUtils.join(values, "\t", 0, 4)); + context.write(outputKey, outputValue); } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesAndEdgesReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesAndEdgesReducer.java index dc88216..a3178c6 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesAndEdgesReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesAndEdgesReducer.java @@ -3,16 +3,14 @@ import java.io.IOException; import java.util.Iterator; -import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; -public class JoinNodesAndEdgesReducer extends Reducer { +public class JoinNodesAndEdgesReducer extends Reducer { - private Text outputKey = new Text(); private Text outputValue = new Text(); - + @Override protected void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { Iterator iter = values.iterator(); @@ -24,11 +22,9 @@ protected void reduce(Text key, Iterable values, Context context) throws I while (iter.hasNext()) { Text value = iter.next(); - String toNode = value.toString().split("\t", 4)[2]; - outputKey.set("R"+toNode); outputValue.set(value.toString() + "\t" + node); - context.write(outputKey, outputValue); + context.write(NullWritable.get(), outputValue); } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesMapper.java index 9ca42d4..06283c8 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesMapper.java @@ -2,17 +2,23 @@ import java.io.IOException; -import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; -public class JoinNodesMapper extends Mapper { +public class JoinNodesMapper extends Mapper { private Text outputKey = new Text(); - - protected void map(LongWritable key, Text value, Context context) throws IOException ,InterruptedException { - String[] values = value.toString().split("\t", 3); - outputKey.set("N"+values[1]); - context.write(outputKey, value); + private Text outputValue = new Text(); + private Text valAsText = new Text(); + + @Override + protected void map(NullWritable key, BytesWritable value, Context context) throws IOException, InterruptedException { + valAsText.set(value.getBytes(), 0, value.getLength()); + String[] values = valAsText.toString().split("\t", 3); + outputKey.set("N" + values[1]); + outputValue.set(values[0] + "\t" + values[2]); + context.write(outputKey, outputValue); } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToEdgesMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToEdgesMapper.java index 777f1da..ca6dc39 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToEdgesMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToEdgesMapper.java @@ -2,15 +2,17 @@ import java.io.IOException; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; -public class JoinToEdgesMapper extends Mapper { +public class JoinToEdgesMapper extends Mapper { private Text outputKey = new Text(); - protected void map(Text key, Text value, Context context) throws IOException ,InterruptedException { - String[] values = value.toString().split("\t", 4); + @Override + protected void map(NullWritable key, Text value, Context context) throws IOException ,InterruptedException { + String[] values = value.toString().split("\t", 6); outputKey.set("E"+values[2]); context.write(outputKey, value); } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitioner.java index 7002f9a..aa1c51a 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitioner.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/join/NodeAndEdgeKeyPartitioner.java @@ -8,7 +8,7 @@ public class NodeAndEdgeKeyPartitioner extends Partitioner { @Override public int getPartition(Text key, Text val, int numPartitions) { int hash = key.toString().substring(1).hashCode(); - return (hash & Integer.MAX_VALUE) % numPartitions; + return (hash & Integer.MAX_VALUE) % numPartitions; } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapper.java index 85731f6..717c1ea 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapper.java @@ -2,22 +2,23 @@ import java.io.IOException; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeIdPropIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeWritable; + import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Mapper; -public class NodeOutputMapper extends Mapper { +public class NodeOutputMapper extends Mapper { private LongWritable outputKey = new LongWritable(); - private Text outputValue = new Text(); + private EdgeIdPropIdWritable outputValue = new EdgeIdPropIdWritable(); + + @Override + protected void map(NullWritable key, NodeEdgeWritable value, Context context) throws IOException, InterruptedException { + outputKey.set(value.getNode().getNodeId().get()); - protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - //nodeid node nodename edgeid fromnodeid tonodeid - String[] vals = value.toString().split("\t", 6); - long id = Long.parseLong(vals[0]); - outputKey.set(id); - //TODO also output a real version of first properties id here - outputValue.set(vals[3]+"\t"+String.valueOf(id * 2)); + outputValue.set(value.getEdge().getEdgeId(), value.getNode().getPropId()); context.write(outputKey, outputValue); } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputReducer.java index 14048bf..4f2d15a 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputReducer.java @@ -3,21 +3,22 @@ import java.io.IOException; import java.util.Iterator; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeIdPropIdWritable; import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import org.neo4j.kernel.impl.nioneo.store.Record; -public class NodeOutputReducer extends Reducer { +public class NodeOutputReducer extends Reducer { private BytesWritable outputValue = new BytesWritable(); - protected void reduce(LongWritable key, Iterable values, Context context) throws IOException, + @Override + protected void reduce(LongWritable key, Iterable values, Context context) throws IOException, InterruptedException { - Iterator itr = values.iterator(); + Iterator itr = values.iterator(); if (!itr.hasNext()) { return; } @@ -28,10 +29,9 @@ protected void reduce(LongWritable key, Iterable values, Context context) long relnum = Long.MAX_VALUE; long propnum = Long.MAX_VALUE; while (itr.hasNext()) { - Text value = itr.next(); - String[] vals = value.toString().split("\t",2); - relnum = Math.min(relnum, Long.parseLong(vals[0])); - propnum = Math.min(propnum, Long.parseLong(vals[1])); + EdgeIdPropIdWritable value = itr.next(); + relnum = Math.min(relnum, value.getEdgeId().get()); + propnum = Math.min(propnum, value.getPropId().get()); } if (relnum == Long.MAX_VALUE) relnum = -1L; diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputRownumPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputRownumPartitioner.java new file mode 100644 index 0000000..4c2b3ae --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputRownumPartitioner.java @@ -0,0 +1,13 @@ +package nl.waredingen.graphs.neo.mapreduce.nodes; + +import nl.waredingen.graphs.neo.mapreduce.AbstractRownumPartitioner; +import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; + +public class NodeOutputRownumPartitioner extends AbstractRownumPartitioner { + + @Override + public long getMaxCounter() { + return Neo4JUtils.getMetaData(conf).getNumberOfNodes(); + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerAndIdComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerAndIdComparator.java new file mode 100644 index 0000000..6aa02d3 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerAndIdComparator.java @@ -0,0 +1,29 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import nl.waredingen.graphs.neo.mapreduce.input.writables.ByteMarkerIdPropIdWritable; + +import org.apache.hadoop.io.WritableComparable; +import org.apache.hadoop.io.WritableComparator; + +public class ByteMarkerAndIdComparator extends WritableComparator { + protected ByteMarkerAndIdComparator() { + super(ByteMarkerIdPropIdWritable.class, true); + } + + @SuppressWarnings("rawtypes") + @Override + public int compare(WritableComparable w1, WritableComparable w2) { + ByteMarkerIdPropIdWritable k1 = (ByteMarkerIdPropIdWritable) w1; + ByteMarkerIdPropIdWritable k2 = (ByteMarkerIdPropIdWritable) w2; + + int result = k1.getMarker().compareTo(k2.getMarker()); + if (0 == result) { + result = k1.getId().compareTo(k2.getId()); + if (0 == result) { + result = k1.getPropId().compareTo(k2.getPropId()); + } + } + return result; + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerAndPropertyOutputIdComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerAndPropertyOutputIdComparator.java deleted file mode 100644 index b0dab4d..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/ByteMarkerAndPropertyOutputIdComparator.java +++ /dev/null @@ -1,24 +0,0 @@ -package nl.waredingen.graphs.neo.mapreduce.properties; - -import org.apache.hadoop.io.WritableComparable; -import org.apache.hadoop.io.WritableComparator; - -public class ByteMarkerAndPropertyOutputIdComparator extends WritableComparator { - protected ByteMarkerAndPropertyOutputIdComparator() { - super(ByteMarkerPropertyIdWritable.class, true); - } - - @SuppressWarnings("rawtypes") - @Override - public int compare(WritableComparable w1, WritableComparable w2) { - ByteMarkerPropertyIdWritable k1 = (ByteMarkerPropertyIdWritable) w1; - ByteMarkerPropertyIdWritable k2 = (ByteMarkerPropertyIdWritable) w2; - - int result = k1.getMarker().compareTo(k2.getMarker()); - if (0 == result) { - result = k1.getId().compareTo(k2.getId()); - } - return result; - } - -} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesMapper.java new file mode 100644 index 0000000..0834ce3 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesMapper.java @@ -0,0 +1,103 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullEdgePropertiesWritable; +import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Mapper; +import org.neo4j.kernel.impl.nioneo.store.PropertyBlock; +import org.neo4j.kernel.impl.nioneo.store.PropertyRecord; +import org.neo4j.kernel.impl.nioneo.store.PropertyType; + +public class EdgePreparePropertiesMapper extends Mapper { + private AscLongDescLongWritable outputKey = new AscLongDescLongWritable(); + private FullEdgePropertiesWritable outputValue = new FullEdgePropertiesWritable(); + private MetaData metaData; + + @Override + protected void setup(Context context) throws IOException ,InterruptedException { + metaData = Neo4JUtils.getMetaData(context.getConfiguration()); + } + + @Override + protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { + String[] values = value.toString().split("\t", metaData.getEdgePropertySize()); + long edgeId = Long.parseLong(key.toString()); + int propId = 0; + List propRecords = new ArrayList(); + Map props = new HashMap(); + PropertyRecord record = new PropertyRecord(propId); + record.setInUse(true); + record.setCreated(); + propRecords.add(record); + for (int i = 0; i < values.length-2; i++) { + String property = values[i+2].trim(); + int propertyKey = i + metaData.getNodePropertySize(); + props.put(propertyKey, property); + Object propertyObj = getValueAsPropertyTypedClass(propertyKey, property); + PropertyBlock propertyBlock = getPropertyBlock(propertyKey, propertyObj); + if (record.size() + propertyBlock.getSize() > PropertyType.getPayloadSize()) { + PropertyRecord prevRecord = record; + record = new PropertyRecord(++propId); + record.setInUse(true); + record.setCreated(); + prevRecord.setNextProp(propId); + record.setPrevProp(prevRecord.getId()); + propRecords.add(record); + } + record.addPropertyBlock(propertyBlock); + } + + for (PropertyRecord rec : propRecords) { + outputKey.setLeft(new LongWritable(edgeId)); + outputKey.setRight(new LongWritable(rec.getId())); + boolean first = true; + for (PropertyBlock block : rec.getPropertyBlocks()) { + if (first) { + outputValue.set(edgeId, values[metaData.getEdgeFromNodeIdIndex()], values[metaData.getEdgeToNodeIdIndex()], (int) rec.getId(), getBlockCount(block), -1L, -1L, block.getKeyIndexId(), + props.get(block.getKeyIndexId())); + first = false; + } else { + outputValue.add(block.getKeyIndexId(), props.get(block.getKeyIndexId()), getBlockCount(block)); + } + } + context.write(outputKey, outputValue); + } + } + + private int getBlockCount(PropertyBlock block) { + return block.getValueRecords().size(); + } + + private PropertyBlock getPropertyBlock(int propertyKey, Object property) { + PropertyBlock block = new PropertyBlock(); + Neo4JUtils.encodeValue(block, propertyKey, property, 0L); + return block; + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private Object getValueAsPropertyTypedClass(int propIndex, String property) { + Class type = null; + try { + if (propIndex - metaData.getNodePropertySize() + 2 < metaData.getEdgePropertySize()) { + type = metaData.getEdgeTypes()[propIndex - metaData.getNodePropertySize() + 2]; + } + if (type != null) { + return type.getConstructor(String.class).newInstance(property); + } else { + return property; + } + } catch (Exception e) { + return property; + } + } +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesReducer.java new file mode 100644 index 0000000..89a9cb3 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesReducer.java @@ -0,0 +1,84 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import java.io.IOException; +import java.util.Iterator; + +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullEdgePropertiesWritable; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.Reducer; + +public class EdgePreparePropertiesReducer extends Reducer { + + private LongWritable outputKey = new LongWritable(); + private FullEdgePropertiesWritable outputValue = new FullEdgePropertiesWritable(); + + protected void reduce(AscLongDescLongWritable key, Iterable values, Context context) throws IOException, InterruptedException { + Iterator iter = values.iterator(); + + SurroundingPropertyContext ctx = new SurroundingPropertyContext(); + + while (iter.hasNext()) { + FullEdgePropertiesWritable value = iter.next(); + + long nodeId = key.getLeft().get(); + if (ctx.nodeId == -1L) { + // first call, so set current fields + ctx.nodeId = nodeId; + ctx.index = value.getPropertyIndex().get(); + ctx.nodeIdentifier = value.getFromNodeIdentifier().toString(); + ctx.toNodeIdentifier = value.getToNodeIdentifier().toString(); + ctx.propertyIndexes = value.getProperties().keysToArray(); + ctx.properties = value.getProperties().valuesToArray(); + ctx.count = value.getBlockCount().get(); + ctx.prev = -1L; // don't know yet + ctx.next = -1L; // first call, relationships ordered descending, + // so last rel, so no next available + + } else if (ctx.prev == -1L) { + // not the first so current relationship will become prev in + // context and context can be emitted and refilled with + // current + ctx.prev = value.getPropertyIndex().get(); + + outputKey.set(ctx.nodeId); + for (int i = 0; i < ctx.properties.length; i++) { + if (i == 0) { + outputValue.set(ctx.nodeId, ctx.nodeIdentifier, ctx.toNodeIdentifier, ctx.index, ctx.count, ctx.prev, ctx.next, ctx.propertyIndexes[i], + ctx.properties[i]); + } else { + outputValue.add(ctx.propertyIndexes[i], ctx.properties[i], 0); + } + } + context.write(outputKey, outputValue); + + long next = ctx.index; + ctx.nodeId = nodeId; + ctx.index = value.getPropertyIndex().get(); + ctx.nodeIdentifier = value.getFromNodeIdentifier().toString(); + ctx.toNodeIdentifier = value.getToNodeIdentifier().toString(); + ctx.propertyIndexes = value.getProperties().keysToArray(); + ctx.properties = value.getProperties().valuesToArray(); + ctx.count = value.getBlockCount().get(); + ctx.prev = -1L; // don't know yet + ctx.next = next; + + } + + } + + // write out last context + outputKey.set(ctx.nodeId); + for (int i = 0; i < ctx.properties.length; i++) { + if (i == 0) { + outputValue.set(ctx.nodeId, ctx.nodeIdentifier, ctx.toNodeIdentifier, ctx.index, ctx.count, ctx.prev, ctx.next, ctx.propertyIndexes[i], + ctx.properties[i]); + } else { + outputValue.add(ctx.propertyIndexes[i], ctx.properties[i], 0); + } + } + context.write(outputKey, outputValue); + + } +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputMapper.java new file mode 100644 index 0000000..ac518c1 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputMapper.java @@ -0,0 +1,59 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import java.io.IOException; + +import nl.waredingen.graphs.misc.RowNumberJob; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.writables.ByteMarkerIdPropIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgePropertyOutputCountersAndValueWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullEdgePropertiesWritable; +import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; + +import org.apache.hadoop.io.ByteWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.Mapper; + +public class EdgePropertyOutputMapper extends + Mapper { + + private ByteMarkerIdPropIdWritable outputKey = new ByteMarkerIdPropIdWritable(); + private EdgePropertyOutputCountersAndValueWritable outputValue = new EdgePropertyOutputCountersAndValueWritable(); + private long[] blockCountCounters; + private long[] propertyIdCounters; + private int numReduceTasks; + private long maxIds; + private MetaData metaData; + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + metaData = Neo4JUtils.getMetaData(context.getConfiguration()); + numReduceTasks = context.getNumReduceTasks(); + maxIds = metaData.getNumberOfEdges(); + + blockCountCounters = new long[numReduceTasks]; + propertyIdCounters = new long[numReduceTasks]; + outputKey.setMarker(new ByteWritable(RowNumberJob.VALUE_MARKER)); + } + + @Override + protected void map(LongWritable key, FullEdgePropertiesWritable value, Context context) throws IOException, InterruptedException { + outputKey.setIds(value.getEdgeId(), value.getPropertyIndex()); + outputValue.setValues(value.getEdgeId(), value); + blockCountCounters[EdgePropertyOutputPartitioner.partitionForValue(outputValue, numReduceTasks, maxIds)] += value.getBlockCount().get(); + propertyIdCounters[EdgePropertyOutputPartitioner.partitionForValue(outputValue, numReduceTasks, maxIds)]++; + context.write(outputKey, outputValue); + } + + @Override + protected void cleanup(Context context) throws IOException, InterruptedException { + outputKey.setMarker(new ByteWritable(RowNumberJob.COUNTER_MARKER)); + outputKey.setIds(new LongWritable(Long.MIN_VALUE), new IntWritable(Integer.MIN_VALUE)); + for (int c = 0; c < blockCountCounters.length - 1; c++) { + outputValue.setCounter(c + 1, blockCountCounters[c], propertyIdCounters[c]); + context.write(outputKey, outputValue); + blockCountCounters[c + 1] += blockCountCounters[c]; + propertyIdCounters[c + 1] += propertyIdCounters[c]; + } + } +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputPartitioner.java new file mode 100644 index 0000000..e0f3c63 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputPartitioner.java @@ -0,0 +1,50 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import nl.waredingen.graphs.misc.RowNumberJob; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.writables.ByteMarkerIdPropIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgePropertyOutputCountersAndValueWritable; +import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.Partitioner; + +public class EdgePropertyOutputPartitioner extends Partitioner implements Configurable { + + private long max = 0L; + private Configuration conf; + + @Override + public int getPartition(ByteMarkerIdPropIdWritable key, EdgePropertyOutputCountersAndValueWritable value, int numPartitions) { + + if (key.getMarker().get() == (byte) RowNumberJob.COUNTER_MARKER) { + return value.getPartition(); + } else { + return EdgePropertyOutputPartitioner.partitionForValue(value, numPartitions, max); + } + } + + public static int partitionForValue(EdgePropertyOutputCountersAndValueWritable value, int numPartitions, long maximumIds) { + double divider = Math.max(1, (double) maximumIds / numPartitions); + return (int) (value.getId().get() / divider); + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + configure(); + + } + + private void configure() { + MetaData metaData = Neo4JUtils.getMetaData(conf); + this.max = metaData.getNumberOfEdges(); + } + + @Override + public Configuration getConf() { + return conf; + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputReducer.java new file mode 100644 index 0000000..7cf5344 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputReducer.java @@ -0,0 +1,155 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import java.io.IOException; +import java.util.Iterator; + +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.writables.ByteMarkerIdPropIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgePropertyOutputCountersAndValueWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullEdgePropertiesWritable; +import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; + +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; +import org.neo4j.kernel.impl.nioneo.store.PropertyBlock; +import org.neo4j.kernel.impl.nioneo.store.PropertyRecord; + +public class EdgePropertyOutputReducer extends Reducer { + + private MultipleOutputs mos; + private BytesWritable outputValue = new BytesWritable(); + private MetaData metaData; + + protected void setup(Context context) throws IOException, InterruptedException { + mos = new MultipleOutputs(context); + metaData = Neo4JUtils.getMetaData(context.getConfiguration()); + } + + protected void reduce(ByteMarkerIdPropIdWritable key, Iterable values, Context context) throws IOException, + InterruptedException { + Iterator itr = values.iterator(); + if (!itr.hasNext()) { + return; + } + + long blockCountOffset = 1; + long propCountOffset = metaData.getNumberOfNodeProperties(); + EdgePropertyOutputCountersAndValueWritable value = itr.next(); + while (itr.hasNext() && (value.getBlockOffset() > 0 || value.getIdOffset() > 0)) { + blockCountOffset += value.getBlockOffset(); + propCountOffset += value.getIdOffset(); + value = itr.next(); + } + + long blocksProcessed = 0L; + long edgeId = -1L; + if (!value.getValue().equals(EdgePropertyOutputCountersAndValueWritable.EMPTY_VAL)) { + if (edgeId != value.getValue().getEdgeId().get()) { + Text edgeText = new Text(); + edgeText.set(value.getValue().getEdgeId().get() + "\t" + value.getValue().getFromNodeIdentifier().toString() + "\t" + value.getValue().getToNodeIdentifier().toString() + "\t" + propCountOffset); + byte[] ba = edgeText.getBytes(); + outputValue.set(ba, 0, edgeText.getLength()); + mos.write("edges", NullWritable.get(), outputValue); + edgeId = value.getValue().getEdgeId().get(); + } + blocksProcessed = processValue(value, blockCountOffset, propCountOffset++); + blockCountOffset += blocksProcessed; + context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").increment(blocksProcessed); + context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "edge.properties").increment(1); + } + while (itr.hasNext()) { + value = itr.next(); + if (!value.getValue().equals(EdgePropertyOutputCountersAndValueWritable.EMPTY_VAL)) { + if (edgeId != value.getValue().getEdgeId().get()) { + Text edgeText = new Text(); + edgeText.set(value.getValue().getEdgeId().get() + "\t" + value.getValue().getFromNodeIdentifier().toString() + "\t" + value.getValue().getToNodeIdentifier().toString() + "\t" + propCountOffset); + byte[] ba = edgeText.getBytes(); + outputValue.set(ba, 0, edgeText.getLength()); + mos.write("edges", NullWritable.get(), outputValue); + edgeId = value.getValue().getEdgeId().get(); + } + blocksProcessed = processValue(value, blockCountOffset, propCountOffset++); + blockCountOffset += blocksProcessed; + context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").increment(blocksProcessed); + context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "edge.properties").increment(1); + } + } + } + + private long processValue(EdgePropertyOutputCountersAndValueWritable value, long blockCountOffset, long propOffset) throws IOException, + InterruptedException { + + FullEdgePropertiesWritable propHolder = value.getValue(); + int propIndex = propHolder.getPropertyIndex().get(); + + PropertyRecord record = new PropertyRecord(propOffset); + record.setInUse(true); + record.setCreated(); + record.setInUse(true); + long prev = propHolder.getPrevProp().get(); + long next = propHolder.getNextProp().get(); + if (prev != -1L) { + prev = (prev - propIndex) + propOffset; + } + if (next != -1L) { + next = (next - propIndex) + propOffset; + } + record.setPrevProp(prev); + record.setNextProp(next); + + String[] properties = propHolder.getProperties().valuesToArray(); + int[] propertyKeyIndexes = propHolder.getProperties().keysToArray(); + + for (int i = 0; i < properties.length; i++) { + Object propertyObj = getValueAsPropertyTypedClass(propertyKeyIndexes[i], properties[i]); + PropertyBlock propertyBlock = getPropertyBlock(propertyKeyIndexes[i], propertyObj, blockCountOffset); + record.addPropertyBlock(propertyBlock); + } + + byte[] ba = Neo4JUtils.getPropertyReferenceAsByteArray(record); + outputValue.set(ba, 0, ba.length); + mos.write("props", NullWritable.get(), outputValue); + + for (PropertyBlock block : record.getPropertyBlocks()) { + + if (block.getValueRecords().size() > 0) { + ba = Neo4JUtils.getDynamicRecordsAsByteArray(block.getValueRecords(), 128); + outputValue.set(ba, 0, ba.length); + mos.write("strings", NullWritable.get(), outputValue); + } + } + + return propHolder.getBlockCount().get(); + + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private Object getValueAsPropertyTypedClass(int propIndex, String property) { + Class type = null; + try { + if (propIndex - metaData.getNodePropertySize() + 2 < metaData.getEdgePropertySize()) { + type = metaData.getEdgeTypes()[propIndex - metaData.getNodePropertySize() + 2]; + } + if (type != null) { + return type.getConstructor(String.class).newInstance(property); + } else { + return property; + } + } catch (Exception e) { + return property; + } + } + + private PropertyBlock getPropertyBlock(int propertyKey, Object property, long blockOffset) { + PropertyBlock block = new PropertyBlock(); + Neo4JUtils.encodeValue(block, propertyKey, property, blockOffset); + return block; + } + + protected void cleanup(Context context) throws IOException, InterruptedException { + mos.close(); + } +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/IndifferentByteMarkerAndIdComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/IndifferentByteMarkerAndIdComparator.java new file mode 100644 index 0000000..46cf5a3 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/IndifferentByteMarkerAndIdComparator.java @@ -0,0 +1,17 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import nl.waredingen.graphs.neo.mapreduce.input.writables.ByteMarkerIdPropIdWritable; + +import org.apache.hadoop.io.RawComparator; + +public class IndifferentByteMarkerAndIdComparator implements RawComparator { + @Override + public int compare(ByteMarkerIdPropIdWritable left, ByteMarkerIdPropIdWritable right) { + return 0; + } + + @Override + public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { + return 0; + } +} \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/IndifferentByteMarkerAndPropertyOutputIdComparator.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/IndifferentByteMarkerAndPropertyOutputIdComparator.java deleted file mode 100644 index 3a2dfea..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/IndifferentByteMarkerAndPropertyOutputIdComparator.java +++ /dev/null @@ -1,15 +0,0 @@ -package nl.waredingen.graphs.neo.mapreduce.properties; - -import org.apache.hadoop.io.RawComparator; - -public class IndifferentByteMarkerAndPropertyOutputIdComparator implements RawComparator { - @Override - public int compare(ByteMarkerPropertyIdWritable left, ByteMarkerPropertyIdWritable right) { - return 0; - } - - @Override - public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) { - return 0; - } -} \ No newline at end of file diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapper.java index 9a2f2d0..8c72a70 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapper.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapper.java @@ -1,36 +1,102 @@ package nl.waredingen.graphs.neo.mapreduce.properties; import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; -import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullNodePropertiesWritable; import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.neo4j.kernel.impl.nioneo.store.PropertyBlock; +import org.neo4j.kernel.impl.nioneo.store.PropertyRecord; +import org.neo4j.kernel.impl.nioneo.store.PropertyType; -public class NodePreparePropertiesMapper extends Mapper { +public class NodePreparePropertiesMapper extends Mapper { private AscLongDescLongWritable outputKey = new AscLongDescLongWritable(); - private Text outputValue = new Text(); - - protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { - // TODO choose correct number for 42 - String[] values = value.toString().split("\t", 42); - int nodeId = Integer.parseInt(values[0]); - for (int i = 0; i < values.length - 1; i++) { - String property = values[i+1]; - int propId = (nodeId * (values.length - 1)) + i; + private FullNodePropertiesWritable outputValue = new FullNodePropertiesWritable(); + private MetaData metaData; + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + metaData = Neo4JUtils.getMetaData(context.getConfiguration()); + } + + @Override + protected void map(Text key, Text value, Context context) throws IOException, InterruptedException { + String[] values = value.toString().split("\t", metaData.getNodePropertySize()); + long nodeId = Long.parseLong(key.toString()); + int propId = 0; + List propRecords = new ArrayList(); + Map props = new HashMap(); + PropertyRecord record = new PropertyRecord(propId); + record.setInUse(true); + record.setCreated(); + propRecords.add(record); + for (int i = 0; i < values.length; i++) { + String property = values[i].trim(); + int propertyKey = i; + props.put(propertyKey, property); + Object propertyObj = getValueAsPropertyTypedClass(propertyKey, property); + PropertyBlock propertyBlock = getPropertyBlock(propertyKey, propertyObj); + if (record.size() + propertyBlock.getSize() > PropertyType.getPayloadSize()) { + PropertyRecord prevRecord = record; + record = new PropertyRecord(++propId); + record.setInUse(true); + record.setCreated(); + prevRecord.setNextProp(propId); + record.setPrevProp(prevRecord.getId()); + propRecords.add(record); + } + record.addPropertyBlock(propertyBlock); + } + + for (PropertyRecord rec : propRecords) { outputKey.setLeft(new LongWritable(nodeId)); - outputKey.setRight(new LongWritable(propId)); - outputValue.set(i + "\t" +property + "\t" + getBlockCount(i, property)); + outputKey.setRight(new LongWritable(rec.getId())); + boolean first = true; + for (PropertyBlock block : rec.getPropertyBlocks()) { + if (first) { + outputValue.set(nodeId, values[metaData.getNodeIdIndex()], (int)rec.getId(), getBlockCount(block), -1L, -1L, block.getKeyIndexId(), props.get(block.getKeyIndexId())); + first = false; + } else { + outputValue.add(block.getKeyIndexId(), props.get(block.getKeyIndexId()), getBlockCount(block)); + } + } context.write(outputKey, outputValue); } } - private int getBlockCount(int propertyKey, String property) { + private int getBlockCount(PropertyBlock block) { + return block.getValueRecords().size(); + } + + private PropertyBlock getPropertyBlock(int propertyKey, Object property) { PropertyBlock block = new PropertyBlock(); Neo4JUtils.encodeValue(block, propertyKey, property, 0L); - return block.getValueRecords().size(); + return block; + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private Object getValueAsPropertyTypedClass(int propIndex, String property) { + Class type = null; + try { + if (propIndex < metaData.getNodePropertySize()) { + type = metaData.getNodeTypes()[propIndex]; + } + if (type != null) { + return type.getConstructor(String.class).newInstance(property); + } else { + return property; + } + } catch (Exception e) { + return property; + } } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesReducer.java index 4c7dcf5..2c0ecd6 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesReducer.java @@ -3,32 +3,34 @@ import java.io.IOException; import java.util.Iterator; -import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; -import nl.waredingen.graphs.neo.mapreduce.SurroundingContext; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullNodePropertiesWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.mapreduce.Reducer; -public class NodePreparePropertiesReducer extends Reducer { +public class NodePreparePropertiesReducer extends Reducer { - private Text outputValue = new Text(); - - protected void reduce(AscLongDescLongWritable key, Iterable values, Context context) throws IOException ,InterruptedException { - Iterator iter = values.iterator(); + private LongWritable outputKey = new LongWritable(); + private FullNodePropertiesWritable outputValue = new FullNodePropertiesWritable(); + + protected void reduce(AscLongDescLongWritable key, Iterable values, Context context) throws IOException ,InterruptedException { + Iterator iter = values.iterator(); - SurroundingContext ctx = new SurroundingContext(); + SurroundingPropertyContext ctx = new SurroundingPropertyContext(); while (iter.hasNext()) { - String value = iter.next().toString(); + FullNodePropertiesWritable value = iter.next(); long nodeId = key.getLeft().get(); - long propId = key.getRight().get(); - if (ctx.id == -1L) { + if (ctx.nodeId == -1L) { // first call, so set current fields - ctx.id = nodeId; - ctx.other = propId; - ctx.val = value; + ctx.nodeId = nodeId; + ctx.index = value.getPropertyIndex().get(); + ctx.nodeIdentifier = value.getNodeIdentifier().toString(); + ctx.propertyIndexes = value.getProperties().keysToArray(); + ctx.properties = value.getProperties().valuesToArray(); + ctx.count = value.getBlockCount().get(); ctx.prev = -1L; // don't know yet ctx.next = -1L; // first call, relationships ordered descending, so last rel, so no next available @@ -36,15 +38,26 @@ protected void reduce(AscLongDescLongWritable key, Iterable values, Contex // not the first so current relationship will become prev in // context and context can be emitted and refilled with // current - ctx.prev = propId; + ctx.prev = value.getPropertyIndex().get(); - outputValue.set(ctx.toString()); - context.write(NullWritable.get(), outputValue); + outputKey.set(ctx.nodeId); + for (int i = 0; i < ctx.properties.length; i++) { + if (i == 0) { + outputValue.set(ctx.nodeId, ctx.nodeIdentifier, ctx.index, ctx.count, ctx.prev, ctx.next, ctx.propertyIndexes[i], + ctx.properties[i]); + } else { + outputValue.add(ctx.propertyIndexes[i], ctx.properties[i], 0); + } + } + context.write(outputKey, outputValue); - long next = ctx.other; - ctx.id = nodeId; - ctx.other = propId; - ctx.val = value; + long next = ctx.index; + ctx.nodeId = nodeId; + ctx.index = value.getPropertyIndex().get(); + ctx.nodeIdentifier = value.getNodeIdentifier().toString(); + ctx.propertyIndexes = value.getProperties().keysToArray(); + ctx.properties = value.getProperties().valuesToArray(); + ctx.count = value.getBlockCount().get(); ctx.prev = -1L; // don't know yet ctx.next = next; @@ -53,9 +66,16 @@ protected void reduce(AscLongDescLongWritable key, Iterable values, Contex } // write out last context - outputValue.set(ctx.toString()); - context.write(NullWritable.get(), outputValue); + outputKey.set(ctx.nodeId); + for (int i = 0; i < ctx.properties.length; i++) { + if (i == 0) { + outputValue.set(ctx.nodeId, ctx.nodeIdentifier, ctx.index, ctx.count, ctx.prev, ctx.next, ctx.propertyIndexes[i], + ctx.properties[i]); + } else { + outputValue.add(ctx.propertyIndexes[i], ctx.properties[i], 0); + } + } + context.write(outputKey, outputValue); } - - } +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputMapper.java new file mode 100644 index 0000000..4b58673 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputMapper.java @@ -0,0 +1,59 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import java.io.IOException; + +import nl.waredingen.graphs.misc.RowNumberJob; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.writables.ByteMarkerIdPropIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullNodePropertiesWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodePropertyOutputCountersAndValueWritable; +import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; + +import org.apache.hadoop.io.ByteWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapreduce.Mapper; + +public class NodePropertyOutputMapper extends + Mapper { + + private ByteMarkerIdPropIdWritable outputKey = new ByteMarkerIdPropIdWritable(); + private NodePropertyOutputCountersAndValueWritable outputValue = new NodePropertyOutputCountersAndValueWritable(); + private long[] blockCountCounters; + private long[] propertyIdCounters; + private int numReduceTasks; + private long maxIds; + private MetaData metaData; + + @Override + protected void setup(Context context) throws IOException, InterruptedException { + metaData = Neo4JUtils.getMetaData(context.getConfiguration()); + numReduceTasks = context.getNumReduceTasks(); + maxIds = metaData.getNumberOfNodes(); + + blockCountCounters = new long[numReduceTasks]; + propertyIdCounters = new long[numReduceTasks]; + outputKey.setMarker(new ByteWritable(RowNumberJob.VALUE_MARKER)); + } + + @Override + protected void map(LongWritable key, FullNodePropertiesWritable value, Context context) throws IOException, InterruptedException { + outputKey.setIds(value.getNodeId(), value.getPropertyIndex()); + outputValue.setValues(value.getNodeId(), value); + blockCountCounters[NodePropertyOutputPartitioner.partitionForValue(outputValue, numReduceTasks, maxIds)] += value.getBlockCount().get(); + propertyIdCounters[NodePropertyOutputPartitioner.partitionForValue(outputValue, numReduceTasks, maxIds)]++; + context.write(outputKey, outputValue); + } + + @Override + protected void cleanup(Context context) throws IOException, InterruptedException { + outputKey.setMarker(new ByteWritable(RowNumberJob.COUNTER_MARKER)); + outputKey.setIds(new LongWritable(Long.MIN_VALUE), new IntWritable(Integer.MIN_VALUE)); + for (int c = 0; c < blockCountCounters.length - 1; c++) { + outputValue.setCounter(c + 1, blockCountCounters[c], propertyIdCounters[c]); + context.write(outputKey, outputValue); + blockCountCounters[c + 1] += blockCountCounters[c]; + propertyIdCounters[c + 1] += propertyIdCounters[c]; + } + } +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputPartitioner.java new file mode 100644 index 0000000..5c0a7bb --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputPartitioner.java @@ -0,0 +1,50 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import nl.waredingen.graphs.misc.RowNumberJob; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.writables.ByteMarkerIdPropIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodePropertyOutputCountersAndValueWritable; +import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; + +import org.apache.hadoop.conf.Configurable; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.mapreduce.Partitioner; + +public class NodePropertyOutputPartitioner extends Partitioner implements Configurable { + + private long max = 0L; + private Configuration conf; + + @Override + public int getPartition(ByteMarkerIdPropIdWritable key, NodePropertyOutputCountersAndValueWritable value, int numPartitions) { + + if (key.getMarker().get() == (byte) RowNumberJob.COUNTER_MARKER) { + return value.getPartition(); + } else { + return NodePropertyOutputPartitioner.partitionForValue(value, numPartitions, max); + } + } + + public static int partitionForValue(NodePropertyOutputCountersAndValueWritable value, int numPartitions, long maximumIds) { + double divider = Math.max(1, (double) maximumIds / numPartitions); + return (int) (value.getId().get() / divider); + } + + @Override + public void setConf(Configuration conf) { + this.conf = conf; + configure(); + + } + + private void configure() { + MetaData metaData = Neo4JUtils.getMetaData(conf); + this.max = metaData.getNumberOfNodes(); + } + + @Override + public Configuration getConf() { + return conf; + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputReducer.java new file mode 100644 index 0000000..ad24285 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputReducer.java @@ -0,0 +1,155 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import java.io.IOException; +import java.util.Iterator; + +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.writables.ByteMarkerIdPropIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullNodePropertiesWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodePropertyOutputCountersAndValueWritable; +import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; + +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; +import org.neo4j.kernel.impl.nioneo.store.PropertyBlock; +import org.neo4j.kernel.impl.nioneo.store.PropertyRecord; + +public class NodePropertyOutputReducer extends Reducer { + + private MultipleOutputs mos; + private BytesWritable outputValue = new BytesWritable(); + private MetaData metaData; + + protected void setup(Context context) throws IOException, InterruptedException { + mos = new MultipleOutputs(context); + metaData = Neo4JUtils.getMetaData(context.getConfiguration()); + } + + protected void reduce(ByteMarkerIdPropIdWritable key, Iterable values, Context context) throws IOException, + InterruptedException { + Iterator itr = values.iterator(); + if (!itr.hasNext()) { + return; + } + + long blockCountOffset = 1; + long propCountOffset = 0; + NodePropertyOutputCountersAndValueWritable value = itr.next(); + while (itr.hasNext() && (value.getBlockOffset() > 0 || value.getIdOffset() > 0)) { + blockCountOffset += value.getBlockOffset(); + propCountOffset += value.getIdOffset(); + value = itr.next(); + } + + long blocksProcessed = 0L; + long nodeId = -1L; + if (!value.getValue().equals(NodePropertyOutputCountersAndValueWritable.EMPTY_VAL)) { + if (nodeId != value.getValue().getNodeId().get()) { + Text nodeText = new Text(); + nodeText.set(value.getValue().getNodeId().get() + "\t" + value.getValue().getNodeIdentifier().toString() + "\t" + propCountOffset); + byte[] ba = nodeText.getBytes(); + outputValue.set(ba, 0, nodeText.getLength()); + mos.write("nodes", NullWritable.get(), outputValue); + nodeId = value.getValue().getNodeId().get(); + } + blocksProcessed = processValue(value, blockCountOffset, propCountOffset++); + blockCountOffset += blocksProcessed; + context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").increment(blocksProcessed); + context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "node.properties").increment(1); + } + while (itr.hasNext()) { + value = itr.next(); + if (!value.getValue().equals(NodePropertyOutputCountersAndValueWritable.EMPTY_VAL)) { + if (nodeId != value.getValue().getNodeId().get()) { + Text nodeText = new Text(); + nodeText.set(value.getValue().getNodeId().get() + "\t" + value.getValue().getNodeIdentifier().toString() + "\t" + propCountOffset); + byte[] ba = nodeText.getBytes(); + outputValue.set(ba, 0, nodeText.getLength()); + mos.write("nodes", NullWritable.get(), outputValue); + nodeId = value.getValue().getNodeId().get(); + } + blocksProcessed = processValue(value, blockCountOffset, propCountOffset++); + blockCountOffset += blocksProcessed; + context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").increment(blocksProcessed); + context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "node.properties").increment(1); + } + } + } + + private long processValue(NodePropertyOutputCountersAndValueWritable value, long blockCountOffset, long propOffset) throws IOException, + InterruptedException { + + FullNodePropertiesWritable propHolder = value.getValue(); + int propIndex = propHolder.getPropertyIndex().get(); + + PropertyRecord record = new PropertyRecord(propOffset); + record.setInUse(true); + record.setCreated(); + record.setInUse(true); + long prev = propHolder.getPrevProp().get(); + long next = propHolder.getNextProp().get(); + if (prev != -1L) { + prev = (prev - propIndex) + propOffset; + } + if (next != -1L) { + next = (next - propIndex) + propOffset; + } + record.setPrevProp(prev); + record.setNextProp(next); + + String[] properties = propHolder.getProperties().valuesToArray(); + int[] propertyKeyIndexes = propHolder.getProperties().keysToArray(); + + for (int i = 0; i < properties.length; i++) { + Object propertyObj = getValueAsPropertyTypedClass(propertyKeyIndexes[i], properties[i]); + PropertyBlock propertyBlock = getPropertyBlock(propertyKeyIndexes[i], propertyObj, blockCountOffset); + record.addPropertyBlock(propertyBlock); + } + + byte[] ba = Neo4JUtils.getPropertyReferenceAsByteArray(record); + outputValue.set(ba, 0, ba.length); + mos.write("props", NullWritable.get(), outputValue); + + for (PropertyBlock block : record.getPropertyBlocks()) { + + if (block.getValueRecords().size() > 0) { + ba = Neo4JUtils.getDynamicRecordsAsByteArray(block.getValueRecords(), 128); + outputValue.set(ba, 0, ba.length); + mos.write("strings", NullWritable.get(), outputValue); + } + } + + return propHolder.getBlockCount().get(); + + } + + @SuppressWarnings({ "unchecked", "rawtypes" }) + private Object getValueAsPropertyTypedClass(int propIndex, String property) { + Class type = null; + try { + if (propIndex < metaData.getNodePropertySize()) { + type = metaData.getNodeTypes()[propIndex]; + } + if (type != null) { + return type.getConstructor(String.class).newInstance(property); + } else { + return property; + } + } catch (Exception e) { + return property; + } + } + + private PropertyBlock getPropertyBlock(int propertyKey, Object property, long blockOffset) { + PropertyBlock block = new PropertyBlock(); + Neo4JUtils.encodeValue(block, propertyKey, property, blockOffset); + return block; + } + + protected void cleanup(Context context) throws IOException, InterruptedException { + mos.close(); + } +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyAsTextOutputReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyAsTextOutputReducer.java deleted file mode 100644 index 6b584ec..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyAsTextOutputReducer.java +++ /dev/null @@ -1,98 +0,0 @@ -package nl.waredingen.graphs.neo.mapreduce.properties; - -import java.io.IOException; -import java.util.Iterator; - -import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; - -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; -import org.neo4j.kernel.impl.nioneo.store.DynamicRecord; -import org.neo4j.kernel.impl.nioneo.store.PropertyBlock; -import org.neo4j.kernel.impl.nioneo.store.PropertyType; - -public class PropertyAsTextOutputReducer extends Reducer { - - private MultipleOutputs mos; - private Text outputValue = new Text(); - - protected void reduce(ByteMarkerPropertyIdWritable key, Iterable values, Context context) throws IOException, - InterruptedException { - System.out.println("In reduce method with key: "+key.toString()); - Iterator itr = values.iterator(); - if (!itr.hasNext()) { - return; - } - - long offset = 1; - PropertyOutputIdBlockcountValueWritable value = itr.next(); - while (itr.hasNext() && value.getCount() > 0) { - System.out.println("Reducer adding offset:"+value.getCount()); - offset += value.getCount(); - value = itr.next(); - } - System.out.println("Reducer starting offset:"+offset); - - long blocksProcessed = 0L; - if (!value.getValue().equals(PropertyOutputIdBlockcountValueWritable.EMPTY_STRING)) { - blocksProcessed = processValue(value, offset); - offset += blocksProcessed; - context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").increment(blocksProcessed); - } - while(itr.hasNext()) { - value = itr.next(); - if (!value.getValue().equals(PropertyOutputIdBlockcountValueWritable.EMPTY_STRING)) { - blocksProcessed = processValue(value, offset); - if (blocksProcessed > 0) System.out.println("Incrementing offset by "+blocksProcessed); - offset += blocksProcessed; - - context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").increment(blocksProcessed); - } - } - } - - private long processValue(PropertyOutputIdBlockcountValueWritable value, long offset) throws IOException, InterruptedException { - String[] vals = value.getValue().toString().split("\t", 6); - PropertyBlock block = new PropertyBlock(); - int propId = Integer.parseInt(vals[0]); - - Neo4JUtils.encodeValue(block, propId, vals[1], offset); - //PropertyRecord record = new PropertyRecord(propId); - //record.setInUse(true); - //record.setPrevProp(Long.parseLong(vals[3])); - //record.setNextProp(Long.parseLong(vals[4])); - //record.addPropertyBlock(block); - //byte[] ba = Neo4JUtils.getPropertyReferenceAsByteArray(record); - outputValue.set(propId + "\t" + vals[1] + "\t" + vals[3] + "\t" + vals[4] + "\t" + block.getSingleValueLong()); - mos.write("props", NullWritable.get(), outputValue); - - if (block.getValueRecords().size() > 0) { - - StringBuilder sb = new StringBuilder().append(propId).append("\t"); - int i=0; - for (DynamicRecord dynamicRecord : block.getValueRecords()) { - - long nextProp = dynamicRecord.getNextBlock(); - int mostlyNrOfBytesInt = dynamicRecord.getLength(); - - sb.append(i).append("\t").append(mostlyNrOfBytesInt).append("\t").append(nextProp).append("\t").append(new String(dynamicRecord.getData())).append("\t"); - i++; - } - //ba = Neo4JUtils.getDynamicRecordsAsByteArray(block.getValueRecords(), 128); - outputValue.set(sb.toString()); - mos.write("strings", NullWritable.get(), outputValue); - } - - return Long.parseLong(vals[2]); - - } - protected void setup(Context context) throws IOException, InterruptedException { - mos = new MultipleOutputs(context); - } - - protected void cleanup(Context context) throws IOException, InterruptedException { - mos.close(); - } -} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountPartitioner.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountPartitioner.java deleted file mode 100644 index be6b528..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputIdBlockcountPartitioner.java +++ /dev/null @@ -1,47 +0,0 @@ -package nl.waredingen.graphs.neo.mapreduce.properties; - -import nl.waredingen.graphs.misc.RowNumberJob; -import nl.waredingen.graphs.neo.mapreduce.PureMRNodesAndEdgesJob; - -import org.apache.hadoop.conf.Configurable; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.ByteWritable; -import org.apache.hadoop.mapreduce.Partitioner; - -public class PropertyOutputIdBlockcountPartitioner extends Partitioner implements Configurable { - - private long max = 0L; - private Configuration conf; - - @Override - public int getPartition(ByteMarkerPropertyIdWritable key, PropertyOutputIdBlockcountValueWritable value, int numPartitions) { - - if (key.getMarker().get() == (byte) RowNumberJob.COUNTER_MARKER) { - return value.getPartition(); - } else { - return PropertyOutputIdBlockcountPartitioner.partitionForValue(value, numPartitions, max); - } - } - - public static int partitionForValue(PropertyOutputIdBlockcountValueWritable value, int numPartitions, long maximumIds) { - double divider = Math.max(1, (double) maximumIds / numPartitions); - return (int) (value.getId().get() / divider); - } - - @Override - public void setConf(Configuration conf) { - this.conf = conf; - configure(); - - } - - private void configure() { - this.max = Long.parseLong(getConf().get(PureMRNodesAndEdgesJob.NUMBEROFROWS_CONFIG)); - } - - @Override - public Configuration getConf() { - return conf; - } - -} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputMapper.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputMapper.java deleted file mode 100644 index 657f001..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputMapper.java +++ /dev/null @@ -1,52 +0,0 @@ -package nl.waredingen.graphs.neo.mapreduce.properties; - -import java.io.IOException; - -import nl.waredingen.graphs.misc.RowNumberJob; -import nl.waredingen.graphs.neo.mapreduce.PureMRNodesAndEdgesJob; - -import org.apache.commons.lang.StringUtils; -import org.apache.hadoop.io.ByteWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Mapper; - -public class PropertyOutputMapper extends Mapper { - - private ByteMarkerPropertyIdWritable outputKey = new ByteMarkerPropertyIdWritable(); - private PropertyOutputIdBlockcountValueWritable outputValue = new PropertyOutputIdBlockcountValueWritable(); - private long[] counters; - private int numReduceTasks; - private long maxIds; - - protected void setup(Context context) throws IOException, InterruptedException { - numReduceTasks = context.getNumReduceTasks(); - maxIds = Long.parseLong(context.getConfiguration().get(PureMRNodesAndEdgesJob.NUMBEROFROWS_CONFIG)); - - counters = new long[numReduceTasks]; - outputKey.setMarker(new ByteWritable(RowNumberJob.VALUE_MARKER)); - } - - protected void map(LongWritable key, Text value, Context context) throws IOException ,InterruptedException { - String[] vals = value.toString().split("\t", 7); - - LongWritable id = new LongWritable(Long.parseLong(vals[1])); - outputKey.setId(id); - outputValue.setValues(id, new Text(StringUtils.join(vals, "\t", 2, vals.length))); - counters[PropertyOutputIdBlockcountPartitioner.partitionForValue(outputValue, numReduceTasks, maxIds)] += Long.parseLong(vals[4]); - context.write(outputKey, outputValue); - } - - protected void cleanup(Context context) throws IOException, InterruptedException { - outputKey.setMarker(new ByteWritable(RowNumberJob.COUNTER_MARKER)); - outputKey.setId(new LongWritable(Long.MIN_VALUE)); - for(int c = 0; c < counters.length - 1; c++) { - if (counters[c] > 0) { - outputValue.setCounter(c+1, counters[c]); - context.write(outputKey, outputValue); - } - counters[c+1] += counters[c]; - } - } -} - diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputReducer.java deleted file mode 100644 index 28eb1af..0000000 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyOutputReducer.java +++ /dev/null @@ -1,82 +0,0 @@ -package nl.waredingen.graphs.neo.mapreduce.properties; - -import java.io.IOException; -import java.util.Iterator; - -import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; - -import org.apache.hadoop.io.BytesWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.output.MultipleOutputs; -import org.neo4j.kernel.impl.nioneo.store.PropertyBlock; -import org.neo4j.kernel.impl.nioneo.store.PropertyRecord; - -public class PropertyOutputReducer extends Reducer { - - private MultipleOutputs mos; - private BytesWritable outputValue = new BytesWritable(); - - protected void reduce(ByteMarkerPropertyIdWritable key, Iterable values, Context context) throws IOException, - InterruptedException { - Iterator itr = values.iterator(); - if (!itr.hasNext()) { - return; - } - - long offset = 1; - PropertyOutputIdBlockcountValueWritable value = itr.next(); - while (itr.hasNext() && value.getCount() > 0) { - offset += value.getCount(); - value = itr.next(); - } - - long blocksProcessed = 0L; - if (!value.getValue().equals(PropertyOutputIdBlockcountValueWritable.EMPTY_STRING)) { - blocksProcessed = processValue(value, offset); - offset += blocksProcessed; - context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").increment(blocksProcessed); - } - while(itr.hasNext()) { - value = itr.next(); - if (!value.getValue().equals(PropertyOutputIdBlockcountValueWritable.EMPTY_STRING)) { - blocksProcessed = processValue(value, offset); - offset += blocksProcessed; - - context.getCounter("org.apache.hadoop.mapreduce.lib.output.MultipleOutputs", "strings.blocks").increment(blocksProcessed); - } - } - } - - private long processValue(PropertyOutputIdBlockcountValueWritable value, long offset) throws IOException, InterruptedException { - String[] vals = value.getValue().toString().split("\t", 6); - PropertyBlock block = new PropertyBlock(); - int propId = Integer.parseInt(vals[0]); - - Neo4JUtils.encodeValue(block, propId, vals[1], offset); - PropertyRecord record = new PropertyRecord(propId); - record.setInUse(true); - record.setPrevProp(Long.parseLong(vals[3])); - record.setNextProp(Long.parseLong(vals[4])); - record.addPropertyBlock(block); - byte[] ba = Neo4JUtils.getPropertyReferenceAsByteArray(record); - outputValue.set(ba, 0, ba.length); - mos.write("props", NullWritable.get(), outputValue); - - if (block.getValueRecords().size() > 0) { - ba = Neo4JUtils.getDynamicRecordsAsByteArray(block.getValueRecords(), 128); - outputValue.set(ba, 0, ba.length); - mos.write("strings", NullWritable.get(), outputValue); - } - - return Long.parseLong(vals[2]); - - } - protected void setup(Context context) throws IOException, InterruptedException { - mos = new MultipleOutputs(context); - } - - protected void cleanup(Context context) throws IOException, InterruptedException { - mos.close(); - } -} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/SurroundingPropertyContext.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/SurroundingPropertyContext.java new file mode 100644 index 0000000..34bcfa7 --- /dev/null +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/SurroundingPropertyContext.java @@ -0,0 +1,15 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +public class SurroundingPropertyContext { + + public long nodeId = -1L, count = -1L, prev = -1L, next = -1L; + public int index = -1; + public String nodeIdentifier = null; + public String toNodeIdentifier = null; + public String[] properties = null; + public int[] propertyIndexes = null; + + public SurroundingPropertyContext() { + } + +} diff --git a/job/src/main/java/nl/waredingen/graphs/neo/neo4j/Neo4JUtils.java b/job/src/main/java/nl/waredingen/graphs/neo/neo4j/Neo4JUtils.java index 87d735b..b74418f 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/neo4j/Neo4JUtils.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/neo4j/Neo4JUtils.java @@ -4,14 +4,17 @@ import java.io.IOException; import java.lang.reflect.Array; +import java.lang.reflect.Constructor; import java.nio.ByteBuffer; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Collection; import java.util.LinkedList; import java.util.List; -import java.util.Map; -import java.util.Map.Entry; + +import nl.waredingen.graphs.neo.mapreduce.input.AbstractMetaData; +import nl.waredingen.graphs.neo.mapreduce.input.HardCodedMetaDataImpl; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; @@ -528,9 +531,10 @@ public static byte[] getPropertyReferenceAsByteArray(PropertyRecord record) { return buffer.array(); } - public static void writePropertyKeyStore(Map>> namesMap, String output, - Configuration conf) throws IOException { + public static void writePropertyKeyStore(String output, Configuration conf) throws IOException { + MetaData metaData = getMetaData(conf); + String indexOutput = output + "/neostore.propertystore.db.index"; String keysOutput = output + "/neostore.propertystore.db.index.keys"; @@ -546,22 +550,20 @@ public static void writePropertyKeyStore(Map>> n buffer.putInt( blockSize ); kdos.write(buffer.array()); - for (Integer key : namesMap.keySet()) { - ByteBuffer indexBuffer = ByteBuffer.allocate(9); - indexBuffer.put(Record.IN_USE.byteValue()); - indexBuffer.putInt(0); - indexBuffer.putInt(nextKeyBlockId); - - idos.write(indexBuffer.array()); - - byte[] name = getStringRecordAsByteArray(nextKeyBlockId, namesMap.get(key).getKey().getBytes(), blockSize ); - nextKeyBlockId += (name.length / blockSize); - - kdos.write(name, 0, name.length); - - lastUsedIndexId = key.intValue() +1; + for ( String propertyName : metaData.getNodeTypeNames()) { + + nextKeyBlockId += writePropertyKey(nextKeyBlockId, blockSize, idos, kdos, propertyName); + lastUsedIndexId++; } + for ( String propertyName : metaData.getEdgeTypeNames()) { + // skip from and to + if (!("from".equals(propertyName) || "to".equals(propertyName))) { + nextKeyBlockId += writePropertyKey(nextKeyBlockId, blockSize, idos, kdos, propertyName); + lastUsedIndexId++; + } + } + String type = PropertyIndexStore.TYPE_DESCRIPTOR + " " + CommonAbstractStore.ALL_STORES_VERSION; byte[] encodedType = UTF8.encode(type); @@ -586,6 +588,21 @@ public static void writePropertyKeyStore(Map>> n } + private static int writePropertyKey(int nextKeyBlockId, int blockSize, FSDataOutputStream idos, FSDataOutputStream kdos, String propertyName) + throws IOException { + ByteBuffer indexBuffer = ByteBuffer.allocate(9); + indexBuffer.put(Record.IN_USE.byteValue()); + indexBuffer.putInt(0); + indexBuffer.putInt(nextKeyBlockId); + + idos.write(indexBuffer.array()); + + byte[] name = getStringRecordAsByteArray(nextKeyBlockId, propertyName.getBytes(), blockSize ); + + kdos.write(name, 0, name.length); + return (name.length / blockSize); + } + public static void writePropertyIds(long lastTypeId, String output, Configuration conf) throws IOException { String idsOutput = output + ".id"; FileSystem fs = FileSystem.get(conf); @@ -601,7 +618,6 @@ public static void writePropertyIds(long lastTypeId, String output, Configuratio } public static void writePropertyStoreFooter(String propertiesOutput, Configuration conf) throws IOException { - // TODO Auto-generated method stub FileSystem fs = FileSystem.get(conf); FSDataOutputStream fdos = fs.create(new Path(propertiesOutput + "/neostore.propertystore.db.footer")); @@ -660,9 +676,28 @@ public static void writeEmptArrayStore(String propertiesOutput, Configuration co ados.close(); - writePropertyIds(42L, propertiesOutput + "/neostore.propertystore.db.arrays", conf); + writePropertyIds(0L, propertiesOutput + "/neostore.propertystore.db.arrays", conf); } + + public static MetaData getMetaData(Configuration conf) { + MetaData md = new HardCodedMetaDataImpl(conf); + @SuppressWarnings("unchecked") + Class mdClass = (Class) conf.getClass(AbstractMetaData.METADATA_CLASS, HardCodedMetaDataImpl.class); + try { + try { + Constructor constructor = mdClass.getConstructor(Configuration.class); + md = constructor.newInstance(conf); + } catch (NoSuchMethodException nsme) { + md = mdClass.newInstance(); + } + } catch (Exception e) { + // ignore any exceptions. MetaData's default is already instantiated + System.out.println(e.getMessage()); + e.printStackTrace(); + } + return md; + } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/neo4j/ShortArray.java b/job/src/main/java/nl/waredingen/graphs/neo/neo4j/ShortArray.java index 6d0ff14..37658d1 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/neo4j/ShortArray.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/neo4j/ShortArray.java @@ -614,7 +614,8 @@ private static boolean isPrimitive( Object array ) return array.getClass().getComponentType().isPrimitive(); } - private final static Map all = new IdentityHashMap( values().length * 2 ); + @SuppressWarnings("rawtypes") + private final static Map all = new IdentityHashMap( values().length * 2 ); static { diff --git a/job/src/test/java/nl/waredingen/graphs/neo/RownumPartitionerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/RownumPartitionerTest.java index 3c7cf59..2a3654b 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/RownumPartitionerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/RownumPartitionerTest.java @@ -2,9 +2,8 @@ import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.assertThat; - -import nl.waredingen.graphs.neo.mapreduce.PureMRNodesAndEdgesJob; -import nl.waredingen.graphs.neo.mapreduce.RownumPartitioner; +import nl.waredingen.graphs.neo.mapreduce.input.AbstractMetaData; +import nl.waredingen.graphs.neo.mapreduce.nodes.NodeOutputRownumPartitioner; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; @@ -14,11 +13,11 @@ public class RownumPartitionerTest { private Configuration conf = new Configuration(); - private RownumPartitioner partitioner = new RownumPartitioner(); + private NodeOutputRownumPartitioner partitioner = new NodeOutputRownumPartitioner(); @Before public void setup() { - conf.set(PureMRNodesAndEdgesJob.NUMBEROFROWS_CONFIG, "100"); + conf.set(AbstractMetaData.METADATA_NUMBER_OF_NODES, "100"); partitioner.setConf(conf); } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/DualInputMapReduceDriver.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/DualInputMapReduceDriver.java index 35af351..511d184 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/DualInputMapReduceDriver.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/DualInputMapReduceDriver.java @@ -45,6 +45,7 @@ * @author Jacob Metcalf */ +@SuppressWarnings("rawtypes") public class DualInputMapReduceDriver extends MapReduceDriverBase { diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputMapperTest.java index ac12433..8e61937 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputMapperTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputMapperTest.java @@ -6,47 +6,51 @@ import java.util.List; +import nl.waredingen.graphs.neo.mapreduce.input.writables.DoubleSurroundingEdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullEdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.SurroundingEdgeWritable; + import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; public class EdgeOutputMapperTest { - private MapDriver driver; - private List> output; + private MapDriver driver; + private List> output; @Before public void setUp() throws Exception { - driver = new MapDriver(new EdgeOutputMapper()); + driver = new MapDriver(new EdgeOutputMapper()); } @Test public void shouldOutputAsEdgeWhenNodeIdMatchesFromNode() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 0 0 1 2 -1 1 0 0 1 1 -1")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(0,0,0,1,100,2,-1),new SurroundingEdgeWritable(1,0,0,1,100,1,-1))).run(); assertThat(output.size(), is(1)); assertThat(output.get(0).getFirst(), equalTo(new LongWritable(0))); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 1 2 -1 1 -1"))); + assertThat(output.get(0).getSecond(), equalTo(new FullEdgeWritable(0,1,100,2,-1,1,-1))); } @Test public void shouldOutputAsEdgeWhereEdgeIdIsTheKeyAndNodeIdMatchesFromNode() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("2 3 2 0 -1 2 0 3 2 0 -1 2")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(2,3,2,0,130,-1,2),new SurroundingEdgeWritable(0,3,2,0,130,-1,2))).run(); assertThat(output.size(), is(1)); assertThat(output.get(0).getFirst(), equalTo(new LongWritable(3))); - assertThat(output.get(0).getSecond(), equalTo(new Text("2 0 -1 2 -1 2"))); + assertThat(output.get(0).getSecond(), equalTo(new FullEdgeWritable(2,0,130,-1,2,-1,2))); } @Test public void shouldNotOutputAsEdgeWhenNodeIdDoesNotMatchFromNode() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 3 2 0 -1 2 2 3 2 0 -1 2")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(0,3,2,0,130,-1,2),new SurroundingEdgeWritable(2,3,2,0,130,-1,2))).run(); assertThat(output.size(), is(0)); } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputReducerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputReducerTest.java index 7362dc1..135167b 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputReducerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/EdgeOutputReducerTest.java @@ -6,34 +6,34 @@ import java.util.List; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullEdgeWritable; import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; public class EdgeOutputReducerTest { - private ReduceDriver driver; + private ReduceDriver driver; private List> output; @Before public void setUp() throws Exception { - driver = new ReduceDriver(new EdgeOutputReducer()); + driver = new ReduceDriver(new EdgeOutputReducer()); } @Test public void shouldOutputAsSomeEdge() throws Exception { - output = driver.withInputKey(new LongWritable(1)).withInputValue(new Text("2 0 -1 2 -1 2")).run(); + output = driver.withInputKey(new LongWritable(1)).withInputValue(new FullEdgeWritable(2,0,110,-1,2,-1,2)).run(); assertThat(output.size(), is(1)); assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(0).getSecond(), equalTo(new BytesWritable(Neo4JUtils.getEdgeAsByteArray(1L, 2L, 0L, 0, -1L, 2L, -1L, 2L, -1L)))); + assertThat(output.get(0).getSecond(), equalTo(new BytesWritable(Neo4JUtils.getEdgeAsByteArray(1L, 2L, 0L, 0, -1L, 2L, -1L, 2L, 110L)))); } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapReduceTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapReduceTest.java new file mode 100644 index 0000000..d98215f --- /dev/null +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapReduceTest.java @@ -0,0 +1,59 @@ +package nl.waredingen.graphs.neo.mapreduce.edges.surround; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +import java.util.List; + +import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongKeyComparator; +import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongKeyGroupingComparator; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.SurroundingEdgeWritable; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.junit.Before; +import org.junit.Test; + +public class EdgeSurroundMapReduceTest { + + private MapReduceDriver driver; + private List> output; + + @SuppressWarnings("unchecked") + @Before + public void setUp() throws Exception { + driver = new MapReduceDriver(new EdgeSurroundMapper(),new EdgeSurroundReducer()); + driver.setKeyGroupingComparator(new AscLongDescLongKeyGroupingComparator()); + driver.setKeyOrderComparator(new AscLongDescLongKeyComparator()); + } + + @Test + public void shouldjoinFromNodeAndEdge() throws Exception { + driver.withInput(NullWritable.get(), new NodeEdgeWritable(0,1,0,0,1,100)); + driver.addInput(NullWritable.get(), new NodeEdgeWritable(0,1,2,0,2,120)); + driver.addInput(NullWritable.get(), new NodeEdgeWritable(1,3,0,0,1,100)); + driver.addInput(NullWritable.get(), new NodeEdgeWritable(1,3,1,1,2,110)); + driver.addInput(NullWritable.get(), new NodeEdgeWritable(2,5,1,1,2,110)); + driver.addInput(NullWritable.get(), new NodeEdgeWritable(2,5,2,0,2,120)); + output = driver.run(); + + assertThat(output.size(), is(6)); + assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(0).getSecond(), equalTo(new SurroundingEdgeWritable(0,2,0,2,120,0,-1))); + assertThat(output.get(1).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(1).getSecond(), equalTo(new SurroundingEdgeWritable(0,0,0,1,100,-1,2))); + assertThat(output.get(2).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(2).getSecond(), equalTo(new SurroundingEdgeWritable(1,1,1,2,110,0,-1))); + assertThat(output.get(3).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(3).getSecond(), equalTo(new SurroundingEdgeWritable(1,0,0,1,100,-1,1))); + assertThat(output.get(4).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(4).getSecond(), equalTo(new SurroundingEdgeWritable(2,2,0,2,120,1,-1))); + assertThat(output.get(5).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(5).getSecond(), equalTo(new SurroundingEdgeWritable(2,1,1,2,110,-1,2))); + } +} diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapperTest.java index 08823e6..91ecf41 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapperTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundMapperTest.java @@ -6,43 +6,71 @@ import java.util.List; -import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaDataFromConfigImpl; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeWritable; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; public class EdgeSurroundMapperTest { - private MapDriver driver; - private List> output; + private MapDriver driver; + private List> output; @Before public void setUp() throws Exception { - driver = new MapDriver(new EdgeSurroundMapper()); + driver = new MapDriver(new EdgeSurroundMapper()); } @Test public void shouldOutputAsNode() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 A Aname 3 2 0")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new NodeEdgeWritable(0,1,3,2,0,5)).run(); assertThat(output.size(), is(1)); assertThat(output.get(0).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(0), new LongWritable(3)))); - assertThat(output.get(0).getSecond(), equalTo(new Text("2 0"))); + assertThat(output.get(0).getSecond(), equalTo(new EdgeWritable(3,2,0,5))); } @Test public void shouldOutputAsNodeWhereNodeIdIsTheKey() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("11 A Aname 3 2 0")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new NodeEdgeWritable(11,21,3,2,0,5)).run(); assertThat(output.size(), is(1)); assertThat(output.get(0).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(11), new LongWritable(3)))); - assertThat(output.get(0).getSecond(), equalTo(new Text("2 0"))); + assertThat(output.get(0).getSecond(), equalTo(new EdgeWritable(3,2,0,5))); + + } + + @Test + public void shouldOutputAsNodeWhereNodeIdIsTheKeyAndSomeEdgePropertiesArePresent() throws Exception { + Configuration config = new Configuration(); + config.setClass("neo.input.metadata.class", MetaDataFromConfigImpl.class, MetaData.class); + config.setStrings("neo.input.metadata.node.property.names", "identifier", "name"); + config.set("neo.input.metadata.node.id.name", "identifier"); + config.setClass("neo.input.metadata.node.property.type.identifier", Long.class, Object.class); + config.setClass("neo.input.metadata.node.property.type.name", String.class, Object.class); + config.setStrings("neo.input.metadata.edge.property.names", "from", "to", "prop1", "prop2"); + config.setClass("neo.input.metadata.edge.property.type.from", Long.class, Object.class); + config.setClass("neo.input.metadata.edge.property.type.to", Long.class, Object.class); + config.setClass("neo.input.metadata.edge.property.type.prop1", String.class, Object.class); + config.setClass("neo.input.metadata.edge.property.type.prop2", String.class, Object.class); + driver.setConfiguration(config); + output = driver.withInputKey(NullWritable.get()).withInputValue(new NodeEdgeWritable(11,21,3,2,0,5)).run(); + + assertThat(output.size(), is(1)); + + assertThat(output.get(0).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(11), new LongWritable(3)))); + assertThat(output.get(0).getSecond(), equalTo(new EdgeWritable(3,2,0,5))); } } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducerTest.java index b8db9fc..94f8637 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/EdgeSurroundReducerTest.java @@ -4,53 +4,92 @@ import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.assertThat; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; -import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; -import nl.waredingen.graphs.neo.mapreduce.join.JoinFromEdgesMapper; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaDataFromConfigImpl; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.SurroundingEdgeWritable; -import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; -import org.neo4j.kernel.impl.nioneo.store.NodeRecord; -import org.neo4j.kernel.impl.nioneo.store.Record; public class EdgeSurroundReducerTest { - private ReduceDriver driver; - private List> output; + private ReduceDriver driver; + private List> output; @Before public void setUp() throws Exception { - driver = new ReduceDriver(new EdgeSurroundReducer()); + driver = new ReduceDriver(new EdgeSurroundReducer()); } @Test public void shouldOutputSurroundingEdges() throws Exception { - List values = new ArrayList(); - values.add(new Text("2 0")); - values.add(new Text("0 2")); - values.add(new Text("0 1")); + List values = new ArrayList(); + values.add(new EdgeWritable(3,2,1,130)); + values.add(new EdgeWritable(1,1,2,110)); + values.add(new EdgeWritable(2,1,3,120)); + values.add(new EdgeWritable(4,3,1,140)); //Unfortunately no multiple keys can be added to this reducer, as is in real life. //This results in an incorrect relnum in this test output = driver.withInputKey(new AscLongDescLongWritable(new LongWritable(1), new LongWritable(3))).withInputValues(values).run(); - assertThat(output.size(), is(3)); + assertThat(output.size(), is(4)); assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(0).getSecond(), equalTo(new Text("1 3 2 0 3 -1"))); + assertThat(output.get(0).getSecond(), equalTo(new SurroundingEdgeWritable(1,3,2,1,130,1,-1))); assertThat(output.get(1).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(1).getSecond(), equalTo(new Text("1 3 0 2 3 3"))); + assertThat(output.get(1).getSecond(), equalTo(new SurroundingEdgeWritable(1,1,1,2,110,2,3))); assertThat(output.get(2).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(2).getSecond(), equalTo(new Text("1 3 0 1 -1 3"))); + assertThat(output.get(2).getSecond(), equalTo(new SurroundingEdgeWritable(1,2,1,3,120,4,1))); + assertThat(output.get(3).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(3).getSecond(), equalTo(new SurroundingEdgeWritable(1,4,3,1,140,-1,2))); } + + @Test + public void shouldOutputSurroundingEdgesWithProperties() throws Exception { + List values = new ArrayList(); + values.add(new EdgeWritable(3,2,1,130)); + values.add(new EdgeWritable(1,1,2,110)); + values.add(new EdgeWritable(2,1,3,120)); + values.add(new EdgeWritable(4,3,1,140)); + + Configuration config = new Configuration(); + config.setClass("neo.input.metadata.class", MetaDataFromConfigImpl.class, MetaData.class); + config.setStrings("neo.input.metadata.node.property.names", "identifier", "name"); + config.set("neo.input.metadata.node.id.name", "identifier"); + config.setClass("neo.input.metadata.node.property.type.identifier", Long.class, Object.class); + config.setClass("neo.input.metadata.node.property.type.name", String.class, Object.class); + config.setStrings("neo.input.metadata.edge.property.names", "from", "to", "prop1", "prop2"); + config.setClass("neo.input.metadata.edge.property.type.from", Long.class, Object.class); + config.setClass("neo.input.metadata.edge.property.type.to", Long.class, Object.class); + config.setClass("neo.input.metadata.edge.property.type.prop1", String.class, Object.class); + config.setClass("neo.input.metadata.edge.property.type.prop2", String.class, Object.class); + driver.setConfiguration(config); + + //Unfortunately no multiple keys can be added to this reducer, as is in real life. + //This results in an incorrect relnum in this test + output = driver.withInputKey(new AscLongDescLongWritable(new LongWritable(1), new LongWritable(3))).withInputValues(values).run(); + + assertThat(output.size(), is(4)); + + assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(0).getSecond(), equalTo(new SurroundingEdgeWritable(1,3,2,1,130,1,-1))); + assertThat(output.get(1).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(1).getSecond(), equalTo(new SurroundingEdgeWritable(1,1,1,2,110,2,3))); + assertThat(output.get(2).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(2).getSecond(), equalTo(new SurroundingEdgeWritable(1,2,1,3,120,4,1))); + assertThat(output.get(3).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(3).getSecond(), equalTo(new SurroundingEdgeWritable(1,4,3,1,140,-1,2))); + + } } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeSurroundMapReduceAndJoinSurroundingEdgesMapReduceTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeSurroundMapReduceAndJoinSurroundingEdgesMapReduceTest.java new file mode 100644 index 0000000..1e19fc0 --- /dev/null +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeSurroundMapReduceAndJoinSurroundingEdgesMapReduceTest.java @@ -0,0 +1,88 @@ +package nl.waredingen.graphs.neo.mapreduce.edges.surround.join; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +import java.util.List; + +import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongKeyComparator; +import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongKeyGroupingComparator; +import nl.waredingen.graphs.neo.mapreduce.edges.surround.EdgeSurroundMapper; +import nl.waredingen.graphs.neo.mapreduce.edges.surround.EdgeSurroundReducer; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.DoubleSurroundingEdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.SurroundingEdgeWritable; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.junit.Before; +import org.junit.Test; + +public class EdgeSurroundMapReduceAndJoinSurroundingEdgesMapReduceTest { + + private MapReduceDriver driver2; + private List> output2; + private MapReduceDriver driver; + private List> output; + + @SuppressWarnings("unchecked") + @Before + public void setUp() throws Exception { + driver = new MapReduceDriver(new EdgeSurroundMapper(),new EdgeSurroundReducer()); + driver.setKeyGroupingComparator(new AscLongDescLongKeyGroupingComparator()); + driver.setKeyOrderComparator(new AscLongDescLongKeyComparator()); + + driver2 = new MapReduceDriver(new JoinSurroundingEdgesMapper(),new JoinSurroundingEdgesReducer()); + driver2.setKeyGroupingComparator(new EdgeWritableKeyGroupingComparator()); + driver2.setKeyOrderComparator(new EdgeWritableKeyComparator()); + } + + @Test + public void shouldjoinFromNodeAndEdge() throws Exception { + driver.withInput(NullWritable.get(), new NodeEdgeWritable(0,1,0,0,1,100)); + driver.addInput(NullWritable.get(), new NodeEdgeWritable(0,1,2,0,2,120)); + driver.addInput(NullWritable.get(), new NodeEdgeWritable(1,3,0,0,1,100)); + driver.addInput(NullWritable.get(), new NodeEdgeWritable(1,3,1,1,2,110)); + driver.addInput(NullWritable.get(), new NodeEdgeWritable(2,5,1,1,2,110)); + driver.addInput(NullWritable.get(), new NodeEdgeWritable(2,5,2,0,2,120)); + output = driver.run(); + + assertThat(output.size(), is(6)); + assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(0).getSecond(), equalTo(new SurroundingEdgeWritable(0,2,0,2,120,0,-1))); + assertThat(output.get(1).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(1).getSecond(), equalTo(new SurroundingEdgeWritable(0,0,0,1,100,-1,2))); + assertThat(output.get(2).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(2).getSecond(), equalTo(new SurroundingEdgeWritable(1,1,1,2,110,0,-1))); + assertThat(output.get(3).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(3).getSecond(), equalTo(new SurroundingEdgeWritable(1,0,0,1,100,-1,1))); + assertThat(output.get(4).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(4).getSecond(), equalTo(new SurroundingEdgeWritable(2,2,0,2,120,1,-1))); + assertThat(output.get(5).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(5).getSecond(), equalTo(new SurroundingEdgeWritable(2,1,1,2,110,-1,2))); + + driver2.withInput(output.get(0)).withInput(output.get(1)).withInput(output.get(2)).withInput(output.get(3)).withInput(output.get(4)).withInput(output.get(5)); + output2 = driver2.run(); + + assertThat(output2.size(), is(6)); + assertThat(output2.get(0).getFirst(), equalTo(NullWritable.get())); + assertThat(output2.get(0).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(0,0,0,1,100,-1,2), new SurroundingEdgeWritable(1,0,0,1,100,-1,1)))); + assertThat(output2.get(1).getFirst(), equalTo(NullWritable.get())); + assertThat(output2.get(1).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(1,0,0,1,100,-1,1), new SurroundingEdgeWritable(0,0,0,1,100,-1,2)))); + assertThat(output2.get(2).getFirst(), equalTo(NullWritable.get())); + assertThat(output2.get(2).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(1,1,1,2,110,0,-1), new SurroundingEdgeWritable(2,1,1,2,110,-1,2)))); + assertThat(output2.get(3).getFirst(), equalTo(NullWritable.get())); + assertThat(output2.get(3).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(2,1,1,2,110,-1,2), new SurroundingEdgeWritable(1,1,1,2,110,0,-1)))); + assertThat(output2.get(4).getFirst(), equalTo(NullWritable.get())); + assertThat(output2.get(4).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(0,2,0,2,120,0,-1), new SurroundingEdgeWritable(2,2,0,2,120,1,-1)))); + assertThat(output2.get(5).getFirst(), equalTo(NullWritable.get())); + assertThat(output2.get(5).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(2,2,0,2,120,1,-1), new SurroundingEdgeWritable(0,2,0,2,120,0,-1)))); + } +} + + + diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeWritableComparatorTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeWritableComparatorTest.java new file mode 100644 index 0000000..0274dce --- /dev/null +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/EdgeWritableComparatorTest.java @@ -0,0 +1,93 @@ +package nl.waredingen.graphs.neo.mapreduce.edges.surround.join; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.SurroundingEdgeWritable; + +import org.apache.hadoop.mapreduce.Partitioner; +import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; +import org.junit.Test; + +public class EdgeWritableComparatorTest { + + @Test + public void testEdgeWritablesAreEqual() { + EdgeWritable e1 = new EdgeWritable(3, 2, 0, 130); + EdgeWritable e2 = new EdgeWritable(3, 2, 0, 130); + + assertTrue(e1.equals(e2)); + assertEquals(0, e1.compareTo(e2)); + assertEquals(0, e2.compareTo(e1)); + assertEquals(0, e1.compareTo(e1)); + assertEquals(0, e2.compareTo(e2)); + } + + @Test + public void testEdgeWritablesAreEqualInTheKeyGroupingComparator() { + EdgeWritable e1 = new EdgeWritable(3, 2, 0, 130); + EdgeWritable e2 = new EdgeWritable(3, 2, 0, 130); + + EdgeWritableKeyGroupingComparator comp = new EdgeWritableKeyGroupingComparator(); + assertEquals(0, comp.compare(e1, e2)); + assertEquals(0, comp.compare(e2, e1)); + } + + @Test + public void testEdgeWritablesAreNotEqualInTheKeyGroupingComparator() { + EdgeWritable e1 = new EdgeWritable(3, 2, 0, 130); + EdgeWritable e2 = new EdgeWritable(4, 2, 1, 140); + + EdgeWritableKeyGroupingComparator comp = new EdgeWritableKeyGroupingComparator(); + assertEquals(-1, comp.compare(e1, e2)); + assertEquals(1, comp.compare(e2, e1)); + } + + @Test + public void testEdgeWritablesAreEqualInTheKeySortingComparator() { + EdgeWritable e1 = new EdgeWritable(3, 2, 0, 130); + EdgeWritable e2 = new EdgeWritable(3, 2, 0, 130); + + EdgeWritableKeyComparator comp = new EdgeWritableKeyComparator(); + assertEquals(0, comp.compare(e1, e2)); + assertEquals(0, comp.compare(e2, e1)); + } + + @Test + public void testEdgeWritablesAreNotEqualInTheKeySortingComparator() { + EdgeWritable e1 = new EdgeWritable(3, 2, 0, 130); + EdgeWritable e2 = new EdgeWritable(4, 2, 1, 140); + + EdgeWritableKeyComparator comp = new EdgeWritableKeyComparator(); + assertEquals(-1, comp.compare(e1, e2)); + assertEquals(1, comp.compare(e2, e1)); + } + + @Test + public void testEdgeWritablesAreNonEqual() { + EdgeWritable e1 = new EdgeWritable(3, 2, 0, 130); + EdgeWritable e2 = new EdgeWritable(4, 0, 2, 140); + + assertFalse(e1.equals(e2)); + assertEquals(-1, e1.compareTo(e2)); + assertEquals(1, e2.compareTo(e1)); + assertEquals(0, e1.compareTo(e1)); + assertEquals(0, e2.compareTo(e2)); + } + + @Test + @SuppressWarnings({ "rawtypes", "unchecked" }) + public void testEdgeWritablesAreEquallyPartitioned() { + EdgeWritable e1 = new EdgeWritable(3, 2, 0, 130); + EdgeWritable e2 = new EdgeWritable(3, 2, 0, 130); + + Partitioner p = new HashPartitioner(); + assertEquals(p.getPartition(e1, new SurroundingEdgeWritable(), 50), p.getPartition(e2, new SurroundingEdgeWritable(), 50)); + assertEquals(p.getPartition(e1, new SurroundingEdgeWritable(), 100), p.getPartition(e2, new SurroundingEdgeWritable(), 100)); + assertEquals(p.getPartition(e1, new SurroundingEdgeWritable(), 5), p.getPartition(e2, new SurroundingEdgeWritable(), 5)); + assertEquals(p.getPartition(e1, new SurroundingEdgeWritable(), 1), p.getPartition(e2, new SurroundingEdgeWritable(), 1)); + assertEquals(p.getPartition(e1, new SurroundingEdgeWritable(0,3,2,0,130,-1,2), 50), p.getPartition(e2, new SurroundingEdgeWritable(2,3,2,0,130,-1,2), 50)); + } + +} diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapReduceTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapReduceTest.java new file mode 100644 index 0000000..1465d91 --- /dev/null +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapReduceTest.java @@ -0,0 +1,56 @@ +package nl.waredingen.graphs.neo.mapreduce.edges.surround.join; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +import java.util.List; + +import nl.waredingen.graphs.neo.mapreduce.input.writables.DoubleSurroundingEdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.SurroundingEdgeWritable; + +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.junit.Before; +import org.junit.Test; + +public class JoinSurroundingEdgesMapReduceTest { + + private MapReduceDriver driver; + private List> output; + + @SuppressWarnings("unchecked") + @Before + public void setUp() throws Exception { + driver = new MapReduceDriver(new JoinSurroundingEdgesMapper(),new JoinSurroundingEdgesReducer()); + driver.setKeyGroupingComparator(new EdgeWritableKeyGroupingComparator()); + driver.setKeyOrderComparator(new EdgeWritableKeyComparator()); + } + + @Test + public void shouldjoinFromNodeAndEdge() throws Exception { + driver.withInput(NullWritable.get(), new SurroundingEdgeWritable(0,2,0,2,120,0,-1)); + driver.addInput(NullWritable.get(), new SurroundingEdgeWritable(0,0,0,1,100,-1,2)); + driver.addInput(NullWritable.get(), new SurroundingEdgeWritable(1,1,1,2,110,0,-1)); + driver.addInput(NullWritable.get(), new SurroundingEdgeWritable(1,0,0,1,100,-1,1)); + driver.addInput(NullWritable.get(), new SurroundingEdgeWritable(2,2,0,2,120,1,-1)); + driver.addInput(NullWritable.get(), new SurroundingEdgeWritable(2,1,1,2,110,-1,2)); + output = driver.run(); + + assertThat(output.size(), is(6)); + assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(0).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(0,0,0,1,100,-1,2), new SurroundingEdgeWritable(1,0,0,1,100,-1,1)))); + assertThat(output.get(1).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(1).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(1,0,0,1,100,-1,1), new SurroundingEdgeWritable(0,0,0,1,100,-1,2)))); + assertThat(output.get(2).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(2).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(1,1,1,2,110,0,-1), new SurroundingEdgeWritable(2,1,1,2,110,-1,2)))); + assertThat(output.get(3).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(3).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(2,1,1,2,110,-1,2), new SurroundingEdgeWritable(1,1,1,2,110,0,-1)))); + assertThat(output.get(4).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(4).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(0,2,0,2,120,0,-1), new SurroundingEdgeWritable(2,2,0,2,120,1,-1)))); + assertThat(output.get(5).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(5).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(2,2,0,2,120,1,-1), new SurroundingEdgeWritable(0,2,0,2,120,0,-1)))); + } +} diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapperTest.java index d5eeecf..3721371 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapperTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesMapperTest.java @@ -1,35 +1,47 @@ package nl.waredingen.graphs.neo.mapreduce.edges.surround.join; -import static org.hamcrest.CoreMatchers.*; -import static org.junit.Assert.*; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; import java.util.List; -import nl.waredingen.graphs.neo.mapreduce.join.JoinNodesMapper; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.SurroundingEdgeWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; public class JoinSurroundingEdgesMapperTest { - private MapDriver driver; - private List> output; + private MapDriver driver; + private List> output; @Before public void setUp() throws Exception { - driver = new MapDriver(new JoinSurroundingEdgesMapper()); + driver = new MapDriver(new JoinSurroundingEdgesMapper()); } @Test public void shouldOutputAsJoin() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 3 2 0 -1 2")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new SurroundingEdgeWritable(0,3,2,0,130,-1,2)).run(); assertThat(output.size(), is(1)); - assertThat(output.get(0).getFirst(), equalTo(new Text("3;2;0"))); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 3 2 0 -1 2"))); + assertThat(output.get(0).getFirst(), equalTo(new EdgeWritable(3,2,0,130))); + assertThat(output.get(0).getSecond(), equalTo(new SurroundingEdgeWritable(0,3,2,0,130,-1,2))); } + + @Test + public void shouldOutputAsJoinForOtherNode() throws Exception { + output = driver.withInputKey(NullWritable.get()).withInputValue(new SurroundingEdgeWritable(2,3,2,0,130,1,-1)).run(); + + assertThat(output.size(), is(1)); + + assertThat(output.get(0).getFirst(), equalTo(new EdgeWritable(3,2,0,130))); + assertThat(output.get(0).getSecond(), equalTo(new SurroundingEdgeWritable(2,3,2,0,130,1,-1))); + } + } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesReducerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesReducerTest.java index ec78144..6c00233 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesReducerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/edges/surround/join/JoinSurroundingEdgesReducerTest.java @@ -7,35 +7,39 @@ import java.util.ArrayList; import java.util.List; +import nl.waredingen.graphs.neo.mapreduce.input.writables.DoubleSurroundingEdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.SurroundingEdgeWritable; + import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; public class JoinSurroundingEdgesReducerTest { - private ReduceDriver driver; - private List> output; + private ReduceDriver driver; + private List> output; @Before public void setUp() throws Exception { - driver = new ReduceDriver(new JoinSurroundingEdgesReducer()); + driver = new ReduceDriver(new JoinSurroundingEdgesReducer()); } @Test - public void shouldOutputAsNode() throws Exception { - ArrayList values = new ArrayList(); - values.add(new Text("0 2 0 2 3 0")); - values.add(new Text("2 2 0 2 3 1")); - output = driver.withInputKey(new Text("2;0;2")).withInputValues(values).run(); + public void shouldOutputAsJoinedSurroundingEdges() throws Exception { + ArrayList values = new ArrayList(); + values.add(new SurroundingEdgeWritable(0,2,0,2,120,3,0)); + values.add(new SurroundingEdgeWritable(2,2,0,2,120,3,1)); + output = driver.withInputKey(new EdgeWritable(2,0,2,120)).withInputValues(values).run(); assertThat(output.size(), is(2)); assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 2 0 2 3 0 2 2 0 2 3 1"))); + assertThat(output.get(0).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(0,2,0,2,120,3,0), new SurroundingEdgeWritable(2,2,0,2,120,3,1)))); assertThat(output.get(1).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(1).getSecond(), equalTo(new Text("2 2 0 2 3 1 0 2 0 2 3 0"))); + assertThat(output.get(1).getSecond(), equalTo(new DoubleSurroundingEdgeWritable(new SurroundingEdgeWritable(2,2,0,2,120,3,1),new SurroundingEdgeWritable(0,2,0,2,120,3,0)))); } + } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesMapReduceTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesMapReduceTest.java index 1e60c9f..89cc93b 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesMapReduceTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesMapReduceTest.java @@ -6,6 +6,14 @@ import java.util.List; +import nl.waredingen.graphs.neo.mapreduce.input.AbstractMetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaDataFromConfigImpl; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeWritable; + +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; @@ -14,28 +22,55 @@ import org.junit.Test; public class GroupNodesAndEdgesMapReduceTest { - private MapReduceDriver driver; - private List> output; + private MapReduceDriver driver; + private List> output; @SuppressWarnings("unchecked") @Before public void setUp() throws Exception { - driver = new MapReduceDriver(new GroupNodesAndEdgesMapper(), new GroupNodesAndEdgesReducer()); + driver = new MapReduceDriver(new GroupNodesAndEdgesMapper(), new GroupNodesAndEdgesReducer()); driver.setKeyGroupingComparator(new NodeAndEdgeIdKeyGroupingComparator()); driver.setKeyOrderComparator(new NodeAndEdgeIdKeyComparator()); } @Test public void shouldOutputWithFromAndToNode() throws Exception { - output = driver.withInput(new Text("RB"), new Text("0 A B 0 A Aname 1 B Bname")).run(); + output = driver.withInput(NullWritable.get(), new Text("0 A B 5 0 1 1 3")).run(); + + assertThat(output.size(), is(2)); + + assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(0).getSecond(), equalTo(new NodeEdgeWritable(0,1,0,0,1,5))); + + assertThat(output.get(1).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(1).getSecond(), equalTo(new NodeEdgeWritable(1,3,0,0,1,5))); + + } + + @Test + public void shouldOutputWithSomeExtraEdgeProperties() throws Exception { + Configuration config = new Configuration(); + config.setClass(AbstractMetaData.METADATA_CLASS, MetaDataFromConfigImpl.class, MetaData.class); + config.setStrings(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_NAMES, "identifier", "name"); + config.set(MetaDataFromConfigImpl.METADATA_NODE_ID_NAME, "identifier"); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX +"identifier", Long.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX +"name", String.class, Object.class); + config.setStrings(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_NAMES, "from", "to", "prop1", "prop2"); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX +"from", Long.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX +"to", Long.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX +"prop1", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX +"prop2", String.class, Object.class); + driver.setConfiguration(config); + + output = driver.withInput(NullWritable.get(), new Text("0 A B 5 0 1 1 3")).run(); assertThat(output.size(), is(2)); assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 A Aname 0 0 1"))); + assertThat(output.get(0).getSecond(), equalTo(new NodeEdgeWritable(0,1,0,0,1,5))); assertThat(output.get(1).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(1).getSecond(), equalTo(new Text("1 B Bname 0 0 1"))); + assertThat(output.get(1).getSecond(), equalTo(new NodeEdgeWritable(1,3,0,0,1,5))); } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesMapperTest.java index ced97d7..e8aab5c 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesMapperTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesMapperTest.java @@ -6,7 +6,14 @@ import java.util.List; -import org.apache.hadoop.io.LongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.AbstractMetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaDataFromConfigImpl; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeIdWritable; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.types.Pair; @@ -14,25 +21,52 @@ import org.junit.Test; public class GroupNodesAndEdgesMapperTest { - private MapDriver driver; - private List> output; + private MapDriver driver; + private List> output; @Before public void setUp() throws Exception { - driver = new MapDriver(new GroupNodesAndEdgesMapper()); + driver = new MapDriver(new GroupNodesAndEdgesMapper()); } @Test public void shouldOutputWithFromAndToNode() throws Exception { - output = driver.withInputKey(new Text("RB")).withInputValue(new Text("0 A B 0 A Aname 1 B Bname")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new Text("0 A B 5 0 1 1 3")).run(); assertThat(output.size(), is(2)); - assertThat(output.get(0).getFirst(), equalTo(new Text("0 A Aname;0"))); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 0 1"))); + assertThat(output.get(0).getFirst(), equalTo(new NodeEdgeIdWritable(0,1,0))); + assertThat(output.get(0).getSecond(), equalTo(new EdgeWritable(0,0,1,5))); - assertThat(output.get(1).getFirst(), equalTo(new Text("1 B Bname;0"))); - assertThat(output.get(1).getSecond(), equalTo(new Text("0 0 1"))); + assertThat(output.get(1).getFirst(), equalTo(new NodeEdgeIdWritable(1,3,0))); + assertThat(output.get(1).getSecond(), equalTo(new EdgeWritable(0,0,1,5))); + + } + + @Test + public void shouldOutputWithFromAndToNodeAndSomeEdgeProperties() throws Exception { + Configuration config = new Configuration(); + config.setClass(AbstractMetaData.METADATA_CLASS, MetaDataFromConfigImpl.class, MetaData.class); + config.setStrings(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_NAMES, "identifier", "name"); + config.set(MetaDataFromConfigImpl.METADATA_NODE_ID_NAME, "identifier"); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "identifier", Long.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "name", String.class, Object.class); + config.setStrings(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_NAMES, "from", "to", "prop1", "prop2"); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "from", Long.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "to", Long.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "prop1", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "prop2", String.class, Object.class); + driver.setConfiguration(config); + + output = driver.withInputKey(NullWritable.get()).withInputValue(new Text("0 A B 5 0 1 1 3")).run(); + + assertThat(output.size(), is(2)); + + assertThat(output.get(0).getFirst(), equalTo(new NodeEdgeIdWritable(0,1,0))); + assertThat(output.get(0).getSecond(), equalTo(new EdgeWritable(0,0,1,5))); + + assertThat(output.get(1).getFirst(), equalTo(new NodeEdgeIdWritable(1,3,0))); + assertThat(output.get(1).getSecond(), equalTo(new EdgeWritable(0,0,1,5))); } @@ -43,44 +77,44 @@ public void shouldOutputWithFromAndToNode() throws Exception { @Test public void shouldOutputForMultipleInputs() throws Exception { - output = driver.withInputKey(new Text("RA")).withInputValue(new Text("3 C A 2 C Cname 0 A Aname")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new Text("3 C A 5 2 1 0 3")).run(); assertThat(output.size(), is(2)); - assertThat(output.get(0).getFirst(), equalTo(new Text("2 C Cname;3"))); - assertThat(output.get(0).getSecond(), equalTo(new Text("3 2 0"))); + assertThat(output.get(0).getFirst(), equalTo(new NodeEdgeIdWritable(2,1,3))); + assertThat(output.get(0).getSecond(), equalTo(new EdgeWritable(3,2,0,5))); - assertThat(output.get(1).getFirst(), equalTo(new Text("0 A Aname;3"))); - assertThat(output.get(1).getSecond(), equalTo(new Text("3 2 0"))); + assertThat(output.get(1).getFirst(), equalTo(new NodeEdgeIdWritable(0,3,3))); + assertThat(output.get(1).getSecond(), equalTo(new EdgeWritable(3,2,0,5))); - output = driver.withInputKey(new Text("RB")).withInputValue(new Text("0 A B 0 A Aname 1 B Bname")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new Text("0 A B 5 0 1 1 3")).run(); assertThat(output.size(), is(2)); - assertThat(output.get(0).getFirst(), equalTo(new Text("0 A Aname;0"))); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 0 1"))); + assertThat(output.get(0).getFirst(), equalTo(new NodeEdgeIdWritable(0,1,0))); + assertThat(output.get(0).getSecond(), equalTo(new EdgeWritable(0,0,1,5))); - assertThat(output.get(1).getFirst(), equalTo(new Text("1 B Bname;0"))); - assertThat(output.get(1).getSecond(), equalTo(new Text("0 0 1"))); + assertThat(output.get(1).getFirst(), equalTo(new NodeEdgeIdWritable(1,3,0))); + assertThat(output.get(1).getSecond(), equalTo(new EdgeWritable(0,0,1,5))); - output = driver.withInputKey(new Text("RC")).withInputValue(new Text("2 A C 0 A Aname 2 C Cname")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new Text("2 A C 5 0 1 2 3")).run(); assertThat(output.size(), is(2)); - assertThat(output.get(0).getFirst(), equalTo(new Text("0 A Aname;2"))); - assertThat(output.get(0).getSecond(), equalTo(new Text("2 0 2"))); + assertThat(output.get(0).getFirst(), equalTo(new NodeEdgeIdWritable(0,1,2))); + assertThat(output.get(0).getSecond(), equalTo(new EdgeWritable(2,0,2,5))); - assertThat(output.get(1).getFirst(), equalTo(new Text("2 C Cname;2"))); - assertThat(output.get(1).getSecond(), equalTo(new Text("2 0 2"))); + assertThat(output.get(1).getFirst(), equalTo(new NodeEdgeIdWritable(2,3,2))); + assertThat(output.get(1).getSecond(), equalTo(new EdgeWritable(2,0,2,5))); - output = driver.withInputKey(new Text("RC")).withInputValue(new Text("1 B C 1 B Bname 2 C Cname")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new Text("1 B C 5 1 2 2 3")).run(); assertThat(output.size(), is(2)); - assertThat(output.get(0).getFirst(), equalTo(new Text("1 B Bname;1"))); - assertThat(output.get(0).getSecond(), equalTo(new Text("1 1 2"))); + assertThat(output.get(0).getFirst(), equalTo(new NodeEdgeIdWritable(1,2,1))); + assertThat(output.get(0).getSecond(), equalTo(new EdgeWritable(1,1,2,5))); - assertThat(output.get(1).getFirst(), equalTo(new Text("2 C Cname;1"))); - assertThat(output.get(1).getSecond(), equalTo(new Text("1 1 2"))); + assertThat(output.get(1).getFirst(), equalTo(new NodeEdgeIdWritable(2,3,1))); + assertThat(output.get(1).getSecond(), equalTo(new EdgeWritable(1,1,2,5))); } } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesReducerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesReducerTest.java index bef3808..dc30d57 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesReducerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/GroupNodesAndEdgesReducerTest.java @@ -6,30 +6,33 @@ import java.util.List; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeWritable; + import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; public class GroupNodesAndEdgesReducerTest { - private ReduceDriver driver; - private List> output; + private ReduceDriver driver; + private List> output; @Before public void setUp() throws Exception { - driver = new ReduceDriver(new GroupNodesAndEdgesReducer()); + driver = new ReduceDriver(new GroupNodesAndEdgesReducer()); } @Test public void shouldOutputAsIs() throws Exception { - output = driver.withInputKey(new Text("0 A Aname;0")).withInputValue(new Text("0 0 1")).run(); + output = driver.withInputKey(new NodeEdgeIdWritable(0,1,0)).withInputValue(new EdgeWritable(0,0,1,5)).run(); assertThat(output.size(), is(1)); assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 A Aname 0 0 1"))); + assertThat(output.get(0).getSecond(), equalTo(new NodeEdgeWritable(0, 1,0,0,1,5))); } } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparatorTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparatorTest.java index ea52c30..3b1b71b 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparatorTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyComparatorTest.java @@ -1,27 +1,18 @@ package nl.waredingen.graphs.neo.mapreduce.group; -import static org.junit.Assert.*; -import static org.hamcrest.CoreMatchers.*; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeIdWritable; -import org.apache.hadoop.io.Text; import org.junit.Test; public class NodeAndEdgeIdKeyComparatorTest { @Test public void shouldSortNodeKeysOnEdgeIdAscKey() { - Text firstKey = new Text("0 A Aname;0"); - Text secondKey = new Text("0 A Aname;1"); - - NodeAndEdgeIdKeyComparator comp = new NodeAndEdgeIdKeyComparator(); - - assertThat(comp.compare(firstKey, secondKey), is(-1)); - } - - @Test - public void shouldSortNodeKeysOnEdgeIdDescKeyAlsoIfKeyContainsSplitCharacter() { - Text firstKey = new Text("0 A Ana;;me;0"); - Text secondKey = new Text("0 A Ana;;me;1"); + NodeEdgeIdWritable firstKey = new NodeEdgeIdWritable(0,1,0); + NodeEdgeIdWritable secondKey = new NodeEdgeIdWritable(0,1,1); NodeAndEdgeIdKeyComparator comp = new NodeAndEdgeIdKeyComparator(); @@ -31,8 +22,8 @@ public void shouldSortNodeKeysOnEdgeIdDescKeyAlsoIfKeyContainsSplitCharacter() { @Test public void shouldSortNodeKeysOnKeyIfDifferent() { - Text firstKey = new Text("0 A Aname;0"); - Text secondKey = new Text("1 B Bname;1"); + NodeEdgeIdWritable firstKey = new NodeEdgeIdWritable(0,1,0); + NodeEdgeIdWritable secondKey = new NodeEdgeIdWritable(1,1,1); NodeAndEdgeIdKeyComparator comp = new NodeAndEdgeIdKeyComparator(); diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyGroupingComparatorTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyGroupingComparatorTest.java index fc96c35..44bfb28 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyGroupingComparatorTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyGroupingComparatorTest.java @@ -1,17 +1,18 @@ package nl.waredingen.graphs.neo.mapreduce.group; -import static org.junit.Assert.*; -import static org.hamcrest.CoreMatchers.*; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeIdWritable; -import org.apache.hadoop.io.Text; import org.junit.Test; public class NodeAndEdgeIdKeyGroupingComparatorTest { @Test public void shouldEqualNodeKeyAndEdgeKey() { - Text firstKey = new Text("0 A Aname;0"); - Text secondKey = new Text("0 A Aname;1"); + NodeEdgeIdWritable firstKey = new NodeEdgeIdWritable(0,1,0); + NodeEdgeIdWritable secondKey = new NodeEdgeIdWritable(0,1,1); NodeAndEdgeIdKeyGroupingComparator comp = new NodeAndEdgeIdKeyGroupingComparator(); diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitionerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitionerTest.java index 678eac3..6b171ce 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitionerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/group/NodeAndEdgeIdKeyPartitionerTest.java @@ -4,30 +4,32 @@ import static org.junit.Assert.assertThat; import static org.junit.Assert.assertTrue; -import org.apache.hadoop.io.Text; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeIdWritable; + import org.junit.Test; public class NodeAndEdgeIdKeyPartitionerTest { @Test public void testSamePartitionForNodeAndEdgeIdKey() { - Text firstKey = new Text("0 A Aname;0"); - Text secondKey = new Text("0 A Aname;1"); + NodeEdgeIdWritable firstKey = new NodeEdgeIdWritable(0,1,0); + NodeEdgeIdWritable secondKey = new NodeEdgeIdWritable(0,1,1); NodeAndEdgeIdKeyPartitioner partitioner = new NodeAndEdgeIdKeyPartitioner(); - assertThat(partitioner.getPartition(firstKey, new Text(), 2), is(partitioner.getPartition(secondKey, new Text(), 2))); - assertThat(partitioner.getPartition(firstKey, new Text(), 50), is(partitioner.getPartition(secondKey, new Text(), 50))); + assertThat(partitioner.getPartition(firstKey, new EdgeWritable(), 2), is(partitioner.getPartition(secondKey, new EdgeWritable(), 2))); + assertThat(partitioner.getPartition(firstKey, new EdgeWritable(), 50), is(partitioner.getPartition(secondKey, new EdgeWritable(), 50))); } @Test public void testNonNegativePartitionForNodeAndEdgeKey() { - Text nodeKey = new Text("3663243826;1"); + NodeEdgeIdWritable nodeKey = new NodeEdgeIdWritable(3663243826L,1,1); NodeAndEdgeIdKeyPartitioner partitioner = new NodeAndEdgeIdKeyPartitioner(); - assertTrue(partitioner.getPartition(nodeKey, new Text(), 50) >= 0); + assertTrue(partitioner.getPartition(nodeKey, new EdgeWritable(), 50) >= 0); } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/input/InputTypeConversionTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/input/InputTypeConversionTest.java new file mode 100644 index 0000000..80647a2 --- /dev/null +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/input/InputTypeConversionTest.java @@ -0,0 +1,52 @@ +package nl.waredingen.graphs.neo.mapreduce.input; + +import static org.junit.Assert.assertEquals; + +import java.util.Scanner; + +import org.junit.Test; + +public class InputTypeConversionTest { + + private Scanner getScanner(String s) { + Scanner scanner = new Scanner(s); + scanner.useDelimiter("\t"); + scanner.useRadix(10); + + return scanner; + } + + @Test + public void testScanning() { + Scanner scanner = getScanner("This\tis\ta\ttest\twith\t12\t129\t260\t"+(Integer.MAX_VALUE - 3)+"\t"+(Long.MAX_VALUE - 3)+"\tand some extra fields:\teen\t\t\tvier\t\ttrue\tfalse"); + while(scanner.hasNext()) { +// System.out.println(scanner.next()); + if (scanner.hasNextByte()) { + System.out.println("Byte : "+scanner.nextByte()); + } + else if (scanner.hasNextShort()) { + System.out.println("Short : "+scanner.nextShort()); + } + else if (scanner.hasNextInt()) { + System.out.println("Int : "+scanner.nextInt()); + } + else if (scanner.hasNextLong()) { + System.out.println("Long : "+scanner.nextLong()); + } + else if (scanner.hasNextFloat()) { + System.out.println("Float : "+scanner.nextFloat()); + } + else if (scanner.hasNextDouble()) { + System.out.println("Double : "+scanner.nextDouble()); + } + else if (scanner.hasNextBoolean()) { + System.out.println("Boolean : "+scanner.nextBoolean()); + } + else { + System.out.println("String : "+scanner.next()); + } + } + assertEquals("Testing 123", "Testing 123"); + } + +} diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/input/MetaDataTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/input/MetaDataTest.java new file mode 100644 index 0000000..ea6ac26 --- /dev/null +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/input/MetaDataTest.java @@ -0,0 +1,134 @@ +package nl.waredingen.graphs.neo.mapreduce.input; + +import static org.junit.Assert.*; + +import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; + +import org.apache.hadoop.conf.Configuration; +import org.junit.Before; +import org.junit.Test; + +public class MetaDataTest { + + private Configuration config; + + @Before + public void setup() { + config = new Configuration(); + } + + @Test + public void testHardCodedMetaData() { + config.setClass(AbstractMetaData.METADATA_CLASS, HardCodedMetaDataImpl.class, MetaData.class); + MetaData md = Neo4JUtils.getMetaData(config); + + assertEquals(0, md.getNumberOfNodes()); + assertEquals(0, md.getNumberOfEdges()); + assertEquals(2, md.getNodePropertySize()); + assertEquals(0, md.getNodeIdIndex()); + assertArrayEquals(new String[] {"identifier", "name"}, md.getNodePropertyNames()); + assertEquals(1, md.getNodePropertyIndexForName("name")); + assertEquals(-1, md.getNodePropertyIndexForName("unknown")); + assertEquals(2, md.getEdgePropertySize()); + assertEquals(0, md.getEdgeFromNodeIdIndex()); + assertEquals(1, md.getEdgeToNodeIdIndex()); + assertArrayEquals(new String[] { "from", "to" }, md.getEdgePropertyNames()); + assertEquals(-1, md.getEdgePropertyIndexForName("unknown")); + assertEquals(Long.class, md.getNodePropertyTypeForName("identifier")); + assertEquals(String.class, md.getNodePropertyTypeForName("name")); + assertEquals(Long.class, md.getEdgePropertyTypeForName("from")); + assertEquals(Long.class, md.getEdgePropertyTypeForName("to")); + + } + + @Test + public void testHardCodedMetaDataWithNodesCountAndEdgesCountFromConfig() { + config.setClass(AbstractMetaData.METADATA_CLASS, HardCodedMetaDataImpl.class, MetaData.class); + config.setLong(AbstractMetaData.METADATA_NUMBER_OF_NODES, 42L); + config.setLong(AbstractMetaData.METADATA_NUMBER_OF_EDGES, 4242L); + + MetaData md = Neo4JUtils.getMetaData(config); + + assertEquals(42, md.getNumberOfNodes()); + assertEquals(4242, md.getNumberOfEdges()); + assertEquals(2, md.getNodePropertySize()); + assertEquals(0, md.getNodeIdIndex()); + assertArrayEquals(new String[] {"identifier", "name"}, md.getNodePropertyNames()); + assertEquals(1, md.getNodePropertyIndexForName("name")); + assertEquals(-1, md.getNodePropertyIndexForName("unknown")); + assertEquals(2, md.getEdgePropertySize()); + assertEquals(0, md.getEdgeFromNodeIdIndex()); + assertEquals(1, md.getEdgeToNodeIdIndex()); + assertArrayEquals(new String[] { "from", "to" }, md.getEdgePropertyNames()); + assertEquals(-1, md.getEdgePropertyIndexForName("unknown")); + assertEquals(Long.class, md.getNodePropertyTypeForName("identifier")); + assertEquals(String.class, md.getNodePropertyTypeForName("name")); + assertEquals(Long.class, md.getEdgePropertyTypeForName("from")); + assertEquals(Long.class, md.getEdgePropertyTypeForName("to")); + + } + + @Test + public void testHardCodedMetaDataIsTheDefault() { + MetaData md = Neo4JUtils.getMetaData(config); + + assertEquals(0, md.getNumberOfNodes()); + assertEquals(0, md.getNumberOfEdges()); + assertEquals(2, md.getNodePropertySize()); + assertEquals(0, md.getNodeIdIndex()); + assertArrayEquals(new String[] {"identifier", "name"}, md.getNodePropertyNames()); + assertEquals(1, md.getNodePropertyIndexForName("name")); + assertEquals(-1, md.getNodePropertyIndexForName("unknown")); + assertEquals(2, md.getEdgePropertySize()); + assertEquals(0, md.getEdgeFromNodeIdIndex()); + assertEquals(1, md.getEdgeToNodeIdIndex()); + assertArrayEquals(new String[] { "from", "to" }, md.getEdgePropertyNames()); + assertEquals(-1, md.getEdgePropertyIndexForName("unknown")); + assertEquals(Long.class, md.getNodePropertyTypeForName("identifier")); + assertEquals(String.class, md.getNodePropertyTypeForName("name")); + assertEquals(Long.class, md.getEdgePropertyTypeForName("from")); + assertEquals(Long.class, md.getEdgePropertyTypeForName("to")); + + } + + @Test + public void testConfigPopulatedMetaData() { + populateConfigWithMetaData(); + + MetaData md = Neo4JUtils.getMetaData(config); + + assertEquals(42, md.getNumberOfNodes()); + assertEquals(4242, md.getNumberOfEdges()); + assertEquals(3, md.getNodePropertySize()); + assertEquals(0, md.getNodeIdIndex()); + assertArrayEquals(new String[] {"id", "name", "accountnumber"}, md.getNodePropertyNames()); + assertEquals(1, md.getNodePropertyIndexForName("name")); + assertEquals(-1, md.getNodePropertyIndexForName("unknown")); + assertEquals(3, md.getEdgePropertySize()); + assertEquals(0, md.getEdgeFromNodeIdIndex()); + assertEquals(1, md.getEdgeToNodeIdIndex()); + assertEquals(2, md.getEdgePropertyIndexForName("amount")); + assertArrayEquals(new String[] { "from", "to", "amount" }, md.getEdgePropertyNames()); + assertEquals(-1, md.getEdgePropertyIndexForName("unknown")); + assertEquals(Long.class, md.getNodePropertyTypeForName("id")); + assertEquals(String.class, md.getNodePropertyTypeForName("name")); + assertEquals(Integer.class, md.getEdgePropertyTypeForName("amount")); + + } + + private void populateConfigWithMetaData() { + config.setClass(AbstractMetaData.METADATA_CLASS, MetaDataFromConfigImpl.class, MetaData.class); + config.setStrings(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_NAMES, "id", "name", "accountnumber"); + config.set(MetaDataFromConfigImpl.METADATA_NODE_ID_NAME, "id"); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "id", Long.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "name", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "accountnumber", Integer.class, Object.class); + config.setStrings(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_NAMES, "from", "to", "amount"); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "id", Long.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "name", Long.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "amount", Integer.class, Object.class); + config.setLong(AbstractMetaData.METADATA_NUMBER_OF_NODES, 42L); + config.setLong(AbstractMetaData.METADATA_NUMBER_OF_EDGES, 4242L); + } + +} diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromEdgesMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromEdgesMapperTest.java index 3a0986e..2c7cc2f 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromEdgesMapperTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromEdgesMapperTest.java @@ -1,13 +1,13 @@ package nl.waredingen.graphs.neo.mapreduce.join; -import static org.hamcrest.CoreMatchers.*; -import static org.junit.Assert.*; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; import java.util.List; -import nl.waredingen.graphs.neo.mapreduce.join.JoinFromEdgesMapper; - -import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.types.Pair; @@ -15,22 +15,25 @@ import org.junit.Test; public class JoinFromEdgesMapperTest { - private MapDriver driver; + private MapDriver driver; private List> output; @Before public void setUp() throws Exception { - driver = new MapDriver(new JoinFromEdgesMapper()); + driver = new MapDriver(new JoinFromEdgesMapper()); } @Test public void shouldOutputAsFromNode() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 A B")).run(); + Text val = new Text("0 A B 100"); + BytesWritable bv = new BytesWritable(val.getBytes()); + output = driver.withInputKey(NullWritable.get()).withInputValue(bv).run(); assertThat(output.size(), is(1)); assertThat(output.get(0).getFirst(), equalTo(new Text("EA"))); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 A B"))); + assertThat(output.get(0).getSecond(), equalTo(new Text("0 A B 100"))); } + } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromNodesAndEdgesMapReduceTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromNodesAndEdgesMapReduceTest.java index e99418c..fbcc221 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromNodesAndEdgesMapReduceTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinFromNodesAndEdgesMapReduceTest.java @@ -8,7 +8,8 @@ import nl.waredingen.graphs.neo.mapreduce.DualInputMapReduceDriver; -import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; @@ -16,13 +17,13 @@ public class JoinFromNodesAndEdgesMapReduceTest { - private DualInputMapReduceDriver driver; - private List> output; + private DualInputMapReduceDriver driver; + private List> output; @SuppressWarnings("unchecked") @Before public void setUp() throws Exception { - driver = new DualInputMapReduceDriver(); + driver = new DualInputMapReduceDriver(); driver.setFirstMapper(new JoinNodesMapper()); driver.setSecondMapper(new JoinFromEdgesMapper()); driver.setReducer(new JoinNodesAndEdgesReducer()); @@ -32,11 +33,17 @@ public void setUp() throws Exception { @Test public void shouldjoinFromNodeAndEdge() throws Exception { - driver.withFirstInput(new LongWritable(0), new Text("0 A Aname")).addInput(new LongWritable(1), new Text("1 B Bname")); - output = driver.withSecondInput(new LongWritable(0), new Text("0 A B")).run(); + Text nodeA = new Text("0 A 1"); + BytesWritable nodeAInput = new BytesWritable(nodeA.getBytes()); + Text nodeB = new Text("1 B 3"); + BytesWritable nodeBInput = new BytesWritable(nodeB.getBytes()); + Text edge = new Text("0 A B 5"); + BytesWritable edgeInput = new BytesWritable(edge.getBytes()); + driver.withFirstInput(NullWritable.get(), nodeAInput).addInput(NullWritable.get(), nodeBInput); + output = driver.withSecondInput(NullWritable.get(), edgeInput).run(); assertThat(output.size(), is(1)); - assertThat(output.get(0).getFirst(), equalTo(new Text("RB"))); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 A B 0 A Aname"))); + assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(0).getSecond(), equalTo(new Text("0 A B 5 0 1"))); } } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesAndEdgesReducerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesAndEdgesReducerTest.java index 7b0ab17..1931656 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesAndEdgesReducerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesAndEdgesReducerTest.java @@ -7,6 +7,7 @@ import java.util.ArrayList; import java.util.List; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.apache.hadoop.mrunit.types.Pair; @@ -14,25 +15,25 @@ import org.junit.Test; public class JoinNodesAndEdgesReducerTest { - private ReduceDriver driver; - private List> output; + private ReduceDriver driver; + private List> output; @Before public void setUp() throws Exception { - driver = new ReduceDriver(new JoinNodesAndEdgesReducer()); + driver = new ReduceDriver(new JoinNodesAndEdgesReducer()); } @Test public void shouldOutputAsNode() throws Exception { ArrayList values = new ArrayList(); - values.add(new Text("0 A Aname")); + values.add(new Text("0")); values.add(new Text("0 A B")); output = driver.withInputKey(new Text("NA")).withInputValues(values).run(); assertThat(output.size(), is(1)); - assertThat(output.get(0).getFirst(), equalTo(new Text("RB"))); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 A B 0 A Aname"))); + assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(0).getSecond(), equalTo(new Text("0 A B 0"))); } } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesMapperTest.java index 74234bf..b0d2eff 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesMapperTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinNodesMapperTest.java @@ -1,13 +1,13 @@ package nl.waredingen.graphs.neo.mapreduce.join; -import static org.hamcrest.CoreMatchers.*; -import static org.junit.Assert.*; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; import java.util.List; -import nl.waredingen.graphs.neo.mapreduce.join.JoinNodesMapper; - -import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.types.Pair; @@ -15,22 +15,24 @@ import org.junit.Test; public class JoinNodesMapperTest { - private MapDriver driver; + private MapDriver driver; private List> output; @Before public void setUp() throws Exception { - driver = new MapDriver(new JoinNodesMapper()); + driver = new MapDriver(new JoinNodesMapper()); } @Test - public void shouldOutputAsNode() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 A Aname")).run(); + public void shouldOutputAsNodeIndex() throws Exception { + Text val = new Text("0 A 1"); + BytesWritable bv = new BytesWritable(val.getBytes()); + output = driver.withInputKey(NullWritable.get()).withInputValue(bv).run(); assertThat(output.size(), is(1)); assertThat(output.get(0).getFirst(), equalTo(new Text("NA"))); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 A Aname"))); + assertThat(output.get(0).getSecond(), equalTo(new Text("0 1"))); } } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToEdgesMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToEdgesMapperTest.java index 43800be..ac83575 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToEdgesMapperTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToEdgesMapperTest.java @@ -1,13 +1,12 @@ package nl.waredingen.graphs.neo.mapreduce.join; -import static org.hamcrest.CoreMatchers.*; -import static org.junit.Assert.*; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; import java.util.List; -import nl.waredingen.graphs.neo.mapreduce.join.JoinToEdgesMapper; - -import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.types.Pair; @@ -15,22 +14,22 @@ import org.junit.Test; public class JoinToEdgesMapperTest { - private MapDriver driver; + private MapDriver driver; private List> output; @Before public void setUp() throws Exception { - driver = new MapDriver(new JoinToEdgesMapper()); + driver = new MapDriver(new JoinToEdgesMapper()); } @Test public void shouldOutputAsToNode() throws Exception { - output = driver.withInputKey(new Text("RB")).withInputValue(new Text("0 A B 0 A Aname")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new Text("0 A B 0 1")).run(); assertThat(output.size(), is(1)); assertThat(output.get(0).getFirst(), equalTo(new Text("EB"))); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 A B 0 A Aname"))); + assertThat(output.get(0).getSecond(), equalTo(new Text("0 A B 0 1"))); } } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToNodesAndEdgesMapReduceTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToNodesAndEdgesMapReduceTest.java index e194a3d..0318bb0 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToNodesAndEdgesMapReduceTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/join/JoinToNodesAndEdgesMapReduceTest.java @@ -1,35 +1,29 @@ package nl.waredingen.graphs.neo.mapreduce.join; -import static org.hamcrest.CoreMatchers.*; -import static org.junit.Assert.*; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; import java.util.List; import nl.waredingen.graphs.neo.mapreduce.DualInputMapReduceDriver; -import nl.waredingen.graphs.neo.mapreduce.join.JoinNodesAndEdgesReducer; -import nl.waredingen.graphs.neo.mapreduce.join.JoinNodesMapper; -import nl.waredingen.graphs.neo.mapreduce.join.JoinToEdgesMapper; -import nl.waredingen.graphs.neo.mapreduce.join.NodeAndEdgeKeyComparator; -import nl.waredingen.graphs.neo.mapreduce.join.NodeKeyGroupingComparator; -import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; -import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; public class JoinToNodesAndEdgesMapReduceTest { - private DualInputMapReduceDriver driver; - private List> output; + private DualInputMapReduceDriver driver; + private List> output; @SuppressWarnings("unchecked") @Before public void setUp() throws Exception { - driver = new DualInputMapReduceDriver(); + driver = new DualInputMapReduceDriver(); driver.setFirstMapper(new JoinNodesMapper()); driver.setSecondMapper(new JoinToEdgesMapper()); driver.setReducer(new JoinNodesAndEdgesReducer()); @@ -39,11 +33,17 @@ public void setUp() throws Exception { @Test public void shouldjoinToNodeAndEdgeWithFromNode() throws Exception { - driver.withFirstInput(new LongWritable(0), new Text("0 A Aname")).addInput(new LongWritable(1), new Text("1 B Bname")); - output = driver.withSecondInput(new Text("RB"), new Text("0 A B 0 A Aname")).run(); + Text nodeA = new Text("0 A 1"); + BytesWritable nodeAInput = new BytesWritable(nodeA.getBytes()); + Text nodeB = new Text("1 B 3"); + BytesWritable nodeBInput = new BytesWritable(nodeB.getBytes()); + Text edgeWithFromNodeJoined = new Text("0 A B 5 0 1"); + + driver.withFirstInput(NullWritable.get(), nodeAInput).addInput(NullWritable.get(), nodeBInput); + output = driver.withSecondInput(NullWritable.get(), edgeWithFromNodeJoined).run(); assertThat(output.size(), is(1)); - assertThat(output.get(0).getFirst(), equalTo(new Text("RB"))); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 A B 0 A Aname 1 B Bname"))); + assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); + assertThat(output.get(0).getSecond(), equalTo(new Text("0 A B 5 0 1 1 3"))); } } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapperTest.java index fca658b..75c238b 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapperTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputMapperTest.java @@ -1,47 +1,49 @@ package nl.waredingen.graphs.neo.mapreduce.nodes; -import static org.hamcrest.CoreMatchers.*; -import static org.junit.Assert.*; +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; import java.util.List; -import nl.waredingen.graphs.neo.mapreduce.join.JoinNodesMapper; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeIdPropIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodeEdgeWritable; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; public class NodeOutputMapperTest { - private MapDriver driver; - private List> output; + private MapDriver driver; + private List> output; @Before public void setUp() throws Exception { - driver = new MapDriver(new NodeOutputMapper()); + driver = new MapDriver(new NodeOutputMapper()); } @Test public void shouldOutputAsNode() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 A Aname 3 2 0")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new NodeEdgeWritable(0,1,3,2,0,5)).run(); assertThat(output.size(), is(1)); assertThat(output.get(0).getFirst(), equalTo(new LongWritable(0))); - assertThat(output.get(0).getSecond(), equalTo(new Text("3 0"))); + assertThat(output.get(0).getSecond(), equalTo(new EdgeIdPropIdWritable(3,1))); } @Test public void shouldOutputAsNodeWhereNodeIdIsTheKey() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("11 A Aname 3 2 0")).run(); + output = driver.withInputKey(NullWritable.get()).withInputValue(new NodeEdgeWritable(11,21,3,2,0,5)).run(); assertThat(output.size(), is(1)); assertThat(output.get(0).getFirst(), equalTo(new LongWritable(11))); - assertThat(output.get(0).getSecond(), equalTo(new Text("3 22"))); + assertThat(output.get(0).getSecond(), equalTo(new EdgeIdPropIdWritable(3,21))); } } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputReducerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputReducerTest.java index 6a90886..b30f090 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputReducerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/nodes/NodeOutputReducerTest.java @@ -6,29 +6,29 @@ import java.util.List; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgeIdPropIdWritable; import nl.waredingen.graphs.neo.neo4j.Neo4JUtils; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; public class NodeOutputReducerTest { - private ReduceDriver driver; + private ReduceDriver driver; private List> output; @Before public void setUp() throws Exception { - driver = new ReduceDriver(new NodeOutputReducer()); + driver = new ReduceDriver(new NodeOutputReducer()); } @Test public void shouldOutputAsSomeNode() throws Exception { - output = driver.withInputKey(new LongWritable(1)).withInputValue(new Text("3 0")).run(); + output = driver.withInputKey(new LongWritable(1)).withInputValue(new EdgeIdPropIdWritable(3,0)).run(); assertThat(output.size(), is(1)); @@ -39,7 +39,7 @@ public void shouldOutputAsSomeNode() throws Exception { @Test public void shouldOutputAsRootAndFirstNode() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("3 0")).run(); + output = driver.withInputKey(new LongWritable(0)).withInputValue(new EdgeIdPropIdWritable(3,0)).run(); assertThat(output.size(), is(2)); diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesMapperTest.java new file mode 100644 index 0000000..e44ee9b --- /dev/null +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesMapperTest.java @@ -0,0 +1,148 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +import java.util.List; + +import nl.waredingen.graphs.neo.mapreduce.input.AbstractMetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaDataFromConfigImpl; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullEdgePropertiesWritable; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mrunit.mapreduce.MapDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.junit.Before; +import org.junit.Test; + +public class EdgePreparePropertiesMapperTest { + + private MapDriver driver; + private List> output; + + @Before + public void setUp() throws Exception { + driver = new MapDriver(new EdgePreparePropertiesMapper()); + } + + @Test + public void shouldOutputAsProperties() throws Exception { + driver.setConfiguration(populateConfigWithMetaData()); + output = driver.withInputKey(new Text("0")).withInputValue(new Text("A B Prop3 Prop4 AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect")).run(); + + FullEdgePropertiesWritable edge1 = new FullEdgePropertiesWritable(0, "A", "B", 0, 0, -1, -1, 5, "Prop3"); + edge1.add(6, "Prop4", 0); + FullEdgePropertiesWritable edge2 = new FullEdgePropertiesWritable(0, "A", "B", 1, 1, -1, -1, 7, "AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect"); + + assertThat(output.size(), is(2)); + + assertThat(output.get(0).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(0),new LongWritable(0)))); + assertThat(output.get(0).getSecond(), equalTo(edge1)); + assertThat(output.get(1).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(0),new LongWritable(1)))); + assertThat(output.get(1).getSecond(), equalTo(edge2)); + + } + + @Test + public void shouldOutputAsPropertiesWithVeryLongPropValue() throws Exception { + driver.setConfiguration(populateConfigWithMetaData()); + output = driver.withInputKey(new Text("1")).withInputValue(new Text("B C Prop1 Prop2 AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrectAndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect")).run(); + + FullEdgePropertiesWritable edge1 = new FullEdgePropertiesWritable(1, "B", "C", 0, 0, -1, -1, 5, "Prop1"); + edge1.add(6, "Prop2", 0); + FullEdgePropertiesWritable edge2 = new FullEdgePropertiesWritable(1, "B", "C", 1, 2, -1, -1, 7, "AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrectAndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect"); + + assertThat(output.size(), is(2)); + + + assertThat(output.get(0).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(1),new LongWritable(0)))); + assertThat(output.get(0).getSecond(), equalTo(edge1)); + assertThat(output.get(1).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(1),new LongWritable(1)))); + assertThat(output.get(1).getSecond(), equalTo(edge2)); + + } + + @Test + public void shouldOutputAsPropertiesWithVeryLongPropValueAndLargeNodeSize() throws Exception { + driver.setConfiguration(populateConfigWithMetaDataWithLargeNodeSize()); + output = driver.withInputKey(new Text("1")).withInputValue(new Text("B C Prop1 Prop2 AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrectAndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect")).run(); + + FullEdgePropertiesWritable edge1 = new FullEdgePropertiesWritable(1, "B", "C", 0, 0, -1, -1, 5, "Prop1"); + edge1.add(6, "Prop2", 0); + FullEdgePropertiesWritable edge2 = new FullEdgePropertiesWritable(1, "B", "C", 1, 2, -1, -1, 7, "AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrectAndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect"); + + assertThat(output.size(), is(2)); + + assertThat(output.get(0).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(1),new LongWritable(0)))); + assertThat(output.get(0).getSecond(), equalTo(edge1)); + assertThat(output.get(1).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(1),new LongWritable(1)))); + assertThat(output.get(1).getSecond(), equalTo(edge2)); + + } + + @Test + public void shouldOutputAsPropertiesWithVeryLongPropValueAndLargeNodeSizeAndHighEdgeId() throws Exception { + driver.setConfiguration(populateConfigWithMetaDataWithLargeNodeSize()); + output = driver.withInputKey(new Text("700000000")).withInputValue(new Text("B C Prop1 Prop2 AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrectAndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect")).run(); + + FullEdgePropertiesWritable edge1 = new FullEdgePropertiesWritable(700000000, "B", "C", 0, 0, -1, -1, 5, "Prop1"); + edge1.add(6, "Prop2", 0); + FullEdgePropertiesWritable edge2 = new FullEdgePropertiesWritable(700000000, "B", "C", 1, 2, -1, -1, 7, "AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrectAndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect"); + + assertThat(output.size(), is(2)); + + assertThat(output.get(0).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(700000000L),new LongWritable(0)))); + assertThat(output.get(0).getSecond(), equalTo(edge1)); + assertThat(output.get(1).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(700000000L),new LongWritable(1)))); + assertThat(output.get(1).getSecond(), equalTo(edge2)); + + } + + private Configuration populateConfigWithMetaData() { + Configuration config = new Configuration(); + config.setClass(AbstractMetaData.METADATA_CLASS, MetaDataFromConfigImpl.class, MetaData.class); + config.setStrings(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_NAMES, "id", "name", "prop3", "prop4", "longprop"); + config.set(MetaDataFromConfigImpl.METADATA_NODE_ID_NAME, "id"); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "id", Long.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "name", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "prop3", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "prop4", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "longprop", String.class, Object.class); + config.setStrings(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_NAMES, "from", "to", "prop1", "prop2", "longprop"); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "from", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "to", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "prop1", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "prop2", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "longprop", String.class, Object.class); + config.setLong(AbstractMetaData.METADATA_NUMBER_OF_NODES, 0L); + config.setLong(AbstractMetaData.METADATA_NUMBER_OF_EDGES, 1L); + return config; + } + + private Configuration populateConfigWithMetaDataWithLargeNodeSize() { + Configuration config = new Configuration(); + config.setClass(AbstractMetaData.METADATA_CLASS, MetaDataFromConfigImpl.class, MetaData.class); + config.setStrings(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_NAMES, "id", "name", "prop3", "prop4", "longprop"); + config.set(MetaDataFromConfigImpl.METADATA_NODE_ID_NAME, "id"); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "id", Long.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "name", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "prop3", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "prop4", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "longprop", String.class, Object.class); + config.setStrings(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_NAMES, "from", "to", "prop1", "prop2", "longprop"); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "from", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "to", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "prop1", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "prop2", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "longprop", String.class, Object.class); + config.setLong(AbstractMetaData.METADATA_NUMBER_OF_NODES, 250000000L); + config.setLong(AbstractMetaData.METADATA_NUMBER_OF_EDGES, 1L); + return config; + } + +} diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesReducerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesReducerTest.java new file mode 100644 index 0000000..a2e892f --- /dev/null +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePreparePropertiesReducerTest.java @@ -0,0 +1,48 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +import java.util.ArrayList; +import java.util.List; + +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullEdgePropertiesWritable; + +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.junit.Before; +import org.junit.Test; + +public class EdgePreparePropertiesReducerTest { + private ReduceDriver driver; + private List> output; + + @Before + public void setUp() throws Exception { + driver = new ReduceDriver(new EdgePreparePropertiesReducer()); + } + + @Test + public void shouldOutputSurroundingProperties() throws Exception { + List values = new ArrayList(); + values.add(new FullEdgePropertiesWritable(1, "A", "B", 2, 2, -1, -1, 0, "longblahorsomething")); + values.add(new FullEdgePropertiesWritable(1, "A", "B", 1, 0, -1, -1, 1, "otherblah")); + values.add(new FullEdgePropertiesWritable(1, "A", "B", 0, 0, -1, -1, 2, "blah")); + //Unfortunately no multiple keys can be added to this reducer, as is in real life. + output = driver.withInputKey(new AscLongDescLongWritable(new LongWritable(1), new LongWritable(3))).withInputValues(values).run(); + + assertThat(output.size(), is(3)); + + assertThat(output.get(0).getFirst(), equalTo(new LongWritable(1))); + assertThat(output.get(0).getSecond(), equalTo(new FullEdgePropertiesWritable(1, "A", "B", 2, 2, 1, -1, 0, "longblahorsomething"))); + assertThat(output.get(1).getFirst(), equalTo(new LongWritable(1))); + assertThat(output.get(1).getSecond(), equalTo(new FullEdgePropertiesWritable(1, "A", "B", 1, 0, 0, 2, 1, "otherblah"))); + assertThat(output.get(2).getFirst(), equalTo(new LongWritable(1))); + assertThat(output.get(2).getSecond(), equalTo(new FullEdgePropertiesWritable(1, "A", "B", 0, 0, -1, 1, 2, "blah"))); + + } + +} diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertiesOutputMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertiesOutputMapperTest.java new file mode 100644 index 0000000..dc74576 --- /dev/null +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertiesOutputMapperTest.java @@ -0,0 +1,95 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +import java.util.List; + +import nl.waredingen.graphs.misc.RowNumberJob; +import nl.waredingen.graphs.neo.mapreduce.input.AbstractMetaData; +import nl.waredingen.graphs.neo.mapreduce.input.writables.ByteMarkerIdPropIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.EdgePropertyOutputCountersAndValueWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullEdgePropertiesWritable; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.ByteWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mrunit.mapreduce.MapDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.junit.Before; +import org.junit.Test; + +public class EdgePropertiesOutputMapperTest { + + private MapDriver driver; + private List> output; + + @Before + public void setUp() throws Exception { + driver = new MapDriver(new EdgePropertyOutputMapper()); + Configuration configuration = new Configuration(); + configuration.setLong(AbstractMetaData.METADATA_NUMBER_OF_NODES, 1); + configuration.setInt("mapred.reduce.tasks", 3); + driver.setConfiguration(configuration ); + } + + @Test + public void shouldOutputAsProperties() throws Exception { + FullEdgePropertiesWritable edge = new FullEdgePropertiesWritable(0, "A", "B", 4, 0, -1, 1, 0, "AProp"); + edge.add(1, "BProp", 0); + output = driver.withInputKey(new LongWritable(0)).withInputValue(edge).run(); + + assertThat(output.size(), is(3)); + + EdgePropertyOutputCountersAndValueWritable val = new EdgePropertyOutputCountersAndValueWritable(); + val.setValues(new LongWritable(0), edge); + + assertThat(output.get(0).getFirst(), equalTo(new ByteMarkerIdPropIdWritable(new ByteWritable(RowNumberJob.VALUE_MARKER), new LongWritable(0), new IntWritable(4)))); + assertThat(output.get(0).getSecond(), equalTo(val)); + + val = new EdgePropertyOutputCountersAndValueWritable(); + val.setValues(EdgePropertyOutputCountersAndValueWritable.EMPTY_ID, EdgePropertyOutputCountersAndValueWritable.EMPTY_VAL); + val.setCounter(1, 0, 1); + + assertThat(output.get(1).getFirst(), equalTo(new ByteMarkerIdPropIdWritable(new ByteWritable(RowNumberJob.COUNTER_MARKER), new LongWritable(Long.MIN_VALUE), new IntWritable(Integer.MIN_VALUE)))); + assertThat(output.get(1).getSecond(), equalTo(val)); + + val = new EdgePropertyOutputCountersAndValueWritable(); + val.setValues(EdgePropertyOutputCountersAndValueWritable.EMPTY_ID, EdgePropertyOutputCountersAndValueWritable.EMPTY_VAL); + val.setCounter(2, 0, 1); + + assertThat(output.get(2).getFirst(), equalTo(new ByteMarkerIdPropIdWritable(new ByteWritable(RowNumberJob.COUNTER_MARKER), new LongWritable(Long.MIN_VALUE), new IntWritable(Integer.MIN_VALUE)))); + assertThat(output.get(2).getSecond(), equalTo(val)); + + } + + @Test + public void shouldOutputAsPropertieswithPropertyIdAsTheKey() throws Exception { + output = driver.withInputKey(new LongWritable(1)).withInputValue(new FullEdgePropertiesWritable(1, "A", "B", 2, 40, 1, 2, 4, "AProp")).run(); + + assertThat(output.size(), is(3)); + + EdgePropertyOutputCountersAndValueWritable val = new EdgePropertyOutputCountersAndValueWritable(); + val.setValues(new LongWritable(1), new FullEdgePropertiesWritable(1, "A", "B", 2, 40, 1, 2, 4, "AProp")); + + assertThat(output.get(0).getFirst(), equalTo(new ByteMarkerIdPropIdWritable(new ByteWritable(RowNumberJob.VALUE_MARKER), new LongWritable(1), new IntWritable(2)))); + assertThat(output.get(0).getSecond(), equalTo(val)); + + val = new EdgePropertyOutputCountersAndValueWritable(); + val.setValues(EdgePropertyOutputCountersAndValueWritable.EMPTY_ID, EdgePropertyOutputCountersAndValueWritable.EMPTY_VAL); + val.setCounter(1, 0, 0); + + assertThat(output.get(1).getFirst(), equalTo(new ByteMarkerIdPropIdWritable(new ByteWritable(RowNumberJob.COUNTER_MARKER), new LongWritable(Long.MIN_VALUE), new IntWritable(Integer.MIN_VALUE)))); + assertThat(output.get(1).getSecond(), equalTo(val)); + + val = new EdgePropertyOutputCountersAndValueWritable(); + val.setValues(EdgePropertyOutputCountersAndValueWritable.EMPTY_ID, EdgePropertyOutputCountersAndValueWritable.EMPTY_VAL); + val.setCounter(2, 40, 1); + + assertThat(output.get(2).getFirst(), equalTo(new ByteMarkerIdPropIdWritable(new ByteWritable(RowNumberJob.COUNTER_MARKER), new LongWritable(Long.MIN_VALUE), new IntWritable(Integer.MIN_VALUE)))); + assertThat(output.get(2).getSecond(), equalTo(val)); + + } +} diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapReduceTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapReduceTest.java new file mode 100644 index 0000000..012440b --- /dev/null +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapReduceTest.java @@ -0,0 +1,94 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +import java.util.List; + +import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongKeyComparator; +import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongKeyGroupingComparator; +import nl.waredingen.graphs.neo.mapreduce.input.AbstractMetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaDataFromConfigImpl; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullNodePropertiesWritable; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.junit.Before; +import org.junit.Test; + +public class NodePreparePropertiesMapReduceTest { + + private MapReduceDriver driver; + private List> output; + + @SuppressWarnings("unchecked") + @Before + public void setUp() throws Exception { + driver = new MapReduceDriver(new NodePreparePropertiesMapper(), new NodePreparePropertiesReducer()); + driver.setKeyGroupingComparator(new AscLongDescLongKeyGroupingComparator()); + driver.setKeyOrderComparator(new AscLongDescLongKeyComparator()); + } + + @Test + public void shouldPrepareTheProps() throws Exception { + driver.setConfiguration(populateConfigWithRealMetaData()); + output = driver.withInput(new Text("2"),new Text("384160409 1-3WH-21474 000000111509024 A I A.J. HOP 2518CT 'S-GRAVENHAGE STORT")).run(); + + FullNodePropertiesWritable node1 = new FullNodePropertiesWritable(2, "384160409", 0, 0, -1, 1, 0, "384160409"); + node1.add(1, "1-3WH-21474", 0); + FullNodePropertiesWritable node2 = new FullNodePropertiesWritable(2, "384160409", 1, 0, 0, 2, 2, "000000111509024"); + node2.add(3, "A", 0); + node2.add(4, "I", 0); + FullNodePropertiesWritable node3 = new FullNodePropertiesWritable(2, "384160409", 2, 0, 1, 3, 5, "A.J. HOP"); + node3.add(6, "2518CT", 0); + FullNodePropertiesWritable node4 = new FullNodePropertiesWritable(2, "384160409", 3, 0, 2, -1, 7, "'S-GRAVENHAGE"); + node4.add(8, "STORT", 0); + assertThat(output.size(), is(4)); + + assertThat(output.get(0).getFirst(), equalTo(new LongWritable(2))); + assertThat(output.get(0).getSecond(), equalTo(node4)); + assertThat(output.get(1).getFirst(), equalTo(new LongWritable(2))); + assertThat(output.get(1).getSecond(), equalTo(node3)); + assertThat(output.get(2).getFirst(), equalTo(new LongWritable(2))); + assertThat(output.get(2).getSecond(), equalTo(node2)); + assertThat(output.get(3).getFirst(), equalTo(new LongWritable(2))); + assertThat(output.get(3).getSecond(), equalTo(node1)); + } + + private Configuration populateConfigWithRealMetaData() { + Configuration conf = new Configuration(); + conf.set(AbstractMetaData.METADATA_NUMBER_OF_NODES, "" + 1); + conf.set(AbstractMetaData.METADATA_NUMBER_OF_EDGES, "" + 0); + + conf.setClass(AbstractMetaData.METADATA_CLASS, MetaDataFromConfigImpl.class, MetaData.class); + + conf.setStrings(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_NAMES, "rekening", "integraalklantnummer", "klantnummer", "cddklasse", "individu_organisatie_code", "naam", "postcode", "woonplaats", "label"); + conf.set(MetaDataFromConfigImpl.METADATA_NODE_ID_NAME, "rekening"); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "rekening", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "integraalklantnummer", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "klantnummer", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "cddklasse", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "individu_organisatie_code", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "naam", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "postcode", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "woonplaats", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "label", String.class, Object.class); + + conf.setStrings(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_NAMES, "from", "to", "netto", "eerste", "laatste", "aantal"); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "from", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "to", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "netto", Long.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "eerste", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "laatste", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "aantal", Long.class, Object.class); + return conf; + } + + +} diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapperTest.java index 6673694..f9029ac 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapperTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesMapperTest.java @@ -6,8 +6,13 @@ import java.util.List; -import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.AbstractMetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaData; +import nl.waredingen.graphs.neo.mapreduce.input.MetaDataFromConfigImpl; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullNodePropertiesWritable; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mrunit.mapreduce.MapDriver; @@ -17,43 +22,123 @@ public class NodePreparePropertiesMapperTest { - private MapDriver driver; - private List> output; + private MapDriver driver; + private List> output; @Before public void setUp() throws Exception { - driver = new MapDriver(new NodePreparePropertiesMapper()); + driver = new MapDriver(new NodePreparePropertiesMapper()); } @Test public void shouldOutputAsProperties() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 A Aname Prop3 Prop4 AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect")).run(); + driver.setConfiguration(populateConfigWithMetaData()); + output = driver.withInputKey(new Text("0")).withInputValue(new Text("A Aname Prop3 Prop4 AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect")).run(); - assertThat(output.size(), is(5)); + FullNodePropertiesWritable node1 = new FullNodePropertiesWritable(0, "A", 0, 0, -1, -1, 0, "A"); + node1.add(1, "Aname", 0); + FullNodePropertiesWritable node2 = new FullNodePropertiesWritable(0, "A", 1, 0, -1, -1, 2, "Prop3"); + node2.add(3, "Prop4", 0); + FullNodePropertiesWritable node3 = new FullNodePropertiesWritable(0, "A", 2, 1, -1, -1, 4, "AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect"); + assertThat(output.size(), is(3)); + assertThat(output.get(0).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(0),new LongWritable(0)))); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 A 0"))); + assertThat(output.get(0).getSecond(), equalTo(node1)); assertThat(output.get(1).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(0),new LongWritable(1)))); - assertThat(output.get(1).getSecond(), equalTo(new Text("1 Aname 0"))); + assertThat(output.get(1).getSecond(), equalTo(node2)); assertThat(output.get(2).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(0),new LongWritable(2)))); - assertThat(output.get(2).getSecond(), equalTo(new Text("2 Prop3 0"))); - assertThat(output.get(3).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(0),new LongWritable(3)))); - assertThat(output.get(3).getSecond(), equalTo(new Text("3 Prop4 0"))); - assertThat(output.get(4).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(0),new LongWritable(4)))); - assertThat(output.get(4).getSecond(), equalTo(new Text("4 AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect 1"))); + assertThat(output.get(2).getSecond(), equalTo(node3)); } + @Test public void shouldOutputAsPropertiesWithVeryLongPropValue() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("1 Prop1 AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrectAndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect")).run(); + output = driver.withInputKey(new Text("1")).withInputValue(new Text("Prop1 AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrectAndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect")).run(); + + FullNodePropertiesWritable node1 = new FullNodePropertiesWritable(1, "Prop1", 0, 0, -1, -1, 0, "Prop1"); + node1.add(1, "AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrectAndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect", 2); + assertThat(output.size(), is(1)); + + + assertThat(output.get(0).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(1),new LongWritable(0)))); + assertThat(output.get(0).getSecond(), equalTo(node1)); + + } + + private Configuration populateConfigWithMetaData() { + Configuration config = new Configuration(); + config.setClass(AbstractMetaData.METADATA_CLASS, MetaDataFromConfigImpl.class, MetaData.class); + config.setStrings(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_NAMES, "id", "name", "prop3", "prop4", "longprop"); + config.set(MetaDataFromConfigImpl.METADATA_NODE_ID_NAME, "id"); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "id", Long.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "name", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "prop3", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "prop4", String.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "longprop", String.class, Object.class); + config.setStrings(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_NAMES, "from", "to"); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "from", Long.class, Object.class); + config.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "to", Long.class, Object.class); + config.setLong(AbstractMetaData.METADATA_NUMBER_OF_NODES, 1L); + config.setLong(AbstractMetaData.METADATA_NUMBER_OF_EDGES, 0L); + return config; + } + + @Test + public void shouldOutputAsPropertiesWithRealValues() throws Exception { + driver.setConfiguration(populateConfigWithRealMetaData()); + output = driver.withInputKey(new Text("2")).withInputValue(new Text("384160409 1-3WH-21474 000000111509024 A I A.J. HOP 2518CT 'S-GRAVENHAGE STORT")).run(); - assertThat(output.size(), is(2)); + FullNodePropertiesWritable node1 = new FullNodePropertiesWritable(2, "384160409", 0, 0, -1, -1, 0, "384160409"); + node1.add(1, "1-3WH-21474", 0); + FullNodePropertiesWritable node2 = new FullNodePropertiesWritable(2, "384160409", 1, 0, -1, -1, 2, "000000111509024"); + node2.add(3, "A", 0); + node2.add(4, "I", 0); + FullNodePropertiesWritable node3 = new FullNodePropertiesWritable(2, "384160409", 2, 0, -1, -1, 5, "A.J. HOP"); + node3.add(6, "2518CT", 0); + FullNodePropertiesWritable node4 = new FullNodePropertiesWritable(2, "384160409", 3, 0, -1, -1, 7, "'S-GRAVENHAGE"); + node4.add(8, "STORT", 0); + assertThat(output.size(), is(4)); - assertThat(output.get(0).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(1),new LongWritable(2)))); - assertThat(output.get(0).getSecond(), equalTo(new Text("0 Prop1 0"))); - assertThat(output.get(1).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(1),new LongWritable(3)))); - assertThat(output.get(1).getSecond(), equalTo(new Text("1 AndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrectAndAPropWithAverryLongvalueBecauseWeKindOfNeedToFindOutIfABlockCountIsPresentAndCorrect 2"))); + assertThat(output.get(0).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(2),new LongWritable(0)))); + assertThat(output.get(0).getSecond(), equalTo(node1)); + assertThat(output.get(1).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(2),new LongWritable(1)))); + assertThat(output.get(1).getSecond(), equalTo(node2)); + assertThat(output.get(2).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(2),new LongWritable(2)))); + assertThat(output.get(2).getSecond(), equalTo(node3)); + assertThat(output.get(3).getFirst(), equalTo(new AscLongDescLongWritable(new LongWritable(2),new LongWritable(3)))); + assertThat(output.get(3).getSecond(), equalTo(node4)); } + + private Configuration populateConfigWithRealMetaData() { + Configuration conf = new Configuration(); + conf.set(AbstractMetaData.METADATA_NUMBER_OF_NODES, "" + 1); + conf.set(AbstractMetaData.METADATA_NUMBER_OF_EDGES, "" + 0); + + conf.setClass(AbstractMetaData.METADATA_CLASS, MetaDataFromConfigImpl.class, MetaData.class); + + conf.setStrings(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_NAMES, "rekening", "integraalklantnummer", "klantnummer", "cddklasse", "individu_organisatie_code", "naam", "postcode", "woonplaats", "label"); + conf.set(MetaDataFromConfigImpl.METADATA_NODE_ID_NAME, "rekening"); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "rekening", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "integraalklantnummer", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "klantnummer", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "cddklasse", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "individu_organisatie_code", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "naam", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "postcode", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "woonplaats", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_NODE_PROPERTY_TYPE_PREFIX + "label", String.class, Object.class); + + conf.setStrings(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_NAMES, "from", "to", "netto", "eerste", "laatste", "aantal"); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "from", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "to", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "netto", Long.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "eerste", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "laatste", String.class, Object.class); + conf.setClass(MetaDataFromConfigImpl.METADATA_EDGE_PROPERTY_TYPE_PREFIX + "aantal", Long.class, Object.class); + return conf; + } + } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesReducerTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesReducerTest.java index 4f415f3..6cd3637 100644 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesReducerTest.java +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePreparePropertiesReducerTest.java @@ -4,52 +4,44 @@ import static org.hamcrest.CoreMatchers.is; import static org.junit.Assert.assertThat; -import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.List; -import nl.waredingen.graphs.neo.mapreduce.AscLongDescLongWritable; -import nl.waredingen.graphs.neo.mapreduce.join.JoinFromEdgesMapper; +import nl.waredingen.graphs.neo.mapreduce.input.writables.AscLongDescLongWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullNodePropertiesWritable; -import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mrunit.mapreduce.MapDriver; import org.apache.hadoop.mrunit.mapreduce.ReduceDriver; import org.apache.hadoop.mrunit.types.Pair; import org.junit.Before; import org.junit.Test; -import org.neo4j.kernel.impl.nioneo.store.NodeRecord; -import org.neo4j.kernel.impl.nioneo.store.Record; public class NodePreparePropertiesReducerTest { - private ReduceDriver driver; - private List> output; + private ReduceDriver driver; + private List> output; @Before public void setUp() throws Exception { - driver = new ReduceDriver(new NodePreparePropertiesReducer()); + driver = new ReduceDriver(new NodePreparePropertiesReducer()); } @Test public void shouldOutputSurroundingProperties() throws Exception { - List values = new ArrayList(); - values.add(new Text("blah 0")); - values.add(new Text("otherblah 0")); - values.add(new Text("longblahorsomething 2")); + List values = new ArrayList(); + values.add(new FullNodePropertiesWritable(1, "A", 2, 2, -1, -1, 0, "longblahorsomething")); + values.add(new FullNodePropertiesWritable(1, "A", 1, 0, -1, -1, 1, "otherblah")); + values.add(new FullNodePropertiesWritable(1, "A", 0, 0, -1, -1, 2, "blah")); //Unfortunately no multiple keys can be added to this reducer, as is in real life. - //This results in an incorrect relnum in this test output = driver.withInputKey(new AscLongDescLongWritable(new LongWritable(1), new LongWritable(3))).withInputValues(values).run(); assertThat(output.size(), is(3)); - assertThat(output.get(0).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(0).getSecond(), equalTo(new Text("1 3 blah 0 3 -1"))); - assertThat(output.get(1).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(1).getSecond(), equalTo(new Text("1 3 otherblah 0 3 3"))); - assertThat(output.get(2).getFirst(), equalTo(NullWritable.get())); - assertThat(output.get(2).getSecond(), equalTo(new Text("1 3 longblahorsomething 2 -1 3"))); + assertThat(output.get(0).getFirst(), equalTo(new LongWritable(1))); + assertThat(output.get(0).getSecond(), equalTo(new FullNodePropertiesWritable(1, "A", 2, 2, 1, -1, 0, "longblahorsomething"))); + assertThat(output.get(1).getFirst(), equalTo(new LongWritable(1))); + assertThat(output.get(1).getSecond(), equalTo(new FullNodePropertiesWritable(1, "A", 1, 0, 0, 2, 1, "otherblah"))); + assertThat(output.get(2).getFirst(), equalTo(new LongWritable(1))); + assertThat(output.get(2).getSecond(), equalTo(new FullNodePropertiesWritable(1, "A", 0, 0, -1, 1, 2, "blah"))); } diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertiesOutputMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertiesOutputMapperTest.java new file mode 100644 index 0000000..f3e263e --- /dev/null +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertiesOutputMapperTest.java @@ -0,0 +1,95 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.CoreMatchers.is; +import static org.junit.Assert.assertThat; + +import java.util.List; + +import nl.waredingen.graphs.misc.RowNumberJob; +import nl.waredingen.graphs.neo.mapreduce.input.AbstractMetaData; +import nl.waredingen.graphs.neo.mapreduce.input.writables.ByteMarkerIdPropIdWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.FullNodePropertiesWritable; +import nl.waredingen.graphs.neo.mapreduce.input.writables.NodePropertyOutputCountersAndValueWritable; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.io.ByteWritable; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mrunit.mapreduce.MapDriver; +import org.apache.hadoop.mrunit.types.Pair; +import org.junit.Before; +import org.junit.Test; + +public class NodePropertiesOutputMapperTest { + + private MapDriver driver; + private List> output; + + @Before + public void setUp() throws Exception { + driver = new MapDriver(new NodePropertyOutputMapper()); + Configuration configuration = new Configuration(); + configuration.setLong(AbstractMetaData.METADATA_NUMBER_OF_NODES, 1); + configuration.setInt("mapred.reduce.tasks", 3); + driver.setConfiguration(configuration ); + } + + @Test + public void shouldOutputAsProperties() throws Exception { + FullNodePropertiesWritable node = new FullNodePropertiesWritable(0, "A", 0, 0, -1, 1, 0,"A"); + node.add(1, "Aname", 0); + output = driver.withInputKey(new LongWritable(0)).withInputValue(node).run(); + + assertThat(output.size(), is(3)); + + NodePropertyOutputCountersAndValueWritable val = new NodePropertyOutputCountersAndValueWritable(); + val.setValues(new LongWritable(0), node); + + assertThat(output.get(0).getFirst(), equalTo(new ByteMarkerIdPropIdWritable(new ByteWritable(RowNumberJob.VALUE_MARKER), new LongWritable(0), new IntWritable(0)))); + assertThat(output.get(0).getSecond(), equalTo(val)); + + val = new NodePropertyOutputCountersAndValueWritable(); + val.setValues(NodePropertyOutputCountersAndValueWritable.EMPTY_ID, NodePropertyOutputCountersAndValueWritable.EMPTY_VAL); + val.setCounter(1, 0, 1); + + assertThat(output.get(1).getFirst(), equalTo(new ByteMarkerIdPropIdWritable(new ByteWritable(RowNumberJob.COUNTER_MARKER), new LongWritable(Long.MIN_VALUE), new IntWritable(Integer.MIN_VALUE)))); + assertThat(output.get(1).getSecond(), equalTo(val)); + + val = new NodePropertyOutputCountersAndValueWritable(); + val.setValues(NodePropertyOutputCountersAndValueWritable.EMPTY_ID, NodePropertyOutputCountersAndValueWritable.EMPTY_VAL); + val.setCounter(2, 0, 1); + + assertThat(output.get(2).getFirst(), equalTo(new ByteMarkerIdPropIdWritable(new ByteWritable(RowNumberJob.COUNTER_MARKER), new LongWritable(Long.MIN_VALUE), new IntWritable(Integer.MIN_VALUE)))); + assertThat(output.get(2).getSecond(), equalTo(val)); + + } + + @Test + public void shouldOutputAsPropertieswithPropertyIdAsTheKey() throws Exception { + output = driver.withInputKey(new LongWritable(1)).withInputValue(new FullNodePropertiesWritable(1, "A", 1, 40, 1, 2, 1, "Aname")).run(); + + assertThat(output.size(), is(3)); + + NodePropertyOutputCountersAndValueWritable val = new NodePropertyOutputCountersAndValueWritable(); + val.setValues(new LongWritable(1), new FullNodePropertiesWritable(1, "A", 1, 40, 1, 2, 1, "Aname")); + + assertThat(output.get(0).getFirst(), equalTo(new ByteMarkerIdPropIdWritable(new ByteWritable(RowNumberJob.VALUE_MARKER), new LongWritable(1), new IntWritable(1)))); + assertThat(output.get(0).getSecond(), equalTo(val)); + + val = new NodePropertyOutputCountersAndValueWritable(); + val.setValues(NodePropertyOutputCountersAndValueWritable.EMPTY_ID, NodePropertyOutputCountersAndValueWritable.EMPTY_VAL); + val.setCounter(1, 0, 0); + + assertThat(output.get(1).getFirst(), equalTo(new ByteMarkerIdPropIdWritable(new ByteWritable(RowNumberJob.COUNTER_MARKER), new LongWritable(Long.MIN_VALUE), new IntWritable(Integer.MIN_VALUE)))); + assertThat(output.get(1).getSecond(), equalTo(val)); + + val = new NodePropertyOutputCountersAndValueWritable(); + val.setValues(NodePropertyOutputCountersAndValueWritable.EMPTY_ID, NodePropertyOutputCountersAndValueWritable.EMPTY_VAL); + val.setCounter(2, 40, 1); + + assertThat(output.get(2).getFirst(), equalTo(new ByteMarkerIdPropIdWritable(new ByteWritable(RowNumberJob.COUNTER_MARKER), new LongWritable(Long.MIN_VALUE), new IntWritable(Integer.MIN_VALUE)))); + assertThat(output.get(2).getSecond(), equalTo(val)); + + } +} diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertiesOutputMapperTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertiesOutputMapperTest.java deleted file mode 100644 index 4e09ccd..0000000 --- a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertiesOutputMapperTest.java +++ /dev/null @@ -1,69 +0,0 @@ -package nl.waredingen.graphs.neo.mapreduce.properties; - -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.CoreMatchers.is; -import static org.junit.Assert.assertThat; - -import java.util.List; - -import nl.waredingen.graphs.misc.RowNumberJob; -import nl.waredingen.graphs.neo.mapreduce.PureMRNodesAndEdgesJob; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.io.ByteWritable; -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mrunit.mapreduce.MapDriver; -import org.apache.hadoop.mrunit.types.Pair; -import org.junit.Before; -import org.junit.Test; - -public class PropertiesOutputMapperTest { - - private MapDriver driver; - private List> output; - - @Before - public void setUp() throws Exception { - driver = new MapDriver(new PropertyOutputMapper()); - Configuration configuration = new Configuration(); - configuration.setLong(PureMRNodesAndEdgesJob.NUMBEROFROWS_CONFIG, 1); - configuration.setInt("mapred.reduce.tasks", 3); - driver.setConfiguration(configuration ); - } - - @Test - public void shouldOutputAsProperties() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 0 0 A 0 -1 1")).run(); - - assertThat(output.size(), is(1)); - - PropertyOutputIdBlockcountValueWritable val = new PropertyOutputIdBlockcountValueWritable(); - val.setValues(new LongWritable(0), new Text("0 A 0 -1 1")); - - assertThat(output.get(0).getFirst(), equalTo(new ByteMarkerPropertyIdWritable(new ByteWritable(RowNumberJob.VALUE_MARKER), new LongWritable(0)))); - assertThat(output.get(0).getSecond(), equalTo(val)); - - } - - @Test - public void shouldOutputAsPropertieswithPropertyIdAsTheKey() throws Exception { - output = driver.withInputKey(new LongWritable(0)).withInputValue(new Text("0 1 1 Aname 40 1 2")).run(); - - assertThat(output.size(), is(2)); - - PropertyOutputIdBlockcountValueWritable val = new PropertyOutputIdBlockcountValueWritable(); - val.setValues(new LongWritable(1), new Text("1 Aname 40 1 2")); - - assertThat(output.get(0).getFirst(), equalTo(new ByteMarkerPropertyIdWritable(new ByteWritable(RowNumberJob.VALUE_MARKER), new LongWritable(1)))); - assertThat(output.get(0).getSecond(), equalTo(val)); - - val = new PropertyOutputIdBlockcountValueWritable(); - val.setValues(PropertyOutputIdBlockcountValueWritable.EMPTY_ID, PropertyOutputIdBlockcountValueWritable.EMPTY_STRING); - val.setCounter(2, 40); - - assertThat(output.get(1).getFirst(), equalTo(new ByteMarkerPropertyIdWritable(new ByteWritable(RowNumberJob.COUNTER_MARKER), new LongWritable(Long.MIN_VALUE)))); - assertThat(output.get(1).getSecond(), equalTo(val)); - - } -} diff --git a/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyListWritableComparatorTest.java b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyListWritableComparatorTest.java new file mode 100644 index 0000000..f5b4845 --- /dev/null +++ b/job/src/test/java/nl/waredingen/graphs/neo/mapreduce/properties/PropertyListWritableComparatorTest.java @@ -0,0 +1,61 @@ +package nl.waredingen.graphs.neo.mapreduce.properties; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import nl.waredingen.graphs.neo.mapreduce.input.writables.PropertyListWritable; + +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Partitioner; +import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner; +import org.junit.Test; + +public class PropertyListWritableComparatorTest { + + @Test + public void testPropertyListWritablesAreEqual() { + PropertyListWritable e1 = new PropertyListWritable(); + e1.setKeys(0,1,2); + e1.setValues("prop3", "prop2", "prop0"); + PropertyListWritable e2 = new PropertyListWritable(); + e2.setKeys(0,1,2); + e2.setValues("prop3", "prop2", "prop0"); + + assertTrue(e1.equals(e2)); + assertEquals(0, e1.compareTo(e2)); + assertEquals(0, e2.compareTo(e1)); + assertEquals(0, e1.compareTo(e1)); + assertEquals(0, e2.compareTo(e2)); + } + + @Test + public void testPropertyListWritablesAreNonEqual() { + PropertyListWritable e1 = new PropertyListWritable(); + e1.setKeys(0,1,2); + e1.setValues("prop3", "prop2", "prop0"); + PropertyListWritable e2 = new PropertyListWritable(); + e2.setKeys(0,1,2); + e2.setValues("prop3", "prop2", "prop1"); + + assertFalse(e1.equals(e2)); + assertEquals(-1, e1.compareTo(e2)); + assertEquals(1, e2.compareTo(e1)); + assertEquals(0, e1.compareTo(e1)); + assertEquals(0, e2.compareTo(e2)); + } + + @Test + @SuppressWarnings({ "rawtypes", "unchecked" }) + public void testPropertyListWritablesAreEquallyPartitioned() { + PropertyListWritable e1 = new PropertyListWritable(); + e1.setKeys(0,1,2); + e1.setValues("prop3", "prop2", "prop0"); + PropertyListWritable e2 = new PropertyListWritable(); + e2.setKeys(0,1,2); + e2.setValues("prop3", "prop2", "prop0"); + + Partitioner p = new HashPartitioner(); + assertEquals(p.getPartition(e1, new Text(), 50), p.getPartition(e2, new Text(), 50)); + } + +} From 65a025301713dd6b358c6735c4c5ed262f591d8a Mon Sep 17 00:00:00 2001 From: Kris Geusebroek Date: Wed, 24 Oct 2012 14:57:32 +0200 Subject: [PATCH 11/11] Fixed issue with propertyOffset --- .../EdgePropertyOutputCountersAndValueWritable.java | 12 ++++++------ .../NodePropertyOutputCountersAndValueWritable.java | 12 ++++++------ .../properties/EdgePropertyOutputReducer.java | 2 +- .../properties/NodePropertyOutputReducer.java | 2 +- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgePropertyOutputCountersAndValueWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgePropertyOutputCountersAndValueWritable.java index d054115..e497be5 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgePropertyOutputCountersAndValueWritable.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/EdgePropertyOutputCountersAndValueWritable.java @@ -21,9 +21,9 @@ public class EdgePropertyOutputCountersAndValueWritable implements Writable { public void setValues(LongWritable id, FullEdgePropertiesWritable value) { this.id = id; this.value = value; - this.countBlockOffset = 0; - this.countIdOffset = 0; - this.partition = 0; + this.countBlockOffset = -1L; + this.countIdOffset = -1L; + this.partition = -1; } public void setCounter(int partition, long blockCount, long idOffsetCount) { @@ -81,9 +81,9 @@ public void readFields(DataInput in) throws IOException { countBlockOffset = in.readLong(); countIdOffset = in.readLong(); } else { - partition = 0; - countBlockOffset = 0; - countIdOffset = 0; + partition = -1; + countBlockOffset = -1L; + countIdOffset = -1L; } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodePropertyOutputCountersAndValueWritable.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodePropertyOutputCountersAndValueWritable.java index 6332d16..5a95e70 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodePropertyOutputCountersAndValueWritable.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/input/writables/NodePropertyOutputCountersAndValueWritable.java @@ -21,9 +21,9 @@ public class NodePropertyOutputCountersAndValueWritable implements Writable { public void setValues(LongWritable id, FullNodePropertiesWritable value) { this.id = id; this.value = value; - this.countBlockOffset = 0; - this.countIdOffset = 0; - this.partition = 0; + this.countBlockOffset = -1L; + this.countIdOffset = -1L; + this.partition = -1; } public void setCounter(int partition, long blockCount, long idOffsetCount) { @@ -81,9 +81,9 @@ public void readFields(DataInput in) throws IOException { countBlockOffset = in.readLong(); countIdOffset = in.readLong(); } else { - partition = 0; - countBlockOffset = 0; - countIdOffset = 0; + partition = -1; + countBlockOffset = -1L; + countIdOffset = -1L; } } diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputReducer.java index 7cf5344..a60b3cc 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/EdgePropertyOutputReducer.java @@ -38,7 +38,7 @@ protected void reduce(ByteMarkerIdPropIdWritable key, Iterable 0 || value.getIdOffset() > 0)) { + while (itr.hasNext() && value.getBlockOffset() != -1L && value.getIdOffset() != -1L) { blockCountOffset += value.getBlockOffset(); propCountOffset += value.getIdOffset(); value = itr.next(); diff --git a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputReducer.java b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputReducer.java index ad24285..ecabf3e 100644 --- a/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputReducer.java +++ b/job/src/main/java/nl/waredingen/graphs/neo/mapreduce/properties/NodePropertyOutputReducer.java @@ -38,7 +38,7 @@ protected void reduce(ByteMarkerIdPropIdWritable key, Iterable 0 || value.getIdOffset() > 0)) { + while (itr.hasNext() && value.getBlockOffset() != -1L && value.getIdOffset() != -1L) { blockCountOffset += value.getBlockOffset(); propCountOffset += value.getIdOffset(); value = itr.next();