2017-10-08 5 views
1

J'essaie de tester l'exemple graphofgodsfactory en utilisant spark/scala/janusgraph en utilisant hbase comme stockage backend.impossible de charger le graphique à janusgraph à partir de spark/scala

Mon code: samplejanusloading.scala

import org.apache.spark.SparkConf 
import org.apache.spark.SparkContext 
import org.apache.spark.rdd.RDD.rddToPairRDDFunctions 
import org.apache.spark.sql.Row 
import org.apache.spark.sql.SQLContext 
import org.apache.spark.sql.types._ 
import org.apache.spark.graphx._ 
import org.apache.tinkerpop.gremlin.groovy.plugin.AbstractGremlinPlugin; 
import org.apache.tinkerpop.gremlin.groovy.plugin.IllegalEnvironmentException; 
import org.apache.tinkerpop.gremlin.groovy.plugin.PluginAcceptor; 
import org.apache.tinkerpop.gremlin.groovy.plugin.PluginInitializationException; 
import org.apache.tinkerpop.gremlin.tinkergraph.process.computer.TinkerGraphComputer; 
import org.apache.tinkerpop.gremlin.tinkergraph.structure.TinkerGraph; 
import org.apache.commons.configuration.ConfigurationUtils; 
import org.apache.commons.configuration.FileConfiguration; 
import org.apache.commons.configuration.PropertiesConfiguration; 
import org.apache.commons.lang3.concurrent.BasicThreadFactory; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.FileSystem; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.mapreduce.InputFormat; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.spark.HashPartitioner; 
import org.apache.spark.Partitioner; 
import org.apache.spark.SparkConf; 
import org.apache.spark.SparkContext; 
import org.apache.spark.api.java.JavaPairRDD; 
import org.apache.spark.api.java.JavaSparkContext; 
import org.apache.spark.launcher.SparkLauncher; 
import org.apache.spark.serializer.KryoSerializer; 
import org.apache.spark.storage.StorageLevel; 
import org.apache.tinkerpop.gremlin.hadoop.Constants; 
import org.apache.tinkerpop.gremlin.hadoop.process.computer.AbstractHadoopGraphComputer; 
import org.apache.tinkerpop.gremlin.hadoop.process.computer.util.ComputerSubmissionHelper; 
import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopConfiguration; 
import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph; 
import org.apache.tinkerpop.gremlin.hadoop.structure.io.FileSystemStorage; 
import org.apache.tinkerpop.gremlin.hadoop.structure.io.GraphFilterAware; 
import org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopPoolShimService; 
import org.apache.tinkerpop.gremlin.hadoop.structure.io.VertexWritable; 
import org.apache.tinkerpop.gremlin.hadoop.structure.util.ConfUtil; 
import org.apache.tinkerpop.gremlin.process.computer.ComputerResult; 
import org.apache.tinkerpop.gremlin.process.computer.GraphComputer; 
import org.apache.tinkerpop.gremlin.process.computer.MapReduce; 
import org.apache.tinkerpop.gremlin.process.computer.Memory; 
import org.apache.tinkerpop.gremlin.process.computer.VertexProgram; 
import org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult; 
import org.apache.tinkerpop.gremlin.process.computer.util.MapMemory; 
import org.apache.tinkerpop.gremlin.process.traversal.TraversalStrategies; 
import org.apache.tinkerpop.gremlin.process.traversal.util.TraversalInterruptedException; 
import org.apache.tinkerpop.gremlin.spark.process.computer.payload.ViewIncomingPayload; 
import org.apache.tinkerpop.gremlin.spark.process.computer.traversal.strategy.SparkVertexProgramInterceptor; 
import org.apache.tinkerpop.gremlin.spark.process.computer.traversal.strategy.optimization.SparkInterceptorStrategy; 
import org.apache.tinkerpop.gremlin.spark.process.computer.traversal.strategy.optimization.SparkSingleIterationStrategy; 
import org.apache.tinkerpop.gremlin.spark.structure.Spark; 
import org.apache.tinkerpop.gremlin.spark.structure.io.InputFormatRDD; 
import org.apache.tinkerpop.gremlin.spark.structure.io.InputOutputHelper; 
import org.apache.tinkerpop.gremlin.spark.structure.io.InputRDD; 
import org.apache.tinkerpop.gremlin.spark.structure.io.OutputFormatRDD; 
import org.apache.tinkerpop.gremlin.spark.structure.io.OutputRDD; 
import org.apache.tinkerpop.gremlin.spark.structure.io.PersistedInputRDD; 
import org.apache.tinkerpop.gremlin.spark.structure.io.PersistedOutputRDD; 
import org.apache.tinkerpop.gremlin.spark.structure.io.SparkContextStorage; 
import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.GryoRegistrator; 
import org.apache.tinkerpop.gremlin.spark.structure.io.gryo.kryoshim.unshaded.UnshadedKryoShimService; 
import org.apache.tinkerpop.gremlin.structure.Direction; 
import org.apache.tinkerpop.gremlin.structure.io.IoRegistry; 
import org.apache.tinkerpop.gremlin.structure.io.Storage; 
import org.apache.tinkerpop.gremlin.structure.io.gryo.kryoshim.KryoShimServiceLoader; 
import org.janusgraph.core.JanusGraph._; 
import org.janusgraph.core.JanusGraph; 
import org.janusgraph.diskstorage; 
import org.janusgraph.graphdb; 
import org.janusgraph.util; 
import org.janusgraph.core.JanusGraphFactory._; 
import org.janusgraph.core.JanusGraphFactory; 
import org.janusgraph.core.attribute.Geo; 
import org.janusgraph.core.attribute.Geoshape; 
import org.janusgraph.example.GraphOfTheGodsFactory; 
import org.apache.tinkerpop.gremlin.structure.T; 
import org.apache.hadoop.hbase.HBaseConfiguration 
import org.apache.hadoop.hbase.client.{HBaseAdmin,HTable,Put,Get} 
import org.apache.hadoop.hbase.util.Bytes 
import org.apache.log4j.{Level, Logger} 
object samplejanusloading { 
    def main(args: Array[String]) = { 

/* Logger.getLogger("org").setLevel(Level.OFF)  
    Logger.getLogger("akka").setLevel(Level.OFF)*/ 
    //Start the Spark context 
    val conf = new SparkConf() 
     .setAppName("Janusgraph") 
     .setMaster("local") 
    val sc = new SparkContext(conf) 
val sqlcontext = new SQLContext(sc)  


val confg = new HBaseConfiguration() 
val admin = new HBaseAdmin(confg) 



val grap = JanusGraphFactory.open("c:\\janusgraph\\conf\\janusgraph-hbase.properties") 


println("factry open") 
val mgmt = grap.openManagement() 
println("factry opened") 
val name = mgmt.makePropertyKey("name") 
println("first key property ") 

val age = mgmt.makePropertyKey("age").make(); 
val reason = mgmt.makePropertyKey("reason").make(); 
val place = mgmt.makePropertyKey("place").make(); 


println("first edges open") 
mgmt.makeEdgeLabel("father").make() 
println("first edges open") 
     mgmt.makeEdgeLabel("mother").make() 
     mgmt.makeEdgeLabel("battled").make(); 
     // mgmt.buildEdgeIndex(battled, "battlesByTime") 
     mgmt.makeEdgeLabel("lives").make(); 
     mgmt.makeEdgeLabel("pet").make(); 
     mgmt.makeEdgeLabel("brother").make(); 


println("edges open") 
     mgmt.makeVertexLabel("titan").make(); 
     mgmt.makeVertexLabel("location").make(); 
     mgmt.makeVertexLabel("god").make(); 
     mgmt.makeVertexLabel("demigod").make(); 
     mgmt.makeVertexLabel("human").make(); 
     mgmt.makeVertexLabel("monster").make(); 
println("vrtx open") 
     mgmt.commit(); 
     println("cmt open") 
     val tx = grap.newTransaction(); 
val saturn = tx.addVertex(T.label, "titan", "name", "saturn", "age", "100"); 
     val sky = tx.addVertex(T.label, "location", "name", "sky"); 
     val sea = tx.addVertex(T.label, "location", "name", "sea"); 
     val jupiter = tx.addVertex(T.label, "god", "name", "jupiter", "age", "5000"); 
     val neptune = tx.addVertex(T.label, "god", "name", "neptune", "age", "4500"); 
     val hercules = tx.addVertex(T.label, "demigod", "name", "hercules", "age", "30"); 
     val alcmene = tx.addVertex(T.label, "human", "name", "alcmene", "age", "45"); 
     val pluto = tx.addVertex(T.label, "god", "name", "pluto", "age", "4000"); 
     val nemean = tx.addVertex(T.label, "monster", "name", "nemean"); 
     val hydra = tx.addVertex(T.label, "monster", "name", "hydra"); 
     val cerberus = tx.addVertex(T.label, "monster", "name", "cerberus"); 
     val tartarus = tx.addVertex(T.label, "location", "name", "tartarus"); 

     jupiter.addEdge("father", saturn); 
     jupiter.addEdge("lives", sky, "reason", "loves fresh breezes"); 
     jupiter.addEdge("brother", neptune); 
     jupiter.addEdge("brother", pluto); 

     neptune.addEdge("lives", sea).property("reason", "loves waves"); 
     neptune.addEdge("brother", jupiter); 
     neptune.addEdge("brother", pluto); 

     hercules.addEdge("father", jupiter); 
     hercules.addEdge("mother", alcmene); 
     hercules.addEdge("battled", nemean, "time", "1", "place", Geoshape.point(38.1f, 23.7f)); 
     hercules.addEdge("battled", hydra, "time", "2", "place", Geoshape.point(37.7f, 23.9f)); 
     hercules.addEdge("battled", cerberus, "time", "12", "place", Geoshape.point(39f, 22f)); 

     pluto.addEdge("brother", jupiter); 
     pluto.addEdge("brother", neptune); 
     pluto.addEdge("lives", tartarus, "reason", "no fear of death"); 
     pluto.addEdge("pet", cerberus); 

     cerberus.addEdge("lives", tartarus); 
println("done sir"); 

     // commit the transaction to disk 
     tx.commit(); 
tx.close() 




    sc.stop 
    } 
} 

c: \ janusgraph \ conf \ janusgraph-hbase.properties:

storage.backend=hbase 
gremlin.graph=org.janusgraph.core.JanusGraphFactory 
gremlin.graph=org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph 
gremlin.hadoop.graphInputFormat=org.apache.tinkerpop.gremlin.hadoop.structure.io.gryo.GryoInputFormat 
gremlin.hadoop.graphOutputFormat=org.apache.hadoop.mapreduce.lib.output.NullOutputFormat 
gremlin.hadoop.outputLocation=output 
gremlin.hadoop.jarsInDistributedCache=true 
spark.executor.memory=1g 
spark.serializer=org.apache.spark.serializer.KryoSerializer 
storage.hostname=127.0.0.1 
cache.db-cache = true 
cache.db-cache-clean-wait = 20 
cache.db-cache-time = 180000 
cache.db-cache-size = 0.5 

Mais son erreur, tout en créant la ligne très deuxième à l'intérieur janusgraphfactory.

i.e. val name = mgmt.makePropertyKey("name")

sortie: factry ouvert factry ouvert

Et je peux voir les noms de table janusgraph créés dans HBase. mais seulement quelques lignes de configuration graphique sont chargées.

hbase(main):043:0> scan 'janusgraph' 
ROW      COLUMN+CELL                 
configuration    column=s:cache.db-cache, timestamp=1507455998304001, value=\x8F\x01   
configuration    column=s:cache.db-cache-clean-wait, timestamp=1507455998311001, value=\x8C\ 
          xA8                   
configuration    column=s:cache.db-cache-size, timestamp=1507455998138001, value=\x94?\xE0\x 
          00\x00\x00\x00\x00\x00              
configuration    column=s:cache.db-cache-time, timestamp=1507455998308001, value=\x8D\x80\x0 
          0\x00\x00\x00\x02\xBF              
configuration    column=s:graph.janusgraph-version, timestamp=1507455998362001, value=\x92\x 
          A00.1.\xB1                 
configuration    column=s:graph.timestamps, timestamp=1507455998395001, value=\xB6\x81  
configuration    column=s:hidden.frozen, timestamp=1507455998404001, value=\x8F\x01   
configuration    column=s:system-registration.c0a8ef013936-Praddy1.startup-time, timestamp=1 
          507456041876001, value=\xC1\x80\x00\x00\x00Y\xD9\xF4)\x06C5L\x80   
1 row(s) in 6.1320 seconds 

hbase(main):044:0> 

erreur est:

Exception dans le thread "principal" java.lang.IllegalArgumentException: Nécessité de préciser un type de données.

Mais quand j'essaie

val name = mgmt.makePropertyKey("name").dataType(String).make(). ça ne fait rien. mais jette la même erreur.

Besoin de votre aide sur ce que je fais mal. Bassicalement j'essaye de développer le programme d'étincelle pour créer la relation & enregistrer cela à janusgraph dans ma machine locale.

Répondre

0

Vous devez utiliser classOf à Scala (reference):

val name = mgmt.makePropertyKey("name").dataType(classOf[String]).make() 

Je vais noter également que vous avez beaucoup de propriétés étrangères dans votre janusgraph-hbase.properties. Vous devriez commencer avec un ensemble minimal de propriétés similaires à celles trouvées dans les fichiers de propriétés de la distribution.

gremlin.graph=org.janusgraph.core.JanusGraphFactory 
storage.backend=hbase 
storage.hostname=127.0.0.1 
storage.hbase.table=janusgraph 
cache.db-cache=true 
cache.db-cache-clean-wait=20 
cache.db-cache-time=180000 
cache.db-cache-size=0.5