2017-10-09 3 views
0
17/10/09 19:40:55 INFO input.FileInputFormat: Total input paths to process : 1 
17/10/09 19:40:55 INFO util.NativeCodeLoader: Loaded the native-hadoop library 
17/10/09 19:40:55 WARN snappy.LoadSnappy: Snappy native library not loaded 
17/10/09 19:40:56 INFO mapred.JobClient: Running job: job_201710090351_0026 
17/10/09 19:40:57 INFO mapred.JobClient: map 0% reduce 0% 
17/10/09 19:41:00 INFO mapred.JobClient: map 100% reduce 0% 
17/10/09 19:41:07 INFO mapred.JobClient: map 100% reduce 33% 
17/10/09 19:41:08 INFO mapred.JobClient: map 100% reduce 100% 
17/10/09 19:41:08 INFO mapred.JobClient: Job complete: job_201710090351_0026 
17/10/09 19:41:08 INFO mapred.JobClient: Counters: 28 
17/10/09 19:41:08 INFO mapred.JobClient: Map-Reduce Framework 
17/10/09 19:41:08 INFO mapred.JobClient:  Spilled Records=0 
17/10/09 19:41:08 INFO mapred.JobClient:  Map output materialized bytes=6 
17/10/09 19:41:08 INFO mapred.JobClient:  Reduce input records=0 
17/10/09 19:41:08 INFO mapred.JobClient:  Virtual memory (bytes) snapshot=3778863104 
17/10/09 19:41:08 INFO mapred.JobClient:  Map input records=8 
17/10/09 19:41:08 INFO mapred.JobClient:  SPLIT_RAW_BYTES=107 
17/10/09 19:41:08 INFO mapred.JobClient:  Map output bytes=0 
17/10/09 19:41:08 INFO mapred.JobClient:  Reduce shuffle bytes=6 
17/10/09 19:41:08 INFO mapred.JobClient:  Physical memory (bytes) snapshot=313819136 
17/10/09 19:41:08 INFO mapred.JobClient:  Reduce input groups=0 
17/10/09 19:41:08 INFO mapred.JobClient:  Combine output records=0 
17/10/09 19:41:08 INFO mapred.JobClient:  Reduce output records=0 
17/10/09 19:41:08 INFO mapred.JobClient:  Map output records=0 
17/10/09 19:41:08 INFO mapred.JobClient:  Combine input records=0 
17/10/09 19:41:08 INFO mapred.JobClient:  CPU time spent (ms)=890 
17/10/09 19:41:08 INFO mapred.JobClient:  Total committed heap usage (bytes)=302514176 
17/10/09 19:41:08 INFO mapred.JobClient: File Input Format Counters 
17/10/09 19:41:08 INFO mapred.JobClient:  Bytes Read=892 
17/10/09 19:41:08 INFO mapred.JobClient: FileSystemCounters 
17/10/09 19:41:08 INFO mapred.JobClient:  HDFS_BYTES_READ=999 
17/10/09 19:41:08 INFO mapred.JobClient:  FILE_BYTES_WRITTEN=109316 
17/10/09 19:41:08 INFO mapred.JobClient:  FILE_BYTES_READ=6 
17/10/09 19:41:08 INFO mapred.JobClient: Job Counters 
17/10/09 19:41:08 INFO mapred.JobClient:  Launched map tasks=1 
17/10/09 19:41:08 INFO mapred.JobClient:  Launched reduce tasks=1 
17/10/09 19:41:08 INFO mapred.JobClient:  SLOTS_MILLIS_REDUCES=8085 
17/10/09 19:41:08 INFO mapred.JobClient:  Total time spent by all reduces waiting after reserving slots (ms)=0 
17/10/09 19:41:08 INFO mapred.JobClient:  SLOTS_MILLIS_MAPS=2769 
17/10/09 19:41:08 INFO mapred.JobClient:  Total time spent by all maps waiting after reserving slots (ms)=0 
17/10/09 19:41:08 INFO mapred.JobClient:  Data-local map tasks=1 
17/10/09 19:41:08 INFO mapred.JobClient: File Output Format Counters 
17/10/09 19:41:08 INFO mapred.JobClient:  Bytes Written=0 

Ceci affiche les journaux du travail mapreduce effectué. Voici le code java. Il y a un problème avec le fichier reducer.part-r-00000 et les fichiers de réussite sont vides.part-r-00000 et les fichiers de réussite sont 0 kb après mapreducing dans hadoop

package BigData; 
import java.io.BufferedReader; 
import java.io.File; 
import java.io.FileReader; 
import java.io.IOException; 
import java.net.URI; 
import java.util.ArrayList; 
import java.util.Collections; 
import java.util.Comparator; 
import java.util.HashMap; 

import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.*; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.Mapper.Context; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs; 
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.util.GenericOptionsParser; 

public class BusinessCategoryPA { 

/* 
* Mapper Class 
*/ 
public static class Map extends Mapper<LongWritable, Text, Text, NullWritable>{ 
    private Text businessCategory = new Text();  //Type of Output key 

    /* 
    * Map function that emits a business category as a key and null value as a value 
    */ 
    public void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException{ 
     String[] business = value.toString().split("::"); 
     if(business[1].contains("Palo Alto")){ 
      String businessCategoryList = business[2]; 

      businessCategoryList = businessCategoryList.replace("(", ""); 
      businessCategoryList = businessCategoryList.replace(")", ""); 
      businessCategoryList = businessCategoryList.replace("List", ""); 
      businessCategoryList = businessCategoryList.replace(" ", ""); 
      String[] businessCategoryList1 = businessCategoryList.toString().split(","); 

      for(String item:businessCategoryList1){ 
       businessCategory.set(item); 
       context.write(businessCategory, NullWritable.get()); 
      } 

     } 
    } 
} 

/* 
* Reducer Class 
*/ 
public static class Reduce extends Reducer<Text, NullWritable, Text, NullWritable>{ 
    //private IntWritable outcome = new IntWritable(); 

    /* 
    * Reduce function 
    */ 
    public void reduce(Text key, Iterable<NullWritable> value, Context context) throws IOException, InterruptedException{ 

     context.write(key, NullWritable.get()); 
    } 
} 

/* 
* Driver program 
*/ 
public static void main(String[] args) throws Exception { 

    /* 
    * Configuration of a job 
    */ 
    Configuration conf = new Configuration(); 

    /* 
    * Getting all the arguments 
    */ 
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); 

    if (otherArgs.length != 2) { 
     System.err.println("Usage: BusinessCategoryPA <in> <out>"); 
     System.exit(2); 
    } 

    /* 
    * Create a job with name "BusinessCategoryPA" 
    */ 
    Job job = new Job(conf, "BusinessCategoryPA"); 
    job.setJarByClass(BusinessCategoryPA.class); 
    job.setMapperClass(Map.class); 
    job.setReducerClass(Reduce.class); 

    /* 
    * set output key type 
    */ 
    job.setOutputKeyClass(Text.class); 

    /* 
    * set output value type 
    */ 
    job.setOutputValueClass(NullWritable.class); 

    /* 
    * set the HDFS path of the input data 
    */ 
    FileInputFormat.addInputPath(job, new Path(otherArgs[0])); 

    /* 
    * set the HDFS path for the output 
    */ 
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); 

    /* 
    * Wait till job completion 
    */ 
    System.exit(job.waitForCompletion(true) ? 0 : 1); 


} 
} 

Comment générer un fichier csv en sortie?

INPUT FILE Le fichier Business.csv contient des informations essentielles sur les entreprises locales. fichier Business.csv contient les colonnes suivantes:

"business_id"::"full_address"::"categories" 

'business_id': (a unique identifier for the business)(eg: HIPGr2gSEN4T73tjz47hpw) 
'full_address': (localized address)(eg. 1 Palmer Sq EPrinceton, NJ 08542) 
'categories': [(localized category names)] (eg. List(Pubs, Bars, American (Traditional), Nightlife, Restaurants)) 

pot Hadoop '/home/hduser/Downloads/Hadoop/TopTenRatedBusiness.jar' bd.TopTenRatedBusiness /Yelp/input/business.csv /Yelp/output.csv/ J'ai utilisé cette commande pour générer la sortie.

+0

Assurez-vous que votre entrée ne contient pas de chaînes vides. – lexicore

+0

j'ai mis à jour. S'il vous plaît aider –

+0

Merci! Ça fonctionne maintenant. –

Répondre

0

Veuillez vérifier la condition "if (business [1] .contains (" Palo Alto "))" vérifiez encore une fois que votre fichier d'entrée contient vraiment "Palo Alto" dans le même format que celui que vous avez écrit ici.