Foro Formación Hadoop
Curso Online Desarrollador Hadoop
Ejemplo del WordCount en MapReduce:
1
2
3 import java.io.IOException;
4 import java.util.*;
5
6 import org.apache.hadoop.fs.Path;
7 import org.apache.hadoop.conf.*;
8 import org.apache.hadoop.io.*;
9 import org.apache.hadoop.mapreduce.*;
10 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
11 import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
12 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
13 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
14
15 public class WordCount {
16
17 public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
18 private final static IntWritable one = new IntWritable(1);
19 private Text word = new Text();
20
21 public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
22 String line = value.toString();
23 StringTokenizer tokenizer = new StringTokenizer(line);
24 while (tokenizer.hasMoreTokens()) {
25 word.set(tokenizer.nextToken());
26 context.write(word, one);
27 }
28 }
29 }
30
31 public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
32
33 public void reduce(Text key, Iterable<IntWritable> values, Context context)
34 throws IOException, InterruptedException {
35 int sum = 0;
36 for (IntWritable val : values) {
37 sum += val.get();
38 }
39 context.write(key, new IntWritable(sum));
40 }
41 }
42
43 public static void main(String[] args) throws Exception {
44 Configuration conf = new Configuration();
45
46 Job job = new Job(conf, "wordcount");
47
48 job.setOutputKeyClass(Text.class);
49 job.setOutputValueClass(IntWritable.class);
50
51 job.setMapperClass(Map.class);
52 job.setReducerClass(Reduce.class);
53
54 job.setInputFormatClass(TextInputFormat.class);
55 job.setOutputFormatClass(TextOutputFormat.class);
56
57 FileInputFormat.addInputPath(job, new Path(args[0]));
58 FileOutputFormat.setOutputPath(job, new Path(args[1]));
59
60 job.waitForCompletion(true);
61 }
62
63 }
Redes sociales