4.2 Map

最后更新于:2022-04-01 20:43:16

## MapReduce - Mapper 主要是读取InputSplit的每一个Key,Value对并进行处理 ~~~ public class Mapper { /** * 预处理,仅在map task启动时运行一次 */ protected void setup(Context context) throws IOException, InterruptedException { } /** * 对于InputSplit中的每一对都会运行一次 */ @SuppressWarnings("unchecked") protected void map(KEYIN key, VALUEIN value, Context context) throws IOException, InterruptedException { context.write((KEYOUT) key, (VALUEOUT) value); } /** * 扫尾工作,比如关闭流等 */ protected void cleanup(Context context) throws IOException, InterruptedException { } /** * map task的驱动器 */ public void run(Context context) throws IOException, InterruptedException { setup(context); while (context.nextKeyValue()) { map(context.getCurrentKey(), context.getCurrentValue(), context); } cleanup(context); } } public class MapContext extends TaskInputOutputContext { private RecordReader reader; private InputSplit split; /** * Get the input split for this map. */ public InputSplit getInputSplit() { return split; } @Override public KEYIN getCurrentKey() throws IOException, InterruptedException { return reader.getCurrentKey(); } @Override public VALUEIN getCurrentValue() throws IOException, InterruptedException { return reader.getCurrentValue(); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { return reader.nextKeyValue(); } } ~~~
';