-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmapreduce-mapper.cc
61 lines (57 loc) · 2.33 KB
/
mapreduce-mapper.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
/**
* File: mapreduce-mapper.cc
* -------------------------
* Presents the implementation of the MapReduceMapper class,
* which is charged with the responsibility of pressing through
* a supplied input file through the provided executable and then
* splaying that output into a large number of intermediate files
* such that all keys that hash to the same value appear in the same
* intermediate.
*/
#include "mapreduce-mapper.h"
#include "mr-names.h"
#include "string-utils.h"
#include <vector>
#include <fstream>
#include <iomanip>
#include <sstream>
#include <iostream>
#include <limits>
#include "ostreamlock.h"
using namespace std;
MapReduceMapper::MapReduceMapper(const string& serverHost, unsigned short serverPort,
const string& cwd, const string& executable,
const string& outputPath,
const size_t numHashCodes) :
MapReduceWorker(serverHost, serverPort, cwd, executable, outputPath), nHashCodes(numHashCodes) {}
void MapReduceMapper::map() const {
while (true) {
string name;
string line;
if (!requestInput(name)) break;
alertServerOfProgress("About to process \"" + name + "\".");
string base = extractBase(name);
string output = outputPath + "/" + changeExtension(base, "input", "mapped");
bool success = processInput(name, output);
ifstream infile (output);
vector<ofstream> mapFileStreams;
for(size_t i = 0; i<nHashCodes; i++){
ofstream out;
mapFileStreams.push_back(move(out));
}
while(infile >> line ) {
size_t hashValue = hash<string>()(line);
size_t belongTo = hashValue % nHashCodes;
const string nameExtension = numberToString(belongTo, 5) + ".mapped";
string output = outputPath + "/" + changeExtension(base, "input", nameExtension);
if(!mapFileStreams[belongTo].is_open()) mapFileStreams[belongTo].open(output, ios::app);
if (!line.empty()) mapFileStreams[belongTo] << line << " 1"<< "\n";
infile.ignore(numeric_limits<streamsize>::max(), '\n');
}
for(ofstream& stream : mapFileStreams) stream.close();
infile.close();
remove(output.c_str());
notifyServer(name, success);
}
alertServerOfProgress("Server says no more input chunks, so shutting down.");
}