I have a functioning flume / spark streaming app when running with only 1 receiver, but if i add more receivers, the app hangs. The streaming UI shows that records are received, but the batches waiting stays at 1 forever, and the records never get processed. My code:
int numStreams = Integer.parseInt(props.getProperty("numReceivers"));
JavaDStream<SparkFlumeEvent> flumeStream;
List<JavaDStream<SparkFlumeEvent>> multiStreams = new java.util.ArrayList<JavaDStream<SparkFlumeEvent>>(numStreams);
for (int i = 0; i < numStreams; i++) {
multiStreams.add(FlumeUtils.createPollingStream(jssc, sparkSinkAddresses[i].getHostName(), sparkSinkAddresses[i].getPort(), StorageLevel.MEMORY_AND_DISK_SER()));
}
flumeStream = jssc.union(multiStreams.get(0), multiStreams.subList(1, multiStreams.size()));
flumeStream.count();
flumeStream.foreach(new Function<JavaRDD<SparkFlumeEvent>, Void>() {
private static final long serialVersionUID = 2292859738871422941L;
// @Override
public Void call(JavaRDD<SparkFlumeEvent> eventsRDD)
throws Exception {
String message = null;
List<SparkFlumeEvent> eventsList = eventsRDD.collect();
if (eventsList.size() > 0) {
Iterator<SparkFlumeEvent> eventsIterator = eventsList.iterator();
AvroFlumeEvent avroEvent = null;
ByteBuffer bytePayload = null;
while (eventsIterator.hasNext()) {
SparkFlumeEvent flumeEvent = eventsIterator.next();
avroEvent = flumeEvent.event();
bytePayload = avroEvent.getBody();
message = new String(bytePayload.array());
LOGGER.info("MESSAGE = " + message);
}
}
return null;
}
});