Storm+Kafka+Reids WordCount代码示例

一:简介

Kafka作为消息源Spout,Redis作为Bolt存储实时计算的结果。

二:启动zookeeper、Kafka服务、Redis服务

<code># 启动redis
redis-sever

# 启动zookeeper
./zkServer.sh start

# 启动Kafka
sudo ./bin/kafka-server-start /usr/local/etc/kafka/server.properties

# 创建test主题
./bin/kafka-topics --create --zookeeper localhost:2181 --partitions 1 --replication-factor 1 --topic test

# 生产者控制台
./bin/kafka-console-producer --broker-list localhost:9092 --topic test
/<code>

三:示例

Storm+Kafka+Reids WordCount代码示例

1. pom.xm

<code><dependency>
<groupid>org.apache.storm/<groupid>
<artifactid>storm-redis/<artifactid>
<version>2.1.0/<version>
/<dependency>

<dependency>
<groupid>org.apache.storm/<groupid>
<artifactid>storm-kafka-client/<artifactid>
<version>2.1.0/<version>
/<dependency>

<dependency>
<groupid>org.apache.kafka/<groupid>
<artifactid>kafka_2.13/<artifactid>
<version>2.4.0/<version>
<exclusions>
<exclusion>
<groupid>org.apache.zookeeper/<groupid>
<artifactid>zookeeper/<artifactid>
/<exclusion>
<exclusion>
<groupid>log4j/<groupid>
<artifactid>log4j/<artifactid>
/<exclusion>
/<exclusions>
/<dependency>

<dependency>
<groupid>org.apache.storm/<groupid>
<artifactid>storm-core/<artifactid>
<version>2.1.0/<version>
/<dependency>

<dependency>
<groupid>org.projectlombok/<groupid>
<artifactid>lombok/<artifactid>
<version>1.18.12/<version>
/<dependency>

/<code>

2. SplitSentenceBolt

<code>import lombok.extern.slf4j.Slf4j;
import org.apache.storm.task.OutputCollector;

import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.Map;

/**
* 将句子分隔成单词
*/
@Slf4j
public class SplitSentenceBolt extends BaseRichBolt {
private OutputCollector collector;

@Override
public void prepare(Map<string> topoConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
}

@Override
public void execute(Tuple input) {
try {
String sentence = input.getStringByField("sentence");
String[] words = sentence.split(" ");

// 将每个单词流向到下一个Bolt
for (String word : words) {
// 发射时携带发射过来的input
collector.emit(input, new Values(word));
}

// 处理成功了给当前tuple做一个成功的标记,调用上游的ack方法
collector.ack(input);
} catch (Exception e) {
log.error("SplitSentenceBolt#execute exception", e);
// 异常做一个失败的标记,调用上游的fail方法
collector.fail(input);
}
}

@Override

public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word"));
}
}
/<string>/<code>

3. WordCountBolt

<code>import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.tuple.Values;

import java.util.HashMap;
import java.util.Map;

public class WordCountBolt extends BaseRichBolt {
private OutputCollector collector;
private Map<string> wordCountMap = null;


/**
* 大部分示例变量通常在prepare中进行实例化
* @param topoConf
* @param context
* @param collector
*/
@Override
public void prepare(Map<string> topoConf, TopologyContext context, OutputCollector collector) {
this.collector = collector;
this.wordCountMap = new HashMap<>();
}

@Override
public void execute(Tuple input) {
String word = input.getStringByField("word");
Long count = wordCountMap.get(word);
if (count == null) {
count = 0L;
}
count++;
wordCountMap.put(word, count);

collector.emit(new Values(word, count));
collector.ack(input);
}


@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
declarer.declare(new Fields("word", "count"));
}
}
/<string>/<string>/<code>

4. WriteRedisBolt

<code>import org.apache.storm.redis.bolt.AbstractRedisBolt;
import org.apache.storm.redis.common.config.JedisPoolConfig;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Tuple;
import redis.clients.jedis.JedisCommands;

public class WriteRedisBolt extends AbstractRedisBolt {

public WriteRedisBolt(JedisPoolConfig config) {
super(config);
}


@Override
protected void process(Tuple tuple) {
String word = tuple.getStringByField("word");
Long count = tuple.getLongByField("count");

JedisCommands jedisCommands = getInstance();
jedisCommands.hset("wordcount", word, count.toString());
}

@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {

}
}
/<code>

5. WordCountTopology

<code>import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.storm.Config;
import org.apache.storm.LocalCluster;
import org.apache.storm.StormSubmitter;
import org.apache.storm.generated.StormTopology;
import org.apache.storm.kafka.spout.ByTopicRecordTranslator;
import org.apache.storm.kafka.spout.KafkaSpout;
import org.apache.storm.kafka.spout.KafkaSpoutConfig;
import org.apache.storm.redis.common.config.JedisPoolConfig;

import org.apache.storm.topology.TopologyBuilder;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.example.demo.bolt.SplitSentenceBolt;
import org.example.demo.bolt.WordCountBolt;
import org.example.demo.bolt.WriteRedisBolt;


public class WordCountTopology {

public static void main(String[] args) throws Exception {
// Redis配置
JedisPoolConfig jedisPoolConfig = new JedisPoolConfig.Builder()
.setHost("127.0.0.1")
.setPort(6379)
.setPassword("123456")
.setTimeout(3000)
.build();

String topic = "test";
// 该类将传入的kafka记录转换为storm的tuple
ByTopicRecordTranslator<string> brt = new ByTopicRecordTranslator<>(
(r) -> new Values(r.value(), r.topic()),
new Fields("sentence", topic));
// 设置要消费的topic
brt.forTopic(topic, (r) -> new Values(r.value(), r.topic()), new Fields("sentence", topic));

KafkaSpoutConfig<string> kafkaSpoutConfig = KafkaSpoutConfig
.builder("localhost:9092", topic)
.setProp(ConsumerConfig.GROUP_ID_CONFIG, "test-group")
.setRecordTranslator(brt)
.build();
KafkaSpout<string> kafkaSpout = new KafkaSpout<>(kafkaSpoutConfig);

TopologyBuilder builder = new TopologyBuilder();
builder.setSpout("kafka-spout", kafkaSpout);
builder.setBolt("split-bolt", new SplitSentenceBolt()).shuffleGrouping("kafka-spout");
builder.setBolt("word-count-bolt", new WordCountBolt()).shuffleGrouping("split-bolt");
builder.setBolt("write-redis-bolt", new WriteRedisBolt(jedisPoolConfig)).globalGrouping("word-count-bolt");
StormTopology topology = builder.createTopology();

Config config = new Config();
if (args == null || args.length == 0) {
// 本地模式
config.setDebug(true);
LocalCluster cluster = new LocalCluster();
cluster.submitTopology("WordCountTopology", config, topology);
} else {

// 集群模式
StormSubmitter.submitTopology(args[0],config,builder.createTopology());
}
}
}
/<string>/<string>/<string>/<code>

6. 运行 WordCountTopology#main

7. Kafka生产消息

Storm+Kafka+Reids WordCount代码示例

本号主要用于发布工作中比较常用实用的技术,更侧重于干货,如有需要请关注本账号,本号将持续发布干货文章。


分享到:


相關文章: