Flink清理状态异常排查

2020-09-03  本文已影响0人  Jorvi

1. 异常信息

Exception in thread "main" org.apache.flink.runtime.client.JobExecutionException: Job execution failed.
    at org.apache.flink.runtime.jobmaster.JobResult.toJobExecutionResult(JobResult.java:146)
    at org.apache.flink.runtime.minicluster.MiniCluster.executeJobBlocking(MiniCluster.java:638)
    at org.apache.flink.streaming.api.environment.LocalStreamEnvironment.execute(LocalStreamEnvironment.java:123)
    at org.apache.flink.streaming.api.environment.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.java:1509)
    at org.apache.flink.streaming.api.scala.StreamExecutionEnvironment.execute(StreamExecutionEnvironment.scala:645)
    at org.learn.StateWordCount$.main(StateWordCount.scala:50)
    at org.learn.StateWordCount.main(StateWordCount.scala)
Caused by: TimerException{java.util.ConcurrentModificationException}
    at org.apache.flink.streaming.runtime.tasks.SystemProcessingTimeService$TriggerTask.run(SystemProcessingTimeService.java:288)
    at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
    at java.util.concurrent.FutureTask.run(FutureTask.java:266)
    at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
    at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
    at java.lang.Thread.run(Thread.java:748)
Caused by: java.util.ConcurrentModificationException
    at java.util.HashMap$HashIterator.nextNode(HashMap.java:1442)
    at java.util.HashMap$KeyIterator.next(HashMap.java:1466)
    at org.learn.function.WordCountProcessFunction.onTimer(WordCountProcessFunction.scala:43)
    at org.apache.flink.streaming.api.operators.KeyedProcessOperator.invokeUserFunction(KeyedProcessOperator.java:94)
    at org.apache.flink.streaming.api.operators.KeyedProcessOperator.onProcessingTime(KeyedProcessOperator.java:78)
    at org.apache.flink.streaming.api.operators.InternalTimerServiceImpl.onProcessingTime(InternalTimerServiceImpl.java:239)
    at org.apache.flink.streaming.runtime.tasks.SystemProcessingTimeService$TriggerTask.run(SystemProcessingTimeService.java:285)
    ... 7 more
    

报错位置是 org.learn.function.WordCountProcessFunction.onTimer(WordCountProcessFunction.scala:43)

报错原因是java.util.ConcurrentModificationException

2. 代码

package org.learn.function

import org.apache.flink.api.common.state.{MapState, MapStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.util.Collector

class WordCountProcessFunction extends KeyedProcessFunction[String, (String, Int), (String, Int)] {

  private var mapState: MapState[String, (String, Int)] = _
  private var timerState: MapState[Long, Long] = _

  override def open(parameters: Configuration): Unit = {
    var mapStateDesc = new MapStateDescriptor[String, (String, Int)]("valueStateDesc", classOf[String], classOf[(String, Int)])
    mapState = getRuntimeContext.getMapState(mapStateDesc)

    val timerStateDesc = new MapStateDescriptor[Long, Long]("timerStateDesc", classOf[Long], classOf[Long])
    timerState = getRuntimeContext.getMapState(timerStateDesc)
  }

  override def processElement(value: (String, Int), ctx: KeyedProcessFunction[String, (String, Int), (String, Int)]#Context, out: Collector[(String, Int)]): Unit = {

    var currentState: (String, Int) = mapState.get(value._1)
    if (null == currentState) {
      currentState = (value._1, 0)

      // TTL时间
      val ttlTime: Long = System.currentTimeMillis() - 30 * 1000 // 设置一个历史时间
      ctx.timerService().registerProcessingTimeTimer(ttlTime)
      timerState.put(ttlTime, ttlTime)
      timerState.put(ttlTime - 10, ttlTime - 10)
    }

    var newState: (String, Int) = (currentState._1, currentState._2 + value._2)
    mapState.put(value._1, newState)
  }

  override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[String, (String, Int), (String, Int)]#OnTimerContext, out: Collector[(String, Int)]): Unit = {

    System.out.println("clear..." + " timestamp: " + timestamp + " currentTime: " + System.currentTimeMillis() + " timerState: ")
    val iter = timerState.keys().iterator()
    while (iter.hasNext) {
      val key = iter.next()
      System.out.println("key: " + key + " value: " + timerState.get(key))
      if (key < System.currentTimeMillis()) {
        timerState.remove(key)
      }
    }

    mapState.clear()
  }
}

第 43 行:val key = iter.next()

错误原因:利用迭代器遍历 map 时,如果同时调用 map.remove(Object key) 做移除操作,就会报 java.util.ConcurrentModificationException 异常。

改正方法:利用迭代器的 remove 方法 iter.remove() 做移除操作,则不会抛出该异常信息。

3. 源码

以 HashMap 为例,看看源码。

上一篇下一篇

猜你喜欢

热点阅读