feat(metrics): 初步增加获取运行指标的功能(兼容 Prometheus).

添加兼容 Prometheus 的运行指标收集导出功能, 通过内置 HttpServer 将其导出.
默认关闭.
This commit is contained in:
LamGC 2022-02-17 19:06:28 +08:00
parent beb3fd8280
commit d85ee024cb
Signed by: LamGC
GPG Key ID: 6C5AE2A913941E1D
4 changed files with 105 additions and 0 deletions

View File

@ -23,6 +23,9 @@ dependencies {
implementation("org.telegram:telegrambots-abilities:5.6.0")
implementation("org.telegram:telegrambots:5.6.0")
implementation("io.prometheus:simpleclient:0.15.0")
implementation("io.prometheus:simpleclient_httpserver:0.15.0")
testImplementation(kotlin("test"))
}

View File

@ -60,6 +60,12 @@ internal data class ProxyConfig(
val port: Int = 1080
)
internal data class MetricsConfig(
val enable: Boolean = false,
val port: Int = 9386,
val bindAddress: String? = null
)
/**
* ScalaBot App 配置.
*
@ -68,6 +74,7 @@ internal data class ProxyConfig(
*/
internal data class AppConfig(
val proxy: ProxyConfig = ProxyConfig(),
val metrics: MetricsConfig = MetricsConfig()
)
/**

View File

@ -1,5 +1,6 @@
package net.lamgc.scalabot
import io.prometheus.client.exporter.HTTPServer
import kotlinx.coroutines.runBlocking
import mu.KotlinLogging
import net.lamgc.scalabot.util.registerShutdownHook
@ -18,11 +19,30 @@ fun main(args: Array<String>): Unit = runBlocking {
log.info { "ScalaBot 正在启动中..." }
log.debug { "启动参数: ${args.joinToString(prefix = "[", postfix = "]")}" }
initialFiles()
if (Const.config.metrics.enable) {
startMetricsServer()
}
if (!launcher.launch()) {
exitProcess(1)
}
}
/**
* 启动运行指标服务器.
* 使用 Prometheus 指标格式.
*/
fun startMetricsServer() {
val builder = HTTPServer.Builder()
.withDaemonThreads(true)
.withPort(Const.config.metrics.port)
.withHostname(Const.config.metrics.bindAddress)
val httpServer = builder
.build()
.registerShutdownHook()
log.info { "运行指标服务器已启动. (Port: ${httpServer.port})" }
}
internal class Launcher : AutoCloseable {
companion object {

View File

@ -1,5 +1,8 @@
package net.lamgc.scalabot
import io.prometheus.client.Counter
import io.prometheus.client.Gauge
import io.prometheus.client.Summary
import mu.KotlinLogging
import org.eclipse.aether.artifact.Artifact
import org.telegram.abilitybots.api.bot.AbilityBot
@ -7,6 +10,7 @@ import org.telegram.abilitybots.api.db.DBContext
import org.telegram.abilitybots.api.toggle.BareboneToggle
import org.telegram.abilitybots.api.toggle.DefaultToggle
import org.telegram.telegrambots.bots.DefaultBotOptions
import org.telegram.telegrambots.meta.api.objects.Update
internal class ScalaBot(
name: String,
@ -22,6 +26,51 @@ internal class ScalaBot(
companion object {
@JvmStatic
private val log = KotlinLogging.logger { }
// ------------- Metrics -------------
@JvmStatic
private val botUpdateCounter = Counter.build()
.name("updates_total")
.help("Total number of updates received by all bots.")
.labelNames("bot_name")
.subsystem("telegrambots")
.register()
@JvmStatic
private val botUpdateGauge = Gauge.build()
.name("updates_in_progress")
.help("Number of updates in process by all bots.")
.labelNames("bot_name")
.subsystem("telegrambots")
.register()
@JvmStatic
private val onlineBotGauge = Gauge.build()
.name("bots_online")
.help("Number of bots Online.")
.subsystem("telegrambots")
.register()
@JvmStatic
private val updateProcessTime = Summary.build()
.name("update_process_duration_seconds")
.help(
"Time to process update. (This indicator includes the pre-processing of update by TelegrammBots, " +
"so it may be different from the actual execution time of ability. " +
"It is not recommended to use it as the accurate execution time of ability)"
)
.labelNames("bot_name")
.subsystem("telegrambots")
.register()
@JvmStatic
private val exceptionHandlingCounter = Counter.build()
.name("updates_exception_handling")
.help("Number of exceptions during processing.")
.labelNames("bot_name")
.subsystem("telegrambots")
.register()
}
private val extensionLoader = ExtensionLoader(this)
@ -38,4 +87,30 @@ internal class ScalaBot(
}
override fun creatorId(): Long = creatorId
override fun onUpdateReceived(update: Update?) {
botUpdateCounter.labels(botUsername).inc()
botUpdateGauge.labels(botUsername).inc()
val timer = updateProcessTime.labels(botUsername).startTimer()
try {
super.onUpdateReceived(update)
} catch (e: Exception) {
exceptionHandlingCounter.labels(botUsername).inc()
throw e
} finally {
timer.observeDuration()
botUpdateGauge.labels(botUsername).dec()
}
}
override fun onRegister() {
super.onRegister()
onlineBotGauge.inc()
}
override fun onClosing() {
super.onClosing()
onlineBotGauge.dec()
}
}