Compare commits

..

10 commits

Author SHA1 Message Date
f62623af38
chore: Bump version to 1.0.3
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 11:49:20 +02:00
2c78fccd1e
doc: Add T-Rex API security
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 11:47:12 +02:00
15f3746f2e
chore: Move logging to info level
- Extracted values are set to info level
- Full API response is still set to debug level

Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 11:46:15 +02:00
9af285c49d
chore: Bump version to 1.0.2
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 11:32:03 +02:00
6f09301879
feat: Add GPU ID to temperatures
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 11:30:39 +02:00
98483e3279
fix: Typo in memory temperature management (#1)
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 11:26:52 +02:00
04446c9329
doc: Fix typo in usage
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 09:44:15 +02:00
7c5a197872
chore: Bump to 1.0.1
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 01:30:48 +02:00
4943442135
feat: Add memory temperature checks
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 01:29:54 +02:00
16c0f33b9e
doc: Add NRPE example
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 01:22:51 +02:00
3 changed files with 52 additions and 20 deletions

View file

@ -14,7 +14,3 @@ repos:
rev: 22.3.0 rev: 22.3.0
hooks: hooks:
- id: black - id: black
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort

View file

@ -2,6 +2,22 @@
Nagios check for [T-Rex miner](https://github.com/trexminer/T-Rex). Nagios check for [T-Rex miner](https://github.com/trexminer/T-Rex).
# Security
T-Rex API must be opened in a secured way:
* `--api-read-only`: accessible only in read-only, no modification
* `--api-bind-http 127.0.0.1:4067`: (default) accessible only to local connections
If the check is executed **remotely**, you should add a **firewall rule** to allow only the host running the check to
access the T-Rex API port.
**HTTPS** should be used:
* `--api-https`
* `--api-webserver-cert`
* `--api-webserver-pkey`
See full [list of options](https://github.com/trexminer/T-Rex#usage).
# Installation # Installation
Using pip: Using pip:
@ -21,7 +37,15 @@ sudo apt-get install python3-nagiosplugin python3-requests
# Usage # Usage
``` ```
./check_trex --help ./check_trex.py --help
```
# Examples
Nagios NRPE:
```
command[check_trex]=/opt/check_trex/check_trex.py --hashrate-warning 60000000 --hashrate-critical 50000000 --uptime-critical 300 --uptime-warning 600
``` ```
# Contributing # Contributing

View file

@ -5,8 +5,15 @@ import logging
import sys import sys
import requests import requests
from nagiosplugin import (Check, Context, Metric, Performance, Resource, from nagiosplugin import (
ScalarContext, Summary) Check,
Context,
Metric,
Performance,
Resource,
ScalarContext,
Summary,
)
from nagiosplugin.state import Critical, Ok, Unknown, Warn from nagiosplugin.state import Critical, Ok, Unknown, Warn
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -128,7 +135,7 @@ def setup_logging(args):
def show_version(): def show_version():
print("1.0.0") print("1.0.3")
class BelowThresholdContext(Context): class BelowThresholdContext(Context):
@ -195,29 +202,29 @@ class Trex(Resource):
if "hashrate" in data: if "hashrate" in data:
hashrate = data["hashrate"] hashrate = data["hashrate"]
logger.debug(f"Hashrate is {hashrate}") logger.info(f"Hashrate is {hashrate}")
metrics.append(Metric("hashrate", hashrate, context="hashrate")) metrics.append(Metric("hashrate", hashrate, context="hashrate"))
if "success" in data: if "success" in data:
success = bool(data["success"]) success = bool(data["success"])
if success: if success:
logger.debug("T-Rex is successfully started") logger.info("T-Rex is successfully started")
else: else:
logger.debug("T-Rex is not successfully started") logger.info("T-Rex is not successfully started")
metrics.append(Metric("success", success, context="success")) metrics.append(Metric("success", success, context="success"))
if "paused" in data: if "paused" in data:
paused = bool(data["paused"]) paused = bool(data["paused"])
if paused: if paused:
logger.debug("T-Rex is paused") logger.info("T-Rex is paused")
else: else:
logger.debug("T-Rex is not paused") logger.info("T-Rex is not paused")
metrics.append(Metric("paused", paused, context="paused")) metrics.append(Metric("paused", paused, context="paused"))
if "uptime" in data: if "uptime" in data:
uptime = data["uptime"] uptime = data["uptime"]
seconds = "seconds" if uptime > 1 else "second" seconds = "seconds" if uptime > 1 else "second"
logger.debug(f"Uptime is {uptime} {seconds}") logger.info(f"Uptime is {uptime} {seconds}")
metrics.append(Metric("uptime", uptime, context="uptime")) metrics.append(Metric("uptime", uptime, context="uptime"))
for gpu in data.get("gpus"): for gpu in data.get("gpus"):
@ -226,19 +233,19 @@ class Trex(Resource):
if "temperature" in gpu: if "temperature" in gpu:
temperature = gpu["temperature"] temperature = gpu["temperature"]
logger.debug(f"Temperature of {name} ({id}) is {temperature}C") logger.info(f"GPU {id} ({name}): temperature is {temperature}C")
metrics.append( metrics.append(
Metric("temperature", temperature, context="temperature") Metric(f"temperature_{id}", temperature, context="temperature")
) )
if "memory_temperature" in gpu: if "memory_temperature" in gpu:
temperature = gpu["memory_temperature"] memory_temperature = gpu["memory_temperature"]
logger.debug( logger.info(
f"Memory temperature of {name} ({id}) is {memory_temperature}C" f"GPU {id} ({name}): memory temperature is {memory_temperature}C"
) )
metrics.append( metrics.append(
Metric( Metric(
"memory_temperature", f"memory_temperature_{id}",
memory_temperature, memory_temperature,
context="memory_temperature", context="memory_temperature",
) )
@ -289,6 +296,11 @@ def main():
warning=args.temperature_warning, warning=args.temperature_warning,
critical=args.temperature_critical, critical=args.temperature_critical,
), ),
ScalarContext(
"memory_temperature",
warning=args.memory_temperature_warning,
critical=args.memory_temperature_critical,
),
TrexSummary(), TrexSummary(),
) )
check.main() check.main()