Compare commits

..

10 commits

Author SHA1 Message Date
f62623af38
chore: Bump version to 1.0.3
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 11:49:20 +02:00
2c78fccd1e
doc: Add T-Rex API security
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 11:47:12 +02:00
15f3746f2e
chore: Move logging to info level
- Extracted values are set to info level
- Full API response is still set to debug level

Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 11:46:15 +02:00
9af285c49d
chore: Bump version to 1.0.2
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 11:32:03 +02:00
6f09301879
feat: Add GPU ID to temperatures
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 11:30:39 +02:00
98483e3279
fix: Typo in memory temperature management (#1)
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 11:26:52 +02:00
04446c9329
doc: Fix typo in usage
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 09:44:15 +02:00
7c5a197872
chore: Bump to 1.0.1
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 01:30:48 +02:00
4943442135
feat: Add memory temperature checks
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 01:29:54 +02:00
16c0f33b9e
doc: Add NRPE example
Signed-off-by: Julien Riou <julien@riou.xyz>
2022-05-14 01:22:51 +02:00
3 changed files with 52 additions and 20 deletions

View file

@ -14,7 +14,3 @@ repos:
rev: 22.3.0
hooks:
- id: black
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort

View file

@ -2,6 +2,22 @@
Nagios check for [T-Rex miner](https://github.com/trexminer/T-Rex).
# Security
T-Rex API must be opened in a secured way:
* `--api-read-only`: accessible only in read-only, no modification
* `--api-bind-http 127.0.0.1:4067`: (default) accessible only to local connections
If the check is executed **remotely**, you should add a **firewall rule** to allow only the host running the check to
access the T-Rex API port.
**HTTPS** should be used:
* `--api-https`
* `--api-webserver-cert`
* `--api-webserver-pkey`
See full [list of options](https://github.com/trexminer/T-Rex#usage).
# Installation
Using pip:
@ -21,7 +37,15 @@ sudo apt-get install python3-nagiosplugin python3-requests
# Usage
```
./check_trex --help
./check_trex.py --help
```
# Examples
Nagios NRPE:
```
command[check_trex]=/opt/check_trex/check_trex.py --hashrate-warning 60000000 --hashrate-critical 50000000 --uptime-critical 300 --uptime-warning 600
```
# Contributing

View file

@ -5,8 +5,15 @@ import logging
import sys
import requests
from nagiosplugin import (Check, Context, Metric, Performance, Resource,
ScalarContext, Summary)
from nagiosplugin import (
Check,
Context,
Metric,
Performance,
Resource,
ScalarContext,
Summary,
)
from nagiosplugin.state import Critical, Ok, Unknown, Warn
logger = logging.getLogger(__name__)
@ -128,7 +135,7 @@ def setup_logging(args):
def show_version():
print("1.0.0")
print("1.0.3")
class BelowThresholdContext(Context):
@ -195,29 +202,29 @@ class Trex(Resource):
if "hashrate" in data:
hashrate = data["hashrate"]
logger.debug(f"Hashrate is {hashrate}")
logger.info(f"Hashrate is {hashrate}")
metrics.append(Metric("hashrate", hashrate, context="hashrate"))
if "success" in data:
success = bool(data["success"])
if success:
logger.debug("T-Rex is successfully started")
logger.info("T-Rex is successfully started")
else:
logger.debug("T-Rex is not successfully started")
logger.info("T-Rex is not successfully started")
metrics.append(Metric("success", success, context="success"))
if "paused" in data:
paused = bool(data["paused"])
if paused:
logger.debug("T-Rex is paused")
logger.info("T-Rex is paused")
else:
logger.debug("T-Rex is not paused")
logger.info("T-Rex is not paused")
metrics.append(Metric("paused", paused, context="paused"))
if "uptime" in data:
uptime = data["uptime"]
seconds = "seconds" if uptime > 1 else "second"
logger.debug(f"Uptime is {uptime} {seconds}")
logger.info(f"Uptime is {uptime} {seconds}")
metrics.append(Metric("uptime", uptime, context="uptime"))
for gpu in data.get("gpus"):
@ -226,19 +233,19 @@ class Trex(Resource):
if "temperature" in gpu:
temperature = gpu["temperature"]
logger.debug(f"Temperature of {name} ({id}) is {temperature}C")
logger.info(f"GPU {id} ({name}): temperature is {temperature}C")
metrics.append(
Metric("temperature", temperature, context="temperature")
Metric(f"temperature_{id}", temperature, context="temperature")
)
if "memory_temperature" in gpu:
temperature = gpu["memory_temperature"]
logger.debug(
f"Memory temperature of {name} ({id}) is {memory_temperature}C"
memory_temperature = gpu["memory_temperature"]
logger.info(
f"GPU {id} ({name}): memory temperature is {memory_temperature}C"
)
metrics.append(
Metric(
"memory_temperature",
f"memory_temperature_{id}",
memory_temperature,
context="memory_temperature",
)
@ -289,6 +296,11 @@ def main():
warning=args.temperature_warning,
critical=args.temperature_critical,
),
ScalarContext(
"memory_temperature",
warning=args.memory_temperature_warning,
critical=args.memory_temperature_critical,
),
TrexSummary(),
)
check.main()