Compare commits

..

No commits in common. "f62623af386989db571fb65c79a105ea25cf5849" and "024d00c22d75bfdae5a42844d89613d4cbe88436" have entirely different histories.

3 changed files with 20 additions and 52 deletions

View file

@ -14,3 +14,7 @@ repos:
rev: 22.3.0 rev: 22.3.0
hooks: hooks:
- id: black - id: black
- repo: https://github.com/pycqa/isort
rev: 5.10.1
hooks:
- id: isort

View file

@ -2,22 +2,6 @@
Nagios check for [T-Rex miner](https://github.com/trexminer/T-Rex). Nagios check for [T-Rex miner](https://github.com/trexminer/T-Rex).
# Security
T-Rex API must be opened in a secured way:
* `--api-read-only`: accessible only in read-only, no modification
* `--api-bind-http 127.0.0.1:4067`: (default) accessible only to local connections
If the check is executed **remotely**, you should add a **firewall rule** to allow only the host running the check to
access the T-Rex API port.
**HTTPS** should be used:
* `--api-https`
* `--api-webserver-cert`
* `--api-webserver-pkey`
See full [list of options](https://github.com/trexminer/T-Rex#usage).
# Installation # Installation
Using pip: Using pip:
@ -37,15 +21,7 @@ sudo apt-get install python3-nagiosplugin python3-requests
# Usage # Usage
``` ```
./check_trex.py --help ./check_trex --help
```
# Examples
Nagios NRPE:
```
command[check_trex]=/opt/check_trex/check_trex.py --hashrate-warning 60000000 --hashrate-critical 50000000 --uptime-critical 300 --uptime-warning 600
``` ```
# Contributing # Contributing

View file

@ -5,15 +5,8 @@ import logging
import sys import sys
import requests import requests
from nagiosplugin import ( from nagiosplugin import (Check, Context, Metric, Performance, Resource,
Check, ScalarContext, Summary)
Context,
Metric,
Performance,
Resource,
ScalarContext,
Summary,
)
from nagiosplugin.state import Critical, Ok, Unknown, Warn from nagiosplugin.state import Critical, Ok, Unknown, Warn
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -135,7 +128,7 @@ def setup_logging(args):
def show_version(): def show_version():
print("1.0.3") print("1.0.0")
class BelowThresholdContext(Context): class BelowThresholdContext(Context):
@ -202,29 +195,29 @@ class Trex(Resource):
if "hashrate" in data: if "hashrate" in data:
hashrate = data["hashrate"] hashrate = data["hashrate"]
logger.info(f"Hashrate is {hashrate}") logger.debug(f"Hashrate is {hashrate}")
metrics.append(Metric("hashrate", hashrate, context="hashrate")) metrics.append(Metric("hashrate", hashrate, context="hashrate"))
if "success" in data: if "success" in data:
success = bool(data["success"]) success = bool(data["success"])
if success: if success:
logger.info("T-Rex is successfully started") logger.debug("T-Rex is successfully started")
else: else:
logger.info("T-Rex is not successfully started") logger.debug("T-Rex is not successfully started")
metrics.append(Metric("success", success, context="success")) metrics.append(Metric("success", success, context="success"))
if "paused" in data: if "paused" in data:
paused = bool(data["paused"]) paused = bool(data["paused"])
if paused: if paused:
logger.info("T-Rex is paused") logger.debug("T-Rex is paused")
else: else:
logger.info("T-Rex is not paused") logger.debug("T-Rex is not paused")
metrics.append(Metric("paused", paused, context="paused")) metrics.append(Metric("paused", paused, context="paused"))
if "uptime" in data: if "uptime" in data:
uptime = data["uptime"] uptime = data["uptime"]
seconds = "seconds" if uptime > 1 else "second" seconds = "seconds" if uptime > 1 else "second"
logger.info(f"Uptime is {uptime} {seconds}") logger.debug(f"Uptime is {uptime} {seconds}")
metrics.append(Metric("uptime", uptime, context="uptime")) metrics.append(Metric("uptime", uptime, context="uptime"))
for gpu in data.get("gpus"): for gpu in data.get("gpus"):
@ -233,19 +226,19 @@ class Trex(Resource):
if "temperature" in gpu: if "temperature" in gpu:
temperature = gpu["temperature"] temperature = gpu["temperature"]
logger.info(f"GPU {id} ({name}): temperature is {temperature}C") logger.debug(f"Temperature of {name} ({id}) is {temperature}C")
metrics.append( metrics.append(
Metric(f"temperature_{id}", temperature, context="temperature") Metric("temperature", temperature, context="temperature")
) )
if "memory_temperature" in gpu: if "memory_temperature" in gpu:
memory_temperature = gpu["memory_temperature"] temperature = gpu["memory_temperature"]
logger.info( logger.debug(
f"GPU {id} ({name}): memory temperature is {memory_temperature}C" f"Memory temperature of {name} ({id}) is {memory_temperature}C"
) )
metrics.append( metrics.append(
Metric( Metric(
f"memory_temperature_{id}", "memory_temperature",
memory_temperature, memory_temperature,
context="memory_temperature", context="memory_temperature",
) )
@ -296,11 +289,6 @@ def main():
warning=args.temperature_warning, warning=args.temperature_warning,
critical=args.temperature_critical, critical=args.temperature_critical,
), ),
ScalarContext(
"memory_temperature",
warning=args.memory_temperature_warning,
critical=args.memory_temperature_critical,
),
TrexSummary(), TrexSummary(),
) )
check.main() check.main()