diff --git a/MANIFEST.in b/MANIFEST.in index 29c2c32..3e2939f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ include README.md include gpuview/views/index.tpl +include gpuview/views/body.tpl include gpuview/service.sh diff --git a/gpuview/app.py b/gpuview/app.py index 9377c07..bb24678 100644 --- a/gpuview/app.py +++ b/gpuview/app.py @@ -24,15 +24,19 @@ EXCLUDE_SELF = False # Do not report to `/gpustat` calls. +REFRESH_TIME = 5 @app.route('/') def index(): - gpustats = core.all_gpustats() - now = datetime.now().strftime('Updated at %Y-%m-%d %H-%M-%S') - return template('index', gpustats=gpustats, update_time=now) + return template('index', update_time=REFRESH_TIME) +@app.route('/update', method='GET') +def update(): + gpustats = core.all_gpustats(REFRESH_TIME) + now = datetime.now().strftime('Updated at %Y-%m-%d %H-%M-%S') + return template('body', gpustats=gpustats, refresh_time=now) -@app.route('/gpustat', methods=['GET']) +@app.route('/gpustat', method='GET') def report_gpustat(): """ Returns the gpustat of this host. @@ -59,14 +63,16 @@ def main(): if 'run' == args.action: core.safe_zone(args.safe_zone) - global EXCLUDE_SELF + global EXCLUDE_SELF, REFRESH_TIME EXCLUDE_SELF = args.exclude_self + REFRESH_TIME = args.refresh_time app.run(host=args.host, port=args.port, debug=args.debug) elif 'service' == args.action: core.install_service(host=args.host, port=args.port, safe_zone=args.safe_zone, - exclude_self=args.exclude_self) + exclude_self=args.exclude_self, + refresh_time=args.refresh_time) elif 'add' == args.action: core.add_host(args.url, args.name) elif 'remove' == args.action: diff --git a/gpuview/core.py b/gpuview/core.py index 58449ca..91d2f45 100644 --- a/gpuview/core.py +++ b/gpuview/core.py @@ -6,14 +6,14 @@ """ import os -import json import subprocess +import asyncio +import aiohttp try: from urllib.request import urlopen except ImportError: from urllib2 import urlopen - ABS_PATH = os.path.dirname(os.path.realpath(__file__)) HOSTS_DB = os.path.join(ABS_PATH, 'gpuhosts.db') SAFE_ZONE = False # Safe to report all details. @@ -80,8 +80,18 @@ def my_gpustat(): except Exception as e: return {'error': '%s!' % getattr(e, 'message', str(e))} +async def async_fetch_gpustat(session, url): + try: + async with session.get(url + '/gpustat') as response: + gpustat = await response.json() + if gpustat and 'gpus' in gpustat: + return gpustat + except Exception as e: + print('Error: %s getting gpustat from %s' % + (getattr(e, 'message', str(e)), url)) + -def all_gpustats(): +async def async_all_gpustats(int_timeout): """ Aggregates the gpustats of all registered hosts and this host. @@ -95,19 +105,16 @@ def all_gpustats(): gpustats.append(mystat) hosts = load_hosts() - for url in hosts: - try: - raw_resp = urlopen(url + '/gpustat') - gpustat = json.loads(raw_resp.read()) - raw_resp.close() - if not gpustat or 'gpus' not in gpustat: - continue + timeout = aiohttp.ClientTimeout(total=int_timeout*0.9) + async with aiohttp.ClientSession(timeout=timeout) as session: + tasks = [async_fetch_gpustat(session, url) for url in hosts] + results = await asyncio.gather(*tasks) + + for result, url in zip(results, hosts): + if result: if hosts[url] != url: - gpustat['hostname'] = hosts[url] - gpustats.append(gpustat) - except Exception as e: - print('Error: %s getting gpustat from %s' % - (getattr(e, 'message', str(e)), url)) + result['hostname'] = hosts[url] + gpustats.append(result) try: sorted_gpustats = sorted(gpustats, key=lambda g: g['hostname']) @@ -117,6 +124,8 @@ def all_gpustats(): print("Error: %s" % getattr(e, 'message', str(e))) return gpustats +def all_gpustats(timeout): + return asyncio.run(async_all_gpustats(timeout)) def load_hosts(): """ @@ -176,7 +185,8 @@ def print_hosts(): def install_service(host=None, port=None, - safe_zone=False, exclude_self=False): + safe_zone=False, exclude_self=False, + refresh_time=None): arg = '' if host is not None: arg += '--host %s ' % host @@ -186,5 +196,7 @@ def install_service(host=None, port=None, arg += '--safe-zone ' if exclude_self: arg += '--exclude-self ' + if refresh_time is not None: + arg += '--refresh-time %s' % refresh_time script = os.path.join(ABS_PATH, 'service.sh') subprocess.call('{} "{}"'.format(script, arg.strip()), shell=True) diff --git a/gpuview/utils.py b/gpuview/utils.py index a912e33..244c399 100644 --- a/gpuview/utils.py +++ b/gpuview/utils.py @@ -44,6 +44,8 @@ def arg_parser(): help="Report all details including usernames") base_parser.add_argument('--exclude-self', action='store_true', help="Don't report to others but self-dashboard") + base_parser.add_argument('--refresh-time', type=int, default=5, + help="Gpuview refresh time (default: 5 [sec])") run_parser = subparsers.add_parser("run", parents=[base_parser], help="Run gpuview server") diff --git a/gpuview/views/body.tpl b/gpuview/views/body.tpl new file mode 100644 index 0000000..814be4f --- /dev/null +++ b/gpuview/views/body.tpl @@ -0,0 +1,113 @@ + +
+
+
+ % for gpustat in gpustats: + % for gpu in gpustat.get('gpus', []): +
+
+
+
+
+ {{ gpustat.get('hostname', '-') }} +
+
[{{ gpu.get('index', '') }}] {{ gpu.get('name', '-') }}
+
+
+ +
+
+ % end + % end +
+ +
+
+ All Hosts and GPUs
+
+
+ + + + + + + + + + + + + + % for gpustat in gpustats: + % for gpu in gpustat.get('gpus', []): + + + + + + + + + + % end + % end + +
HostGPUTemp.Util.Memory Use/CapPower Use/CapUser Processes
{{ gpustat.get('hostname', '-') }} [{{ gpu.get('index', '') }}] {{ gpu.get('name', '-') }} {{ gpu.get('temperature.gpu', '-') }}℃ {{ gpu.get('utilization.gpu', '-') }}% {{ gpu.get('memory', '-') }}% ({{ gpu.get('memory.used', '') }}/{{ gpu.get('memory.total', '-') }}) {{ gpu.get('power.draw', '-') }} / {{ gpu.get('enforced.power.limit', '-') }} {{ gpu.get('user_processes', '-') }}
+
+
+ +
+ +
+ + + + +
\ No newline at end of file diff --git a/gpuview/views/index.tpl b/gpuview/views/index.tpl index b3b0f0b..fa34d7f 100644 --- a/gpuview/views/index.tpl +++ b/gpuview/views/index.tpl @@ -16,119 +16,30 @@ - -
-
-
- % for gpustat in gpustats: - % for gpu in gpustat.get('gpus', []): -
-
-
-
-
- {{ gpustat.get('hostname', '-') }} -
-
[{{ gpu.get('index', '') }}] {{ gpu.get('name', '-') }}
-
-
- -
-
- % end - % end -
- -
-
- All Hosts and GPUs
-
-
- - - - - - - - - - - - - - % for gpustat in gpustats: - % for gpu in gpustat.get('gpus', []): - - - - - - - - - - % end - % end - -
HostGPUTemp.Util.Memory Use/CapPower Use/CapUser Processes
{{ gpustat.get('hostname', '-') }} [{{ gpu.get('index', '') }}] {{ gpu.get('name', '-') }} {{ gpu.get('temperature.gpu', '-') }}℃ {{ gpu.get('utilization.gpu', '-') }}% {{ gpu.get('memory', '-') }}% ({{ gpu.get('memory.used', '') }}/{{ gpu.get('memory.total', '-') }}) {{ gpu.get('power.draw', '-') }} / {{ gpu.get('enforced.power.limit', '-') }} {{ gpu.get('user_processes', '-') }}
-
-
- -
- -
- - - - -
+ + + diff --git a/requirements.txt b/requirements.txt index cbfbabb..fae1e23 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ gpustat>=0.5.0 bottle>=0.12.14 +aiohttp flake8 pytest pytest-cov diff --git a/setup.py b/setup.py index dd23f06..f2140d7 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ def read_readme(): 'Topic :: System :: Monitoring', ], packages=['gpuview'], - install_requires=['gpustat>=0.5.0', 'bottle>=0.12.14'], + install_requires=['gpustat>=0.5.0', 'bottle>=0.12.14', 'aiohttp'], extras_require={'test': ['pytest']}, setup_requires=['pytest-runner'], tests_require=['pytest'],