From b50c08f81efd27f7d0b8b2dc61de9009d20adf20 Mon Sep 17 00:00:00 2001 From: KatakaiAkinori Date: Wed, 28 Feb 2024 21:25:01 +0900 Subject: [PATCH 1/7] Web page reload time and timeout for each host can be specified as arguments --- .idea/gpuview.iml | 12 ++++ gpuview/app.py | 18 ++++-- gpuview/core.py | 5 +- gpuview/views/body.tpl | 113 +++++++++++++++++++++++++++++++++ gpuview/views/index.tpl | 137 +++++++--------------------------------- 5 files changed, 164 insertions(+), 121 deletions(-) create mode 100644 .idea/gpuview.iml create mode 100644 gpuview/views/body.tpl diff --git a/.idea/gpuview.iml b/.idea/gpuview.iml new file mode 100644 index 0000000..8a05c6e --- /dev/null +++ b/.idea/gpuview.iml @@ -0,0 +1,12 @@ + + + + + + + + + + \ No newline at end of file diff --git a/gpuview/app.py b/gpuview/app.py index 9377c07..16a9279 100644 --- a/gpuview/app.py +++ b/gpuview/app.py @@ -24,15 +24,21 @@ EXCLUDE_SELF = False # Do not report to `/gpustat` calls. +UPDATE_TIME = 5 + +TIMEOUT = 5 @app.route('/') def index(): - gpustats = core.all_gpustats() - now = datetime.now().strftime('Updated at %Y-%m-%d %H-%M-%S') - return template('index', gpustats=gpustats, update_time=now) + return template('index', update_time=UPDATE_TIME) +@app.route('/update', method='GET') +def update(): + gpustats = core.all_gpustats(TIMEOUT) + now = datetime.now().strftime('Updated at %Y-%m-%d %H-%M-%S') + return template('body', gpustats=gpustats, update_time=now) -@app.route('/gpustat', methods=['GET']) +@app.route('/gpustat', method='GET') def report_gpustat(): """ Returns the gpustat of this host. @@ -59,8 +65,10 @@ def main(): if 'run' == args.action: core.safe_zone(args.safe_zone) - global EXCLUDE_SELF + global EXCLUDE_SELF, UPDATE_TIME, TIMEOUT EXCLUDE_SELF = args.exclude_self + UPDATE_TIME = args.update_time + TIMEOUT = args.timeout app.run(host=args.host, port=args.port, debug=args.debug) elif 'service' == args.action: core.install_service(host=args.host, diff --git a/gpuview/core.py b/gpuview/core.py index 58449ca..faee4f2 100644 --- a/gpuview/core.py +++ b/gpuview/core.py @@ -13,7 +13,6 @@ except ImportError: from urllib2 import urlopen - ABS_PATH = os.path.dirname(os.path.realpath(__file__)) HOSTS_DB = os.path.join(ABS_PATH, 'gpuhosts.db') SAFE_ZONE = False # Safe to report all details. @@ -81,7 +80,7 @@ def my_gpustat(): return {'error': '%s!' % getattr(e, 'message', str(e))} -def all_gpustats(): +def all_gpustats(timeout): """ Aggregates the gpustats of all registered hosts and this host. @@ -97,7 +96,7 @@ def all_gpustats(): hosts = load_hosts() for url in hosts: try: - raw_resp = urlopen(url + '/gpustat') + raw_resp = urlopen(url + '/gpustat', timeout=timeout) gpustat = json.loads(raw_resp.read()) raw_resp.close() if not gpustat or 'gpus' not in gpustat: diff --git a/gpuview/views/body.tpl b/gpuview/views/body.tpl new file mode 100644 index 0000000..23d2747 --- /dev/null +++ b/gpuview/views/body.tpl @@ -0,0 +1,113 @@ + +
+
+
+ % for gpustat in gpustats: + % for gpu in gpustat.get('gpus', []): +
+
+
+
+
+ {{ gpustat.get('hostname', '-') }} +
+
[{{ gpu.get('index', '') }}] {{ gpu.get('name', '-') }}
+
+
+ +
+
+ % end + % end +
+ +
+
+ All Hosts and GPUs
+
+
+ + + + + + + + + + + + + + % for gpustat in gpustats: + % for gpu in gpustat.get('gpus', []): + + + + + + + + + + % end + % end + +
HostGPUTemp.Util.Memory Use/CapPower Use/CapUser Processes
{{ gpustat.get('hostname', '-') }} [{{ gpu.get('index', '') }}] {{ gpu.get('name', '-') }} {{ gpu.get('temperature.gpu', '-') }}℃ {{ gpu.get('utilization.gpu', '-') }}% {{ gpu.get('memory', '-') }}% ({{ gpu.get('memory.used', '') }}/{{ gpu.get('memory.total', '-') }}) {{ gpu.get('power.draw', '-') }} / {{ gpu.get('enforced.power.limit', '-') }} {{ gpu.get('user_processes', '-') }}
+
+
+ +
+ +
+ + + + +
\ No newline at end of file diff --git a/gpuview/views/index.tpl b/gpuview/views/index.tpl index b3b0f0b..fa34d7f 100644 --- a/gpuview/views/index.tpl +++ b/gpuview/views/index.tpl @@ -16,119 +16,30 @@ - -
-
-
- % for gpustat in gpustats: - % for gpu in gpustat.get('gpus', []): -
-
-
-
-
- {{ gpustat.get('hostname', '-') }} -
-
[{{ gpu.get('index', '') }}] {{ gpu.get('name', '-') }}
-
-
- -
-
- % end - % end -
- -
-
- All Hosts and GPUs
-
-
- - - - - - - - - - - - - - % for gpustat in gpustats: - % for gpu in gpustat.get('gpus', []): - - - - - - - - - - % end - % end - -
HostGPUTemp.Util.Memory Use/CapPower Use/CapUser Processes
{{ gpustat.get('hostname', '-') }} [{{ gpu.get('index', '') }}] {{ gpu.get('name', '-') }} {{ gpu.get('temperature.gpu', '-') }}℃ {{ gpu.get('utilization.gpu', '-') }}% {{ gpu.get('memory', '-') }}% ({{ gpu.get('memory.used', '') }}/{{ gpu.get('memory.total', '-') }}) {{ gpu.get('power.draw', '-') }} / {{ gpu.get('enforced.power.limit', '-') }} {{ gpu.get('user_processes', '-') }}
-
-
- -
- -
- - - - -
+ + + From 697b398b4361997d64f657a9d0fb8db7f4fc6aed Mon Sep 17 00:00:00 2001 From: KatakaiAkinori Date: Wed, 28 Feb 2024 21:27:57 +0900 Subject: [PATCH 2/7] fix: Commit uncommitted files --- MANIFEST.in | 1 + gpuview/utils.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/MANIFEST.in b/MANIFEST.in index 29c2c32..3e2939f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,3 +1,4 @@ include README.md include gpuview/views/index.tpl +include gpuview/views/body.tpl include gpuview/service.sh diff --git a/gpuview/utils.py b/gpuview/utils.py index a912e33..6712487 100644 --- a/gpuview/utils.py +++ b/gpuview/utils.py @@ -44,6 +44,10 @@ def arg_parser(): help="Report all details including usernames") base_parser.add_argument('--exclude-self', action='store_true', help="Don't report to others but self-dashboard") + base_parser.add_argument('--update-time', type=int, default=5, + help="Gpuview update time (default: 5 [sec])") + base_parser.add_argument('--timeout', type=int, default=5, + help="Timeout when querying each server for gpu information (default: 5 [sec])") run_parser = subparsers.add_parser("run", parents=[base_parser], help="Run gpuview server") From 50281011d3e89b800959f939c4756f22bc70f212 Mon Sep 17 00:00:00 2001 From: KatakaiAkinori <49186510+akinoria@users.noreply.github.com> Date: Wed, 28 Feb 2024 21:31:17 +0900 Subject: [PATCH 3/7] Delete .idea directory --- .idea/gpuview.iml | 12 ------------ 1 file changed, 12 deletions(-) delete mode 100644 .idea/gpuview.iml diff --git a/.idea/gpuview.iml b/.idea/gpuview.iml deleted file mode 100644 index 8a05c6e..0000000 --- a/.idea/gpuview.iml +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - \ No newline at end of file From c7e07db44e40853dc4c3abfa4c43816c3ed1832c Mon Sep 17 00:00:00 2001 From: KatakaiAkinori Date: Thu, 29 Feb 2024 17:39:29 +0900 Subject: [PATCH 4/7] Asynchronize HTTP requests --- gpuview/app.py | 15 ++++++--------- gpuview/core.py | 38 ++++++++++++++++++++++++-------------- gpuview/utils.py | 6 ++---- gpuview/views/body.tpl | 2 +- requirements.txt | 1 + 5 files changed, 34 insertions(+), 28 deletions(-) diff --git a/gpuview/app.py b/gpuview/app.py index 16a9279..b46c130 100644 --- a/gpuview/app.py +++ b/gpuview/app.py @@ -24,19 +24,17 @@ EXCLUDE_SELF = False # Do not report to `/gpustat` calls. -UPDATE_TIME = 5 - -TIMEOUT = 5 +REFRESH_TIME = 5 @app.route('/') def index(): - return template('index', update_time=UPDATE_TIME) + return template('index', update_time=REFRESH_TIME) @app.route('/update', method='GET') def update(): - gpustats = core.all_gpustats(TIMEOUT) + gpustats = core.all_gpustats(REFRESH_TIME) now = datetime.now().strftime('Updated at %Y-%m-%d %H-%M-%S') - return template('body', gpustats=gpustats, update_time=now) + return template('body', gpustats=gpustats, refresh_time=now) @app.route('/gpustat', method='GET') def report_gpustat(): @@ -65,10 +63,9 @@ def main(): if 'run' == args.action: core.safe_zone(args.safe_zone) - global EXCLUDE_SELF, UPDATE_TIME, TIMEOUT + global EXCLUDE_SELF, REFRESH_TIME EXCLUDE_SELF = args.exclude_self - UPDATE_TIME = args.update_time - TIMEOUT = args.timeout + REFRESH_TIME = args.refresh_time app.run(host=args.host, port=args.port, debug=args.debug) elif 'service' == args.action: core.install_service(host=args.host, diff --git a/gpuview/core.py b/gpuview/core.py index faee4f2..93551d6 100644 --- a/gpuview/core.py +++ b/gpuview/core.py @@ -6,8 +6,9 @@ """ import os -import json import subprocess +import asyncio +import aiohttp try: from urllib.request import urlopen except ImportError: @@ -79,8 +80,18 @@ def my_gpustat(): except Exception as e: return {'error': '%s!' % getattr(e, 'message', str(e))} +async def async_fetch_gpustat(session, url): + try: + async with session.get(url + '/gpustat') as response: + gpustat = await response.json() + if gpustat and 'gpus' in gpustat: + return gpustat + except Exception as e: + print('Error: %s getting gpustat from %s' % + (getattr(e, 'message', str(e)), url)) -def all_gpustats(timeout): + +async def async_all_gpustats(int_timeout): """ Aggregates the gpustats of all registered hosts and this host. @@ -94,19 +105,16 @@ def all_gpustats(timeout): gpustats.append(mystat) hosts = load_hosts() - for url in hosts: - try: - raw_resp = urlopen(url + '/gpustat', timeout=timeout) - gpustat = json.loads(raw_resp.read()) - raw_resp.close() - if not gpustat or 'gpus' not in gpustat: - continue + timeout = aiohttp.ClientTimeout(total=int_timeout*0.9) + async with aiohttp.ClientSession(timeout=timeout) as session: + tasks = [async_fetch_gpustat(session, url) for url in hosts] + results = await asyncio.gather(*tasks) + + for result, url in zip(results, hosts): + if result: if hosts[url] != url: - gpustat['hostname'] = hosts[url] - gpustats.append(gpustat) - except Exception as e: - print('Error: %s getting gpustat from %s' % - (getattr(e, 'message', str(e)), url)) + result['hostname'] = hosts[url] + gpustats.append(result) try: sorted_gpustats = sorted(gpustats, key=lambda g: g['hostname']) @@ -116,6 +124,8 @@ def all_gpustats(timeout): print("Error: %s" % getattr(e, 'message', str(e))) return gpustats +def all_gpustats(timeout): + return asyncio.run(async_all_gpustats(timeout)) def load_hosts(): """ diff --git a/gpuview/utils.py b/gpuview/utils.py index 6712487..244c399 100644 --- a/gpuview/utils.py +++ b/gpuview/utils.py @@ -44,10 +44,8 @@ def arg_parser(): help="Report all details including usernames") base_parser.add_argument('--exclude-self', action='store_true', help="Don't report to others but self-dashboard") - base_parser.add_argument('--update-time', type=int, default=5, - help="Gpuview update time (default: 5 [sec])") - base_parser.add_argument('--timeout', type=int, default=5, - help="Timeout when querying each server for gpu information (default: 5 [sec])") + base_parser.add_argument('--refresh-time', type=int, default=5, + help="Gpuview refresh time (default: 5 [sec])") run_parser = subparsers.add_parser("run", parents=[base_parser], help="Run gpuview server") diff --git a/gpuview/views/body.tpl b/gpuview/views/body.tpl index 23d2747..814be4f 100644 --- a/gpuview/views/body.tpl +++ b/gpuview/views/body.tpl @@ -92,7 +92,7 @@ - +