diff --git a/Scripts/README.md b/Scripts/README.md new file mode 100644 index 0000000..9cc1ce0 --- /dev/null +++ b/Scripts/README.md @@ -0,0 +1,13 @@ +# Scripts + +## Description + +`get_all_links.py` : justify one link is live or dead with network connection + +## Usage + +`get_all_links.py` : + +``` +./get_all_links.py ../ +``` diff --git a/Scripts/get_all_links.py b/Scripts/get_all_links.py new file mode 100755 index 0000000..c8fdd79 --- /dev/null +++ b/Scripts/get_all_links.py @@ -0,0 +1,77 @@ +#!/usr/bin/env python + +from __future__ import print_function +from socket import timeout + +import os +import sys +import codecs +import re + +import markdown + +try: + # compatible for python2 + from urllib2 import urlopen + from urllib2 import HTTPError + from urllib2 import URLError +except ImportError: + # compatible for python3 + from urllib.request import urlopen + from urllib.error import HTTPError + from urllib.error import URLError + +def check_live_url(url): + + result = False + try: + ret = urlopen(url, timeout=2) + result = (ret.code == 200) + except HTTPError as e: + print(e, file=sys.stderr) + except URLError as e: + print(e, file=sys.stderr) + except timeout as e: + print(e, file=sys.stderr) + except Exception as e: + print(e, file=sys.stderr) + + return result + + +def main(path): + + filenames = [] + for (dirpath, dnames, fnames) in os.walk(path): + for fname in fnames: + if fname.endswith('.md'): + filenames.append(os.sep.join([dirpath, fname])) + + urls = [] + + for filename in filenames: + fd = codecs.open(filename, mode="r", encoding="utf-8") + for line in fd.readlines(): + refs = re.findall(r'(?<=