forked from lahwaacz/arch-wiki-docs
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy patharch-wiki-docs.py
executable file
·42 lines (32 loc) · 1.63 KB
/
arch-wiki-docs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/bin/python3
import datetime
import argparse
from simplemediawiki import build_user_agent
import ArchWiki
if __name__ == "__main__":
aparser = argparse.ArgumentParser(description="Download pages from Arch Wiki and optimize them for offline browsing")
aparser.add_argument("--output-directory", type=str, required=True, help="Path where the downloaded pages should be stored.")
aparser.add_argument("--force", action="store_true", help="Ignore timestamp, always download the page from the wiki.")
aparser.add_argument("--clean", action="store_true", help="Clean the output directory after downloading, useful for removing pages deleted/moved on the wiki. Warning: any unknown files found in the output directory will be deleted!")
aparser.add_argument("--variant", type=str, required=True, help="zh variant")
args = aparser.parse_args()
if args.force:
epoch = datetime.datetime.utcnow()
else:
# this should be the date of the latest incompatible change
epoch = datetime.datetime(2016, 3, 3, 18, 0, 0)
user_agent = build_user_agent(__file__, ArchWiki.__version__, ArchWiki.__url__)
aw = ArchWiki.ArchWiki(user_agent=user_agent, variant=args.variant)
optimizer = ArchWiki.Optimizer(aw, args.output_directory)
downloader = ArchWiki.Downloader(
aw, args.output_directory, epoch,
optimizer=optimizer,
variant=args.variant,
)
downloader.download_css()
aw.print_namespaces()
for ns in ["0", "4", "12", "14"]:
downloader.process_namespace(ns)
downloader.download_images()
if args.clean:
downloader.clean_output_directory()