Commit 83a5e612 authored by Petr Špaček's avatar Petr Špaček

optimization: use forkserver instead of fork to save memory

The multiprocessing module by default uses fork() on Linux
and this effectively doubles amount of memory required for processing
big zones.

We are targeting Linux platform anyway so using forkserver should not
have any adverse effect.
parent 910e19bc
Pipeline #41254 passed with stage
in 1 minute and 2 seconds
#!/usr/bin/python3
import glob
import logging
import multiprocessing
import sys
import dns.name
......@@ -14,6 +15,7 @@ import testedns
import zone2pickle
def main():
multiprocessing.set_start_method('forkserver')
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
try:
......
......@@ -9,7 +9,7 @@ script sends queries to IP addresses from glue.
import collections
import enum
import logging
from multiprocessing import Pool
import multiprocessing
import pickle
from typing import Counter, Deque, Dict, Iterable, Set, Tuple
......@@ -158,7 +158,7 @@ def update_mapping(domain2nsset, nsname2ipset, netstats, domain2ipset):
#candidates = gen_candidates(domain2nsset, nsname2ipset, netstats, retry_queue, domain2ipset)
#logging.info('queue contains %s queries to be checked', count_candidates(candidates))
candidates = gen_candidates(domain2nsset, nsname2ipset, netstats, retry_queue, domain2ipset)
with Pool(processes = 30) as pool:
with multiprocessing.Pool(processes = 30) as pool:
for attempt, domain, ip, state in pool.imap_unordered(check_availability, candidates):
process_reply(attempt, domain, ip, state, netstats, retry_queue, domain2ipset)
if len(domain2ipset) % 1000 == 0 and len(domain2ipset.get(domain, [])) == 1:
......@@ -173,6 +173,7 @@ def update_mapping(domain2nsset, nsname2ipset, netstats, domain2ipset):
if __name__ == '__main__':
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
multiprocessing.set_start_method('forkserver')
domain2nsset, nsname2ipset, netstats, domain2ipset = load()
try:
update_mapping(domain2nsset, nsname2ipset, netstats, domain2ipset)
......
#!/usr/bin/python3
import logging
from multiprocessing.pool import Pool
import multiprocessing
import pickle
from typing import Dict, Set, Tuple
......@@ -60,7 +60,7 @@ def update_mapping(nsnames: Set[dns.name.Name],
dns.resolver.default_resolver.lifetime = 5 # seconds
#dns.resolver.default_resolver.nameservers = ['193.29.206.206']
with Pool(processes=128) as p:
with multiprocessing.Pool(processes=128) as p:
i = 0
logging.info('starting DNS query machinery')
for nsname, ipset in p.imap_unordered(get_ips, yield_ns_name(nsnames, mapping), chunksize=10):
......@@ -77,6 +77,7 @@ def save(mapping: Dict[dns.name.Name, Set[str]]) -> None:
if __name__ == "__main__":
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s')
multiprocessing.set_start_method('forkserver')
nsnames = load_nsnames()
mapping = load_nsname2ipset()
try:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment