/*
 * Decompiled with CFR 0.152.
 */
package edu.uci.ics.crawler4j.crawler;

import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;
import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.WebCrawler;
import edu.uci.ics.crawler4j.fetcher.PageFetcher;
import edu.uci.ics.crawler4j.frontier.DocIDServer;
import edu.uci.ics.crawler4j.frontier.Frontier;
import edu.uci.ics.crawler4j.parser.Parser;
import edu.uci.ics.crawler4j.robotstxt.RobotstxtServer;
import edu.uci.ics.crawler4j.url.TLDList;
import edu.uci.ics.crawler4j.url.URLCanonicalizer;
import edu.uci.ics.crawler4j.url.WebURL;
import edu.uci.ics.crawler4j.util.IO;
import java.io.File;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CrawlController {
    static final Logger logger = LoggerFactory.getLogger(CrawlController.class);
    private final CrawlConfig config;
    protected Object customData;
    protected List<Object> crawlersLocalData = new ArrayList<Object>();
    protected boolean finished;
    private Throwable error;
    protected boolean shuttingDown;
    protected PageFetcher pageFetcher;
    protected RobotstxtServer robotstxtServer;
    protected Frontier frontier;
    protected DocIDServer docIdServer;
    protected TLDList tldList;
    protected final Object waitingLock = new Object();
    protected final Environment env;
    protected Parser parser;

    public CrawlController(CrawlConfig config, PageFetcher pageFetcher, RobotstxtServer robotstxtServer) throws Exception {
        this(config, pageFetcher, null, robotstxtServer, null);
    }

    public CrawlController(CrawlConfig config, PageFetcher pageFetcher, RobotstxtServer robotstxtServer, TLDList tldList) throws Exception {
        this(config, pageFetcher, null, robotstxtServer, tldList);
    }

    public CrawlController(CrawlConfig config, PageFetcher pageFetcher, Parser parser, RobotstxtServer robotstxtServer, TLDList tldList) throws Exception {
        config.validate();
        this.config = config;
        File folder = new File(config.getCrawlStorageFolder());
        if (!folder.exists()) {
            if (folder.mkdirs()) {
                logger.debug("Created folder: " + folder.getAbsolutePath());
            } else {
                throw new Exception("couldn't create the storage folder: " + folder.getAbsolutePath() + " does it already exist ?");
            }
        }
        this.tldList = tldList == null ? new TLDList(config) : tldList;
        URLCanonicalizer.setHaltOnError(config.isHaltOnError());
        boolean resumable = config.isResumableCrawling();
        EnvironmentConfig envConfig = new EnvironmentConfig();
        envConfig.setAllowCreate(true);
        envConfig.setTransactional(resumable);
        envConfig.setLocking(resumable);
        envConfig.setLockTimeout(config.getDbLockTimeout(), TimeUnit.MILLISECONDS);
        File envHome = new File(config.getCrawlStorageFolder() + "/frontier");
        if (!envHome.exists()) {
            if (envHome.mkdir()) {
                logger.debug("Created folder: " + envHome.getAbsolutePath());
            } else {
                throw new Exception("Failed creating the frontier folder: " + envHome.getAbsolutePath());
            }
        }
        if (!resumable) {
            IO.deleteFolderContents(envHome);
            logger.info("Deleted contents of: " + envHome + " ( as you have configured resumable crawling to false )");
        }
        this.env = new Environment(envHome, envConfig);
        this.docIdServer = new DocIDServer(this.env, config);
        this.frontier = new Frontier(this.env, config);
        this.pageFetcher = pageFetcher;
        this.parser = parser == null ? new Parser(config, tldList) : parser;
        this.robotstxtServer = robotstxtServer;
        this.finished = false;
        this.shuttingDown = false;
        robotstxtServer.setCrawlConfig(config);
    }

    public Parser getParser() {
        return this.parser;
    }

    public <T extends WebCrawler> void start(Class<T> clazz, int numberOfCrawlers) {
        this.start(new DefaultWebCrawlerFactory<T>(clazz), numberOfCrawlers, true);
    }

    public <T extends WebCrawler> void start(T instance) {
        this.start(new SingleInstanceFactory<T>(instance), 1, true);
    }

    public <T extends WebCrawler> void start(WebCrawlerFactory<T> crawlerFactory, int numberOfCrawlers) {
        this.start(crawlerFactory, numberOfCrawlers, true);
    }

    public <T extends WebCrawler> void startNonBlocking(WebCrawlerFactory<T> crawlerFactory, int numberOfCrawlers) {
        this.start(crawlerFactory, numberOfCrawlers, false);
    }

    public <T extends WebCrawler> void startNonBlocking(Class<T> clazz, int numberOfCrawlers) {
        this.start(new DefaultWebCrawlerFactory<T>(clazz), numberOfCrawlers, false);
    }

    protected <T extends WebCrawler> void start(final WebCrawlerFactory<T> crawlerFactory, int numberOfCrawlers, boolean isBlocking) {
        try {
            this.finished = false;
            this.setError(null);
            this.crawlersLocalData.clear();
            final ArrayList<Thread> threads = new ArrayList<Thread>();
            final ArrayList<T> crawlers = new ArrayList<T>();
            for (int i = 1; i <= numberOfCrawlers; ++i) {
                T crawler = crawlerFactory.newInstance();
                Thread thread = new Thread((Runnable)crawler, "Crawler " + i);
                ((WebCrawler)crawler).setThread(thread);
                ((WebCrawler)crawler).init(i, this);
                thread.start();
                crawlers.add(crawler);
                threads.add(thread);
                logger.info("Crawler {} started", (Object)i);
            }
            final CrawlController controller = this;
            Thread monitorThread = new Thread(new Runnable(){

                /*
                 * WARNING - Removed try catching itself - possible behaviour change.
                 */
                @Override
                public void run() {
                    try {
                        Object object = CrawlController.this.waitingLock;
                        synchronized (object) {
                            while (true) {
                                CrawlController.sleep(CrawlController.this.config.getThreadMonitoringDelaySeconds());
                                boolean someoneIsWorking = false;
                                for (int i = 0; i < threads.size(); ++i) {
                                    Throwable t;
                                    Thread thread = (Thread)threads.get(i);
                                    if (!thread.isAlive()) {
                                        if (!CrawlController.this.shuttingDown && !CrawlController.this.config.isHaltOnError()) {
                                            logger.info("Thread {} was dead, I'll recreate it", (Object)i);
                                            Object crawler = crawlerFactory.newInstance();
                                            thread = new Thread((Runnable)crawler, "Crawler " + (i + 1));
                                            threads.remove(i);
                                            threads.add(i, thread);
                                            ((WebCrawler)crawler).setThread(thread);
                                            ((WebCrawler)crawler).init(i + 1, controller);
                                            thread.start();
                                            crawlers.remove(i);
                                            crawlers.add(i, crawler);
                                        }
                                    } else if (((WebCrawler)crawlers.get(i)).isNotWaitingForNewURLs()) {
                                        someoneIsWorking = true;
                                    }
                                    if ((t = ((WebCrawler)crawlers.get(i)).getError()) == null || !CrawlController.this.config.isHaltOnError()) continue;
                                    throw new RuntimeException("error on thread [" + ((Thread)threads.get(i)).getName() + "]", t);
                                }
                                boolean shutOnEmpty = CrawlController.this.config.isShutdownOnEmptyQueue();
                                if (someoneIsWorking || !shutOnEmpty) continue;
                                logger.info("It looks like no thread is working, waiting for " + CrawlController.this.config.getThreadShutdownDelaySeconds() + " seconds to make sure...");
                                CrawlController.sleep(CrawlController.this.config.getThreadShutdownDelaySeconds());
                                someoneIsWorking = false;
                                for (int i = 0; i < threads.size(); ++i) {
                                    Thread thread = (Thread)threads.get(i);
                                    if (!thread.isAlive() || !((WebCrawler)crawlers.get(i)).isNotWaitingForNewURLs()) continue;
                                    someoneIsWorking = true;
                                }
                                if (someoneIsWorking) continue;
                                if (CrawlController.this.shuttingDown) break;
                                long queueLength = CrawlController.this.frontier.getQueueLength();
                                if (queueLength > 0L) continue;
                                logger.info("No thread is working and no more URLs are in queue waiting for another " + CrawlController.this.config.getThreadShutdownDelaySeconds() + " seconds to make sure...");
                                CrawlController.sleep(CrawlController.this.config.getThreadShutdownDelaySeconds());
                                queueLength = CrawlController.this.frontier.getQueueLength();
                                if (queueLength <= 0L) break;
                            }
                            logger.info("All of the crawlers are stopped. Finishing the process...");
                            CrawlController.this.frontier.finish();
                            for (Object crawler : crawlers) {
                                ((WebCrawler)crawler).onBeforeExit();
                                CrawlController.this.crawlersLocalData.add(((WebCrawler)crawler).getMyLocalData());
                            }
                            logger.info("Waiting for " + CrawlController.this.config.getCleanupDelaySeconds() + " seconds before final clean up...");
                            CrawlController.sleep(CrawlController.this.config.getCleanupDelaySeconds());
                            CrawlController.this.frontier.close();
                            CrawlController.this.docIdServer.close();
                            CrawlController.this.pageFetcher.shutDown();
                            CrawlController.this.finished = true;
                            CrawlController.this.waitingLock.notifyAll();
                            CrawlController.this.env.close();
                            return;
                        }
                    }
                    catch (Throwable e) {
                        if (CrawlController.this.config.isHaltOnError()) {
                            CrawlController.this.setError(e);
                            Object object = CrawlController.this.waitingLock;
                            synchronized (object) {
                                CrawlController.this.frontier.finish();
                                CrawlController.this.frontier.close();
                                CrawlController.this.docIdServer.close();
                                CrawlController.this.pageFetcher.shutDown();
                                CrawlController.this.waitingLock.notifyAll();
                                CrawlController.this.env.close();
                            }
                        } else {
                            logger.error("Unexpected Error", e);
                        }
                        return;
                    }
                }
            });
            monitorThread.start();
            if (isBlocking) {
                this.waitUntilFinish();
            }
        }
        catch (Exception e) {
            if (this.config.isHaltOnError()) {
                if (e instanceof RuntimeException) {
                    throw (RuntimeException)e;
                }
                throw new RuntimeException("error running the monitor thread", e);
            }
            logger.error("Error happened", (Throwable)e);
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void waitUntilFinish() {
        while (!this.finished) {
            Object object = this.waitingLock;
            synchronized (object) {
                Throwable t;
                if (this.config.isHaltOnError() && (t = this.getError()) != null && this.config.isHaltOnError()) {
                    if (t instanceof RuntimeException) {
                        throw (RuntimeException)t;
                    }
                    if (t instanceof Error) {
                        throw (Error)t;
                    }
                    throw new RuntimeException("error on monitor thread", t);
                }
                if (this.finished) {
                    return;
                }
                try {
                    this.waitingLock.wait();
                }
                catch (InterruptedException e) {
                    logger.error("Error occurred", (Throwable)e);
                }
            }
        }
    }

    public List<Object> getCrawlersLocalData() {
        return this.crawlersLocalData;
    }

    protected static void sleep(int seconds) {
        try {
            Thread.sleep(seconds * 1000);
        }
        catch (InterruptedException interruptedException) {
            // empty catch block
        }
    }

    public void addSeed(String pageUrl) throws IOException, InterruptedException {
        this.addSeed(pageUrl, -1);
    }

    public void addSeed(String pageUrl, int docId) throws IOException, InterruptedException {
        String canonicalUrl = URLCanonicalizer.getCanonicalURL(pageUrl);
        if (canonicalUrl == null) {
            logger.error("Invalid seed URL: {}", (Object)pageUrl);
        } else {
            if (docId < 0) {
                docId = this.docIdServer.getDocId(canonicalUrl);
                if (docId > 0) {
                    logger.trace("This URL is already seen.");
                    return;
                }
                docId = this.docIdServer.getNewDocID(canonicalUrl);
            } else {
                try {
                    this.docIdServer.addUrlAndDocId(canonicalUrl, docId);
                }
                catch (RuntimeException e) {
                    if (this.config.isHaltOnError()) {
                        throw e;
                    }
                    logger.error("Could not add seed: {}", (Object)e.getMessage());
                }
            }
            WebURL webUrl = new WebURL();
            webUrl.setTldList(this.tldList);
            webUrl.setURL(canonicalUrl);
            webUrl.setDocid(docId);
            webUrl.setDepth((short)0);
            if (this.robotstxtServer.allows(webUrl)) {
                this.frontier.schedule(webUrl);
            } else {
                logger.warn("Robots.txt does not allow this seed: {}", (Object)pageUrl);
            }
        }
    }

    public void addSeenUrl(String url, int docId) throws UnsupportedEncodingException {
        String canonicalUrl = URLCanonicalizer.getCanonicalURL(url);
        if (canonicalUrl == null) {
            logger.error("Invalid Url: {} (can't cannonicalize it!)", (Object)url);
        } else {
            try {
                this.docIdServer.addUrlAndDocId(canonicalUrl, docId);
            }
            catch (RuntimeException e) {
                if (this.config.isHaltOnError()) {
                    throw e;
                }
                logger.error("Could not add seen url: {}", (Object)e.getMessage());
            }
        }
    }

    public PageFetcher getPageFetcher() {
        return this.pageFetcher;
    }

    public void setPageFetcher(PageFetcher pageFetcher) {
        this.pageFetcher = pageFetcher;
    }

    public RobotstxtServer getRobotstxtServer() {
        return this.robotstxtServer;
    }

    public void setRobotstxtServer(RobotstxtServer robotstxtServer) {
        this.robotstxtServer = robotstxtServer;
    }

    public Frontier getFrontier() {
        return this.frontier;
    }

    public void setFrontier(Frontier frontier) {
        this.frontier = frontier;
    }

    public DocIDServer getDocIdServer() {
        return this.docIdServer;
    }

    public void setDocIdServer(DocIDServer docIdServer) {
        this.docIdServer = docIdServer;
    }

    @Deprecated
    public Object getCustomData() {
        return this.customData;
    }

    @Deprecated
    public void setCustomData(Object customData) {
        this.customData = customData;
    }

    public boolean isFinished() {
        return this.finished;
    }

    public boolean isShuttingDown() {
        return this.shuttingDown;
    }

    public void shutdown() {
        logger.info("Shutting down...");
        this.shuttingDown = true;
        this.pageFetcher.shutDown();
        this.frontier.finish();
    }

    public CrawlConfig getConfig() {
        return this.config;
    }

    protected synchronized Throwable getError() {
        return this.error;
    }

    private synchronized void setError(Throwable e) {
        this.error = e;
    }

    public TLDList getTldList() {
        return this.tldList;
    }

    private static class DefaultWebCrawlerFactory<T extends WebCrawler>
    implements WebCrawlerFactory<T> {
        final Class<T> clazz;

        DefaultWebCrawlerFactory(Class<T> clazz) {
            this.clazz = clazz;
        }

        @Override
        public T newInstance() throws Exception {
            return (T)((WebCrawler)this.clazz.newInstance());
        }
    }

    private static class SingleInstanceFactory<T extends WebCrawler>
    implements WebCrawlerFactory<T> {
        final T instance;

        SingleInstanceFactory(T instance) {
            this.instance = instance;
        }

        @Override
        public T newInstance() throws Exception {
            return this.instance;
        }
    }

    public static interface WebCrawlerFactory<T extends WebCrawler> {
        public T newInstance() throws Exception;
    }
}

