package org.archive.crawler.prefetch;

import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.commons.httpclient.URIException;
import org.archive.crawler.reporting.CrawlerLoggerModule;
import org.archive.modules.CrawlMetadata;
import org.archive.modules.CrawlURI;
import org.archive.modules.ProcessResult;
import org.archive.modules.Processor;
import org.archive.modules.credential.Credential;
import org.archive.modules.credential.CredentialStore;
import org.archive.modules.net.CrawlHost;
import org.archive.modules.net.CrawlServer;
import org.archive.modules.net.ServerCache;
import org.archive.net.UURI;
import org.springframework.beans.factory.annotation.Autowired;

/* loaded from: input_file:org/archive/crawler/prefetch/PreconditionEnforcer.class */
public class PreconditionEnforcer extends Processor {
    private static final long serialVersionUID = 3;
    private static final Logger logger = Logger.getLogger(PreconditionEnforcer.class.getName());
    protected CrawlMetadata metadata;
    protected ServerCache serverCache;
    protected CrawlerLoggerModule loggerModule;

    public int getIpValidityDurationSeconds() {
        return ((Integer) this.kp.get("ipValidityDurationSeconds")).intValue();
    }

    public void setIpValidityDurationSeconds(int i) {
        this.kp.put("ipValidityDurationSeconds", Integer.valueOf(i));
    }

    public int getRobotsValidityDurationSeconds() {
        return ((Integer) this.kp.get("robotsValidityDurationSeconds")).intValue();
    }

    public void setRobotsValidityDurationSeconds(int i) {
        this.kp.put("robotsValidityDurationSeconds", Integer.valueOf(i));
    }

    public boolean getCalculateRobotsOnly() {
        return ((Boolean) this.kp.get("calculateRobotsOnly")).booleanValue();
    }

    public void setCalculateRobotsOnly(boolean z) {
        this.kp.put("calculateRobotsOnly", Boolean.valueOf(z));
    }

    public CrawlMetadata getMetadata() {
        return this.metadata;
    }

    @Autowired
    public void setMetadata(CrawlMetadata crawlMetadata) {
        this.metadata = crawlMetadata;
    }

    public CredentialStore getCredentialStore() {
        return (CredentialStore) this.kp.get("credentialStore");
    }

    @Autowired(required = false)
    public void setCredentialStore(CredentialStore credentialStore) {
        this.kp.put("credentialStore", credentialStore);
    }

    public ServerCache getServerCache() {
        return this.serverCache;
    }

    @Autowired
    public void setServerCache(ServerCache serverCache) {
        this.serverCache = serverCache;
    }

    public CrawlerLoggerModule getLoggerModule() {
        return this.loggerModule;
    }

    @Autowired
    public void setLoggerModule(CrawlerLoggerModule crawlerLoggerModule) {
        this.loggerModule = crawlerLoggerModule;
    }

    public PreconditionEnforcer() {
        setIpValidityDurationSeconds(21600);
        setRobotsValidityDurationSeconds(86400);
        setCalculateRobotsOnly(false);
        setCredentialStore(new CredentialStore());
    }

    protected boolean shouldProcess(CrawlURI crawlURI) {
        return crawlURI instanceof CrawlURI;
    }

    protected void innerProcess(CrawlURI crawlURI) {
        throw new AssertionError();
    }

    protected ProcessResult innerProcessResult(CrawlURI crawlURI) {
        if (considerDnsPreconditions(crawlURI)) {
            return ProcessResult.FINISH;
        }
        String lowerCase = crawlURI.getUURI().getScheme().toLowerCase();
        if (lowerCase.equals("http") || lowerCase.equals("https")) {
            return considerRobotsPreconditions(crawlURI) ? ProcessResult.FINISH : (crawlURI.isPrerequisite() || !credentialPrecondition(crawlURI)) ? ProcessResult.PROCEED : ProcessResult.FINISH;
        }
        logger.fine("PolitenessEnforcer doesn't understand uri's of type " + lowerCase + " (ignoring)");
        return ProcessResult.PROCEED;
    }

    protected boolean considerRobotsPreconditions(CrawlURI crawlURI) {
        UURI uuri = crawlURI.getUURI();
        if (uuri != null) {
            try {
                if (uuri.getPath() != null && crawlURI.getUURI().getPath().equals("/robots.txt")) {
                    crawlURI.setPrerequisite(true);
                    return false;
                }
            } catch (URIException e) {
                logger.severe("Failed get of path for " + crawlURI);
            }
        }
        CrawlServer serverFor = this.serverCache.getServerFor(crawlURI.getUURI());
        if (serverFor.isRobotsExpired(getRobotsValidityDurationSeconds())) {
            if (logger.isLoggable(Level.FINE)) {
                logger.fine("No valid robots for " + serverFor + "; deferring " + crawlURI);
            }
            try {
                crawlURI.markPrerequisite(crawlURI.getUURI().resolve("/robots.txt").toString());
                return true;
            } catch (URIException e2) {
                logger.severe("Failed resolve using " + crawlURI);
                throw new RuntimeException((Throwable) e2);
            }
        }
        if (!serverFor.isValidRobots()) {
            crawlURI.setFetchStatus(-61);
            crawlURI.setError("robots.txt prerequisite failed");
            if (!logger.isLoggable(Level.FINE)) {
                return true;
            }
            logger.fine("robots.txt prerequisite failed " + crawlURI);
            return true;
        }
        if (this.metadata.getRobotsPolicy().allows(this.metadata.getUserAgent(), crawlURI, serverFor.getRobotstxt())) {
            return false;
        }
        if (getCalculateRobotsOnly()) {
            crawlURI.getAnnotations().add("robotExcluded");
            return false;
        }
        crawlURI.setFetchStatus(-9998);
        crawlURI.setError("robots.txt exclusion");
        logger.fine("robots.txt precluded " + crawlURI);
        return true;
    }

    protected boolean considerDnsPreconditions(CrawlURI crawlURI) {
        if (crawlURI.getUURI().getScheme().equals("dns")) {
            crawlURI.setPrerequisite(true);
            return false;
        }
        if (crawlURI.getUURI().getScheme().equals("whois")) {
            return false;
        }
        if (this.serverCache.getServerFor(crawlURI.getUURI()) == null) {
            crawlURI.setFetchStatus(-7);
            return true;
        }
        CrawlHost hostFor = this.serverCache.getHostFor(crawlURI.getUURI());
        if (hostFor == null || (hostFor.hasBeenLookedUp() && hostFor.getIP() == null)) {
            if (logger.isLoggable(Level.FINE)) {
                logger.fine("no dns for " + hostFor + " cancelling processing for CrawlURI " + crawlURI.toString());
            }
            crawlURI.setFetchStatus(-6);
            return true;
        }
        if (!isIpExpired(crawlURI) || crawlURI.getUURI().getScheme().equals("dns")) {
            return false;
        }
        logger.fine("Deferring processing of CrawlURI " + crawlURI.toString() + " for dns lookup.");
        try {
            crawlURI.markPrerequisite("dns:" + hostFor.getHostName());
            return true;
        } catch (URIException e) {
            throw new RuntimeException((Throwable) e);
        }
    }

    public boolean isIpExpired(CrawlURI crawlURI) {
        CrawlHost hostFor = this.serverCache.getHostFor(crawlURI.getUURI());
        if (!hostFor.hasBeenLookedUp()) {
            return true;
        }
        if (hostFor.getIpTTL() == -1) {
            return false;
        }
        long ipValidityDurationSeconds = getIpValidityDurationSeconds();
        if (ipValidityDurationSeconds == 0) {
            return false;
        }
        long ipTTL = hostFor.getIpTTL();
        if (ipTTL > ipValidityDurationSeconds) {
            ipValidityDurationSeconds = ipTTL;
        }
        if (ipValidityDurationSeconds > 0) {
            ipValidityDurationSeconds *= 1000;
        }
        return ipValidityDurationSeconds + hostFor.getIpFetched() < System.currentTimeMillis();
    }

    protected boolean credentialPrecondition(CrawlURI crawlURI) {
        boolean z = false;
        CredentialStore credentialStore = getCredentialStore();
        if (credentialStore == null) {
            logger.severe("No credential store for " + crawlURI);
            return false;
        }
        Iterator it = credentialStore.getAll().iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            Credential credential = (Credential) it.next();
            if (credential.isPrerequisite(crawlURI)) {
                credential.attach(crawlURI);
                crawlURI.setFetchType(CrawlURI.FetchType.HTTP_POST);
                break;
            }
            if (credential.rootUriMatch(this.serverCache, crawlURI) && credential.hasPrerequisite(crawlURI) && !authenticated(credential, crawlURI)) {
                String prerequisite = credential.getPrerequisite(crawlURI);
                if (prerequisite == null || prerequisite.length() <= 0) {
                    logger.severe(String.valueOf(this.serverCache.getServerFor(crawlURI.getUURI()).getName()) + " has  credential(s) of type " + credential + " but prereq is null.");
                } else {
                    try {
                        crawlURI.markPrerequisite(prerequisite);
                        z = true;
                        if (logger.isLoggable(Level.FINE)) {
                            logger.fine("Queueing prereq " + prerequisite + " of type " + credential + " for " + crawlURI);
                        }
                    } catch (URIException e) {
                        logger.severe("unable to set credentials prerequisite " + prerequisite);
                        this.loggerModule.logUriError(e, crawlURI.getUURI(), prerequisite);
                        return false;
                    }
                }
            }
        }
        return z;
    }

    protected boolean authenticated(Credential credential, CrawlURI crawlURI) {
        CrawlServer serverFor = this.serverCache.getServerFor(crawlURI.getUURI());
        if (!serverFor.hasCredentials()) {
            return false;
        }
        for (Credential credential2 : serverFor.getCredentials()) {
            if (credential2.getKey().equals(credential.getKey()) && credential2.getClass().isInstance(credential)) {
                return true;
            }
        }
        return false;
    }
}
