package org.archive.crawler.postprocessor;

import java.util.Map;
import java.util.logging.Logger;
import org.apache.commons.httpclient.URIException;
import org.archive.modules.CrawlMetadata;
import org.archive.modules.CrawlURI;
import org.archive.modules.Processor;
import org.archive.modules.net.CrawlHost;
import org.archive.modules.net.CrawlServer;
import org.archive.modules.net.IgnoreRobotsPolicy;
import org.archive.modules.net.Robotstxt;
import org.archive.modules.net.ServerCache;
import org.springframework.beans.factory.annotation.Autowired;

/* loaded from: input_file:org/archive/crawler/postprocessor/DispositionProcessor.class */
public class DispositionProcessor extends Processor {
    private static final long serialVersionUID = -1072728147960180091L;
    private static final Logger logger = Logger.getLogger(DispositionProcessor.class.getName());
    protected ServerCache serverCache;
    protected CrawlMetadata metadata;

    public ServerCache getServerCache() {
        return this.serverCache;
    }

    @Autowired
    public void setServerCache(ServerCache serverCache) {
        this.serverCache = serverCache;
    }

    public float getDelayFactor() {
        return ((Float) this.kp.get("delayFactor")).floatValue();
    }

    public void setDelayFactor(float f) {
        this.kp.put("delayFactor", Float.valueOf(f));
    }

    public int getMinDelayMs() {
        return ((Integer) this.kp.get("minDelayMs")).intValue();
    }

    public void setMinDelayMs(int i) {
        this.kp.put("minDelayMs", Integer.valueOf(i));
    }

    public int getRespectCrawlDelayUpToSeconds() {
        return ((Integer) this.kp.get("respectCrawlDelayUpToSeconds")).intValue();
    }

    public void setRespectCrawlDelayUpToSeconds(int i) {
        this.kp.put("respectCrawlDelayUpToSeconds", Integer.valueOf(i));
    }

    public int getMaxDelayMs() {
        return ((Integer) this.kp.get("maxDelayMs")).intValue();
    }

    public void setMaxDelayMs(int i) {
        this.kp.put("maxDelayMs", Integer.valueOf(i));
    }

    public int getMaxPerHostBandwidthUsageKbSec() {
        return ((Integer) this.kp.get("maxPerHostBandwidthUsageKbSec")).intValue();
    }

    public void setMaxPerHostBandwidthUsageKbSec(int i) {
        this.kp.put("maxPerHostBandwidthUsageKbSec", Integer.valueOf(i));
    }

    public boolean getForceRetire() {
        return ((Boolean) this.kp.get("forceRetire")).booleanValue();
    }

    public void setForceRetire(boolean z) {
        this.kp.put("forceRetire", Boolean.valueOf(z));
    }

    public CrawlMetadata getMetadata() {
        return this.metadata;
    }

    @Autowired
    public void setMetadata(CrawlMetadata crawlMetadata) {
        this.metadata = crawlMetadata;
    }

    public DispositionProcessor() {
        setDelayFactor(5.0f);
        setMinDelayMs(3000);
        setRespectCrawlDelayUpToSeconds(300);
        setMaxDelayMs(30000);
        setMaxPerHostBandwidthUsageKbSec(0);
        setForceRetire(false);
    }

    protected boolean shouldProcess(CrawlURI crawlURI) {
        return crawlURI instanceof CrawlURI;
    }

    protected void innerProcess(CrawlURI crawlURI) {
        CrawlServer serverFor = this.serverCache.getServerFor(crawlURI.getUURI());
        String lowerCase = crawlURI.getUURI().getScheme().toLowerCase();
        if (lowerCase.equals("http") || (lowerCase.equals("https") && serverFor != null)) {
            if (crawlURI.getFetchStatus() == -2 || crawlURI.getFetchStatus() == -3) {
                serverFor.incrementConsecutiveConnectionErrors();
            } else if (crawlURI.getFetchStatus() > 0) {
                serverFor.resetConsecutiveConnectionErrors();
            }
            try {
                if ("/robots.txt".equals(crawlURI.getUURI().getPath()) && crawlURI.getFetchStatus() != -50) {
                    if ((this.metadata.getRobotsPolicy() instanceof IgnoreRobotsPolicy) && crawlURI.getFetchStatus() < 0 && crawlURI.getFetchStatus() != -50) {
                        crawlURI.setFetchStatus(-404);
                    }
                    serverFor.updateRobots(crawlURI);
                }
            } catch (URIException e) {
                logger.severe("Failed get path on " + crawlURI.getUURI());
            }
        }
        crawlURI.setPolitenessDelay(politenessDelayFor(crawlURI));
        if (getForceRetire()) {
            crawlURI.setForceRetire(true);
        }
    }

    protected long politenessDelayFor(CrawlURI crawlURI) {
        long j = 0;
        Map data = crawlURI.getData();
        if (data.containsKey("fetch-began-time") && data.containsKey("fetch-completed-time")) {
            j = getDelayFactor() * ((float) (crawlURI.getFetchCompletedTime() - crawlURI.getFetchBeginTime()));
            long minDelayMs = getMinDelayMs();
            if (minDelayMs > j) {
                j = minDelayMs;
            }
            long maxDelayMs = getMaxDelayMs();
            if (j > maxDelayMs) {
                j = maxDelayMs;
            }
            long respectCrawlDelayUpToSeconds = getRespectCrawlDelayUpToSeconds() * 1000;
            if (j < respectCrawlDelayUpToSeconds) {
                CrawlServer serverFor = getServerCache().getServerFor(crawlURI.getUURI());
                String userAgent = crawlURI.getUserAgent();
                if (userAgent == null) {
                    userAgent = this.metadata.getUserAgent();
                }
                Robotstxt robotstxt = serverFor.getRobotstxt();
                if (robotstxt != null) {
                    long crawlDelay = 1000.0f * robotstxt.getDirectivesFor(userAgent).getCrawlDelay();
                    long j2 = crawlDelay > respectCrawlDelayUpToSeconds ? respectCrawlDelayUpToSeconds : crawlDelay;
                    if (j2 > j) {
                        j = j2;
                    }
                }
            }
            long currentTimeMillis = System.currentTimeMillis();
            if (getMaxPerHostBandwidthUsageKbSec() > 0) {
                CrawlHost hostFor = getServerCache().getHostFor(crawlURI.getUURI());
                long earliestNextURIEmitTime = hostFor.getEarliestNextURIEmitTime() - currentTimeMillis;
                hostFor.setEarliestNextURIEmitTime((((float) crawlURI.getContentSize()) / (r0 * 1.024f)) + currentTimeMillis);
                if (earliestNextURIEmitTime > j) {
                    j = earliestNextURIEmitTime;
                }
            }
        }
        return j;
    }
}
