package org.archive.modules.extractor;

import groovy.text.SimpleTemplateEngine;
import groovy.text.Template;
import java.io.IOException;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import org.apache.commons.httpclient.URIException;
import org.archive.io.ReplayCharSequence;
import org.archive.modules.CrawlURI;
import org.archive.util.TextUtils;

/* loaded from: input_file:org/archive/modules/extractor/ExtractorMultipleRegex.class */
public class ExtractorMultipleRegex extends Extractor {
    private static final Logger LOGGER = Logger.getLogger(ExtractorMultipleRegex.class.getName());
    protected ConcurrentHashMap<String, Template> groovyTemplates;

    /* loaded from: input_file:org/archive/modules/extractor/ExtractorMultipleRegex$GroupList.class */
    protected class GroupList extends LinkedList<String> {
        private static final long serialVersionUID = 1;

        public GroupList(MatchResult matchResult) {
            for (int i = 0; i <= matchResult.groupCount(); i++) {
                add(matchResult.group(i));
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:org/archive/modules/extractor/ExtractorMultipleRegex$MatchList.class */
    public class MatchList extends LinkedList<GroupList> {
        private static final long serialVersionUID = 1;

        public MatchList(String str, CharSequence charSequence) {
            Matcher matcher = TextUtils.getMatcher(str, charSequence);
            while (matcher.find()) {
                add(new GroupList(matcher));
            }
        }

        public MatchList(GroupList... groupListArr) {
            for (GroupList groupList : groupListArr) {
                add(groupList);
            }
        }
    }

    public ExtractorMultipleRegex() {
        setUriRegex("");
        setContentRegexes(new LinkedHashMap());
        setTemplate("");
        this.groovyTemplates = new ConcurrentHashMap<>();
    }

    public void setUriRegex(String str) {
        this.kp.put("uriRegex", str);
    }

    public String getUriRegex() {
        return (String) this.kp.get("uriRegex");
    }

    public void setContentRegexes(Map<String, String> map) {
        this.kp.put("contentRegexes", map);
    }

    public Map<String, String> getContentRegexes() {
        return (Map) this.kp.get("contentRegexes");
    }

    public void setTemplate(String str) {
        this.kp.put("template", str);
    }

    public String getTemplate() {
        return (String) this.kp.get("template");
    }

    protected Template groovyTemplate() {
        Template template = this.groovyTemplates.get(getTemplate());
        if (template == null) {
            try {
                template = new SimpleTemplateEngine().createTemplate(getTemplate());
                this.groovyTemplates.put(getTemplate(), template);
            } catch (Exception e) {
                LOGGER.log(Level.SEVERE, "problem with groovy template " + getTemplate(), (Throwable) e);
            }
        }
        return template;
    }

    @Override // org.archive.modules.Processor
    protected boolean shouldProcess(CrawlURI crawlURI) {
        if (crawlURI.getContentLength() <= 0) {
            return false;
        }
        return getExtractorParameters().getExtract404s() || crawlURI.getFetchStatus() != 404;
    }

    @Override // org.archive.modules.extractor.Extractor
    public void extract(CrawlURI crawlURI) {
        Matcher matcher = TextUtils.getMatcher(getUriRegex(), crawlURI.getURI());
        if (matcher.matches()) {
            LinkedHashMap linkedHashMap = new LinkedHashMap();
            linkedHashMap.put("uriRegex", new MatchList(new GroupList(matcher)));
            try {
                ReplayCharSequence contentReplayCharSequence = crawlURI.getRecorder().getContentReplayCharSequence();
                for (String str : getContentRegexes().keySet()) {
                    MatchList matchList = new MatchList(getContentRegexes().get(str), contentReplayCharSequence);
                    if (matchList.isEmpty()) {
                        return;
                    } else {
                        linkedHashMap.put(str, matchList);
                    }
                }
                int i = 1;
                Iterator<MatchList> it = linkedHashMap.values().iterator();
                while (it.hasNext()) {
                    i *= it.next().size();
                }
                String[] strArr = (String[]) linkedHashMap.keySet().toArray(new String[0]);
                for (int i2 = 0; i2 < i; i2++) {
                    buildAndAddOutlink(crawlURI, makeBindings(linkedHashMap, strArr, i2));
                }
            } catch (IOException e) {
                crawlURI.getNonFatalFailures().add(e);
                LOGGER.log(Level.WARNING, "Failed get of replay char sequence in " + Thread.currentThread().getName(), (Throwable) e);
            }
        }
    }

    protected Map<String, Object> makeBindings(Map<String, MatchList> map, String[] strArr, int i) {
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        int i2 = i;
        for (int i3 = 0; i3 < strArr.length; i3++) {
            MatchList matchList = map.get(strArr[i3]);
            linkedHashMap.put(strArr[i3], matchList.get(i2 % matchList.size()));
            i2 /= matchList.size();
        }
        return linkedHashMap;
    }

    protected void buildAndAddOutlink(CrawlURI crawlURI, Map<String, Object> map) {
        String obj = groovyTemplate().make(map).toString();
        try {
            Link.addRelativeToBase(crawlURI, getExtractorParameters().getMaxOutlinks(), obj, HTMLLinkContext.INFERRED_MISC, Hop.INFERRED);
        } catch (URIException e) {
            logUriError(e, crawlURI.getUURI(), obj);
        }
    }
}
