/*
 * Decompiled with CFR 0.152.
 */
package edu.uci.ics.crawler4j.parser;

import edu.uci.ics.crawler4j.crawler.CrawlConfig;
import edu.uci.ics.crawler4j.crawler.Page;
import edu.uci.ics.crawler4j.crawler.exceptions.ParseException;
import edu.uci.ics.crawler4j.parser.BinaryParseData;
import edu.uci.ics.crawler4j.parser.CssParseData;
import edu.uci.ics.crawler4j.parser.HtmlParseData;
import edu.uci.ics.crawler4j.parser.HtmlParser;
import edu.uci.ics.crawler4j.parser.NotAllowedContentException;
import edu.uci.ics.crawler4j.parser.TextParseData;
import edu.uci.ics.crawler4j.parser.TikaHtmlParser;
import edu.uci.ics.crawler4j.url.TLDList;
import edu.uci.ics.crawler4j.util.Net;
import edu.uci.ics.crawler4j.util.Util;
import org.apache.tika.language.LanguageIdentifier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class Parser {
    private static final Logger logger = LoggerFactory.getLogger(Parser.class);
    private final CrawlConfig config;
    private final HtmlParser htmlContentParser;
    private final Net net;

    @Deprecated
    public Parser(CrawlConfig config) throws IllegalAccessException, InstantiationException {
        this(config, new TikaHtmlParser(config, null));
    }

    public Parser(CrawlConfig config, TLDList tldList) throws IllegalAccessException, InstantiationException {
        this(config, new TikaHtmlParser(config, tldList), tldList);
    }

    @Deprecated
    public Parser(CrawlConfig config, HtmlParser htmlParser) {
        this(config, htmlParser, null);
    }

    public Parser(CrawlConfig config, HtmlParser htmlParser, TLDList tldList) {
        this.config = config;
        this.htmlContentParser = htmlParser;
        this.net = new Net(config, tldList);
    }

    /*
     * Enabled force condition propagation
     * Lifted jumps to return sites
     */
    public void parse(Page page, String contextURL) throws NotAllowedContentException, ParseException {
        if (Util.hasBinaryContent(page.getContentType())) {
            BinaryParseData parseData = new BinaryParseData();
            if (!this.config.isIncludeBinaryContentInCrawling()) throw new NotAllowedContentException();
            if (this.config.isProcessBinaryContentInCrawling()) {
                try {
                    parseData.setBinaryContent(page.getContentData());
                }
                catch (Exception e) {
                    if (this.config.isHaltOnError()) {
                        throw new ParseException(e);
                    }
                    logger.error("Error parsing file", (Throwable)e);
                }
            } else {
                parseData.setHtml("<html></html>");
            }
            page.setParseData(parseData);
            if (parseData.getHtml() == null) {
                throw new ParseException();
            }
            parseData.setOutgoingUrls(this.net.extractUrls(parseData.getHtml()));
            return;
        } else if (Util.hasCssTextContent(page.getContentType())) {
            try {
                CssParseData parseData = new CssParseData();
                if (page.getContentCharset() == null) {
                    parseData.setTextContent(new String(page.getContentData()));
                } else {
                    parseData.setTextContent(new String(page.getContentData(), page.getContentCharset()));
                }
                parseData.setOutgoingUrls(page.getWebURL());
                page.setParseData(parseData);
                return;
            }
            catch (Exception e) {
                logger.error("{}, while parsing css: {}", (Object)e.getMessage(), (Object)page.getWebURL().getURL());
                throw new ParseException();
            }
        } else if (Util.hasPlainTextContent(page.getContentType())) {
            try {
                TextParseData parseData = new TextParseData();
                if (page.getContentCharset() == null) {
                    parseData.setTextContent(new String(page.getContentData()));
                } else {
                    parseData.setTextContent(new String(page.getContentData(), page.getContentCharset()));
                }
                parseData.setOutgoingUrls(this.net.extractUrls(parseData.getTextContent()));
                page.setParseData(parseData);
                return;
            }
            catch (Exception e) {
                logger.error("{}, while parsing: {}", (Object)e.getMessage(), (Object)page.getWebURL().getURL());
                throw new ParseException(e);
            }
        } else {
            HtmlParseData parsedData = this.htmlContentParser.parse(page, contextURL);
            if (page.getContentCharset() == null) {
                page.setContentCharset(parsedData.getContentCharset());
            }
            LanguageIdentifier languageIdentifier = new LanguageIdentifier(parsedData.getText());
            page.setLanguage(languageIdentifier.getLanguage());
            page.setParseData(parsedData);
        }
    }
}

