package com.facebook.presto.parquet.reader;

import com.facebook.presto.common.block.ArrayBlock;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.block.BlockBuilderStatus;
import com.facebook.presto.common.block.IntArrayBlock;
import com.facebook.presto.common.block.LongArrayBlock;
import com.facebook.presto.common.block.RowBlock;
import com.facebook.presto.common.block.RunLengthEncodedBlock;
import com.facebook.presto.common.type.BigintType;
import com.facebook.presto.common.type.IntegerType;
import com.facebook.presto.common.type.TinyintType;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.memory.context.AggregatedMemoryContext;
import com.facebook.presto.parquet.ColumnReader;
import com.facebook.presto.parquet.ColumnReaderFactory;
import com.facebook.presto.parquet.Field;
import com.facebook.presto.parquet.GroupField;
import com.facebook.presto.parquet.ParquetCorruptionException;
import com.facebook.presto.parquet.ParquetDataSource;
import com.facebook.presto.parquet.ParquetResultVerifierUtils;
import com.facebook.presto.parquet.ParquetValidationUtils;
import com.facebook.presto.parquet.PrimitiveField;
import com.facebook.presto.parquet.RichColumnDescriptor;
import com.google.common.base.Preconditions;
import io.airlift.units.DataSize;
import it.unimi.dsi.fastutil.booleans.BooleanArrayList;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import java.io.Closeable;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import org.apache.parquet.column.ColumnDescriptor;
import org.apache.parquet.hadoop.metadata.BlockMetaData;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.parquet.hadoop.metadata.ColumnPath;
import org.apache.parquet.io.MessageColumnIO;
import org.apache.parquet.io.PrimitiveColumnIO;
import org.apache.parquet.schema.PrimitiveType;

/* loaded from: input_file:com/facebook/presto/parquet/reader/ParquetReader.class */
public class ParquetReader implements Closeable {
    private static final int MAX_VECTOR_LENGTH = 1024;
    private static final int INITIAL_BATCH_SIZE = 1;
    private static final int BATCH_SIZE_GROWTH_FACTOR = 2;
    private final List<BlockMetaData> blocks;
    private final List<PrimitiveColumnIO> columns;
    private final ParquetDataSource dataSource;
    private final AggregatedMemoryContext systemMemoryContext;
    private final boolean batchReadEnabled;
    private final boolean enableVerification;
    private int currentBlock;
    private BlockMetaData currentBlockMetadata;
    private long currentPosition;
    private long currentGroupRowCount;
    private long nextRowInGroup;
    private int batchSize;
    private final ColumnReader[] columnReaders;
    protected final ColumnReader[] verificationColumnReaders;
    private long[] maxBytesPerCell;
    private long maxCombinedBytesPerRow;
    private final long maxReadBlockBytes;
    private AggregatedMemoryContext currentRowGroupMemoryContext;
    private int nextBatchSize = INITIAL_BATCH_SIZE;
    private int maxBatchSize = MAX_VECTOR_LENGTH;

    public ParquetReader(MessageColumnIO messageColumnIO, List<BlockMetaData> list, ParquetDataSource parquetDataSource, AggregatedMemoryContext aggregatedMemoryContext, DataSize dataSize, boolean z, boolean z2) {
        this.blocks = list;
        this.dataSource = (ParquetDataSource) Objects.requireNonNull(parquetDataSource, "dataSource is null");
        this.systemMemoryContext = (AggregatedMemoryContext) Objects.requireNonNull(aggregatedMemoryContext, "systemMemoryContext is null");
        this.currentRowGroupMemoryContext = aggregatedMemoryContext.newAggregatedMemoryContext();
        this.maxReadBlockBytes = ((DataSize) Objects.requireNonNull(dataSize, "maxReadBlockSize is null")).toBytes();
        this.batchReadEnabled = z;
        this.columns = messageColumnIO.getLeaves();
        this.columnReaders = new ColumnReader[this.columns.size()];
        this.enableVerification = z2;
        this.verificationColumnReaders = z2 ? new ColumnReader[this.columns.size()] : null;
        this.maxBytesPerCell = new long[this.columns.size()];
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        this.currentRowGroupMemoryContext.close();
        this.dataSource.close();
    }

    public long getPosition() {
        return this.currentPosition;
    }

    public int nextBatch() {
        if (this.nextRowInGroup >= this.currentGroupRowCount && !advanceToNextRowGroup()) {
            return -1;
        }
        this.batchSize = Math.toIntExact(Math.min(this.nextBatchSize, this.maxBatchSize));
        this.nextBatchSize = Math.min(this.batchSize * BATCH_SIZE_GROWTH_FACTOR, MAX_VECTOR_LENGTH);
        this.batchSize = Math.toIntExact(Math.min(this.batchSize, this.currentGroupRowCount - this.nextRowInGroup));
        this.nextRowInGroup += this.batchSize;
        this.currentPosition += this.batchSize;
        Arrays.stream(this.columnReaders).forEach(columnReader -> {
            columnReader.prepareNextRead(this.batchSize);
        });
        if (this.enableVerification) {
            Arrays.stream(this.verificationColumnReaders).forEach(columnReader2 -> {
                columnReader2.prepareNextRead(this.batchSize);
            });
        }
        return this.batchSize;
    }

    private boolean advanceToNextRowGroup() {
        this.currentRowGroupMemoryContext.close();
        this.currentRowGroupMemoryContext = this.systemMemoryContext.newAggregatedMemoryContext();
        if (this.currentBlock == this.blocks.size()) {
            return false;
        }
        this.currentBlockMetadata = this.blocks.get(this.currentBlock);
        this.currentBlock += INITIAL_BATCH_SIZE;
        this.nextRowInGroup = 0L;
        this.currentGroupRowCount = this.currentBlockMetadata.getRowCount();
        initializeColumnReaders();
        return true;
    }

    private ColumnChunk readArray(GroupField groupField) throws IOException {
        List typeParameters = groupField.getType().getTypeParameters();
        Preconditions.checkArgument(typeParameters.size() == INITIAL_BATCH_SIZE, "Arrays must have a single type parameter, found %d", typeParameters.size());
        ColumnChunk readColumnChunk = readColumnChunk(groupField.getChildren().get(0).get());
        IntArrayList intArrayList = new IntArrayList();
        BooleanArrayList booleanArrayList = new BooleanArrayList();
        ListColumnReader.calculateCollectionOffsets(groupField, intArrayList, booleanArrayList, readColumnChunk.getDefinitionLevels(), readColumnChunk.getRepetitionLevels());
        return new ColumnChunk(ArrayBlock.fromElementBlock(booleanArrayList.size(), Optional.of(booleanArrayList.toBooleanArray()), intArrayList.toIntArray(), readColumnChunk.getBlock()), readColumnChunk.getDefinitionLevels(), readColumnChunk.getRepetitionLevels());
    }

    private ColumnChunk readMap(GroupField groupField) throws IOException {
        List typeParameters = groupField.getType().getTypeParameters();
        Preconditions.checkArgument(typeParameters.size() == BATCH_SIZE_GROWTH_FACTOR, "Maps must have two type parameters, found %d", typeParameters.size());
        Block[] blockArr = new Block[typeParameters.size()];
        ColumnChunk readColumnChunk = readColumnChunk(groupField.getChildren().get(0).get());
        blockArr[0] = readColumnChunk.getBlock();
        blockArr[INITIAL_BATCH_SIZE] = readColumnChunk(groupField.getChildren().get(INITIAL_BATCH_SIZE).get()).getBlock();
        IntArrayList intArrayList = new IntArrayList();
        BooleanArrayList booleanArrayList = new BooleanArrayList();
        ListColumnReader.calculateCollectionOffsets(groupField, intArrayList, booleanArrayList, readColumnChunk.getDefinitionLevels(), readColumnChunk.getRepetitionLevels());
        return new ColumnChunk(groupField.getType().createBlockFromKeyValue(intArrayList.size() - INITIAL_BATCH_SIZE, Optional.of(booleanArrayList.toBooleanArray()), intArrayList.toIntArray(), blockArr[0], blockArr[INITIAL_BATCH_SIZE]), readColumnChunk.getDefinitionLevels(), readColumnChunk.getRepetitionLevels());
    }

    private ColumnChunk readStruct(GroupField groupField) throws IOException {
        List parameters = groupField.getType().getTypeSignature().getParameters();
        Block[] blockArr = new Block[parameters.size()];
        ColumnChunk columnChunk = null;
        List<Optional<Field>> children = groupField.getChildren();
        for (int i = 0; i < parameters.size(); i += INITIAL_BATCH_SIZE) {
            Optional<Field> optional = children.get(i);
            if (optional.isPresent()) {
                columnChunk = readColumnChunk(optional.get());
                blockArr[i] = columnChunk.getBlock();
            }
        }
        for (int i2 = 0; i2 < parameters.size(); i2 += INITIAL_BATCH_SIZE) {
            if (blockArr[i2] == null) {
                blockArr[i2] = RunLengthEncodedBlock.create(groupField.getType(), (Object) null, columnChunk.getBlock().getPositionCount());
            }
        }
        boolean[] booleanArray = StructColumnReader.calculateStructOffsets(groupField, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels()).toBooleanArray();
        return new ColumnChunk(RowBlock.fromFieldBlocks(booleanArray.length, Optional.of(booleanArray), blockArr), columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels());
    }

    private ColumnChunk readPrimitive(PrimitiveField primitiveField) throws IOException {
        RichColumnDescriptor descriptor = primitiveField.getDescriptor();
        int id = primitiveField.getId();
        ColumnReader columnReader = this.columnReaders[id];
        if (!columnReader.isInitialized()) {
            ParquetValidationUtils.validateParquet(this.currentBlockMetadata.getRowCount() > 0, "Row group has 0 rows", new Object[0]);
            ColumnChunkMetaData columnChunkMetaData = getColumnChunkMetaData(descriptor);
            long startingPos = columnChunkMetaData.getStartingPos();
            int intExact = Math.toIntExact(columnChunkMetaData.getTotalSize());
            byte[] allocateBlock = allocateBlock(intExact);
            this.dataSource.readFully(startingPos, allocateBlock);
            ColumnChunkDescriptor columnChunkDescriptor = new ColumnChunkDescriptor(descriptor, columnChunkMetaData, intExact);
            columnReader.init(new ParquetColumnChunk(columnChunkDescriptor, allocateBlock, 0).readAllPages(), primitiveField);
            if (this.enableVerification) {
                this.verificationColumnReaders[primitiveField.getId()].init(new ParquetColumnChunk(columnChunkDescriptor, allocateBlock, 0).readAllPages(), primitiveField);
            }
        }
        ColumnChunk typeCoercion = typeCoercion(columnReader.readNext(), primitiveField.getDescriptor().getPrimitiveType().getPrimitiveTypeName(), primitiveField.getType());
        if (this.enableVerification) {
            ParquetResultVerifierUtils.verifyColumnChunks(typeCoercion, this.verificationColumnReaders[primitiveField.getId()].readNext(), descriptor.getPath().length > INITIAL_BATCH_SIZE, primitiveField, this.dataSource.getId());
        }
        long sizeInBytes = typeCoercion.getBlock().getSizeInBytes() / this.batchSize;
        if (this.maxBytesPerCell[id] < sizeInBytes) {
            this.maxCombinedBytesPerRow = (this.maxCombinedBytesPerRow - this.maxBytesPerCell[id]) + sizeInBytes;
            this.maxBatchSize = Math.toIntExact(Math.min(this.maxBatchSize, Math.max(1L, this.maxReadBlockBytes / this.maxCombinedBytesPerRow)));
            this.maxBytesPerCell[id] = sizeInBytes;
        }
        return typeCoercion;
    }

    private byte[] allocateBlock(int i) {
        byte[] bArr = new byte[i];
        this.currentRowGroupMemoryContext.newLocalMemoryContext(ParquetReader.class.getSimpleName()).setBytes(bArr.length);
        return bArr;
    }

    private ColumnChunkMetaData getColumnChunkMetaData(ColumnDescriptor columnDescriptor) throws IOException {
        for (ColumnChunkMetaData columnChunkMetaData : this.currentBlockMetadata.getColumns()) {
            if (columnChunkMetaData.getPath().equals(ColumnPath.get(columnDescriptor.getPath()))) {
                return columnChunkMetaData;
            }
        }
        throw new ParquetCorruptionException("Metadata is missing for column: %s", columnDescriptor);
    }

    private void initializeColumnReaders() {
        for (PrimitiveColumnIO primitiveColumnIO : this.columns) {
            RichColumnDescriptor richColumnDescriptor = new RichColumnDescriptor(primitiveColumnIO.getColumnDescriptor(), primitiveColumnIO.getType().asPrimitiveType());
            this.columnReaders[primitiveColumnIO.getId()] = ColumnReaderFactory.createReader(richColumnDescriptor, this.batchReadEnabled);
            if (this.enableVerification) {
                this.verificationColumnReaders[primitiveColumnIO.getId()] = ColumnReaderFactory.createReader(richColumnDescriptor, false);
            }
        }
    }

    public Block readBlock(Field field) throws IOException {
        return readColumnChunk(field).getBlock();
    }

    private ColumnChunk readColumnChunk(Field field) throws IOException {
        return "row".equals(field.getType().getTypeSignature().getBase()) ? readStruct((GroupField) field) : "map".equals(field.getType().getTypeSignature().getBase()) ? readMap((GroupField) field) : "array".equals(field.getType().getTypeSignature().getBase()) ? readArray((GroupField) field) : readPrimitive((PrimitiveField) field);
    }

    public ParquetDataSource getDataSource() {
        return this.dataSource;
    }

    public AggregatedMemoryContext getSystemMemoryContext() {
        return this.systemMemoryContext;
    }

    private static ColumnChunk typeCoercion(ColumnChunk columnChunk, PrimitiveType.PrimitiveTypeName primitiveTypeName, Type type) {
        Block block = null;
        if ("smallint".equals(type) || TinyintType.TINYINT.equals(type)) {
            if (columnChunk.getBlock() instanceof IntArrayBlock) {
                block = rewriteIntegerArrayBlock(columnChunk.getBlock(), type);
            } else if (columnChunk.getBlock() instanceof LongArrayBlock) {
                block = rewriteLongArrayBlock(columnChunk.getBlock(), type);
            }
        } else if (IntegerType.INTEGER.equals(type) && primitiveTypeName == PrimitiveType.PrimitiveTypeName.INT64) {
            if (columnChunk.getBlock() instanceof LongArrayBlock) {
                block = rewriteLongArrayBlock(columnChunk.getBlock(), type);
            }
        } else if (BigintType.BIGINT.equals(type) && primitiveTypeName == PrimitiveType.PrimitiveTypeName.INT32 && (columnChunk.getBlock() instanceof IntArrayBlock)) {
            block = rewriteIntegerArrayBlock(columnChunk.getBlock(), type);
        }
        return block != null ? new ColumnChunk(block, columnChunk.getDefinitionLevels(), columnChunk.getRepetitionLevels()) : columnChunk;
    }

    private static Block rewriteIntegerArrayBlock(IntArrayBlock intArrayBlock, Type type) {
        int positionCount = intArrayBlock.getPositionCount();
        BlockBuilder createBlockBuilder = type.createBlockBuilder((BlockBuilderStatus) null, positionCount);
        for (int i = 0; i < positionCount; i += INITIAL_BATCH_SIZE) {
            if (intArrayBlock.isNull(i)) {
                createBlockBuilder.appendNull();
            } else {
                type.writeLong(createBlockBuilder, intArrayBlock.getInt(i));
            }
        }
        return createBlockBuilder.build();
    }

    private static Block rewriteLongArrayBlock(LongArrayBlock longArrayBlock, Type type) {
        int positionCount = longArrayBlock.getPositionCount();
        BlockBuilder createBlockBuilder = type.createBlockBuilder((BlockBuilderStatus) null, positionCount);
        for (int i = 0; i < positionCount; i += INITIAL_BATCH_SIZE) {
            if (longArrayBlock.isNull(i)) {
                createBlockBuilder.appendNull();
            } else {
                type.writeLong(createBlockBuilder, longArrayBlock.getLong(i, 0));
            }
        }
        return createBlockBuilder.build();
    }
}
