summaryrefslogtreecommitdiffstats
path: root/venv/lib/python3.9/site-packages/pyarrow/includes/libarrow_dataset_parquet.pxd
blob: cc753d66a94f70475dd94ef80a25007abb8819c7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

# distutils: language = c++

from pyarrow.includes.libarrow_dataset cimport *
from pyarrow._parquet cimport *


cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:

    cdef cppclass CParquetFileWriter \
            "arrow::dataset::ParquetFileWriter"(CFileWriter):
        const shared_ptr[FileWriter]& parquet_writer() const

    cdef cppclass CParquetFileWriteOptions \
            "arrow::dataset::ParquetFileWriteOptions"(CFileWriteOptions):
        shared_ptr[WriterProperties] writer_properties
        shared_ptr[ArrowWriterProperties] arrow_writer_properties

    cdef cppclass CParquetFileFragment "arrow::dataset::ParquetFileFragment"(
            CFileFragment):
        const vector[int]& row_groups() const
        shared_ptr[CFileMetaData] metadata() const
        CResult[vector[shared_ptr[CFragment]]] SplitByRowGroup(
            CExpression predicate)
        CResult[shared_ptr[CFragment]] SubsetWithFilter "Subset"(
            CExpression predicate)
        CResult[shared_ptr[CFragment]] SubsetWithIds "Subset"(
            vector[int] row_group_ids)
        CStatus EnsureCompleteMetadata()

    cdef cppclass CParquetFileFormatReaderOptions \
            "arrow::dataset::ParquetFileFormat::ReaderOptions":
        unordered_set[c_string] dict_columns
        TimeUnit coerce_int96_timestamp_unit

    cdef cppclass CParquetFileFormat "arrow::dataset::ParquetFileFormat"(
            CFileFormat):
        CParquetFileFormatReaderOptions reader_options
        CResult[shared_ptr[CFileFragment]] MakeFragment(
            CFileSource source,
            CExpression partition_expression,
            shared_ptr[CSchema] physical_schema,
            vector[int] row_groups)

    cdef cppclass CParquetFragmentScanOptions \
            "arrow::dataset::ParquetFragmentScanOptions"(CFragmentScanOptions):
        shared_ptr[CReaderProperties] reader_properties
        shared_ptr[ArrowReaderProperties] arrow_reader_properties

    cdef cppclass CParquetFactoryOptions \
            "arrow::dataset::ParquetFactoryOptions":
        CPartitioningOrFactory partitioning
        c_string partition_base_dir
        c_bool validate_column_chunk_paths

    cdef cppclass CParquetDatasetFactory \
            "arrow::dataset::ParquetDatasetFactory"(CDatasetFactory):
        @staticmethod
        CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataPath "Make"(
            const c_string& metadata_path,
            shared_ptr[CFileSystem] filesystem,
            shared_ptr[CParquetFileFormat] format,
            CParquetFactoryOptions options
        )

        @staticmethod
        CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataSource "Make"(
            const CFileSource& metadata_path,
            const c_string& base_path,
            shared_ptr[CFileSystem] filesystem,
            shared_ptr[CParquetFileFormat] format,
            CParquetFactoryOptions options
        )