1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# distutils: language = c++
from pyarrow.includes.libarrow_dataset cimport *
from pyarrow._parquet cimport *
cdef extern from "arrow/dataset/api.h" namespace "arrow::dataset" nogil:
cdef cppclass CParquetFileWriter \
"arrow::dataset::ParquetFileWriter"(CFileWriter):
const shared_ptr[FileWriter]& parquet_writer() const
cdef cppclass CParquetFileWriteOptions \
"arrow::dataset::ParquetFileWriteOptions"(CFileWriteOptions):
shared_ptr[WriterProperties] writer_properties
shared_ptr[ArrowWriterProperties] arrow_writer_properties
cdef cppclass CParquetFileFragment "arrow::dataset::ParquetFileFragment"(
CFileFragment):
const vector[int]& row_groups() const
shared_ptr[CFileMetaData] metadata() const
CResult[vector[shared_ptr[CFragment]]] SplitByRowGroup(
CExpression predicate)
CResult[shared_ptr[CFragment]] SubsetWithFilter "Subset"(
CExpression predicate)
CResult[shared_ptr[CFragment]] SubsetWithIds "Subset"(
vector[int] row_group_ids)
CStatus EnsureCompleteMetadata()
cdef cppclass CParquetFileFormatReaderOptions \
"arrow::dataset::ParquetFileFormat::ReaderOptions":
unordered_set[c_string] dict_columns
TimeUnit coerce_int96_timestamp_unit
cdef cppclass CParquetFileFormat "arrow::dataset::ParquetFileFormat"(
CFileFormat):
CParquetFileFormatReaderOptions reader_options
CResult[shared_ptr[CFileFragment]] MakeFragment(
CFileSource source,
CExpression partition_expression,
shared_ptr[CSchema] physical_schema,
vector[int] row_groups)
cdef cppclass CParquetFragmentScanOptions \
"arrow::dataset::ParquetFragmentScanOptions"(CFragmentScanOptions):
shared_ptr[CReaderProperties] reader_properties
shared_ptr[ArrowReaderProperties] arrow_reader_properties
cdef cppclass CParquetFactoryOptions \
"arrow::dataset::ParquetFactoryOptions":
CPartitioningOrFactory partitioning
c_string partition_base_dir
c_bool validate_column_chunk_paths
cdef cppclass CParquetDatasetFactory \
"arrow::dataset::ParquetDatasetFactory"(CDatasetFactory):
@staticmethod
CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataPath "Make"(
const c_string& metadata_path,
shared_ptr[CFileSystem] filesystem,
shared_ptr[CParquetFileFormat] format,
CParquetFactoryOptions options
)
@staticmethod
CResult[shared_ptr[CDatasetFactory]] MakeFromMetaDataSource "Make"(
const CFileSource& metadata_path,
const c_string& base_path,
shared_ptr[CFileSystem] filesystem,
shared_ptr[CParquetFileFormat] format,
CParquetFactoryOptions options
)
|