blob: a34ea5412e14a8e5e7433a8b640f31fd41be1107 (
plain) (
tree)
|
|
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
cdef class StringBuilder(_Weakrefable):
"""
Builder class for UTF8 strings.
This class exposes facilities for incrementally adding string values and
building the null bitmap for a pyarrow.Array (type='string').
"""
cdef:
unique_ptr[CStringBuilder] builder
def __cinit__(self, MemoryPool memory_pool=None):
cdef CMemoryPool* pool = maybe_unbox_memory_pool(memory_pool)
self.builder.reset(new CStringBuilder(pool))
def append(self, value):
"""
Append a single value to the builder.
The value can either be a string/bytes object or a null value
(np.nan or None).
Parameters
----------
value : string/bytes or np.nan/None
The value to append to the string array builder.
"""
if value is None or value is np.nan:
self.builder.get().AppendNull()
elif isinstance(value, (bytes, str)):
self.builder.get().Append(tobytes(value))
else:
raise TypeError('StringBuilder only accepts string objects')
def append_values(self, values):
"""
Append all the values from an iterable.
Parameters
----------
values : iterable of string/bytes or np.nan/None values
The values to append to the string array builder.
"""
for value in values:
self.append(value)
def finish(self):
"""
Return result of builder as an Array object; also resets the builder.
Returns
-------
array : pyarrow.Array
"""
cdef shared_ptr[CArray] out
with nogil:
self.builder.get().Finish(&out)
return pyarrow_wrap_array(out)
@property
def null_count(self):
return self.builder.get().null_count()
def __len__(self):
return self.builder.get().length()
|