import vaex.serialize
import json
import numpy as np
from vaex import column
from vaex.column import _to_string_sequence, _to_string_list_sequence
import re
import vaex.expression
import functools

# @vaex.serialize.register_function
# class Function(FunctionSerializable):

scopes = {
    'str': vaex.expression.StringOperations,
    'str_pandas': vaex.expression.StringOperationsPandas,
    'dt': vaex.expression.DateTime
}

def register_function(scope=None, as_property=False, name=None):
    """Decorator to register a new function with vaex.

    Example:

    >>> import vaex
    >>> df = vaex.example()
    >>> @vaex.register_function()
    >>> def invert(x):
    >>>     return 1/x
    >>> df.x.invert()


    >>> import numpy as np
    >>> df = vaex.from_arrays(departure=np.arange('2015-01-01', '2015-12-05', dtype='datetime64'))
    >>> @vaex.register_function(as_property=True, scope='dt')
    >>> def dt_relative_day(x):
    >>>     return vaex.functions.dt_dayofyear(x)/365.
    >>> df.departure.dt.relative_day
    """
    prefix = ''
    if scope:
        prefix = scope + "_"
        if scope not in scopes:
            raise KeyError("unknown scope")
    def wrapper(f, name=name):
        name = name or f.__name__
        # remove possible prefix
        if name.startswith(prefix):
            name = name[len(prefix):]
        full_name = prefix + name
        if scope:
            def closure(name=name, full_name=full_name, function=f):
                def wrapper(self, *args, **kwargs):
                    lazy_func = getattr(self.expression.ds.func, full_name)
                    args = (self.expression, ) + args
                    return lazy_func(*args, **kwargs)
                return functools.wraps(function)(wrapper)
            if as_property:
                setattr(scopes[scope], name, property(closure()))
            else:
                setattr(scopes[scope], name, closure())
        else:
            def closure(name=name, full_name=full_name, function=f):
                def wrapper(self, *args, **kwargs):
                    lazy_func = getattr(self.ds.func, full_name)
                    args = (self, ) + args
                    return lazy_func(*args, **kwargs)
                return functools.wraps(function)(wrapper)
            setattr(vaex.expression.Expression, name, closure())
        vaex.expression.expression_namespace[prefix + name] = f
        return f  # we leave the original function as is
    return wrapper

# name maps to numpy function
# <vaex name>:<numpy name>
numpy_function_mapping = [name.strip().split(":") if ":" in name else (name, name) for name in """
sinc
sin
cos
tan
arcsin
arccos
arctan
arctan2
sinh
cosh
tanh
arcsinh
arccosh
arctanh
log
log10
log1p
exp
expm1
sqrt
abs
where
rad2deg
deg2rad
minimum
maximum
clip
searchsorted
""".strip().split()]
for name, numpy_name in numpy_function_mapping:
    if not hasattr(np, numpy_name):
        raise SystemError("numpy does not have: %s" % numpy_name)
    else:
        function = getattr(np, numpy_name)
        def f(function=function):
            def wrapper(*args, **kwargs):
                return function(*args, **kwargs)
            return wrapper
        try:
            function = functools.wraps(function)(f())
        except AttributeError:
            function = f()  # python 2 case
        register_function(name=name)(function)


@register_function()
def fillna(ar, value, fill_nan=True, fill_masked=True):
    '''Returns an array where missing values are replaced by value.

    If the dtype is object, nan values and 'nan' string values
    are replaced by value when fill_nan==True.
    '''
    ar = ar if not isinstance(ar, column.Column) else ar.to_numpy()
    if ar.dtype.kind in 'O' and fill_nan:
        strings = ar.astype(str)
        mask = strings == 'nan'
        ar = ar.copy()
        ar[mask] = value
    elif ar.dtype.kind in 'f' and fill_nan:
        mask = np.isnan(ar)
        if np.any(mask):
            ar = ar.copy()
            ar[mask] = value
    if fill_masked and np.ma.isMaskedArray(ar):
        mask = ar.mask
        if np.any(mask):
            ar = ar.data.copy()
            ar[mask] = value
    return ar


########## datetime operations ##########

@register_function(scope='dt', as_property=True)
def dt_dayofweek(x):
    """Obtain the day of the week with Monday=0 and Sunday=6

    :returns: an expression containing the day of week.

    Example:

    >>> import vaex
    >>> import numpy as np
    >>> date = np.array(['2009-10-12T03:31:00', '2016-02-11T10:17:34', '2015-11-12T11:34:22'], dtype=np.datetime64)
    >>> df = vaex.from_arrays(date=date)
    >>> df
      #  date
      0  2009-10-12 03:31:00
      1  2016-02-11 10:17:34
      2  2015-11-12 11:34:22

    >>> df.date.dt.dayofweek
    Expression = dt_dayofweek(date)
    Length: 3 dtype: int64 (expression)
    -----------------------------------
    0  0
    1  3
    2  3
    """
    import pandas as pd
    return pd.Series(x).dt.dayofweek.values

@register_function(scope='dt', as_property=True)
def dt_dayofyear(x):
    """The ordinal day of the year.

    :returns: an expression containing the ordinal day of the year.

    Example:

    >>> import vaex
    >>> import numpy as np
    >>> date = np.array(['2009-10-12T03:31:00', '2016-02-11T10:17:34', '2015-11-12T11:34:22'], dtype=np.datetime64)
    >>> df = vaex.from_arrays(date=date)
    >>> df
      #  date
      0  2009-10-12 03:31:00
      1  2016-02-11 10:17:34
      2  2015-11-12 11:34:22

    >>> df.date.dt.dayofyear
    Expression = dt_dayofyear(date)
    Length: 3 dtype: int64 (expression)
    -----------------------------------
    0  285
    1   42
    2  316
    """
    import pandas as pd
    return pd.Series(x).dt.dayofyear.values

@register_function(scope='dt', as_property=True)
def dt_is_leap_year(x):
    """Check whether a year is a leap year.

    :returns: an expression which evaluates to True if a year is a leap year, and to False otherwise.

    Example:

    >>> import vaex
    >>> import numpy as np
    >>> date = np.array(['2009-10-12T03:31:00', '2016-02-11T10:17:34', '2015-11-12T11:34:22'], dtype=np.datetime64)
    >>> df = vaex.from_arrays(date=date)
    >>> df
      #  date
      0  2009-10-12 03:31:00
      1  2016-02-11 10:17:34
      2  2015-11-12 11:34:22

    >>> df.date.dt.is_leap_year
    Expression = dt_is_leap_year(date)
    Length: 3 dtype: bool (expression)
    ----------------------------------
    0  False
    1   True
    2  False
    """
    import pandas as pd
    return pd.Series(x).dt.is_leap_year.values

@register_function(scope='dt', as_property=True)
def dt_year(x):
    """Extracts the year out of a datetime sample.

    :returns: an expression containing the year extracted from a datetime column.

    Example:

    >>> import vaex
    >>> import numpy as np
    >>> date = np.array(['2009-10-12T03:31:00', '2016-02-11T10:17:34', '2015-11-12T11:34:22'], dtype=np.datetime64)
    >>> df = vaex.from_arrays(date=date)
    >>> df
      #  date
      0  2009-10-12 03:31:00
      1  2016-02-11 10:17:34
      2  2015-11-12 11:34:22

    >>> df.date.dt.year
    Expression = dt_year(date)
    Length: 3 dtype: int64 (expression)
    -----------------------------------
    0  2009
    1  2016
    2  2015
    """
    import pandas as pd
    return pd.Series(x).dt.year.values

@register_function(scope='dt', as_property=True)
def dt_month(x):
    """Extracts the month out of a datetime sample.

    :returns: an expression containing the month extracted from a datetime column.

    Example:

    >>> import vaex
    >>> import numpy as np
    >>> date = np.array(['2009-10-12T03:31:00', '2016-02-11T10:17:34', '2015-11-12T11:34:22'], dtype=np.datetime64)
    >>> df = vaex.from_arrays(date=date)
    >>> df
      #  date
      0  2009-10-12 03:31:00
      1  2016-02-11 10:17:34
      2  2015-11-12 11:34:22

    >>> df.date.dt.month
    Expression = dt_month(date)
    Length: 3 dtype: int64 (expression)
    -----------------------------------
    0  10
    1   2
    2  11
    """
    import pandas as pd
    return pd.Series(x).dt.month.values

@register_function(scope='dt', as_property=True)
def dt_month_name(x):
    """Returns the month names of a datetime sample in English.

    :returns: an expression containing the month names extracted from a datetime column.

    Example:

    >>> import vaex
    >>> import numpy as np
    >>> date = np.array(['2009-10-12T03:31:00', '2016-02-11T10:17:34', '2015-11-12T11:34:22'], dtype=np.datetime64)
    >>> df = vaex.from_arrays(date=date)
    >>> df
      #  date
      0  2009-10-12 03:31:00
      1  2016-02-11 10:17:34
      2  2015-11-12 11:34:22

    >>> df.date.dt.month_name
    Expression = dt_month_name(date)
    Length: 3 dtype: str (expression)
    ---------------------------------
    0   October
    1  February
    2  November
    """
    import pandas as pd
    return pd.Series(x).dt.month_name().values.astype(str)

@register_function(scope='dt', as_property=True)
def dt_day(x):
    """Extracts the day from a datetime sample.

    :returns: an expression containing the day extracted from a datetime column.

    Example:

    >>> import vaex
    >>> import numpy as np
    >>> date = np.array(['2009-10-12T03:31:00', '2016-02-11T10:17:34', '2015-11-12T11:34:22'], dtype=np.datetime64)
    >>> df = vaex.from_arrays(date=date)
    >>> df
      #  date
      0  2009-10-12 03:31:00
      1  2016-02-11 10:17:34
      2  2015-11-12 11:34:22

    >>> df.date.dt.day
    Expression = dt_day(date)
    Length: 3 dtype: int64 (expression)
    -----------------------------------
    0  12
    1  11
    2  12
    """
    import pandas as pd
    return pd.Series(x).dt.day.values

@register_function(scope='dt', as_property=True)
def dt_day_name(x):
    """Returns the day names of a datetime sample in English.

    :returns: an expression containing the day names extracted from a datetime column.

    Example:

    >>> import vaex
    >>> import numpy as np
    >>> date = np.array(['2009-10-12T03:31:00', '2016-02-11T10:17:34', '2015-11-12T11:34:22'], dtype=np.datetime64)
    >>> df = vaex.from_arrays(date=date)
    >>> df
      #  date
      0  2009-10-12 03:31:00
      1  2016-02-11 10:17:34
      2  2015-11-12 11:34:22

    >>> df.date.dt.day_name
    Expression = dt_day_name(date)
    Length: 3 dtype: str (expression)
    ---------------------------------
    0    Monday
    1  Thursday
    2  Thursday
    """
    import pandas as pd
    return pd.Series(x).dt.day_name().values.astype(str)

@register_function(scope='dt', as_property=True)
def dt_weekofyear(x):
    """Returns the week ordinal of the year.

    :returns: an expression containing the week ordinal of the year, extracted from a datetime column.

    Example:

    >>> import vaex
    >>> import numpy as np
    >>> date = np.array(['2009-10-12T03:31:00', '2016-02-11T10:17:34', '2015-11-12T11:34:22'], dtype=np.datetime64)
    >>> df = vaex.from_arrays(date=date)
    >>> df
      #  date
      0  2009-10-12 03:31:00
      1  2016-02-11 10:17:34
      2  2015-11-12 11:34:22

    >>> df.date.dt.weekofyear
    Expression = dt_weekofyear(date)
    Length: 3 dtype: int64 (expression)
    -----------------------------------
    0  42
    1   6
    2  46
    """
    import pandas as pd
    return pd.Series(x).dt.weekofyear.values

@register_function(scope='dt', as_property=True)
def dt_hour(x):
    """Extracts the hour out of a datetime samples.

    :returns: an expression containing the hour extracted from a datetime column.

    Example:

    >>> import vaex
    >>> import numpy as np
    >>> date = np.array(['2009-10-12T03:31:00', '2016-02-11T10:17:34', '2015-11-12T11:34:22'], dtype=np.datetime64)
    >>> df = vaex.from_arrays(date=date)
    >>> df
      #  date
      0  2009-10-12 03:31:00
      1  2016-02-11 10:17:34
      2  2015-11-12 11:34:22

    >>> df.date.dt.hour
    Expression = dt_hour(date)
    Length: 3 dtype: int64 (expression)
    -----------------------------------
    0   3
    1  10
    2  11
    """
    import pandas as pd
    return pd.Series(x).dt.hour.values

@register_function(scope='dt', as_property=True)
def dt_minute(x):
    """Extracts the minute out of a datetime samples.

    :returns: an expression containing the minute extracted from a datetime column.

    Example:

    >>> import vaex
    >>> import numpy as np
    >>> date = np.array(['2009-10-12T03:31:00', '2016-02-11T10:17:34', '2015-11-12T11:34:22'], dtype=np.datetime64)
    >>> df = vaex.from_arrays(date=date)
    >>> df
      #  date
      0  2009-10-12 03:31:00
      1  2016-02-11 10:17:34
      2  2015-11-12 11:34:22

    >>> df.date.dt.minute
    Expression = dt_minute(date)
    Length: 3 dtype: int64 (expression)
    -----------------------------------
    0  31
    1  17
    2  34
    """
    import pandas as pd
    return pd.Series(x).dt.minute.values

@register_function(scope='dt', as_property=True)
def dt_second(x):
    """Extracts the second out of a datetime samples.

    :returns: an expression containing the second extracted from a datetime column.

    Example:

    >>> import vaex
    >>> import numpy as np
    >>> date = np.array(['2009-10-12T03:31:00', '2016-02-11T10:17:34', '2015-11-12T11:34:22'], dtype=np.datetime64)
    >>> df = vaex.from_arrays(date=date)
    >>> df
      #  date
      0  2009-10-12 03:31:00
      1  2016-02-11 10:17:34
      2  2015-11-12 11:34:22

    >>> df.date.dt.second
    Expression = dt_second(date)
    Length: 3 dtype: int64 (expression)
    -----------------------------------
    0   0
    1  34
    2  22
    """
    import pandas as pd
    return pd.Series(x).dt.second.values


########## string operations ##########

@register_function(scope='str')
def str_capitalize(x):
    """Capitalize the first letter of a string sample.

    :returns: an expression containing the capitalized strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.capitalize()
    Expression = str_capitalize(text)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    Something
    1  Very pretty
    2    Is coming
    3          Our
    4         Way.
    """
    sl = _to_string_sequence(x).capitalize()
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

@register_function(scope='str')
def str_cat(x, other):
    """Concatenate two string columns on a row-by-row basis.

    :param expression other: The expression of the other column to be concatenated.
    :returns: an expression containing the concatenated columns.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.cat(df.text)
    Expression = str_cat(text, text)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0      SomethingSomething
    1  very prettyvery pretty
    2      is comingis coming
    3                  ourour
    4                way.way.
    """
    sl1 = _to_string_sequence(x)
    sl2 = _to_string_sequence(other)
    sl = sl1.concat(sl2)
    return column.ColumnStringArrow.from_string_sequence(sl)

@register_function(scope='str')
def str_center(x, width, fillchar=' '):
    """ Fills the left and right side of the strings with additional characters, such that the sample has a total of `width`
    characters.

    :param int width: The total number of characters of the resulting string sample.
    :param str fillchar: The character used for filling.
    :returns: an expression containing the filled strings.

    Example:
    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.center(width=11, fillchar='!')
    Expression = str_center(text, width=11, fillchar='!')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0  !Something!
    1  very pretty
    2  !is coming!
    3  !!!!our!!!!
    4  !!!!way.!!!
    """
    sl = _to_string_sequence(x).pad(width, fillchar, True, True)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

@register_function(scope='str')
def str_contains(x, pattern, regex=True):
    """Check if a string pattern or regex is contained within a sample of a string column.

    :param str pattern: A string or regex pattern
    :param bool regex: If True,
    :returns: an expression which is evaluated to True if the pattern is found in a given sample, and it is False otherwise.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.contains('very')
    Expression = str_contains(text, 'very')
    Length: 5 dtype: bool (expression)
    ----------------------------------
    0  False
    1   True
    2  False
    3  False
    4  False
    """
    return _to_string_sequence(x).search(pattern, regex)

# TODO: default regex is False, which breaks with pandas
@register_function(scope='str')
def str_count(x, pat, regex=False):
    """Count the occurences of a pattern in sample of a string column.

    :param str pat: A string or regex pattern
    :param bool regex: If True,
    :returns: an expression containing the number of times a pattern is found in each sample.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.count(pat="et", regex=False)
    Expression = str_count(text, pat='et', regex=False)
    Length: 5 dtype: int64 (expression)
    -----------------------------------
    0  1
    1  1
    2  0
    3  0
    4  0
    """
    return _to_string_sequence(x).count(pat, regex)

# TODO: what to do with decode and encode

@register_function(scope='str')
def str_endswith(x, pat):
    """Check if the end of each string sample matches the specified pattern.

    :param str pat: A string pattern or a regex
    :returns: an expression evaluated to True if the pattern is found at the end of a given sample, False otherwise.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.endswith(pat="ing")
    Expression = str_endswith(text, pat='ing')
    Length: 5 dtype: bool (expression)
    ----------------------------------
    0   True
    1  False
    2   True
    3  False
    4  False
    """
    return _to_string_sequence(x).endswith(pat)

# TODO: what to do with extract/extractall

# def str_extract(x, pat, flags=0, expand=True):
#     """Wraps pandas"""
#     import pandas
#     x = x.to_numpy()
#     series = pandas.Series(x)
#     return series.str.extract(pat, flags, expand).values

# def str_extractall(x, pat, flags=0, expand=True):
#     """Wraps pandas"""
#     import pandas
#     x = x.to_numpy()
#     series = pandas.Series(x)
#     return series.str.str_extractall(pat, flags, expand).values

# TODO: extract/extractall

@register_function(scope='str')
def str_find(x, sub, start=0, end=None):
    """Returns the lowest indices in each string in a column, where the provided substring is fully contained between within a
    sample. If the substring is not found, -1 is returned.

    :param str sub: A substring to be found in the samples
    :param int start:
    :param int end:
    :returns: an expression containing the lowest indices specifying the start of the substring.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.find(sub="et")
    Expression = str_find(text, sub='et')
    Length: 5 dtype: int64 (expression)
    -----------------------------------
    0   3
    1   7
    2  -1
    3  -1
    4  -1
    """
    return _to_string_sequence(x).find(sub, start, 0 if end is None else end, end is None, True)

# TODO: findall (not sure what the use/behaviour is)

# TODO get/index/join

@register_function(scope='str')
def str_get(x, i):
    """Extract a character from each sample at the specified position from a string column.
    Note that if the specified position is out of bound of the string sample, this method returns '', while pandas retunrs nan.

    :param int i: The index location, at which to extract the character.
    :returns: an expression containing the extracted characters.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.get(5)
    Expression = str_get(text, 5)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    h
    1    p
    2    m
    3
    4
    """
    x = _to_string_sequence(x)
    if i == -1:
        sl = x.slice_string_end(-1)
    else:
        sl = x.slice_string(i, i+1)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

@register_function(scope='str')
def str_index(x, sub, start=0, end=None):
    """Returns the lowest indices in each string in a column, where the provided substring is fully contained between within a
    sample. If the substring is not found, -1 is returned. It is the same as `str.find`.

    :param str sub: A substring to be found in the samples
    :param int start:
    :param int end:
    :returns: an expression containing the lowest indices specifying the start of the substring.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.index(sub="et")
    Expression = str_find(text, sub='et')
    Length: 5 dtype: int64 (expression)
    -----------------------------------
    0   3
    1   7
    2  -1
    3  -1
    4  -1
    """
    return str_find(x, sub, start, end)

@register_function(scope='str')
def str_join(x, sep):
    """Same as find (difference with pandas is that it does not raise a ValueError)"""
    sl = _to_string_list_sequence(x).join(sep)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

@register_function(scope='str')
def str_len(x):
    """Returns the length of a string sample.

    :returns: an expression contains the length of each sample of a string column.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.len()
    Expression = str_len(text)
    Length: 5 dtype: int64 (expression)
    -----------------------------------
    0   9
    1  11
    2   9
    3   3
    4   4
    """
    return _to_string_sequence(x).len()

@register_function(scope='str')
def str_byte_length(x):
    """Returns the number of bytes in a string sample.

    :returns: an expression contains the number of bytes in each sample of a string column.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.byte_length()
    Expression = str_byte_length(text)
    Length: 5 dtype: int64 (expression)
    -----------------------------------
    0   9
    1  11
    2   9
    3   3
    4   4
    """
    return _to_string_sequence(x).byte_length()

@register_function(scope='str')
def str_ljust(x, width, fillchar=' '):
    """Fills the right side of string samples with a specified character such that the strings are right-hand justified.

    :param int width: The minimal width of the strings.
    :param str fillchar: The character used for filling.
    :returns: an expression containing the filled strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.ljust(width=10, fillchar='!')
    Expression = str_ljust(text, width=10, fillchar='!')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0   Something!
    1  very pretty
    2   is coming!
    3   our!!!!!!!
    4   way.!!!!!!
    """
    sl = _to_string_sequence(x).pad(width, fillchar, False, True)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

@register_function(scope='str')
def str_lower(x):
    """Converts string samples to lower case.

    :returns: an expression containing the converted strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.lower()
    Expression = str_lower(text)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    something
    1  very pretty
    2    is coming
    3          our
    4         way.
    """
    sl = _to_string_sequence(x).lower()
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

@register_function(scope='str')
def str_lstrip(x, to_strip=None):
    """Remove leading characters from a string sample.

    :param str to_strip: The string to be removed
    :returns: an expression containing the modified string column.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.lstrip(to_strip='very ')
    Expression = str_lstrip(text, to_strip='very ')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0  Something
    1     pretty
    2  is coming
    3        our
    4       way.
    """
    # in c++ we give empty string the same meaning as None
    sl = _to_string_sequence(x).lstrip('' if to_strip is None else to_strip) if to_strip != '' else x
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

@register_function(scope='str')
def str_match(x, pattern):
    """Check if a string sample matches a given regular expression.

    :param str pattern: a string or regex to match to a string sample.
    :returns: an expression which is evaluated to True if a match is found, False otherwise.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.match(pattern='our')
    Expression = str_match(text, pattern='our')
    Length: 5 dtype: bool (expression)
    ----------------------------------
    0  False
    1  False
    2  False
    3   True
    4  False
    """
    return _to_string_sequence(x).match(pattern)

# TODO: normalize, partition

@register_function(scope='str')
def str_pad(x, width, side='left', fillchar=' '):
    """Pad strings in a given column.

    :param int width: The total width of the string
    :param str side: If 'left' than pad on the left, if 'right' than pad on the right side the string.
    :param str fillchar: The character used for padding.
    :returns: an expression containing the padded strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.pad(width=10, side='left', fillchar='!')
    Expression = str_pad(text, width=10, side='left', fillchar='!')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0   !Something
    1  very pretty
    2   !is coming
    3   !!!!!!!our
    4   !!!!!!way.
    """
    sl = _to_string_sequence(x).pad(width, fillchar, side in ['left', 'both'], side in ['right', 'both'])
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

@register_function(scope='str')
def str_repeat(x, repeats):
    """Duplicate each string in a column.

    :param int repeats: number of times each string sample is to be duplicated.
    :returns: an expression containing the duplicated strings

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.repeat(3)
    Expression = str_repeat(text, 3)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0        SomethingSomethingSomething
    1  very prettyvery prettyvery pretty
    2        is comingis comingis coming
    3                          ourourour
    4                       way.way.way.
    """
    sl = _to_string_sequence(x).repeat(repeats)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

@register_function(scope='str')
def str_replace(x, pat, repl, n=-1, flags=0, regex=False):
    """Replace occurences of a pattern/regex in a column with some other string.

    :param str pattern: string or a regex pattern
    :param str replace: a replacement string
    :param int n: number of replacements to be made from the start. If -1 make all replacements.
    :param int flags: ??
    :param bool regex: If True, ...?
    :returns: an expression containing the string replacements.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.replace(pat='et', repl='__')
    Expression = str_replace(text, pat='et', repl='__')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    Som__hing
    1  very pr__ty
    2    is coming
    3          our
    4         way.
    """
    sl = _to_string_sequence(x).replace(pat, repl, n, flags, regex)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

@register_function(scope='str')
def str_rfind(x, sub, start=0, end=None):
    """Returns the highest indices in each string in a column, where the provided substring is fully contained between within a
    sample. If the substring is not found, -1 is returned.

    :param str sub: A substring to be found in the samples
    :param int start:
    :param int end:
    :returns: an expression containing the highest indices specifying the start of the substring.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.rfind(sub="et")
    Expression = str_rfind(text, sub='et')
    Length: 5 dtype: int64 (expression)
    -----------------------------------
    0   3
    1   7
    2  -1
    3  -1
    4  -1
    """
    return _to_string_sequence(x).find(sub, start, 0 if end is None else end, end is None, False)

@register_function(scope='str')
def str_rindex(x, sub, start=0, end=None):
    """Returns the highest indices in each string in a column, where the provided substring is fully contained between within a
    sample. If the substring is not found, -1 is returned. Same as `str.rfind`.

    :param str sub: A substring to be found in the samples
    :param int start:
    :param int end:
    :returns: an expression containing the highest indices specifying the start of the substring.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.rindex(sub="et")
    Expression = str_rindex(text, sub='et')
    Length: 5 dtype: int64 (expression)
    -----------------------------------
    0   3
    1   7
    2  -1
    3  -1
    4  -1
    """
    return str_rfind(x, sub, start, end)

@register_function(scope='str')
def str_rjust(x, width, fillchar=' '):
    """Fills the left side of string samples with a specified character such that the strings are left-hand justified.

    :param int width: The minimal width of the strings.
    :param str fillchar: The character used for filling.
    :returns: an expression containing the filled strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.rjust(width=10, fillchar='!')
    Expression = str_rjust(text, width=10, fillchar='!')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0   !Something
    1  very pretty
    2   !is coming
    3   !!!!!!!our
    4   !!!!!!way.
    """
    sl = _to_string_sequence(x).pad(width, fillchar, True, False)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

# TODO: rpartition

@register_function(scope='str')
def str_rstrip(x, to_strip=None):
    """Remove trailing characters from a string sample.

    :param str to_strip: The string to be removed
    :returns: an expression containing the modified string column.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.rstrip(to_strip='ing')
    Expression = str_rstrip(text, to_strip='ing')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0       Someth
    1  very pretty
    2       is com
    3          our
    4         way.
    """
    # in c++ we give empty string the same meaning as None
    sl = _to_string_sequence(x).rstrip('' if to_strip is None else to_strip) if to_strip != '' else x
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

@register_function(scope='str')
def str_slice(x, start=0, stop=None):  # TODO: support n
    """Slice substrings from each string element in a column.

    :param int start: The start position for the slice operation.
    :param int end: The stop position for the slice operation.
    :returns: an expression containing the sliced substrings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.slice(start=2, stop=5)
    Expression = str_pandas_slice(text, start=2, stop=5)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0  met
    1   ry
    2   co
    3    r
    4   y.
    """
    if stop is None:
        sll = _to_string_sequence(x).slice_string_end(start)
    else:
        sll = _to_string_sequence(x).slice_string(start, stop)
    return sll

# TODO: slice_replace (not sure it this makes sense)
# TODO: n argument and rsplit
@register_function(scope='str')
def str_split(x, pattern=None):  # TODO: support n
    x = _to_string_sequence(x)
    if isinstance(x, vaex.strings.StringArray):
        x = x.to_arrow()
    if pattern == '':
        raise ValueError('empty separator')
    sll = x.split('' if pattern is None else pattern)
    return sll

@register_function(scope='str')
def str_startswith(x, pat):
    """Check if a start of a string matches a pattern.

    :param str pat: A string pattern. Regular expressions are not supported.
    :returns: an expression which is evaluated to True if the pattern is found at the start of a string sample, False otherwise.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.startswith(pat='is')
    Expression = str_startswith(text, pat='is')
    Length: 5 dtype: bool (expression)
    ----------------------------------
    0  False
    1  False
    2   True
    3  False
    4  False
    """
    return _to_string_sequence(x).startswith(pat)

@register_function(scope='str')
def str_strip(x, to_strip=None):
    """Removes leading and trailing characters.

    Strips whitespaces (including new lines), or a set of specified
    characters from each string saple in a column, both from the left
    right sides.

    :param str to_strip: The characters to be removed. All combinations of the characters will be removed.
                         If None, it removes whitespaces.
    :param returns: an expression containing the modified string samples.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.strip(to_strip='very')
    Expression = str_strip(text, to_strip='very')
    Length: 5 dtype: str (expression)
    ---------------------------------
    0  Something
    1      prett
    2  is coming
    3         ou
    4       way.
    """
    # in c++ we give empty string the same meaning as None
    sl = _to_string_sequence(x).strip('' if to_strip is None else to_strip) if to_strip != '' else x
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

# TODO: swapcase, translate

@register_function(scope='str')
def str_title(x):
    """Converts all string samples to titlecase.

    :returns: an expression containing the converted strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.title()
    Expression = str_title(text)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    Something
    1  Very Pretty
    2    Is Coming
    3          Our
    4         Way.
    """
    sl = _to_string_sequence(x).title()
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)


@register_function(scope='str')
def str_upper(x):
    """Converts all strings in a column to uppercase.

    :returns: an expression containing the converted strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.


    >>> df.text.str.upper()
    Expression = str_upper(text)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0    SOMETHING
    1  VERY PRETTY
    2    IS COMING
    3          OUR
    4         WAY.

    """
    sl = _to_string_sequence(x).upper()
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)


# TODO: wrap, is*, get_dummies(maybe?)

@register_function(scope='str')
def str_zfill(x, width):
    """Pad strings in a column by prepanding "0" characters.

    :param int width: The minimum length of the resulting string. Strings shorter less than `width` will be prepended with zeros.
    :returns: an expression containing the modified strings.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.zfill(width=12)
    Expression = str_zfill(text, width=12)
    Length: 5 dtype: str (expression)
    ---------------------------------
    0  000Something
    1  0very pretty
    2  000is coming
    3  000000000our
    4  00000000way.
    """
    sl = _to_string_sequence(x).pad(width, '0', True, False)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)


@register_function(scope='str')
def str_isalnum(x):
    """Check if all characters in a string sample are alphanumeric.

    :returns: an expression evaluated to True if a sample contains only alphanumeric characters, otherwise False.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.isalnum()
    Expression = str_isalnum(text)
    Length: 5 dtype: bool (expression)
    ----------------------------------
    0   True
    1  False
    2  False
    3   True
    4  False
    """
    return _to_string_sequence(x).isalnum()

@register_function(scope='str')
def str_isalpha(x):
    """Check if all characters in a string sample are alphabetic.

    :returns: an expression evaluated to True if a sample contains only alphabetic characters, otherwise False.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.isalpha()
    Expression = str_isalpha(text)
    Length: 5 dtype: bool (expression)
    ----------------------------------
    0   True
    1  False
    2  False
    3   True
    4  False
    """
    return _to_string_sequence(x).isalpha()

@register_function(scope='str')
def str_isdigit(x):
    """Check if all characters in a string sample are digits.

    :returns: an expression evaluated to True if a sample contains only digits, otherwise False.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', '6']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  6

    >>> df.text.str.isdigit()
    Expression = str_isdigit(text)
    Length: 5 dtype: bool (expression)
    ----------------------------------
    0  False
    1  False
    2  False
    3  False
    4   True
    """
    return _to_string_sequence(x).isdigit()

@register_function(scope='str')
def str_isspace(x):
    """Check if all characters in a string sample are whitespaces.

    :returns: an expression evaluated to True if a sample contains only whitespaces, otherwise False.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', '      ', ' ']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3
      4

    >>> df.text.str.isspace()
    Expression = str_isspace(text)
    Length: 5 dtype: bool (expression)
    ----------------------------------
    0  False
    1  False
    2  False
    3   True
    4   True
    """
    return _to_string_sequence(x).isspace()

@register_function(scope='str')
def str_islower(x):
    """Check if all characters in a string sample are lowercase characters.

    :returns: an expression evaluated to True if a sample contains only lowercase characters, otherwise False.

    Example:

    >>> import vaex
    >>> text = ['Something', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  Something
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.islower()
    Expression = str_islower(text)
    Length: 5 dtype: bool (expression)
    ----------------------------------
    0  False
    1   True
    2   True
    3   True
    4   True
    """
    return _to_string_sequence(x).islower()

@register_function(scope='str')
def str_isupper(x):
    """Check if all characters in a string sample are lowercase characters.

    :returns: an expression evaluated to True if a sample contains only lowercase characters, otherwise False.

    Example:

    >>> import vaex
    >>> text = ['SOMETHING', 'very pretty', 'is coming', 'our', 'way.']
    >>> df = vaex.from_arrays(text=text)
    >>> df
      #  text
      0  SOMETHING
      1  very pretty
      2  is coming
      3  our
      4  way.

    >>> df.text.str.isupper()
    Expression = str_isupper(text)
    Length: 5 dtype: bool (expression)
    ----------------------------------
    0   True
    1  False
    2  False
    3  False
    4  False
    """
    return _to_string_sequence(x).isupper()

# @register_function(scope='str')
# def str_istitle(x):
#     return _to_string_sequence(x).istitle()

# @register_function(scope='str')
# def str_isnumeric(x):
#     sl = _to_string_sequence(x).isnumeric()

# @register_function(scope='str')
# def str_isdecimal(x):
#     sl = _to_string_sequence(x).isnumeric()

@register_function()
def to_string(x):
    # don't change the dtype, otherwise for each block the dtype may be different (string length)
    sl = vaex.strings.to_string(x)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)

@register_function()
def format(x, format):
    """Uses http://www.cplusplus.com/reference/string/to_string/ for formatting"""
    # don't change the dtype, otherwise for each block the dtype may be different (string length)
    sl = vaex.strings.format(x, format)
    return column.ColumnStringArrow(sl.bytes, sl.indices, sl.length, sl.offset, string_sequence=sl)


for name in dir(scopes['str']):
    if name.startswith('__'):
        continue
    def pandas_wrapper(name=name):
        def wrapper(*args, **kwargs):
            import pandas
            def fix_arg(x):
                if isinstance(x, column.Column):
                    x = x.to_numpy()
                return x
            args = list(map(fix_arg, args))
            x = args[0]
            args = args[1:]
            series = pandas.Series(x)
            method = getattr(series.str, name)
            return method(*args, **kwargs)
        return wrapper
    wrapper = pandas_wrapper()
    wrapper.__doc__ = "Wrapper around pandas.Series.%s" % name
    register_function(scope='str_pandas')(wrapper, name=name)
    # expression_namespace['str_pandas_' + name] = wrapper
    # _scopes['str_pandas'][name] = wrapper

# expression_namespace['str_strip'] = str_strip

@register_function(name='float')
def _float(x):
    return x.astype(np.float64)

@register_function(name='astype')
def _astype(x, dtype):
    return x.astype(dtype)


def add_geo_json(ds, json_or_file, column_name, longitude_expression, latitude_expresion, label=None, persist=True, overwrite=False, inplace=False, mapping=None):
    ds = ds if inplace else ds.copy()
    if not isinstance(json_or_file, (list, tuple)):
        with open(json_or_file) as f:
            geo_json = json.load(f)
    else:
        geo_json = json_or_file
    def default_label(properties):
        return " - ".join(properties.values())
    label = label or default_label
    features = geo_json['features']
    list_of_polygons = []
    labels = []
    if mapping:
        mapping_dict = {}
    for i, feature in enumerate(features[:]):
        geo = feature['geometry']
        properties = feature['properties']
        if mapping:
            mapping_dict[properties[mapping]] = i
    #     print(properties)
        # label = f"{properties['borough']} - {properties['zone']}'"
        labels.append(label(properties))#roperties[label_key])
        list_of_polygons.append([np.array(polygon_set[0]).T for polygon_set in geo['coordinates']])
        M = np.sum([polygon_set.shape[1] for polygon_set in list_of_polygons[-1]])
        # print(M)
        # N += M
    ds[column_name] = ds.func.inside_which_polygons(longitude_expression, latitude_expresion, list_of_polygons)
    if persist:
        ds.persist(column_name, overwrite=overwrite)
    ds.categorize(column_name, labels=labels, check=False)
    if mapping:
        return ds, mapping_dict
    else:
        return ds
