Nested Structures

Nested Accessor

import pandas as pd
from colassigner import ColAccessor
class GrandChildCols(ColAccessor):
    x = str
    y = str


class ChildCols(ColAccessor):
    a = int
    b = float
    grandchild_a = GrandChildCols
    grandchild_b = GrandChildCols


class Cols(ColAccessor):

    fing = int
    assigned_child = ChildCols

    class InheritedChild(ChildCols):
        pass
pd.DataFrame(
    {
        Cols.fing: [2, 3, 4],
        Cols.assigned_child.grandchild_a.y: ["a", "b", "c"],
        Cols.InheritedChild.b: [0.1, 0.2, 0.3],
    }
)
fing assigned_child__grandchild_a__y inherited_child__b
0 2 a 0.1
1 3 b 0.2
2 4 c 0.3

Nested Assigner

from colassigner import ColAssigner
class SourceCols(ColAccessor):

    x = float
    b = bool

class SepChild(ColAssigner):
    _col = SourceCols.x

    def neg(self, df):
        return -df[self._col]

    def double(self, df):
        return 2 * df[self._col]

class Cols(ColAssigner):
    def col_one(self, df):
        return 1

    class SubCol(ColAssigner):
        def fing(self, df):
            return df.sum(axis=1)

        class SubSubCol(ColAssigner):
            _prefix = "pref_"

            def sub_x(self, df):
                return 0

            def sub_y(self, df):
                return self._prefix + df[Cols.col_one].astype(str)

        class SubSubCol2(SubSubCol):
            _prefix = "pref2_"

    sep_child = SepChild

    class SepChildB(SepChild):
        _col = SourceCols.b
df = pd.DataFrame({
    SourceCols.x: [1.5, 3.4, 9.1], SourceCols.b: [False, True, True]
}).pipe(Cols())
df.T
0 1 2
x 1.5 3.4 9.1
b False True True
col_one 1 1 1
sub_col__fing 2.5 5.4 11.1
sub_col__sub_sub_col__sub_x 0 0 0
sub_col__sub_sub_col__sub_y pref_1 pref_1 pref_1
sub_col__sub_sub_col_2__sub_x 0 0 0
sub_col__sub_sub_col_2__sub_y pref2_1 pref2_1 pref2_1
sep_child__neg -1.5 -3.4 -9.1
sep_child__double 3.0 6.8 18.2
sep_child_b__neg True False False
sep_child_b__double 0 2 2
df.loc[:, [Cols.sep_child.double, Cols.SubCol.SubSubCol2.sub_x]]
sep_child__double sub_col__sub_sub_col_2__sub_x
0 3.0 0
1 6.8 0
2 18.2 0

Designated Child Assigner

These are designed for information sharing among assigners and do not take the dataframe as arguments for their methods but, take both the df and their parent assigner as parameters for their __init__

import numpy as np

from colassigner import ChildColAssigner
class RawCols(ColAccessor):

    cat = str
    num = int

class RawCols2(ColAccessor):
    b = str
    c = str

class IntSides(ChildColAssigner):

    # note the type and order of the parameters:
    def __init__(self, df, parent_assigner: "GbReindex") -> None:
        self.arr = parent_assigner.arr

    # note the absence of parameters
    def lower(self):
        return np.floor(self.arr).astype(int)

    def upper(self):
        return np.ceil(self.arr).astype(int)

class GbReindex(ChildColAssigner):
    main_col = ...

    def __init__(self, df, bc: "BaseCols"):
        # note that this reindex needs to be done only once
        # and can be used in many child assigners
        self.arr = bc.base_gb.reindex(df[self.main_col]).values

    def values(self):
        return self.arr

    sides = IntSides

class BaseCols(ColAssigner):
    def __init__(self, base_df):
        self.base_gb = base_df.groupby(RawCols.cat)[RawCols.num].mean()

    class GbB(GbReindex):
        main_col = RawCols2.b

    class GbC(GbReindex):
        main_col = RawCols2.c

    def prod(self, df):
        return df.loc[
            :, [BaseCols.GbB.sides.lower, BaseCols.GbC.values]
        ].prod(axis=1)
df1 = pd.DataFrame({RawCols.cat: ["x", "y", "y"], RawCols.num: [2, 3, 4]})
assigner = BaseCols(df1)
df2 = pd.DataFrame({"b": ["x", "y", "x"], "c": ["y", "y", "x"]}).pipe(assigner)
df2
b c gb_b__values gb_b__sides__lower gb_b__sides__upper gb_c__values gb_c__sides__lower gb_c__sides__upper prod
0 x y 2.0 2 2 3.5 3 4 7.0
1 y y 3.5 3 4 3.5 3 4 10.5
2 x x 2.0 2 2 2.0 2 2 4.0