Nested Structures
Nested Accessor
import pandas as pd
from colassigner import ColAccessor
class GrandChildCols(ColAccessor):
x = str
y = str
class ChildCols(ColAccessor):
a = int
b = float
grandchild_a = GrandChildCols
grandchild_b = GrandChildCols
class Cols(ColAccessor):
fing = int
assigned_child = ChildCols
class InheritedChild(ChildCols):
pass
pd.DataFrame(
{
Cols.fing: [2, 3, 4],
Cols.assigned_child.grandchild_a.y: ["a", "b", "c"],
Cols.InheritedChild.b: [0.1, 0.2, 0.3],
}
)
| fing | assigned_child__grandchild_a__y | inherited_child__b | |
|---|---|---|---|
| 0 | 2 | a | 0.1 |
| 1 | 3 | b | 0.2 |
| 2 | 4 | c | 0.3 |
Nested Assigner
from colassigner import ColAssigner
class SourceCols(ColAccessor):
x = float
b = bool
class SepChild(ColAssigner):
_col = SourceCols.x
def neg(self, df):
return -df[self._col]
def double(self, df):
return 2 * df[self._col]
class Cols(ColAssigner):
def col_one(self, df):
return 1
class SubCol(ColAssigner):
def fing(self, df):
return df.sum(axis=1)
class SubSubCol(ColAssigner):
_prefix = "pref_"
def sub_x(self, df):
return 0
def sub_y(self, df):
return self._prefix + df[Cols.col_one].astype(str)
class SubSubCol2(SubSubCol):
_prefix = "pref2_"
sep_child = SepChild
class SepChildB(SepChild):
_col = SourceCols.b
df = pd.DataFrame({
SourceCols.x: [1.5, 3.4, 9.1], SourceCols.b: [False, True, True]
}).pipe(Cols())
df.T
| 0 | 1 | 2 | |
|---|---|---|---|
| x | 1.5 | 3.4 | 9.1 |
| b | False | True | True |
| col_one | 1 | 1 | 1 |
| sub_col__fing | 2.5 | 5.4 | 11.1 |
| sub_col__sub_sub_col__sub_x | 0 | 0 | 0 |
| sub_col__sub_sub_col__sub_y | pref_1 | pref_1 | pref_1 |
| sub_col__sub_sub_col_2__sub_x | 0 | 0 | 0 |
| sub_col__sub_sub_col_2__sub_y | pref2_1 | pref2_1 | pref2_1 |
| sep_child__neg | -1.5 | -3.4 | -9.1 |
| sep_child__double | 3.0 | 6.8 | 18.2 |
| sep_child_b__neg | True | False | False |
| sep_child_b__double | 0 | 2 | 2 |
df.loc[:, [Cols.sep_child.double, Cols.SubCol.SubSubCol2.sub_x]]
| sep_child__double | sub_col__sub_sub_col_2__sub_x | |
|---|---|---|
| 0 | 3.0 | 0 |
| 1 | 6.8 | 0 |
| 2 | 18.2 | 0 |
Designated Child Assigner
These are designed for information sharing among assigners and do not take the dataframe as arguments for their methods but, take both the df and their parent assigner as parameters for their
__init__
import numpy as np
from colassigner import ChildColAssigner
class RawCols(ColAccessor):
cat = str
num = int
class RawCols2(ColAccessor):
b = str
c = str
class IntSides(ChildColAssigner):
# note the type and order of the parameters:
def __init__(self, df, parent_assigner: "GbReindex") -> None:
self.arr = parent_assigner.arr
# note the absence of parameters
def lower(self):
return np.floor(self.arr).astype(int)
def upper(self):
return np.ceil(self.arr).astype(int)
class GbReindex(ChildColAssigner):
main_col = ...
def __init__(self, df, bc: "BaseCols"):
# note that this reindex needs to be done only once
# and can be used in many child assigners
self.arr = bc.base_gb.reindex(df[self.main_col]).values
def values(self):
return self.arr
sides = IntSides
class BaseCols(ColAssigner):
def __init__(self, base_df):
self.base_gb = base_df.groupby(RawCols.cat)[RawCols.num].mean()
class GbB(GbReindex):
main_col = RawCols2.b
class GbC(GbReindex):
main_col = RawCols2.c
def prod(self, df):
return df.loc[
:, [BaseCols.GbB.sides.lower, BaseCols.GbC.values]
].prod(axis=1)
df1 = pd.DataFrame({RawCols.cat: ["x", "y", "y"], RawCols.num: [2, 3, 4]})
assigner = BaseCols(df1)
df2 = pd.DataFrame({"b": ["x", "y", "x"], "c": ["y", "y", "x"]}).pipe(assigner)
df2
| b | c | gb_b__values | gb_b__sides__lower | gb_b__sides__upper | gb_c__values | gb_c__sides__lower | gb_c__sides__upper | prod | |
|---|---|---|---|---|---|---|---|---|---|
| 0 | x | y | 2.0 | 2 | 2 | 3.5 | 3 | 4 | 7.0 |
| 1 | y | y | 3.5 | 3 | 4 | 3.5 | 3 | 4 | 10.5 |
| 2 | x | x | 2.0 | 2 | 2 | 2.0 | 2 | 2 | 4.0 |