Data Pipelines Dependency

Screen Link: Learn data science with Python and R projects

My Code:

class Pipeline(DQ):
    def __init__(self):
        self.tasks = DAG()
        
    def task(self, depends_on=None):
        def inner(f):
            pass
        return inner

pipeline = Pipeline()

def first():
    return 20

def second(x):
    return x * 2

def third(x):
    return x // 3

def fourth(x):
    return x // 4
class Pipeline(DQ):
    def __init__(self):
        self.tasks = DAG()
        
    def task(self, depends_on=None):
        def inner(f):
            self.tasks.add(f)
            if depends_on:
                self.tasks.add(depends_on, f)
            return f
        return inner

pipeline = Pipeline()
@pipeline.task()
def first():
    return 20

@pipeline.task(depends_on=first)
def second(x):
    return x * 2

@pipeline.task(depends_on=second)
def third(x):
    return x // 3

@pipeline.task(depends_on=second)
def fourth(x):
    return x // 4

graph = pipeline.tasks.graph

My question is how to re-write this if the. . .

  1. function first is not dependent on any other
  2. function second is not dependent on any other
  3. but third is dependent on both first and second

What happens of there is multiple dependencies something like (1,3), (2,3), (3,4), (4,5), (4,6). How can I edit the pipeline.task decorator to address this?