/*
  This file is a part of Qosmos ixEngine.

   Copyright  Qosmos 2022 - All rights reserved

  This computer program and all its components are protected by
  authors' rights and copyright law and by international treaties.
  Any representation, reproduction, distribution or modification
  of this program or any portion of it is forbidden without
  Qosmos explicit and written agreement and may result in severe
  civil and criminal penalties, and will be prosecuted
  to the maximum extent possible under the law.
*/

#include <vppinfra/dlist.h>
#include <vppinfra/types.h>
#include <vppinfra/vec.h>
#include <vnet/ip/ip4_packet.h>
#include <vnet/tcp/tcp_packet.h>
#include <vnet/udp/udp_packet.h>

#include "flowtable.h"
#include "flowtable_tcp.h"
#include "packet_helper.h"
#include <qmdpi.h>

/* libpcap header for capture */
#include <vnet/vnet.h>

#define MAX_PKTS 255

#if (defined DEBUG) && (DEBUG >= 1)
#define DBG_PRINT(args ...) printf(args)
#else
#define DBG_PRINT(args ...)
#endif

typedef unsigned char ctb_uint8;

#ifdef _MSC_VER
typedef unsigned __int64 ctb_uint64;
#else
typedef unsigned long long ctb_uint64;
#endif

typedef struct {
    u32 sw_if_index;
    u32 next_index;
    u32 offloaded;
} flow_trace_t;

vlib_node_registration_t flowtable_input_node;

static u8 *
format_get_flowinfo(u8 *s, va_list *args)
{
    CLIB_UNUSED(vlib_main_t *vm) = va_arg(*args, vlib_main_t *);
    CLIB_UNUSED(vlib_node_t *node) = va_arg(*args, vlib_node_t *);
    flow_trace_t *t = va_arg(*args, flow_trace_t *);

    s = format(s, "FlowInfo - sw_if_index %d, next_index = %d, offload = %d",
               t->sw_if_index, t->next_index, t->offloaded);
    return s;
}

static
ctb_uint64 __murmur_hash64(const ctb_uint8 *data, unsigned long len)
{
    const ctb_uint64 m = 0xc6a4a7935bd1e995;
    const int r = 47;
    const unsigned int seed = 0x9747b28c;

    ctb_uint64 h = seed ^ (len * m);

    const ctb_uint64 *lptr = (const ctb_uint64 *)data;
    unsigned int lstep = len / sizeof(*lptr);

    while (lstep--) {
        ctb_uint64 k = *lptr++;

        k *= m;
        k ^= k >> r;
        k *= m;

        h ^= k;
        h *= m;
    }

    data = (const ctb_uint8 *)lptr;

    switch (len % sizeof(*lptr)) {
        case 7:
            h ^= (ctb_uint64)(data[6]) << 48;
        case 6:
            h ^= (ctb_uint64)(data[5]) << 40;
        case 5:
            h ^= (ctb_uint64)(data[4]) << 32;
        case 4:
            h ^= (ctb_uint64)(data[3]) << 24;
        case 3:
            h ^= (ctb_uint64)(data[2]) << 16;
        case 2:
            h ^= (ctb_uint64)(data[1]) << 8;
        case 1:
            h ^= (ctb_uint64)(data[0]);
            h *= m;
    };

    h ^= h >> r;
    h *= m;
    h ^= h >> r;
    return h;
}

always_inline u64
hash_signature(flow_signature_t const *sig)
{
    if (flow_signature_is_ip4(sig)) {
        return __murmur_hash64((ctb_uint8 *)&sig->s.ip4, sizeof(struct ip4_sig));
    } else {
        return __murmur_hash64((ctb_uint8 *)&sig->s.ip6, sizeof(struct ip6_sig));
    }
}

always_inline void
parse_ip4_packet(ip4_header_t *ip4, uword *is_reverse, struct ip4_sig *ip4_sig,
                 flow_signature_t *sig,
                 packet_signature_t *pkt_sig)
{
    ip4_sig->proto = ip4->protocol;

    if (ip4_address_compare(&ip4->src_address, &ip4->dst_address) < 0) {
        ip4_sig->src = ip4->src_address;
        ip4_sig->dst = ip4->dst_address;
        *is_reverse = 0;
    } else {
        ip4_sig->src = ip4->dst_address;
        ip4_sig->dst = ip4->src_address;
        *is_reverse = 1;
    }
    pkt_sig->data_offset += ip4_header_bytes(ip4);
    if (ip4_sig->proto == IP_PROTOCOL_UDP || ip4_sig->proto == IP_PROTOCOL_TCP) {
        /* tcp and udp ports have the same offset */
        udp_header_t *udp0 = (udp_header_t *) ip4_next_header(ip4);
        if (*is_reverse == 0) {
            ip4_sig->port_src = udp0->src_port;
            ip4_sig->port_dst = udp0->dst_port;
        } else {
            ip4_sig->port_src = udp0->dst_port;
            ip4_sig->port_dst = udp0->src_port;
        }
        if (ip4_sig->proto == IP_PROTOCOL_TCP) {
            tcp_header_t *tcp0 = (tcp_header_t *) ip4_next_header(ip4);
            pkt_sig->data_offset += tcp_header_bytes(tcp0);
        } else {
            pkt_sig->data_offset += sizeof(udp_header_t);
        }
        pkt_sig->l4hdr = udp0;
    } else {
        ip4_sig->port_src = 0;
        ip4_sig->port_dst = 0;
        pkt_sig->l4hdr = NULL;
    }
}

always_inline void
parse_ip6_packet(ip6_header_t *ip6, uword *is_reverse, struct ip6_sig *ip6_sig,
                 flow_signature_t *sig,
                 packet_signature_t *pkt_sig)
{
    ip6_sig->proto = ip6->protocol;

    pkt_sig->data_offset += sizeof(ip6_header_t);
    if (ip6_address_compare(&ip6->src_address, &ip6->dst_address) < 0) {
        ip6_sig->src = ip6->src_address;
        ip6_sig->dst = ip6->dst_address;
        *is_reverse = 0;
    } else {
        ip6_sig->src = ip6->dst_address;
        ip6_sig->dst = ip6->src_address;
        *is_reverse = 1;
    }

    if (ip6_sig->proto == IP_PROTOCOL_UDP || ip6_sig->proto == IP_PROTOCOL_TCP) {
        /* tcp and udp ports have the same offset */
        udp_header_t *udp0 = (udp_header_t *) ip6_next_header(ip6);
        if (*is_reverse == 0) {
            ip6_sig->port_src = udp0->src_port;
            ip6_sig->port_dst = udp0->dst_port;
        } else {
            ip6_sig->port_src = udp0->dst_port;
            ip6_sig->port_dst = udp0->src_port;
        }
        if (ip6_sig->proto == IP_PROTOCOL_TCP) {
            tcp_header_t *tcp0 = (tcp_header_t *) ip6_next_header(ip6);
            pkt_sig->data_offset += tcp_header_bytes(tcp0);
        } else {
            pkt_sig->data_offset += sizeof(udp_header_t);
        }
        pkt_sig->l4hdr = udp0;
    } else {
        ip6_sig->port_src = 0;
        ip6_sig->port_dst = 0;
        pkt_sig->l4hdr = NULL;
    }
}

static inline u64
compute_packet_hash(vlib_buffer_t       *buffer,
                    uword               *is_reverse,
                    flow_signature_t    *sig,
                    packet_signature_t  *pkt_sig)
{
    ethernet_header_t *eth = (ethernet_header_t *)(buffer->data +
                                                   buffer->current_data);

    if (PREDICT_TRUE(eth->type == clib_host_to_net_u16(ETHERNET_TYPE_IP4))) {
        sig->len = sizeof(struct ip4_sig);
        vlib_buffer_advance(buffer, sizeof(ethernet_header_t));
        parse_ip4_packet(vlib_buffer_get_current(buffer),
                         is_reverse, (struct ip4_sig *) sig, sig, pkt_sig);
        pkt_sig->l3hdr = buffer;
        return hash_signature(sig);
    } else if (eth->type == clib_host_to_net_u16(ETHERNET_TYPE_IP6)) {
        sig->len = sizeof(struct ip6_sig);
        vlib_buffer_advance(buffer, sizeof(ethernet_header_t));
        parse_ip6_packet(vlib_buffer_get_current(buffer),
                         is_reverse, (struct ip6_sig *) sig, sig, pkt_sig);
        pkt_sig->l3hdr = buffer;
        return hash_signature(sig);
    }

    sig->len = 0;
    return 0;
}

always_inline void
flow_entry_cache_fill(flowtable_main_t *fm, flowtable_main_per_cpu_t *fmt)
{
    u64 i, flow_alloc = FLOW_CACHE_SZ;
    flow_entry_t *f;

    if (pthread_spin_lock(&fm->flows_lock)) {
        DBG_PRINT("Flows Lock failed\n");
        return;
    }

    if (PREDICT_FALSE(fm->flows_cpt + flow_alloc > fm->flows_max)) {
        flow_alloc = fm->flows_cpt >= fm->flows_max ? 0 : fm->flows_max -
                     fm->flows_cpt;
    }

    if (PREDICT_FALSE(flow_alloc == 0)) {
        FLOWTABLE_COUNTER_INC(fm->vlib_main, FLOW_POOL_EXHAUSTED, 1);
        pthread_spin_unlock(&fm->flows_lock);
        return;
    }

    for (i = 0; i < flow_alloc; i++) {
        pool_get_aligned(fm->flows, f, CLIB_CACHE_LINE_BYTES);
        vec_add1(fmt->flow_cache, f - fm->flows);
    }
    fm->flows_cpt += flow_alloc;
    pthread_spin_unlock(&fm->flows_lock);
}

always_inline void
flow_entry_cache_empty(flowtable_main_t *fm, flowtable_main_per_cpu_t *fmt)
{
    int i;

    if (pthread_spin_lock(&fm->flows_lock) == 0) {
        for (i = vec_len(fmt->flow_cache) - 1; i > FLOW_CACHE_SZ; i--) {
            u32 f_index = vec_pop(fmt->flow_cache);
            pool_put_index(fm->flows, f_index);
            fm->flows_cpt --;
        }

        pthread_spin_unlock(&fm->flows_lock);
    }
}

always_inline flow_entry_t *
flow_entry_alloc(flowtable_main_t *fm, flowtable_main_per_cpu_t *fmt)
{
    u32 f_index;

    if (vec_len(fmt->flow_cache) == 0) {
        flow_entry_cache_fill(fm, fmt);
    }

    if (PREDICT_FALSE((vec_len(fmt->flow_cache) == 0))) {
        return NULL;
    }

    f_index = vec_pop(fmt->flow_cache);
    if (pool_is_free_index(fm->flows, f_index)) {
        DBG_PRINT("flow_entry_alloc: pool_is_free_index()\n");
        return NULL;
    }

    return pool_elt_at_index(fm->flows, f_index);
}

always_inline void
flow_entry_free(flowtable_main_t *fm, flowtable_main_per_cpu_t *fmt,
                flow_entry_t *f)
{
    vec_add1(fmt->flow_cache, f - fm->flows);

    if (vec_len(fmt->flow_cache) > 2 * FLOW_CACHE_SZ) {
        flow_entry_cache_empty(fm, fmt);
    }
}

always_inline void
flowtable_entry_remove(flowtable_main_per_cpu_t *fmt, flow_entry_t *f)
{
    /* remove node from hashtable */
    clib_dlist_remove(fmt->ht_lines, f->ht_index);
    pool_put_index(fmt->ht_lines, f->ht_index);

    /* if list is empty, free it and delete hashtable entry */
    if (dlist_is_empty(fmt->ht_lines, f->ht_line_index)) {
        pool_put_index(fmt->ht_lines, f->ht_line_index);

        BVT(clib_bihash_kv) kv = {.key = f->sig_hash};
        BV(clib_bihash_add_del)(&fmt->flows_ht, &kv, 0  /* is_add */);
    }
}

always_inline void
expire_single_flow(flowtable_main_t *fm, flowtable_main_per_cpu_t *fmt,
                   flow_entry_t *f, dlist_elt_t *e)
{
    ASSERT(f->timer_index == (e - fmt->timers));

    /* destroy the qmdpi flow */
    if (f->infos.data.dpi_flow) {
        u32 cpu_index = os_get_thread_index();
        struct qmdpi_worker *worker = fm->workers_table[cpu_index];
        while (qmdpi_flow_destroy(worker, f->infos.data.dpi_flow,
                                  NULL) == QMDPI_PROCESS_MORE);
        f->infos.data.dpi_flow = NULL;
    }

    /* timers unlink */
    clib_dlist_remove(fmt->timers, e - fmt->timers);
    pool_put(fmt->timers, e);

    /* hashtable unlink */
    flowtable_entry_remove(fmt, f);

    /* free to flow cache && pool (last) */
    flow_entry_free(fm, fmt, f);
}

static u64
flowtable_timer_expire(flowtable_main_t *fm, flowtable_main_per_cpu_t *fmt,
                       u32 now)
{
    u64 expire_cpt;
    flow_entry_t *f;
    u32 *time_slot_curr_index;
    dlist_elt_t *time_slot_curr;
    u32 index;

    time_slot_curr_index = vec_elt_at_index(fmt->timer_wheel, fmt->time_index);

    if (PREDICT_FALSE(dlist_is_empty(fmt->timers, *time_slot_curr_index))) {
        return 0;
    }

    expire_cpt = 0;
    time_slot_curr = pool_elt_at_index(fmt->timers, *time_slot_curr_index);

    index = time_slot_curr->next;
    while (index != *time_slot_curr_index && expire_cpt < TIMER_MAX_EXPIRE) {
        dlist_elt_t *e = pool_elt_at_index(fmt->timers, index);
        if (pool_is_free_index(fm->flows, e->value)) {
            return expire_cpt;
        }
        f = pool_elt_at_index(fm->flows, e->value);

        index = e->next;
        if (f->expire < now) {
            expire_single_flow(fm, fmt, f, e);
            expire_cpt++;
        }
    }

    return expire_cpt;
}

always_inline void
timer_wheel_insert_flow(flowtable_main_per_cpu_t *fmt, flow_entry_t *f)
{
    u32 timer_slot_head_index;

    timer_slot_head_index = (fmt->time_index + f->lifetime) % TIMER_MAX_LIFETIME;
    clib_dlist_addtail(fmt->timers, timer_slot_head_index, f->timer_index);
}

always_inline void
timer_wheel_resched_flow(flowtable_main_per_cpu_t *fmt, flow_entry_t *f,
                         u32 now)
{
    clib_dlist_remove(fmt->timers, f->timer_index);
    f->expire = now + f->lifetime;
    timer_wheel_insert_flow(fmt, f);

    return;
}

static struct qmdpi_flow *flowtable_create_dpi_flow(vlib_main_t            *vm,
                                                    struct qmdpi_worker    *worker,
                                                    const flow_signature_t *sig,
                                                    int                     is_reverse)
{
    struct qmdpi_flow *df = NULL;
    u8 sig_valid = 1;

    if (!is_reverse) {
        if (sig->len == sizeof(struct ip4_sig)) {
            df = qmdpi_flow_create(worker,
                                   Q_PROTO_IP,
                                   sig->s.ip4.proto == IP_PROTOCOL_TCP ? Q_PROTO_TCP : Q_PROTO_UDP,
                                   (void const *)&sig->s.ip4.src.as_u32,
                                   (void const *)&sig->s.ip4.port_src,
                                   (void const *)&sig->s.ip4.dst.as_u32,
                                   (void const *)&sig->s.ip4.port_dst);
        } else if (sig->len == sizeof(struct ip6_sig)) {
            df = qmdpi_flow_create(worker,
                                   Q_PROTO_IP6,
                                   sig->s.ip6.proto == IP_PROTOCOL_TCP ? Q_PROTO_TCP : Q_PROTO_UDP,
                                   (void const *)&sig->s.ip6.src,
                                   (void const *)&sig->s.ip6.port_src,
                                   (void const *)&sig->s.ip6.dst,
                                   (void const *)&sig->s.ip6.port_dst);
        } else {
            sig_valid = 0;
        }
    } else {
        if (sig->len == sizeof(struct ip4_sig)) {
            df = qmdpi_flow_create(worker,
                                   Q_PROTO_IP,
                                   sig->s.ip4.proto == IP_PROTOCOL_TCP ? Q_PROTO_TCP : Q_PROTO_UDP,
                                   (void const *)&sig->s.ip4.dst.as_u32,
                                   (void const *)&sig->s.ip4.port_dst,
                                   (void const *)&sig->s.ip4.src.as_u32,
                                   (void const *)&sig->s.ip4.port_src);
        } else if (sig->len == sizeof(struct ip6_sig)) {
            df = qmdpi_flow_create(worker,
                                   Q_PROTO_IP6,
                                   sig->s.ip6.proto == IP_PROTOCOL_TCP ? Q_PROTO_TCP : Q_PROTO_UDP,
                                   (void const *)&sig->s.ip6.dst,
                                   (void const *)&sig->s.ip6.port_dst,
                                   (void const *)&sig->s.ip6.src,
                                   (void const *)&sig->s.ip6.port_src);
        } else {
            sig_valid = 0;
        }
    }

    if (!df) {
        if (sig_valid) {
            FLOWTABLE_COUNTER_INC(vm, CREATION_FAILED, 1);
        } else { /*unlikely - checked before */
            FLOWTABLE_COUNTER_INC(vm, UNHANDLED, 1);
        }
    }

    return df;
}


/* TODO: replace with a more appropriate hashtable */
static inline flow_entry_t *
flowtable_entry_lookup_create(vlib_main_t              *vm,
                              flowtable_main_t         *fm,
                              flowtable_main_per_cpu_t *fmt,
                              BVT(clib_bihash_kv)      *kv,
                              flow_signature_t const   *sig,
                              int                       is_reverse,
                              u32              const    now,
                              int                      *created)
{
    flow_entry_t *f;
    dlist_elt_t *ht_line;
    dlist_elt_t *timer_entry;
    dlist_elt_t *flow_entry;
    u32 ht_line_head_index;
    int ht_line_created;

    ht_line = NULL;
    ht_line_created = 0;

    if (PREDICT_FALSE(kv->key == 0)) {
        if (sig->len == 0) { /* Non IPv4/IPv6 packet */
            FLOWTABLE_COUNTER_INC(vm, UNHANDLED, 1);
        }
        return NULL;
    }

    /* get hashtable line */
    if (PREDICT_TRUE(BV(clib_bihash_search)(&fmt->flows_ht, kv, kv) == 0)) {
        ht_line_head_index = (u32) kv->value;
        ht_line = pool_elt_at_index(fmt->ht_lines, ht_line_head_index);
        u32 index;

        /* The list CANNOT be a singleton */
        index = ht_line->next;
        while (index != ht_line_head_index) {
            dlist_elt_t *e = pool_elt_at_index(fmt->ht_lines, index);
            if (pool_is_free_index(fm->flows, e->value)) {
                DBG_PRINT("pool_is_free_index()\n");
                return NULL;
            }

            f = pool_elt_at_index(fm->flows, e->value);
            if (PREDICT_TRUE(memcmp(&f->sig, sig, sig->len) == 0)) {
                /* The flow already exists. Update some flags */
                if (f->infos.data.dpi_flow) {
                    struct qmdpi_flow_info *flow_info;
                    flow_info = qmdpi_flow_info_get(f->infos.data.dpi_flow);
                    f->infos.data.classified = (QMDPI_FLOW_INFO_CLASSIFIED(flow_info) != 0);
                    f->infos.data.offloaded  = (QMDPI_FLOW_INFO_OFFLOADED(flow_info) != 0);
                }
                return f;
            }

            index = e->next;
        }

        FLOWTABLE_COUNTER_INC(vm, COLLISION, 1);
    } else {
        /* create a new line */
        pool_get(fmt->ht_lines, ht_line);

        ht_line_head_index = ht_line - fmt->ht_lines;
        clib_dlist_init(fmt->ht_lines, ht_line_head_index);
        kv->value = ht_line_head_index;
        BV(clib_bihash_add_del)(&fmt->flows_ht, kv, 1  /* is_add */);
        ht_line_created = 1;
    }

    /* create new flow */
    f = flow_entry_alloc(fm, fmt);
    if (PREDICT_FALSE(f == NULL)) {
        /* Flow allocation error: check the size of the flow pool */
        FLOWTABLE_COUNTER_INC(vm, FLOW_ALLOCATION, 1);

        if (ht_line_created) {
            BV(clib_bihash_add_del)(&fmt->flows_ht, kv, 0  /* is_del */);
            pool_put(fmt->ht_lines, ht_line);
        }
        return NULL;
    }

    *created = 1;

    memset(f, 0, sizeof(*f));

    u32 cpu_index = os_get_thread_index();
    struct qmdpi_worker *worker = fm->workers_table[cpu_index];
    if (fm->dpi_enable) {
        f->infos.data.dpi_flow = flowtable_create_dpi_flow(vm, worker, sig, is_reverse);
    } else {
        f->infos.data.dpi_flow = NULL;
    }
    f->sig.len = sig->len;
    clib_memcpy(&f->sig, sig, sig->len);
    f->sig_hash = kv->key;
    f->lifetime = TIMER_DEFAULT_LIFETIME;
    f->expire = now + TIMER_DEFAULT_LIFETIME;

    /* insert in timer list */
    pool_get(fmt->timers, timer_entry);
    timer_entry->value = f - fm->flows;  /* index within the flow pool */
    f->timer_index = timer_entry - fmt->timers;  /* index within the timer pool */
    timer_wheel_insert_flow(fmt, f);

    /* insert in ht line */
    pool_get(fmt->ht_lines, flow_entry);
    f->ht_index = flow_entry - fmt->ht_lines;  /* index within the ht line pool */
    flow_entry->value = f - fm->flows;  /* index within the flow pool */
    f->ht_line_index = ht_line_head_index;
    clib_dlist_addhead(fmt->ht_lines, ht_line_head_index, f->ht_index);

    return f;
}

static inline void
timer_wheel_index_update(flowtable_main_per_cpu_t *fmt, u32 now)
{
    u32 new_index = now % TIMER_MAX_LIFETIME;

    if (PREDICT_FALSE(fmt->time_index == ~0)) {
        fmt->time_index = new_index;
        return;
    }

    if (new_index != fmt->time_index) {
        /* reschedule all remaining flows on current time index
         * at the begining of the next one */

        u32 *curr_slot_index = vec_elt_at_index(fmt->timer_wheel, fmt->time_index);
        dlist_elt_t *curr_head = pool_elt_at_index(fmt->timers, *curr_slot_index);

        u32 *next_slot_index = vec_elt_at_index(fmt->timer_wheel, new_index);
        dlist_elt_t *next_head = pool_elt_at_index(fmt->timers, *next_slot_index);

        if (PREDICT_FALSE(dlist_is_empty(fmt->timers, *curr_slot_index))) {
            fmt->time_index = new_index;
            return;
        }

        dlist_elt_t *curr_prev = pool_elt_at_index(fmt->timers, curr_head->prev);
        dlist_elt_t *curr_next = pool_elt_at_index(fmt->timers, curr_head->next);

        /* insert timer list of current time slot at the begining of the next slot */
        if (PREDICT_FALSE(dlist_is_empty(fmt->timers, *next_slot_index))) {
            next_head->next = curr_head->next;
            next_head->prev = curr_head->prev;
            curr_prev->next = *next_slot_index;
            curr_next->prev = *next_slot_index;
        } else {
            dlist_elt_t *next_next = pool_elt_at_index(fmt->timers, next_head->next);
            curr_prev->next = next_head->next;
            next_head->next = curr_head->next;
            next_next->prev = curr_head->prev;
            curr_next->prev = *next_slot_index;
        }

        /* reset current time slot as an empty list */
        memset(curr_head, 0xff, sizeof(*curr_head));

        fmt->time_index = new_index;
    }
}

always_inline void
flow_tcp_update_lifetime(flow_entry_t *f, tcp_header_t *hdr)
{
    tcp_state_t old_state, new_state;

    ASSERT(f->tcp_state < TCP_STATE_MAX);

    old_state = f->tcp_state;
    new_state = tcp_trans[old_state][tcp_event(hdr)];

    if (old_state != new_state) {
        f->tcp_state = new_state;
        f->lifetime = tcp_lifetime[new_state];
    }
}

always_inline void
flow_update_lifetime(flow_entry_t *f, vlib_buffer_t *buffer)
{
    /*
     * XXX: we already skipped the ethernet header
     * CHECK-ME: assert we have enough wellformed data to read the tcp header.
     */
    if (f->sig.len == sizeof(struct ip4_sig)) {
        vlib_buffer_advance(buffer, sizeof(ip4_header_t));

        if (f->sig.s.ip4.proto == IP_PROTOCOL_TCP) {
            flow_tcp_update_lifetime(f, vlib_buffer_get_current(buffer));
        }
    } else if (f->sig.len == sizeof(struct ip6_sig)) {
        vlib_buffer_advance(buffer, sizeof(ip6_header_t));
        if (f->sig.s.ip6.proto == IP_PROTOCOL_TCP) {
            flow_tcp_update_lifetime(f, vlib_buffer_get_current(buffer));
        }
    }

}


static void pack_sigs_into_opaque(vlib_buffer_t         *b,
                                  int                    is_reverse,
                                  flow_entry_t          *flow,
                                  packet_signature_t    *pkt_sig,
                                  flow_signature_t      *sig)
{
    sig_info_t      *info = (sig_info_t *) vnet_plugin_buffer2(b);

    info->is_reverse  = is_reverse;
    info->is_ipv6     = (sig->len == sizeof(struct ip6_sig));
    info->data_offset = pkt_sig->data_offset;
    info->l4hdr       = pkt_sig->l4hdr;
    info->ft_flow     = flow;

    if (info->is_ipv6) {
        info->ip_protocol = sig->s.ip6.proto;
        if (info->is_reverse) {
            info->ipv6.src_offset = 24;
            info->ipv6.dst_offset = 8;
        } else {
            info->ipv6.src_offset = 8;
            info->ipv6.dst_offset = 24;
        }
        info->src_port = sig->s.ip6.port_src;
        info->dst_port = sig->s.ip6.port_dst;
    } else {
        info->ip_protocol = sig->s.ip4.proto;
        info->ipv4.src = sig->s.ip4.src;
        info->ipv4.dst = sig->s.ip4.dst;
        info->src_port = sig->s.ip4.port_src;
        info->dst_port = sig->s.ip4.port_dst;
    }
}

static u32 get_next_node(vlib_main_t *vm, flowtable_main_t *fm,
                         flow_entry_t *flow)
{
    u32     next_node = fm->next_node_index;

    if (!fm->dpi_enable) {
        FLOWTABLE_COUNTER_INC(vm, FORWARDED, 1);
        flow->infos.data.offloaded = 1;
        return fm->next_node_index;
    }

    if (! flow->infos.data.dpi_flow) {
        flow->infos.data.offloaded = 1;
        FLOWTABLE_COUNTER_INC(vm, FORWARDED, 1);
        return fm->next_node_index;
    }

    if (!flow->infos.data.offloaded) {
        /* The packet has to be processed by the DPI */
        next_node = FT_NEXT_DPI;
        FLOWTABLE_COUNTER_INC(vm, DPI, 1);
    } else {
        /* The flow is offloaded. The packet
           can be directly forwarded  */
        next_node = fm->next_node_index;
        FLOWTABLE_COUNTER_INC(vm, FORWARDED, 1);
    }

    return next_node;
}

/**
 * just skip the ethernet layer since this isn't part of flow identification
 * in our example
 */


VLIB_NODE_FN(flowtable_input_node)(vlib_main_t         *vm,
                                   vlib_node_runtime_t *node,
                                   vlib_frame_t        *frame)
{
    u32 n_left_from, * from, next_index, * to_next, n_left_to_next;
    flowtable_main_t *fm = &flowtable_main;
    u32 cpu_index = os_get_thread_index();
    flowtable_main_per_cpu_t *fmt = &fm->per_cpu[cpu_index];

#define _(sym, str) u32 CPT_ ## sym = 0;
    foreach_flowtable_error
#undef _

    from = vlib_frame_vector_args(frame);
    n_left_from = frame->n_vectors;
    next_index = node->cached_next_index;

    u32 current_time =
        (u32)((u64)(vm->cpu_time_last_node_dispatch /
                    fmt->clocks_per_second));

    timer_wheel_index_update(fmt, current_time);

    /* dummy flow used in case alloc fail */
    flow_entry_t offload_flow = {
        .infos.data.offloaded = 1,
    };

    while (n_left_from > 0) {
        vlib_get_next_frame(vm, node, next_index, to_next, n_left_to_next);
        /* Dual loop */
        while (n_left_from >= 4 && n_left_to_next >= 2) {
            u32 bi0, bi1;
            vlib_buffer_t *b0, * b1;
            u32 next0, next1;
            BVT(clib_bihash_kv) kv0, kv1;
            int created0, created1, noflow0, noflow1;
            uword is_reverse0, is_reverse1;
            flow_signature_t sig0, sig1;
            packet_signature_t pkt_sig0 = { 0, 0, NULL};
            packet_signature_t pkt_sig1 = { 0, 0, NULL};
            flow_entry_t *flow0, * flow1;

            /* prefetch next iteration */
            {
                vlib_buffer_t *p2, * p3;

                p2 = vlib_get_buffer(vm, from[2]);
                p3 = vlib_get_buffer(vm, from[3]);

                vlib_prefetch_buffer_header(p2, LOAD);
                vlib_prefetch_buffer_header(p3, LOAD);
                CLIB_PREFETCH(p2->data, sizeof(ethernet_header_t) + sizeof(ip6_header_t), LOAD);
                CLIB_PREFETCH(p3->data, sizeof(ethernet_header_t) + sizeof(ip6_header_t), LOAD);
            }
            bi0 = to_next[0] = from[0];
            bi1 = to_next[1] = from[1];
            b0 = vlib_get_buffer(vm, bi0);
            b1 = vlib_get_buffer(vm, bi1);

            created0 = created1 = 0;
            is_reverse0 = is_reverse1 = 0;
            noflow0 = noflow1 = 0;

            /* frame mgmt */
            from += 2;
            to_next += 2;
            n_left_from -= 2;
            n_left_to_next -= 2;

            kv0.key = compute_packet_hash(b0, &is_reverse0, &sig0, &pkt_sig0);
            kv1.key = compute_packet_hash(b1, &is_reverse1, &sig1, &pkt_sig1);
            ASSERT(CPT_PACKETS < MAX_PKTS);

            /* lookup/create flow */
            flow0 = flowtable_entry_lookup_create(vm, fm, fmt, &kv0, &sig0, is_reverse0,
                                                  current_time, &created0);
            if (PREDICT_FALSE(flow0 == NULL)) {
                flow0 = &offload_flow;
                noflow0 = 1;
            }

#if (defined DEBUG) && (DEBUG >= 1)
            if (fm->log_enable) {
                flowtable_dump_flow(vm, flow0);
            }
#endif /* DEBUG */

            if (fm->stats_enable) {
                flowtable_stats_add(vm, fm, fmt, flow0, b0);
            }

            /* timer management */
            if (!created0 && (flow0 != &offload_flow)) {
                flow_update_lifetime(flow0, b0);
                timer_wheel_resched_flow(fmt, flow0, current_time);
            }

            flow1 = flowtable_entry_lookup_create(vm, fm, fmt, &kv1, &sig1, is_reverse1,
                                                  current_time, &created1);
            if (PREDICT_FALSE(flow1 == NULL)) {
                flow1 = &offload_flow;
                noflow1 = 1;
            }

#if (defined DEBUG) && (DEBUG >= 1)
            if (fm->log_enable) {
                flowtable_dump_flow(vm, flow1);
            }
#endif /* DEBUG */

            if (fm->stats_enable) {
                flowtable_stats_add(vm, fm, fmt, flow1, b1);
            }

            if (!created1 && (flow1 != &offload_flow)) {
                flow_update_lifetime(flow1, b1);
                timer_wheel_resched_flow(fmt, flow1, current_time);
            }

            next0 = get_next_node(vm, fm, flow0);
            if (next0 == FT_NEXT_DPI) {
                /* fill opaque buffer with flow data */
                flow0->infos.data.structure_addr = (u64)&flow0->infos;
                clib_memcpy(vnet_plugin_buffer(b0),
                            &flow0->infos, sizeof(flow0->infos));
                pack_sigs_into_opaque(b0, is_reverse0, flow0, &pkt_sig0, &sig0);
            }

            next1 = get_next_node(vm, fm, flow1);
            if (next1 == FT_NEXT_DPI) {
                /* fill opaque buffer with flow data */
                flow1->infos.data.structure_addr = (u64)&flow1->infos;
                clib_memcpy(vnet_plugin_buffer(b1),
                            &flow1->infos, sizeof(flow1->infos));
                pack_sigs_into_opaque(b1, is_reverse1, flow1, &pkt_sig1, &sig1);
            }

            /* flowtable counters */
            CPT_PACKETS += 2;
            CPT_MISS += created0 + created1;
            CPT_HIT += (!noflow0 && !created0) + (!noflow1 && !created1);

            if (b0->flags & VLIB_BUFFER_IS_TRACED) {
                flow_trace_t *t = vlib_add_trace(vm, node, b0, sizeof(*t));
                t->sw_if_index = vnet_buffer(b0)->sw_if_index[VLIB_RX];
                t->next_index = next0;
                if (flow0) {
                    t->offloaded = flow0->infos.data.offloaded;
                } else {
                    t->offloaded = 0;
                }
            }
            if (b1->flags & VLIB_BUFFER_IS_TRACED) {
                flow_trace_t *t = vlib_add_trace(vm, node, b1, sizeof(*t));
                t->sw_if_index = vnet_buffer(b1)->sw_if_index[VLIB_RX];
                t->next_index = next1;
                if (flow1) {
                    t->offloaded = flow1->infos.data.offloaded;
                } else {
                    t->offloaded = 0;
                }
            }

            vlib_buffer_reset(b0);
            vlib_buffer_reset(b1);

            vlib_validate_buffer_enqueue_x2(vm, node, next_index, to_next,
                                            n_left_to_next, bi0, bi1, next0, next1);
        }

        /* Single loop */
        while (n_left_from > 0 && n_left_to_next > 0) {
            u32 bi0;
            u32 next0;
            vlib_buffer_t *b0;
            int created = 0, noflow = 0;
            flow_entry_t *flow = NULL;
            uword is_reverse = 0;
            BVT(clib_bihash_kv) kv;
            flow_signature_t sig;
            packet_signature_t pkt_sig = { 0, 0, NULL};

            bi0 = to_next[0] = from[0];
            b0 = vlib_get_buffer(vm, bi0);

            /* lookup/create flow */
            kv.key = compute_packet_hash(b0, &is_reverse, &sig, &pkt_sig);
            ASSERT(CPT_PACKETS < MAX_PKTS);

            flow = flowtable_entry_lookup_create(vm, fm, fmt, &kv, &sig, is_reverse,
                                                 current_time, &created);
            if (PREDICT_FALSE(flow == NULL)) {
                flow = &offload_flow;
                noflow = 1;
            }

#if (defined DEBUG) && (DEBUG >= 1)
            if (fm->log_enable) {
                flowtable_dump_flow(vm, flow);
            }
#endif /* DEBUG */

            if (fm->stats_enable) {
                flowtable_stats_add(vm, fm, fmt, flow, b0);
            }

            if (!created && (flow != &offload_flow)) {
                flow_update_lifetime(flow, b0);
                timer_wheel_resched_flow(fmt, flow, current_time);
            }

            next0 = get_next_node(vm, fm, flow);
            if (next0 == FT_NEXT_DPI) {
                /* flow statistics */
                flow->infos.data.structure_addr = (u64)&flow->infos;
                clib_memcpy(vnet_plugin_buffer(b0),
                            &flow->infos, sizeof(flow->infos));
                pack_sigs_into_opaque(b0, is_reverse, flow, &pkt_sig, &sig);
            }

            /* flowtable counters */
            CPT_PACKETS++;
            CPT_MISS += created;
            CPT_HIT += (!noflow && !created);

            /* frame mgmt */
            from++;
            to_next++;
            n_left_from--;
            n_left_to_next--;

            if (b0->flags & VLIB_BUFFER_IS_TRACED) {
                flow_trace_t *t = vlib_add_trace(vm, node, b0, sizeof(*t));
                t->sw_if_index =  vnet_buffer(b0)->sw_if_index[VLIB_RX];
                t->next_index = next0;
                if (flow) {
                    t->offloaded = flow->infos.data.offloaded;
                } else {
                    t->offloaded = 0;
                }
            }

            vlib_buffer_reset(b0);
            vlib_validate_buffer_enqueue_x1(vm, node, next_index, to_next,
                                            n_left_to_next, bi0, next0);
        }
        vlib_put_next_frame(vm, node, next_index, n_left_to_next);
    }

    /* handle expirations */
    CPT_TIMER_EXPIRE += flowtable_timer_expire(fm, fmt, current_time);

#define _(sym, str) \
    FLOWTABLE_COUNTER_INC(vm, sym, CPT_ ## sym);

    foreach_flowtable_error
#undef _

    return frame->n_vectors;
}

static char *flowtable_error_strings[] = {
#define _(sym, string) string,
    foreach_flowtable_error
#undef _
};

VLIB_REGISTER_NODE(flowtable_input_node) = {
    .name = "flowtable-input",
    .vector_size = sizeof(u32),
    .format_trace = format_get_flowinfo,
    .type = VLIB_NODE_TYPE_INTERNAL,
    .n_errors = FLOWTABLE_N_ERROR,
    .error_strings = flowtable_error_strings,
    .n_next_nodes = FT_NEXT_N_NEXT,
    .next_nodes = {
        [FT_NEXT_ETHERNET_INPUT] = "ethernet-input",
        [FT_NEXT_DPI] = "dpi",
    }
};
