# StreamFifo

import spinal.core._
import spinal.lib._
import scala.language.postfixOps
trait StreamReuseFifoInterface[T <: Data]{
  def push          : Stream[T]
  def pop           : Stream[T]
  def pushOccupancy : UInt
  def popOccupancy  : UInt
}
object StreamReuseFifo{
  def apply[T <: Data](dataType: HardType[T],
                       depth: Int,
                       latency : Int = 2,
                       forFMax : Boolean = false) = {
    assert(latency >= 0 && latency <= 2)
    new StreamFifo(
      dataType,
      depth,
      withAsyncRead = latency < 2,
      withBypass = latency == 0,
      forFMax = forFMax
    )
  }
}
/**
 * Fully redesigned in release 1.8.2 allowing improved timing closure.
 * - latency of 0, 1, 2 cycles
 *
 * @param dataType
 * @param depth Number of element stored in the fifo, Note that if withAsyncRead==false, then one extra transaction can be stored
 * @param withAsyncRead Read the memory using asyncronous read port (ex distributed ram). If false, add 1 cycle latency
 * @param withBypass Bypass the push port to the pop port when the fifo is empty. If false, add 1 cycle latency
 *                   Only available if withAsyncRead == true
 * @param forFMax Tune the design to get the maximal clock frequency
 * @param useVec Use an Vec of register instead of a Mem to store the content
 *               Only available if withAsyncRead == true
 */
class StreamReuseFifo[T <: Data](val dataType: HardType[T],
                            val depth: Int,
                            val withAsyncRead : Boolean = false,
                            val withBypass : Boolean = false,
                            val allowExtraMsb : Boolean = true,
                            val forFMax : Boolean = false,
                            val useVec : Boolean = false) extends Component {
  require(depth >= 0)
  if(withBypass) require(withAsyncRead)
  if(useVec) require (withAsyncRead)
  val io = new Bundle with StreamReuseFifoInterface[T]{
    val push = slave Stream (dataType)
    val pop = master Stream (dataType)
    val flush = in Bool() default(False)
    val occupancy    = out UInt (log2Up(depth + 1) bits)
    val availability = out UInt (log2Up(depth + 1) bits)
    override def pushOccupancy = occupancy
    override def popOccupancy = occupancy
  }
  class CounterUpDownFmax(states : BigInt, init : BigInt) extends Area{
    val incr, decr = Bool()
    val value = Reg(UInt(log2Up(states) bits)) init(init)
    val plusOne = KeepAttribute(value + 1)
    val minusOne = KeepAttribute(value - 1)
    when(incr =/= decr){
      value := incr.mux(plusOne, minusOne)
    }
    when(io.flush) { value := init }
  }
  val withExtraMsb = allowExtraMsb && isPow2(depth)
  val bypass = (depth == 0) generate new Area {
    io.push >> io.pop
    io.occupancy := 0
    io.availability := 0
  }
  val oneStage = (depth == 1) generate new Area {
    val doFlush = CombInit(io.flush)
    val buffer = io.push.m2sPipe(flush = doFlush)
    io.pop << buffer
    io.occupancy := U(buffer.valid)
    io.availability := U(!buffer.valid)
    if(withBypass){
      when(!buffer.valid){
        io.pop.valid := io.push.valid
        io.pop.payload := io.push.payload
        doFlush setWhen(io.pop.ready)
        // 这里 doFlush 拉高是因为 m2sPipe 的 rValid=RegNextWhen (self.valid, self.ready) init (False)
        // 而由于 bypass 的缘故,rValid 不应该采样当前的 self.valid, 因此 doFlush 拉高,rValid clearWhen (flush)
      }
    }
  }
  val logic = (depth > 1) generate new Area {
    val vec = useVec generate Vec(Reg(dataType), depth)
    val ram = !useVec generate Mem(dataType, depth)
    val ptr = new Area{
      val doPush, doPop = Bool()  // 这两个信号都是对内部 Ram 而言的,其中 doPush 就是 io.push.fire, 仅在 bypass 情况有所不同
      val full, empty = Bool()
      val push = Reg(UInt(log2Up(depth) + withExtraMsb.toInt bits)) init(0)
      val pop  = Reg(UInt(log2Up(depth) + withExtraMsb.toInt bits)) init(0)
      val occupancy = cloneOf(io.occupancy)
      val popOnIo = cloneOf(pop) // Used to track the global occupancy of the fifo (the extra buffer of !withAsyncRead)
      val wentUp = RegNextWhen(doPush, doPush =/= doPop) init(False) clearWhen (io.flush)
      //pop 只是针对内部而言的,pop 会在 ram 做 pop 操作后更改指针,但 pop 操作到读结束还有 latency, 因此用 popOnIo 记录读结束的指针
      val arb = new Area {
        //full 是为了反压 push 的,pop 相当于提前生成了读地址,而 popOnIo 则是外部 fire 时的读地址
        // 因此 full 信号不用 pop 信号是因为他所指的指针可能并未真正的读 (pop 通常会比 popOnIo 多 1)
        //empty 是用于 pop 的 valid 信号的 (准确来说是直接提供给 addressGen), 但 empty 信号
        // 对外部并不暴露,因此能否做 pop 操作只需要根据对内部的 pop 和 push 做判断即可
        val area = !forFMax generate {
          withExtraMsb match {
            case true => { //as we have extra MSB, we don't need the "wentUp"
              full := (push ^ popOnIo ^ depth) === 0  //full 时 push=popOnIo=depth
              empty := push === pop
            }
            case false => {
              full := push === popOnIo && wentUp
              empty := push === pop && !wentUp
            }
          }
        }
        val fmax = forFMax generate new Area {
          val counterWidth = log2Up(depth) + 1
          //empty 对内部而言 (为了 pop)
          val emptyTracker = new CounterUpDownFmax(1 << counterWidth, 1 << (counterWidth - 1)) {
            incr := doPop
            decr := doPush
            empty := value.msb
          }
          //full 对外部而言 (为了 push)
          val fullTracker = new CounterUpDownFmax(1 << counterWidth, (1 << (counterWidth - 1)) - depth) {
            incr := io.push.fire
            decr := io.pop.fire
            full := value.msb
          }
        }
      }
      when(doPush){
        push := push + 1
        if(!isPow2(depth)) when(push === depth - 1){ push := 0 }
      }
      when(doPop){
        pop := pop + 1
        if(!isPow2(depth)) when(pop === depth - 1){ pop := 0 }
      }
      when(io.flush){
        push := 0
        pop := 0
      }
      val forPow2 = (withExtraMsb && !forFMax) generate new Area{
        occupancy := push - popOnIo  //if no extra msb, could be U(full ## (push - popOnIo))
      }
      val notPow2 = (!withExtraMsb && !forFMax) generate new Area{
        val counter = Reg(UInt(log2Up(depth + 1) bits)) init(0)
        counter := counter + U(io.push.fire) - U(io.pop.fire)
        occupancy := counter
        when(io.flush) { counter := 0 }
      }
      val fmax = forFMax generate new CounterUpDownFmax(depth + 1, 0){
        incr := io.push.fire
        decr := io.pop.fire
        occupancy := value
      }
    }
    val push = new Area {
      io.push.ready := !ptr.full
      ptr.doPush := io.push.fire
      val onRam = !useVec generate new Area {
        val write = ram.writePort()
        write.valid := io.push.fire
        write.address := ptr.push.resized
        write.data := io.push.payload
      }
      val onVec = useVec generate new Area {
        when(io.push.fire){
          vec.write(ptr.push.resized, io.push.payload)
        }
      }
    }
    val pop = new Area{
      val addressGen = Stream(UInt(log2Up(depth) bits))
      addressGen.valid := !ptr.empty
      addressGen.payload := ptr.pop.resized
      ptr.doPop := addressGen.fire
      val sync = !withAsyncRead generate new Area{
        assert(!useVec)
        val readArbitration = addressGen.m2sPipe(flush = io.flush)   //valid 和读地址打一拍
        val readPort = ram.readSyncPort   // 同样的是 1 cycle delay
        readPort.cmd := addressGen.toFlowFire   //toFlowFire, 读的时候不需要 ready
        io.pop << readArbitration.translateWith(readPort.rsp)   //valid 打一拍后 payload 替换成读数据
        val popReg = RegNextWhen(ptr.pop, readArbitration.fire) init(0)
        ptr.popOnIo := popReg   // 读结束后采样 pop 指针
        when(io.flush){ popReg := 0 }
      }
      val async = withAsyncRead generate new Area{
        val readed = useVec match {
          case true => vec.read(addressGen.payload)
          case false => ram.readAsync(addressGen.payload)
        }
        io.pop << addressGen.translateWith(readed)
        ptr.popOnIo := ptr.pop
        if(withBypass){
          when(ptr.empty){
            io.pop.valid := io.push.valid
            io.pop.payload := io.push.payload
            ptr.doPush clearWhen(io.pop.ready)
          }
        }
      }
    }
    io.occupancy := ptr.occupancy
    if(!forFMax) io.availability := depth - ptr.occupancy
    val fmaxAvail = forFMax generate new CounterUpDownFmax(depth + 1, depth){
      incr := io.pop.fire
      decr := io.push.fire
      io.availability := value
    }
  }
  // check a condition against all valid payloads in the FIFO RAM
  def formalCheckRam(cond: T => Bool): Vec[Bool] = this rework new Composite(this){
    val condition = (0 until depth).map(x => cond(if (useVec) logic.vec(x) else logic.ram(x)))
    // create mask for all valid payloads in FIFO RAM
    // inclusive [popd_idx, push_idx) exclusive
    // assume FIFO RAM is full with valid payloads
    //           [ ...  push_idx ... ]
    //           [ ...  pop_idx  ... ]
    // mask      [ 1 1 1 1 1 1 1 1 1 ]
    val mask = Vec(True, depth)
    val push_idx = logic.ptr.push.resize(log2Up(depth))
    val pop_idx = logic.ptr.pop.resize(log2Up(depth))
    // pushMask(i)==0 indicates location i was popped
    val popMask = (~((U(1) << pop_idx) - 1)).asBits
    // pushMask(i)==1 indicates location i was pushed
    val pushMask = ((U(1) << push_idx) - 1).asBits
    // no wrap   [ ... popd_idx ... push_idx ... ]
    // popMask   [ 0 0 1 1 1 1  1 1 1 1 1 1 1 1 1]
    // pushpMask [ 1 1 1 1 1 1  1 1 0 0 0 0 0 0 0] &
    // mask      [ 0 0 1 1 1 1  1 1 0 0 0 0 0 0 0]
    when(pop_idx < push_idx) {
      mask.assignFromBits(pushMask & popMask)
      // wrapped   [ ... push_idx ... popd_idx ... ]
      // popMask   [ 0 0 0 0 0 0  0 0 1 1 1 1 1 1 1]
      // pushpMask [ 1 1 0 0 0 0  0 0 0 0 0 0 0 0 0] |
      // mask      [ 1 1 0 0 0 0  0 0 1 1 1 1 1 1 1]
    }.elsewhen(pop_idx > push_idx) {
      mask.assignFromBits(pushMask | popMask)
      // empty?
      //           [ ...  push_idx ... ]
      //           [ ...  pop_idx  ... ]
      // mask      [ 0 0 0 0 0 0 0 0 0 ]
    }.elsewhen(logic.ptr.empty) {
      mask := mask.getZero
    }
    val check = mask.zipWithIndex.map{case (x, id) => x & condition(id)}
    val vec = Vec(check)
  }.vec
  def formalCheckOutputStage(cond: T => Bool): Bool = this.rework {
    // only with sync RAM read, io.pop is directly connected to the m2sPipe() stage
    Bool(!withAsyncRead) & io.pop.valid & cond(io.pop.payload)
  }
  // verify this works, then we can simplify below
  //def formalCheck(cond: T => Bool): Vec[Bool] = this.rework {
  //  Vec(formalCheckOutputStage(cond) +: formalCheckRam(cond))
  //}
  def formalContains(word: T): Bool = this.rework {
    formalCheckRam(_ === word.pull()).reduce(_ || _) || formalCheckOutputStage(_ === word.pull())
  }
  def formalContains(cond: T => Bool): Bool = this.rework {
    formalCheckRam(cond).reduce(_ || _) || formalCheckOutputStage(cond)
  }
  def formalCount(word: T): UInt = this.rework {
    // occurance count in RAM and in m2sPipe()
    CountOne(formalCheckRam(_ === word.pull())) +^ U(formalCheckOutputStage(_ === word.pull()))
  }
  def formalCount(cond: T => Bool): UInt = this.rework {
    // occurance count in RAM and in m2sPipe()
    CountOne(formalCheckRam(cond)) +^ U(formalCheckOutputStage(cond))
  }
  def formalFullToEmpty() = this.rework {
    val was_full = RegInit(False) setWhen(!io.push.ready)
    cover(was_full && logic.ptr.empty)
  }
}
更新于 阅读次数