我是 Gemini-2.0-flash-exp 打造的 AI 助手,我的小脑袋瓜可厉害啦,帮你咻咻咻地概括文章重点!✨

这段代码定义了一个名为`StreamReuseFifo`的SpinalHDL模块,实现了一个可复用的FIFO(先进先出)队列。该FIFO支持可配置的深度、延迟(0、1或2个时钟周期)以及异步读取。核心功能包括:数据压入(push)、弹出(pop)、以及查询队列的占用率和可用空间。此外,该模块还考虑了最大时钟频率(FMax)优化,并提供了形式化验证的功能,如检查特定条件下的数据是否在FIFO中存在,以及计算满足特定条件的数据数量。总之,`StreamReuseFifo`是一个高度可配置且适用于FPGA设计的流式FIFO。

StreamFifo

import spinal.core._
import spinal.lib._

import scala.language.postfixOps

trait StreamReuseFifoInterface[T <: Data]{
  def push          : Stream[T]
  def pop           : Stream[T]
  def pushOccupancy : UInt
  def popOccupancy  : UInt
}

object StreamReuseFifo{
  def apply[T <: Data](dataType: HardType[T],
                       depth: Int,
                       latency : Int = 2,
                       forFMax : Boolean = false) = {
    assert(latency >= 0 && latency <= 2)
    new StreamFifo(
      dataType,
      depth,
      withAsyncRead = latency < 2,
      withBypass = latency == 0,
      forFMax = forFMax
    )
  }
}

/**
 * Fully redesigned in release 1.8.2 allowing improved timing closure.
 * - latency of 0, 1, 2 cycles
 *
 * @param dataType
 * @param depth Number of element stored in the fifo, Note that if withAsyncRead==false, then one extra transaction can be stored
 * @param withAsyncRead Read the memory using asyncronous read port (ex distributed ram). If false, add 1 cycle latency
 * @param withBypass Bypass the push port to the pop port when the fifo is empty. If false, add 1 cycle latency
 *                   Only available if withAsyncRead == true
 * @param forFMax Tune the design to get the maximal clock frequency
 * @param useVec Use an Vec of register instead of a Mem to store the content
 *               Only available if withAsyncRead == true
 */
class StreamReuseFifo[T <: Data](val dataType: HardType[T],
                            val depth: Int,
                            val withAsyncRead : Boolean = false,
                            val withBypass : Boolean = false,
                            val allowExtraMsb : Boolean = true,
                            val forFMax : Boolean = false,
                            val useVec : Boolean = false) extends Component {
  require(depth >= 0)

  if(withBypass) require(withAsyncRead)
  if(useVec) require (withAsyncRead)

  val io = new Bundle with StreamReuseFifoInterface[T]{
    val push = slave Stream (dataType)
    val pop = master Stream (dataType)
    val flush = in Bool() default(False)
    val occupancy    = out UInt (log2Up(depth + 1) bits)
    val availability = out UInt (log2Up(depth + 1) bits)
    override def pushOccupancy = occupancy
    override def popOccupancy = occupancy
  }

  class CounterUpDownFmax(states : BigInt, init : BigInt) extends Area{
    val incr, decr = Bool()
    val value = Reg(UInt(log2Up(states) bits)) init(init)
    val plusOne = KeepAttribute(value + 1)
    val minusOne = KeepAttribute(value - 1)
    when(incr =/= decr){
      value := incr.mux(plusOne, minusOne)
    }
    when(io.flush) { value := init }
  }

  val withExtraMsb = allowExtraMsb && isPow2(depth)
  val bypass = (depth == 0) generate new Area {
    io.push >> io.pop
    io.occupancy := 0
    io.availability := 0
  }
  val oneStage = (depth == 1) generate new Area {
    val doFlush = CombInit(io.flush)
    val buffer = io.push.m2sPipe(flush = doFlush)
    io.pop << buffer
    io.occupancy := U(buffer.valid)
    io.availability := U(!buffer.valid)

    if(withBypass){
      when(!buffer.valid){
        io.pop.valid := io.push.valid
        io.pop.payload := io.push.payload
        doFlush setWhen(io.pop.ready)
        // 这里doFlush拉高是因为m2sPipe的rValid=RegNextWhen(self.valid, self.ready) init(False)
        // 而由于bypass的缘故, rValid不应该采样当前的self.valid, 因此doFlush拉高, rValid clearWhen(flush)
      }
    }
  }
  val logic = (depth > 1) generate new Area {
    val vec = useVec generate Vec(Reg(dataType), depth)
    val ram = !useVec generate Mem(dataType, depth)

    val ptr = new Area{
      val doPush, doPop = Bool()  // 这两个信号都是对内部Ram而言的,其中doPush就是io.push.fire,仅在bypass情况有所不同
      val full, empty = Bool()
      val push = Reg(UInt(log2Up(depth) + withExtraMsb.toInt bits)) init(0)
      val pop  = Reg(UInt(log2Up(depth) + withExtraMsb.toInt bits)) init(0)
      val occupancy = cloneOf(io.occupancy)
      val popOnIo = cloneOf(pop) // Used to track the global occupancy of the fifo (the extra buffer of !withAsyncRead)
      val wentUp = RegNextWhen(doPush, doPush =/= doPop) init(False) clearWhen (io.flush)
      // pop只是针对内部而言的, pop会在ram做pop操作后更改指针, 但pop操作到读结束还有latency, 因此用popOnIo记录读结束的指针

      val arb = new Area {
        // full是为了反压push的, pop相当于提前生成了读地址, 而popOnIo则是外部fire时的读地址
        // 因此full信号不用pop信号是因为他所指的指针可能并未真正的读(pop通常会比popOnIo多1)
        // empty是用于pop的valid信号的(准确来说是直接提供给addressGen), 但empty信号
        // 对外部并不暴露, 因此能否做pop操作只需要根据对内部的pop和push做判断即可
        val area = !forFMax generate {
          withExtraMsb match {
            case true => { //as we have extra MSB, we don't need the "wentUp"
              full := (push ^ popOnIo ^ depth) === 0  // full时push=popOnIo=depth
              empty := push === pop
            }
            case false => {
              full := push === popOnIo && wentUp
              empty := push === pop && !wentUp
            }
          }
        }

        val fmax = forFMax generate new Area {
          val counterWidth = log2Up(depth) + 1
          // empty对内部而言(为了pop)
          val emptyTracker = new CounterUpDownFmax(1 << counterWidth, 1 << (counterWidth - 1)) {
            incr := doPop
            decr := doPush
            empty := value.msb
          }
          // full对外部而言(为了push)
          val fullTracker = new CounterUpDownFmax(1 << counterWidth, (1 << (counterWidth - 1)) - depth) {
            incr := io.push.fire
            decr := io.pop.fire
            full := value.msb
          }
        }
      }


      when(doPush){
        push := push + 1
        if(!isPow2(depth)) when(push === depth - 1){ push := 0 }
      }
      when(doPop){
        pop := pop + 1
        if(!isPow2(depth)) when(pop === depth - 1){ pop := 0 }
      }

      when(io.flush){
        push := 0
        pop := 0
      }


      val forPow2 = (withExtraMsb && !forFMax) generate new Area{
        occupancy := push - popOnIo  //if no extra msb, could be U(full ## (push - popOnIo))
      }

      val notPow2 = (!withExtraMsb && !forFMax) generate new Area{
        val counter = Reg(UInt(log2Up(depth + 1) bits)) init(0)
        counter := counter + U(io.push.fire) - U(io.pop.fire)
        occupancy := counter

        when(io.flush) { counter := 0 }
      }
      val fmax = forFMax generate new CounterUpDownFmax(depth + 1, 0){
        incr := io.push.fire
        decr := io.pop.fire
        occupancy := value
      }
    }

    val push = new Area {
      io.push.ready := !ptr.full
      ptr.doPush := io.push.fire
      val onRam = !useVec generate new Area {
        val write = ram.writePort()
        write.valid := io.push.fire
        write.address := ptr.push.resized
        write.data := io.push.payload
      }
      val onVec = useVec generate new Area {
        when(io.push.fire){
          vec.write(ptr.push.resized, io.push.payload)
        }
      }
    }

    val pop = new Area{
      val addressGen = Stream(UInt(log2Up(depth) bits))
      addressGen.valid := !ptr.empty
      addressGen.payload := ptr.pop.resized
      ptr.doPop := addressGen.fire

      val sync = !withAsyncRead generate new Area{
        assert(!useVec)
        val readArbitration = addressGen.m2sPipe(flush = io.flush)   // valid和读地址打一拍
        val readPort = ram.readSyncPort   // 同样的是1 cycle delay
        readPort.cmd := addressGen.toFlowFire   // toFlowFire, 读的时候不需要ready
        io.pop << readArbitration.translateWith(readPort.rsp)   // valid打一拍后payload替换成读数据

        val popReg = RegNextWhen(ptr.pop, readArbitration.fire) init(0)
        ptr.popOnIo := popReg   // 读结束后采样pop指针
        when(io.flush){ popReg := 0 }
      }

      val async = withAsyncRead generate new Area{
        val readed = useVec match {
          case true => vec.read(addressGen.payload)
          case false => ram.readAsync(addressGen.payload)
        }
        io.pop << addressGen.translateWith(readed)
        ptr.popOnIo := ptr.pop

        if(withBypass){
          when(ptr.empty){
            io.pop.valid := io.push.valid
            io.pop.payload := io.push.payload
            ptr.doPush clearWhen(io.pop.ready)
          }
        }
      }
    }

    io.occupancy := ptr.occupancy
    if(!forFMax) io.availability := depth - ptr.occupancy
    val fmaxAvail = forFMax generate new CounterUpDownFmax(depth + 1, depth){
      incr := io.pop.fire
      decr := io.push.fire
      io.availability := value
    }
  }



  // check a condition against all valid payloads in the FIFO RAM
  def formalCheckRam(cond: T => Bool): Vec[Bool] = this rework new Composite(this){
    val condition = (0 until depth).map(x => cond(if (useVec) logic.vec(x) else logic.ram(x)))
    // create mask for all valid payloads in FIFO RAM
    // inclusive [popd_idx, push_idx) exclusive
    // assume FIFO RAM is full with valid payloads
    //           [ ...  push_idx ... ]
    //           [ ...  pop_idx  ... ]
    // mask      [ 1 1 1 1 1 1 1 1 1 ]
    val mask = Vec(True, depth)
    val push_idx = logic.ptr.push.resize(log2Up(depth))
    val pop_idx = logic.ptr.pop.resize(log2Up(depth))
    // pushMask(i)==0 indicates location i was popped
    val popMask = (~((U(1) << pop_idx) - 1)).asBits
    // pushMask(i)==1 indicates location i was pushed
    val pushMask = ((U(1) << push_idx) - 1).asBits
    // no wrap   [ ... popd_idx ... push_idx ... ]
    // popMask   [ 0 0 1 1 1 1  1 1 1 1 1 1 1 1 1]
    // pushpMask [ 1 1 1 1 1 1  1 1 0 0 0 0 0 0 0] &
    // mask      [ 0 0 1 1 1 1  1 1 0 0 0 0 0 0 0]
    when(pop_idx < push_idx) {
      mask.assignFromBits(pushMask & popMask)
      // wrapped   [ ... push_idx ... popd_idx ... ]
      // popMask   [ 0 0 0 0 0 0  0 0 1 1 1 1 1 1 1]
      // pushpMask [ 1 1 0 0 0 0  0 0 0 0 0 0 0 0 0] |
      // mask      [ 1 1 0 0 0 0  0 0 1 1 1 1 1 1 1]
    }.elsewhen(pop_idx > push_idx) {
      mask.assignFromBits(pushMask | popMask)
      // empty?
      //           [ ...  push_idx ... ]
      //           [ ...  pop_idx  ... ]
      // mask      [ 0 0 0 0 0 0 0 0 0 ]
    }.elsewhen(logic.ptr.empty) {
      mask := mask.getZero
    }
    val check = mask.zipWithIndex.map{case (x, id) => x & condition(id)}
    val vec = Vec(check)
  }.vec

  def formalCheckOutputStage(cond: T => Bool): Bool = this.rework {
    // only with sync RAM read, io.pop is directly connected to the m2sPipe() stage
    Bool(!withAsyncRead) & io.pop.valid & cond(io.pop.payload)
  }

  // verify this works, then we can simplify below
  //def formalCheck(cond: T => Bool): Vec[Bool] = this.rework {
  //  Vec(formalCheckOutputStage(cond) +: formalCheckRam(cond))
  //}

  def formalContains(word: T): Bool = this.rework {
    formalCheckRam(_ === word.pull()).reduce(_ || _) || formalCheckOutputStage(_ === word.pull())
  }
  def formalContains(cond: T => Bool): Bool = this.rework {
    formalCheckRam(cond).reduce(_ || _) || formalCheckOutputStage(cond)
  }

  def formalCount(word: T): UInt = this.rework {
    // occurance count in RAM and in m2sPipe()
    CountOne(formalCheckRam(_ === word.pull())) +^ U(formalCheckOutputStage(_ === word.pull()))
  }
  def formalCount(cond: T => Bool): UInt = this.rework {
    // occurance count in RAM and in m2sPipe()
    CountOne(formalCheckRam(cond)) +^ U(formalCheckOutputStage(cond))
  }

  def formalFullToEmpty() = this.rework {
    val was_full = RegInit(False) setWhen(!io.push.ready)
    cover(was_full && logic.ptr.empty)
  }
}