我是 Gemini-2.0-flash-exp 打造的 AI 助手,我的小脑袋瓜可厉害啦,帮你咻咻咻地概括文章重点!✨
这段代码定义了一个名为`StreamReuseFifo`的SpinalHDL模块,实现了一个可复用的FIFO(先进先出)队列。该FIFO支持可配置的深度、延迟(0、1或2个时钟周期)以及异步读取。核心功能包括:数据压入(push)、弹出(pop)、以及查询队列的占用率和可用空间。此外,该模块还考虑了最大时钟频率(FMax)优化,并提供了形式化验证的功能,如检查特定条件下的数据是否在FIFO中存在,以及计算满足特定条件的数据数量。总之,`StreamReuseFifo`是一个高度可配置且适用于FPGA设计的流式FIFO。
StreamFifo
import spinal.core._
import spinal.lib._
import scala.language.postfixOps
trait StreamReuseFifoInterface[T <: Data]{
def push : Stream[T]
def pop : Stream[T]
def pushOccupancy : UInt
def popOccupancy : UInt
}
object StreamReuseFifo{
def apply[T <: Data](dataType: HardType[T],
depth: Int,
latency : Int = 2,
forFMax : Boolean = false) = {
assert(latency >= 0 && latency <= 2)
new StreamFifo(
dataType,
depth,
withAsyncRead = latency < 2,
withBypass = latency == 0,
forFMax = forFMax
)
}
}
/**
* Fully redesigned in release 1.8.2 allowing improved timing closure.
* - latency of 0, 1, 2 cycles
*
* @param dataType
* @param depth Number of element stored in the fifo, Note that if withAsyncRead==false, then one extra transaction can be stored
* @param withAsyncRead Read the memory using asyncronous read port (ex distributed ram). If false, add 1 cycle latency
* @param withBypass Bypass the push port to the pop port when the fifo is empty. If false, add 1 cycle latency
* Only available if withAsyncRead == true
* @param forFMax Tune the design to get the maximal clock frequency
* @param useVec Use an Vec of register instead of a Mem to store the content
* Only available if withAsyncRead == true
*/
class StreamReuseFifo[T <: Data](val dataType: HardType[T],
val depth: Int,
val withAsyncRead : Boolean = false,
val withBypass : Boolean = false,
val allowExtraMsb : Boolean = true,
val forFMax : Boolean = false,
val useVec : Boolean = false) extends Component {
require(depth >= 0)
if(withBypass) require(withAsyncRead)
if(useVec) require (withAsyncRead)
val io = new Bundle with StreamReuseFifoInterface[T]{
val push = slave Stream (dataType)
val pop = master Stream (dataType)
val flush = in Bool() default(False)
val occupancy = out UInt (log2Up(depth + 1) bits)
val availability = out UInt (log2Up(depth + 1) bits)
override def pushOccupancy = occupancy
override def popOccupancy = occupancy
}
class CounterUpDownFmax(states : BigInt, init : BigInt) extends Area{
val incr, decr = Bool()
val value = Reg(UInt(log2Up(states) bits)) init(init)
val plusOne = KeepAttribute(value + 1)
val minusOne = KeepAttribute(value - 1)
when(incr =/= decr){
value := incr.mux(plusOne, minusOne)
}
when(io.flush) { value := init }
}
val withExtraMsb = allowExtraMsb && isPow2(depth)
val bypass = (depth == 0) generate new Area {
io.push >> io.pop
io.occupancy := 0
io.availability := 0
}
val oneStage = (depth == 1) generate new Area {
val doFlush = CombInit(io.flush)
val buffer = io.push.m2sPipe(flush = doFlush)
io.pop << buffer
io.occupancy := U(buffer.valid)
io.availability := U(!buffer.valid)
if(withBypass){
when(!buffer.valid){
io.pop.valid := io.push.valid
io.pop.payload := io.push.payload
doFlush setWhen(io.pop.ready)
// 这里doFlush拉高是因为m2sPipe的rValid=RegNextWhen(self.valid, self.ready) init(False)
// 而由于bypass的缘故, rValid不应该采样当前的self.valid, 因此doFlush拉高, rValid clearWhen(flush)
}
}
}
val logic = (depth > 1) generate new Area {
val vec = useVec generate Vec(Reg(dataType), depth)
val ram = !useVec generate Mem(dataType, depth)
val ptr = new Area{
val doPush, doPop = Bool() // 这两个信号都是对内部Ram而言的,其中doPush就是io.push.fire,仅在bypass情况有所不同
val full, empty = Bool()
val push = Reg(UInt(log2Up(depth) + withExtraMsb.toInt bits)) init(0)
val pop = Reg(UInt(log2Up(depth) + withExtraMsb.toInt bits)) init(0)
val occupancy = cloneOf(io.occupancy)
val popOnIo = cloneOf(pop) // Used to track the global occupancy of the fifo (the extra buffer of !withAsyncRead)
val wentUp = RegNextWhen(doPush, doPush =/= doPop) init(False) clearWhen (io.flush)
// pop只是针对内部而言的, pop会在ram做pop操作后更改指针, 但pop操作到读结束还有latency, 因此用popOnIo记录读结束的指针
val arb = new Area {
// full是为了反压push的, pop相当于提前生成了读地址, 而popOnIo则是外部fire时的读地址
// 因此full信号不用pop信号是因为他所指的指针可能并未真正的读(pop通常会比popOnIo多1)
// empty是用于pop的valid信号的(准确来说是直接提供给addressGen), 但empty信号
// 对外部并不暴露, 因此能否做pop操作只需要根据对内部的pop和push做判断即可
val area = !forFMax generate {
withExtraMsb match {
case true => { //as we have extra MSB, we don't need the "wentUp"
full := (push ^ popOnIo ^ depth) === 0 // full时push=popOnIo=depth
empty := push === pop
}
case false => {
full := push === popOnIo && wentUp
empty := push === pop && !wentUp
}
}
}
val fmax = forFMax generate new Area {
val counterWidth = log2Up(depth) + 1
// empty对内部而言(为了pop)
val emptyTracker = new CounterUpDownFmax(1 << counterWidth, 1 << (counterWidth - 1)) {
incr := doPop
decr := doPush
empty := value.msb
}
// full对外部而言(为了push)
val fullTracker = new CounterUpDownFmax(1 << counterWidth, (1 << (counterWidth - 1)) - depth) {
incr := io.push.fire
decr := io.pop.fire
full := value.msb
}
}
}
when(doPush){
push := push + 1
if(!isPow2(depth)) when(push === depth - 1){ push := 0 }
}
when(doPop){
pop := pop + 1
if(!isPow2(depth)) when(pop === depth - 1){ pop := 0 }
}
when(io.flush){
push := 0
pop := 0
}
val forPow2 = (withExtraMsb && !forFMax) generate new Area{
occupancy := push - popOnIo //if no extra msb, could be U(full ## (push - popOnIo))
}
val notPow2 = (!withExtraMsb && !forFMax) generate new Area{
val counter = Reg(UInt(log2Up(depth + 1) bits)) init(0)
counter := counter + U(io.push.fire) - U(io.pop.fire)
occupancy := counter
when(io.flush) { counter := 0 }
}
val fmax = forFMax generate new CounterUpDownFmax(depth + 1, 0){
incr := io.push.fire
decr := io.pop.fire
occupancy := value
}
}
val push = new Area {
io.push.ready := !ptr.full
ptr.doPush := io.push.fire
val onRam = !useVec generate new Area {
val write = ram.writePort()
write.valid := io.push.fire
write.address := ptr.push.resized
write.data := io.push.payload
}
val onVec = useVec generate new Area {
when(io.push.fire){
vec.write(ptr.push.resized, io.push.payload)
}
}
}
val pop = new Area{
val addressGen = Stream(UInt(log2Up(depth) bits))
addressGen.valid := !ptr.empty
addressGen.payload := ptr.pop.resized
ptr.doPop := addressGen.fire
val sync = !withAsyncRead generate new Area{
assert(!useVec)
val readArbitration = addressGen.m2sPipe(flush = io.flush) // valid和读地址打一拍
val readPort = ram.readSyncPort // 同样的是1 cycle delay
readPort.cmd := addressGen.toFlowFire // toFlowFire, 读的时候不需要ready
io.pop << readArbitration.translateWith(readPort.rsp) // valid打一拍后payload替换成读数据
val popReg = RegNextWhen(ptr.pop, readArbitration.fire) init(0)
ptr.popOnIo := popReg // 读结束后采样pop指针
when(io.flush){ popReg := 0 }
}
val async = withAsyncRead generate new Area{
val readed = useVec match {
case true => vec.read(addressGen.payload)
case false => ram.readAsync(addressGen.payload)
}
io.pop << addressGen.translateWith(readed)
ptr.popOnIo := ptr.pop
if(withBypass){
when(ptr.empty){
io.pop.valid := io.push.valid
io.pop.payload := io.push.payload
ptr.doPush clearWhen(io.pop.ready)
}
}
}
}
io.occupancy := ptr.occupancy
if(!forFMax) io.availability := depth - ptr.occupancy
val fmaxAvail = forFMax generate new CounterUpDownFmax(depth + 1, depth){
incr := io.pop.fire
decr := io.push.fire
io.availability := value
}
}
// check a condition against all valid payloads in the FIFO RAM
def formalCheckRam(cond: T => Bool): Vec[Bool] = this rework new Composite(this){
val condition = (0 until depth).map(x => cond(if (useVec) logic.vec(x) else logic.ram(x)))
// create mask for all valid payloads in FIFO RAM
// inclusive [popd_idx, push_idx) exclusive
// assume FIFO RAM is full with valid payloads
// [ ... push_idx ... ]
// [ ... pop_idx ... ]
// mask [ 1 1 1 1 1 1 1 1 1 ]
val mask = Vec(True, depth)
val push_idx = logic.ptr.push.resize(log2Up(depth))
val pop_idx = logic.ptr.pop.resize(log2Up(depth))
// pushMask(i)==0 indicates location i was popped
val popMask = (~((U(1) << pop_idx) - 1)).asBits
// pushMask(i)==1 indicates location i was pushed
val pushMask = ((U(1) << push_idx) - 1).asBits
// no wrap [ ... popd_idx ... push_idx ... ]
// popMask [ 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1]
// pushpMask [ 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0] &
// mask [ 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0]
when(pop_idx < push_idx) {
mask.assignFromBits(pushMask & popMask)
// wrapped [ ... push_idx ... popd_idx ... ]
// popMask [ 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1]
// pushpMask [ 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0] |
// mask [ 1 1 0 0 0 0 0 0 1 1 1 1 1 1 1]
}.elsewhen(pop_idx > push_idx) {
mask.assignFromBits(pushMask | popMask)
// empty?
// [ ... push_idx ... ]
// [ ... pop_idx ... ]
// mask [ 0 0 0 0 0 0 0 0 0 ]
}.elsewhen(logic.ptr.empty) {
mask := mask.getZero
}
val check = mask.zipWithIndex.map{case (x, id) => x & condition(id)}
val vec = Vec(check)
}.vec
def formalCheckOutputStage(cond: T => Bool): Bool = this.rework {
// only with sync RAM read, io.pop is directly connected to the m2sPipe() stage
Bool(!withAsyncRead) & io.pop.valid & cond(io.pop.payload)
}
// verify this works, then we can simplify below
//def formalCheck(cond: T => Bool): Vec[Bool] = this.rework {
// Vec(formalCheckOutputStage(cond) +: formalCheckRam(cond))
//}
def formalContains(word: T): Bool = this.rework {
formalCheckRam(_ === word.pull()).reduce(_ || _) || formalCheckOutputStage(_ === word.pull())
}
def formalContains(cond: T => Bool): Bool = this.rework {
formalCheckRam(cond).reduce(_ || _) || formalCheckOutputStage(cond)
}
def formalCount(word: T): UInt = this.rework {
// occurance count in RAM and in m2sPipe()
CountOne(formalCheckRam(_ === word.pull())) +^ U(formalCheckOutputStage(_ === word.pull()))
}
def formalCount(cond: T => Bool): UInt = this.rework {
// occurance count in RAM and in m2sPipe()
CountOne(formalCheckRam(cond)) +^ U(formalCheckOutputStage(cond))
}
def formalFullToEmpty() = this.rework {
val was_full = RegInit(False) setWhen(!io.push.ready)
cover(was_full && logic.ptr.empty)
}
}