diff options
Diffstat (limited to 'plugins/dfsound')
-rw-r--r-- | plugins/dfsound/externals.h | 13 | ||||
-rw-r--r-- | plugins/dfsound/registers.c | 78 | ||||
-rw-r--r-- | plugins/dfsound/reverb.c | 147 | ||||
-rw-r--r-- | plugins/dfsound/spu.c | 124 | ||||
-rw-r--r-- | plugins/dfsound/spu_c64x.c | 55 | ||||
-rw-r--r-- | plugins/dfsound/spu_c64x.h | 8 | ||||
-rw-r--r-- | plugins/dfsound/spu_c64x_dspcode.c | 26 |
7 files changed, 247 insertions, 204 deletions
diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h index 4832fac..f6fc440 100644 --- a/plugins/dfsound/externals.h +++ b/plugins/dfsound/externals.h @@ -28,6 +28,11 @@ #define noinline
#define unlikely(x) x
#endif
+#if defined(__GNUC__) && !defined(_TMS320C6X)
+#define preload __builtin_prefetch
+#else
+#define preload(...)
+#endif
#define PSE_LT_SPU 4
#define PSE_SPU_ERR_SUCCESS 0
@@ -121,8 +126,6 @@ typedef struct int VolLeft;
int VolRight;
- int iRVBLeft;
- int iRVBRight;
int FB_SRC_A; // (offset)
int FB_SRC_B; // (offset)
@@ -224,9 +227,10 @@ typedef struct int iRightXAVol;
SPUCHAN * s_chan;
- int * SB;
+ REVERBInfo * rvb;
- int * RVB;
+ // buffers
+ int * SB;
int * SSumLR;
int pad[29];
@@ -240,7 +244,6 @@ typedef struct #ifndef _IN_SPU
extern SPUInfo spu;
-extern REVERBInfo rvb;
void do_samples(unsigned int cycles_to, int do_sync);
void schedule_next_irq(void);
diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c index 7a49b1c..4588fa7 100644 --- a/plugins/dfsound/registers.c +++ b/plugins/dfsound/registers.c @@ -146,14 +146,14 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, //-------------------------------------------------//
case H_SPUReverbAddr:
if(val==0xFFFF || val<=0x200)
- {rvb.StartAddr=rvb.CurrAddr=0;}
+ {spu.rvb->StartAddr=spu.rvb->CurrAddr=0;}
else
{
const long iv=(unsigned long)val<<2;
- if(rvb.StartAddr!=iv)
+ if(spu.rvb->StartAddr!=iv)
{
- rvb.StartAddr=(unsigned long)val<<2;
- rvb.CurrAddr=rvb.StartAddr;
+ spu.rvb->StartAddr=(unsigned long)val<<2;
+ spu.rvb->CurrAddr=spu.rvb->StartAddr;
}
}
goto rvbd;
@@ -163,11 +163,11 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, goto upd_irq;
//-------------------------------------------------//
case H_SPUrvolL:
- rvb.VolLeft=val;
+ spu.rvb->VolLeft=val;
break;
//-------------------------------------------------//
case H_SPUrvolR:
- rvb.VolRight=val;
+ spu.rvb->VolRight=val;
break;
//-------------------------------------------------//
@@ -246,38 +246,38 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val, ReverbOn(16,24,val);
break;
//-------------------------------------------------//
- case H_Reverb+0 : rvb.FB_SRC_A=val*4; goto rvbd;
- case H_Reverb+2 : rvb.FB_SRC_B=val*4; goto rvbd;
- case H_Reverb+4 : rvb.IIR_ALPHA=(short)val; goto rvbd;
- case H_Reverb+6 : rvb.ACC_COEF_A=(short)val; goto rvbd;
- case H_Reverb+8 : rvb.ACC_COEF_B=(short)val; goto rvbd;
- case H_Reverb+10 : rvb.ACC_COEF_C=(short)val; goto rvbd;
- case H_Reverb+12 : rvb.ACC_COEF_D=(short)val; goto rvbd;
- case H_Reverb+14 : rvb.IIR_COEF=(short)val; goto rvbd;
- case H_Reverb+16 : rvb.FB_ALPHA=(short)val; goto rvbd;
- case H_Reverb+18 : rvb.FB_X=(short)val; goto rvbd;
- case H_Reverb+20 : rvb.IIR_DEST_A0=val*4; goto rvbd;
- case H_Reverb+22 : rvb.IIR_DEST_A1=val*4; goto rvbd;
- case H_Reverb+24 : rvb.ACC_SRC_A0=val*4; goto rvbd;
- case H_Reverb+26 : rvb.ACC_SRC_A1=val*4; goto rvbd;
- case H_Reverb+28 : rvb.ACC_SRC_B0=val*4; goto rvbd;
- case H_Reverb+30 : rvb.ACC_SRC_B1=val*4; goto rvbd;
- case H_Reverb+32 : rvb.IIR_SRC_A0=val*4; goto rvbd;
- case H_Reverb+34 : rvb.IIR_SRC_A1=val*4; goto rvbd;
- case H_Reverb+36 : rvb.IIR_DEST_B0=val*4; goto rvbd;
- case H_Reverb+38 : rvb.IIR_DEST_B1=val*4; goto rvbd;
- case H_Reverb+40 : rvb.ACC_SRC_C0=val*4; goto rvbd;
- case H_Reverb+42 : rvb.ACC_SRC_C1=val*4; goto rvbd;
- case H_Reverb+44 : rvb.ACC_SRC_D0=val*4; goto rvbd;
- case H_Reverb+46 : rvb.ACC_SRC_D1=val*4; goto rvbd;
- case H_Reverb+48 : rvb.IIR_SRC_B1=val*4; goto rvbd;
- case H_Reverb+50 : rvb.IIR_SRC_B0=val*4; goto rvbd;
- case H_Reverb+52 : rvb.MIX_DEST_A0=val*4; goto rvbd;
- case H_Reverb+54 : rvb.MIX_DEST_A1=val*4; goto rvbd;
- case H_Reverb+56 : rvb.MIX_DEST_B0=val*4; goto rvbd;
- case H_Reverb+58 : rvb.MIX_DEST_B1=val*4; goto rvbd;
- case H_Reverb+60 : rvb.IN_COEF_L=(short)val; goto rvbd;
- case H_Reverb+62 : rvb.IN_COEF_R=(short)val; goto rvbd;
+ case H_Reverb+0 : spu.rvb->FB_SRC_A=val*4; goto rvbd;
+ case H_Reverb+2 : spu.rvb->FB_SRC_B=val*4; goto rvbd;
+ case H_Reverb+4 : spu.rvb->IIR_ALPHA=(short)val; goto rvbd;
+ case H_Reverb+6 : spu.rvb->ACC_COEF_A=(short)val; goto rvbd;
+ case H_Reverb+8 : spu.rvb->ACC_COEF_B=(short)val; goto rvbd;
+ case H_Reverb+10 : spu.rvb->ACC_COEF_C=(short)val; goto rvbd;
+ case H_Reverb+12 : spu.rvb->ACC_COEF_D=(short)val; goto rvbd;
+ case H_Reverb+14 : spu.rvb->IIR_COEF=(short)val; goto rvbd;
+ case H_Reverb+16 : spu.rvb->FB_ALPHA=(short)val; goto rvbd;
+ case H_Reverb+18 : spu.rvb->FB_X=(short)val; goto rvbd;
+ case H_Reverb+20 : spu.rvb->IIR_DEST_A0=val*4; goto rvbd;
+ case H_Reverb+22 : spu.rvb->IIR_DEST_A1=val*4; goto rvbd;
+ case H_Reverb+24 : spu.rvb->ACC_SRC_A0=val*4; goto rvbd;
+ case H_Reverb+26 : spu.rvb->ACC_SRC_A1=val*4; goto rvbd;
+ case H_Reverb+28 : spu.rvb->ACC_SRC_B0=val*4; goto rvbd;
+ case H_Reverb+30 : spu.rvb->ACC_SRC_B1=val*4; goto rvbd;
+ case H_Reverb+32 : spu.rvb->IIR_SRC_A0=val*4; goto rvbd;
+ case H_Reverb+34 : spu.rvb->IIR_SRC_A1=val*4; goto rvbd;
+ case H_Reverb+36 : spu.rvb->IIR_DEST_B0=val*4; goto rvbd;
+ case H_Reverb+38 : spu.rvb->IIR_DEST_B1=val*4; goto rvbd;
+ case H_Reverb+40 : spu.rvb->ACC_SRC_C0=val*4; goto rvbd;
+ case H_Reverb+42 : spu.rvb->ACC_SRC_C1=val*4; goto rvbd;
+ case H_Reverb+44 : spu.rvb->ACC_SRC_D0=val*4; goto rvbd;
+ case H_Reverb+46 : spu.rvb->ACC_SRC_D1=val*4; goto rvbd;
+ case H_Reverb+48 : spu.rvb->IIR_SRC_B1=val*4; goto rvbd;
+ case H_Reverb+50 : spu.rvb->IIR_SRC_B0=val*4; goto rvbd;
+ case H_Reverb+52 : spu.rvb->MIX_DEST_A0=val*4; goto rvbd;
+ case H_Reverb+54 : spu.rvb->MIX_DEST_A1=val*4; goto rvbd;
+ case H_Reverb+56 : spu.rvb->MIX_DEST_B0=val*4; goto rvbd;
+ case H_Reverb+58 : spu.rvb->MIX_DEST_B1=val*4; goto rvbd;
+ case H_Reverb+60 : spu.rvb->IN_COEF_L=(short)val; goto rvbd;
+ case H_Reverb+62 : spu.rvb->IN_COEF_R=(short)val; goto rvbd;
}
return;
@@ -287,7 +287,7 @@ upd_irq: return;
rvbd:
- rvb.dirty = 1; // recalculate on next update
+ spu.rvb->dirty = 1; // recalculate on next update
}
////////////////////////////////////////////////////////////////////////
diff --git a/plugins/dfsound/reverb.c b/plugins/dfsound/reverb.c index 7e32b8e..2ff6edc 100644 --- a/plugins/dfsound/reverb.c +++ b/plugins/dfsound/reverb.c @@ -50,41 +50,40 @@ INLINE int rvb2ram_offs(int curr, int space, int iOff) // get_buffer content helper: takes care about wraps
#define g_buffer(var) \
- ((int)(signed short)spu.spuMem[rvb2ram_offs(curr_addr, space, rvb.n##var)])
+ ((int)(signed short)spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var)])
// saturate iVal and store it as var
#define s_buffer(var, iVal) \
ssat32_to_16(iVal); \
- spu.spuMem[rvb2ram_offs(curr_addr, space, rvb.n##var)] = iVal
+ spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var)] = iVal
#define s_buffer1(var, iVal) \
ssat32_to_16(iVal); \
- spu.spuMem[rvb2ram_offs(curr_addr, space, rvb.n##var + 1)] = iVal
+ spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var + 1)] = iVal
////////////////////////////////////////////////////////////////////////
// portions based on spu2-x from PCSX2
-static void MixREVERB(int *SSumLR, int *RVB, int ns_to)
+static void MixREVERB(int *SSumLR, int *RVB, int ns_to, int curr_addr)
{
- int l_old = rvb.iRVBLeft;
- int r_old = rvb.iRVBRight;
- int curr_addr = rvb.CurrAddr;
- int space = 0x40000 - rvb.StartAddr;
- int l = 0, r = 0, ns;
+ const REVERBInfo *rvb = spu.rvb;
+ int IIR_ALPHA = rvb->IIR_ALPHA;
+ int IIR_COEF = rvb->IIR_COEF;
+ int space = 0x40000 - rvb->StartAddr;
+ int l, r, ns;
for (ns = 0; ns < ns_to * 2; )
{
- int IIR_ALPHA = rvb.IIR_ALPHA;
int ACC0, ACC1, FB_A0, FB_A1, FB_B0, FB_B1;
int mix_dest_a0, mix_dest_a1, mix_dest_b0, mix_dest_b1;
- int input_L = RVB[ns] * rvb.IN_COEF_L;
- int input_R = RVB[ns+1] * rvb.IN_COEF_R;
+ int input_L = RVB[ns] * rvb->IN_COEF_L;
+ int input_R = RVB[ns+1] * rvb->IN_COEF_R;
- int IIR_INPUT_A0 = ((g_buffer(IIR_SRC_A0) * rvb.IIR_COEF) + input_L) >> 15;
- int IIR_INPUT_A1 = ((g_buffer(IIR_SRC_A1) * rvb.IIR_COEF) + input_R) >> 15;
- int IIR_INPUT_B0 = ((g_buffer(IIR_SRC_B0) * rvb.IIR_COEF) + input_L) >> 15;
- int IIR_INPUT_B1 = ((g_buffer(IIR_SRC_B1) * rvb.IIR_COEF) + input_R) >> 15;
+ int IIR_INPUT_A0 = ((g_buffer(IIR_SRC_A0) * IIR_COEF) + input_L) >> 15;
+ int IIR_INPUT_A1 = ((g_buffer(IIR_SRC_A1) * IIR_COEF) + input_R) >> 15;
+ int IIR_INPUT_B0 = ((g_buffer(IIR_SRC_B0) * IIR_COEF) + input_L) >> 15;
+ int IIR_INPUT_B1 = ((g_buffer(IIR_SRC_B1) * IIR_COEF) + input_R) >> 15;
int iir_dest_a0 = g_buffer(IIR_DEST_A0);
int iir_dest_a1 = g_buffer(IIR_DEST_A1);
@@ -96,30 +95,34 @@ static void MixREVERB(int *SSumLR, int *RVB, int ns_to) int IIR_B0 = iir_dest_b0 + ((IIR_INPUT_B0 - iir_dest_b0) * IIR_ALPHA >> 15);
int IIR_B1 = iir_dest_b1 + ((IIR_INPUT_B1 - iir_dest_b1) * IIR_ALPHA >> 15);
+ preload(SSumLR + ns + 64*2/4 - 4);
+
s_buffer1(IIR_DEST_A0, IIR_A0);
s_buffer1(IIR_DEST_A1, IIR_A1);
s_buffer1(IIR_DEST_B0, IIR_B0);
s_buffer1(IIR_DEST_B1, IIR_B1);
- ACC0 = (g_buffer(ACC_SRC_A0) * rvb.ACC_COEF_A +
- g_buffer(ACC_SRC_B0) * rvb.ACC_COEF_B +
- g_buffer(ACC_SRC_C0) * rvb.ACC_COEF_C +
- g_buffer(ACC_SRC_D0) * rvb.ACC_COEF_D) >> 15;
- ACC1 = (g_buffer(ACC_SRC_A1) * rvb.ACC_COEF_A +
- g_buffer(ACC_SRC_B1) * rvb.ACC_COEF_B +
- g_buffer(ACC_SRC_C1) * rvb.ACC_COEF_C +
- g_buffer(ACC_SRC_D1) * rvb.ACC_COEF_D) >> 15;
+ preload(RVB + ns + 64*2/4 - 4);
+
+ ACC0 = (g_buffer(ACC_SRC_A0) * rvb->ACC_COEF_A +
+ g_buffer(ACC_SRC_B0) * rvb->ACC_COEF_B +
+ g_buffer(ACC_SRC_C0) * rvb->ACC_COEF_C +
+ g_buffer(ACC_SRC_D0) * rvb->ACC_COEF_D) >> 15;
+ ACC1 = (g_buffer(ACC_SRC_A1) * rvb->ACC_COEF_A +
+ g_buffer(ACC_SRC_B1) * rvb->ACC_COEF_B +
+ g_buffer(ACC_SRC_C1) * rvb->ACC_COEF_C +
+ g_buffer(ACC_SRC_D1) * rvb->ACC_COEF_D) >> 15;
FB_A0 = g_buffer(FB_SRC_A0);
FB_A1 = g_buffer(FB_SRC_A1);
FB_B0 = g_buffer(FB_SRC_B0);
FB_B1 = g_buffer(FB_SRC_B1);
- mix_dest_a0 = ACC0 - ((FB_A0 * rvb.FB_ALPHA) >> 15);
- mix_dest_a1 = ACC1 - ((FB_A1 * rvb.FB_ALPHA) >> 15);
+ mix_dest_a0 = ACC0 - ((FB_A0 * rvb->FB_ALPHA) >> 15);
+ mix_dest_a1 = ACC1 - ((FB_A1 * rvb->FB_ALPHA) >> 15);
- mix_dest_b0 = FB_A0 + (((ACC0 - FB_A0) * rvb.FB_ALPHA - FB_B0 * rvb.FB_X) >> 15);
- mix_dest_b1 = FB_A1 + (((ACC1 - FB_A1) * rvb.FB_ALPHA - FB_B1 * rvb.FB_X) >> 15);
+ mix_dest_b0 = FB_A0 + (((ACC0 - FB_A0) * rvb->FB_ALPHA - FB_B0 * rvb->FB_X) >> 15);
+ mix_dest_b1 = FB_A1 + (((ACC1 - FB_A1) * rvb->FB_ALPHA - FB_B1 * rvb->FB_X) >> 15);
s_buffer(MIX_DEST_A0, mix_dest_a0);
s_buffer(MIX_DEST_A1, mix_dest_a1);
@@ -129,73 +132,60 @@ static void MixREVERB(int *SSumLR, int *RVB, int ns_to) l = (mix_dest_a0 + mix_dest_b0) / 2;
r = (mix_dest_a1 + mix_dest_b1) / 2;
- l = (l * rvb.VolLeft) >> 15; // 15?
- r = (r * rvb.VolRight) >> 15;
+ l = (l * rvb->VolLeft) >> 15; // 15?
+ r = (r * rvb->VolRight) >> 15;
- SSumLR[ns++] += (l + l_old) / 2;
- SSumLR[ns++] += (r + r_old) / 2;
SSumLR[ns++] += l;
SSumLR[ns++] += r;
-
- l_old = l;
- r_old = r;
+ SSumLR[ns++] += l;
+ SSumLR[ns++] += r;
curr_addr++;
- if (curr_addr >= 0x40000) curr_addr = rvb.StartAddr;
+ if (curr_addr >= 0x40000) curr_addr = rvb->StartAddr;
}
-
- rvb.iRVBLeft = l;
- rvb.iRVBRight = r;
- rvb.CurrAddr = curr_addr;
}
-static void MixREVERB_off(int *SSumLR, int ns_to)
+static void MixREVERB_off(int *SSumLR, int ns_to, int curr_addr)
{
- int l_old = rvb.iRVBLeft;
- int r_old = rvb.iRVBRight;
- int curr_addr = rvb.CurrAddr;
- int space = 0x40000 - rvb.StartAddr;
- int l = 0, r = 0, ns;
+ const REVERBInfo *rvb = spu.rvb;
+ int space = 0x40000 - rvb->StartAddr;
+ int l, r, ns;
for (ns = 0; ns < ns_to * 2; )
{
+ preload(SSumLR + ns + 64*2/4 - 4);
+
l = (g_buffer(MIX_DEST_A0) + g_buffer(MIX_DEST_B0)) / 2;
r = (g_buffer(MIX_DEST_A1) + g_buffer(MIX_DEST_B1)) / 2;
- l = (l * rvb.VolLeft) >> 15;
- r = (r * rvb.VolRight) >> 15;
+ l = (l * rvb->VolLeft) >> 15;
+ r = (r * rvb->VolRight) >> 15;
- SSumLR[ns++] += (l + l_old) / 2;
- SSumLR[ns++] += (r + r_old) / 2;
SSumLR[ns++] += l;
SSumLR[ns++] += r;
-
- l_old = l;
- r_old = r;
+ SSumLR[ns++] += l;
+ SSumLR[ns++] += r;
curr_addr++;
- if (curr_addr >= 0x40000) curr_addr = rvb.StartAddr;
+ if (curr_addr >= 0x40000) curr_addr = rvb->StartAddr;
}
-
- rvb.iRVBLeft = l;
- rvb.iRVBRight = r;
- rvb.CurrAddr = curr_addr;
}
-static void prepare_offsets(void)
+static void REVERBPrep(void)
{
- int space = 0x40000 - rvb.StartAddr;
+ REVERBInfo *rvb = spu.rvb;
+ int space = 0x40000 - rvb->StartAddr;
int t;
#define prep_offs(v) \
- t = rvb.v; \
+ t = rvb->v; \
while (t >= space) \
t -= space; \
- rvb.n##v = t
+ rvb->n##v = t
#define prep_offs2(d, v1, v2) \
- t = rvb.v1 - rvb.v2; \
+ t = rvb->v1 - rvb->v2; \
while (t >= space) \
t -= space; \
- rvb.n##d = t
+ rvb->n##d = t
prep_offs(IIR_SRC_A0);
prep_offs(IIR_SRC_A1);
@@ -224,37 +214,18 @@ static void prepare_offsets(void) #undef prep_offs
#undef prep_offs2
- rvb.dirty = 0;
+ rvb->dirty = 0;
}
-INLINE void REVERBDo(int *SSumLR, int *RVB, int ns_to)
+INLINE void REVERBDo(int *SSumLR, int *RVB, int ns_to, int curr_addr)
{
- if (!rvb.StartAddr) // reverb is off
- {
- rvb.iRVBLeft = rvb.iRVBRight = 0;
- return;
- }
-
if (spu.spuCtrl & 0x80) // -> reverb on? oki
{
- if (unlikely(rvb.dirty))
- prepare_offsets();
-
- MixREVERB(SSumLR, RVB, ns_to);
- }
- else if (rvb.VolLeft || rvb.VolRight)
- {
- if (unlikely(rvb.dirty))
- prepare_offsets();
-
- MixREVERB_off(SSumLR, ns_to);
+ MixREVERB(SSumLR, RVB, ns_to, curr_addr);
}
- else // -> reverb off
+ else if (spu.rvb->VolLeft || spu.rvb->VolRight)
{
- // reverb runs anyway
- rvb.CurrAddr += ns_to / 2;
- while (rvb.CurrAddr >= 0x40000)
- rvb.CurrAddr -= 0x40000 - rvb.StartAddr;
+ MixREVERB_off(SSumLR, ns_to, curr_addr);
}
}
diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c index ec31b0c..8681d35 100644 --- a/plugins/dfsound/spu.c +++ b/plugins/dfsound/spu.c @@ -73,11 +73,8 @@ static char * libraryInfo = N_("P.E.Op.S. Sound Driver V1.7\nCoded by Pete B SPUInfo spu; SPUConfig spu_config; -// MAIN infos struct for each channel - -REVERBInfo rvb; - static int iFMod[NSSIZE]; +static int RVB[NSSIZE * 2]; int ChanBuf[NSSIZE]; #define CDDA_BUFFER_SIZE (16384 * sizeof(uint32_t)) // must be power of 2 @@ -763,11 +760,13 @@ static void do_silent_chans(int ns_to, int silentch) static void do_channels(int ns_to) { unsigned int mask; + int do_rvb, ch, d; SPUCHAN *s_chan; int *SB, sinc; - int ch, d; - memset(spu.RVB, 0, ns_to * sizeof(spu.RVB[0]) * 2); + do_rvb = spu.rvb->StartAddr && spu_config.iUseReverb; + if (do_rvb) + memset(RVB, 0, ns_to * sizeof(RVB[0]) * 2); mask = spu.dwNewChannel & 0xffffff; for (ch = 0; mask != 0; ch++, mask >>= 1) { @@ -812,14 +811,27 @@ static void do_channels(int ns_to) if (s_chan->bFMod == 2) // fmod freq channel memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0])); - if (s_chan->bRVBActive) - mix_chan_rvb(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume, spu.RVB); + if (s_chan->bRVBActive && do_rvb) + mix_chan_rvb(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume, RVB); else mix_chan(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume); } + + if (spu.rvb->StartAddr) { + if (do_rvb) { + if (unlikely(spu.rvb->dirty)) + REVERBPrep(); + + REVERBDo(spu.SSumLR, RVB, ns_to, spu.rvb->CurrAddr); + } + + spu.rvb->CurrAddr += ns_to / 2; + while (spu.rvb->CurrAddr >= 0x40000) + spu.rvb->CurrAddr -= 0x40000 - spu.rvb->StartAddr; + } } -static void do_samples_finish(int *SSumLR, int *RVB, int ns_to, +static void do_samples_finish(int *SSumLR, int ns_to, int silentch, int decode_pos); // optional worker thread handling @@ -833,8 +845,7 @@ static struct spu_worker { unsigned int exit_thread; unsigned int i_ready; unsigned int i_reaped; - unsigned int req_sent; // dsp - unsigned int last_boot_cnt; + unsigned int last_boot_cnt; // dsp }; // aligning for C64X_DSP unsigned int _pad0[128/4]; @@ -851,6 +862,7 @@ static struct spu_worker { int ns_to; int ctrl; int decode_pos; + int rvb_addr; unsigned int channels_new; unsigned int channels_on; unsigned int channels_silent; @@ -861,10 +873,11 @@ static struct spu_worker { int start; int loop; int ns_to; + short vol_l; + short vol_r; ADSRInfoEx adsr; - // might want to add vol and fmod flags.. + // might also want to add fmod flags.. } ch[24]; - int RVB[NSSIZE * 2]; int SSumLR[NSSIZE * 2]; } i[4]; } *worker; @@ -874,6 +887,7 @@ static struct spu_worker { static void thread_work_start(void); static void thread_work_wait_sync(struct work_item *work, int force); +static void thread_sync_caches(void); static int thread_get_i_done(void); static int decode_block_work(void *context, int ch, int *SB) @@ -936,6 +950,8 @@ static void queue_channel_work(int ns_to, unsigned int silentch) work->ch[ch].sbpos = s_chan->iSBPos; work->ch[ch].sinc = s_chan->sinc; work->ch[ch].adsr = s_chan->ADSRX; + work->ch[ch].vol_l = s_chan->iLeftVolume; + work->ch[ch].vol_r = s_chan->iRightVolume; work->ch[ch].start = s_chan->pCurr - spu.spuMemC; work->ch[ch].loop = s_chan->pLoop - spu.spuMemC; if (s_chan->prevflags & 1) @@ -952,6 +968,19 @@ static void queue_channel_work(int ns_to, unsigned int silentch) } } + work->rvb_addr = 0; + if (spu.rvb->StartAddr) { + if (spu_config.iUseReverb) { + if (unlikely(spu.rvb->dirty)) + REVERBPrep(); + work->rvb_addr = spu.rvb->CurrAddr; + } + + spu.rvb->CurrAddr += ns_to / 2; + while (spu.rvb->CurrAddr >= 0x40000) + spu.rvb->CurrAddr -= 0x40000 - spu.rvb->StartAddr; + } + worker->i_ready++; thread_work_start(); } @@ -965,7 +994,9 @@ static void do_channel_work(struct work_item *work) SPUCHAN *s_chan; ns_to = work->ns_to; - memset(work->RVB, 0, ns_to * sizeof(work->RVB[0]) * 2); + + if (work->rvb_addr) + memset(RVB, 0, ns_to * sizeof(RVB[0]) * 2); mask = work->channels_new; for (ch = 0; mask != 0; ch++, mask >>= 1) { @@ -1010,12 +1041,15 @@ static void do_channel_work(struct work_item *work) if (s_chan->bFMod == 2) // fmod freq channel memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0])); - if (s_chan->bRVBActive) + if (s_chan->bRVBActive && work->rvb_addr) mix_chan_rvb(work->SSumLR, ns_to, - s_chan->iLeftVolume, s_chan->iRightVolume, work->RVB); + work->ch[ch].vol_l, work->ch[ch].vol_r, RVB); else - mix_chan(work->SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume); + mix_chan(work->SSumLR, ns_to, work->ch[ch].vol_l, work->ch[ch].vol_r); } + + if (work->rvb_addr) + REVERBDo(work->SSumLR, RVB, ns_to, work->rvb_addr); } static void sync_worker_thread(int force) @@ -1032,13 +1066,15 @@ static void sync_worker_thread(int force) work = &worker->i[worker->i_reaped & WORK_I_MASK]; thread_work_wait_sync(work, force); - do_samples_finish(work->SSumLR, work->RVB, work->ns_to, + do_samples_finish(work->SSumLR, work->ns_to, work->channels_silent, work->decode_pos); worker->i_reaped++; done = thread_get_i_done() - worker->i_reaped; used_space = worker->i_ready - worker->i_reaped; } + if (force) + thread_sync_caches(); } #else @@ -1116,7 +1152,7 @@ void do_samples(unsigned int cycles_to, int do_direct) if (do_direct || worker == NULL || !spu_config.iUseThread) { do_channels(ns_to); - do_samples_finish(spu.SSumLR, spu.RVB, ns_to, silentch, spu.decode_pos); + do_samples_finish(spu.SSumLR, ns_to, silentch, spu.decode_pos); } else { queue_channel_work(ns_to, silentch); @@ -1131,7 +1167,7 @@ void do_samples(unsigned int cycles_to, int do_direct) spu.decode_pos = (spu.decode_pos + ns_to) & 0x1ff; } -static void do_samples_finish(int *SSumLR, int *RVB, int ns_to, +static void do_samples_finish(int *SSumLR, int ns_to, int silentch, int decode_pos) { int volmult = spu_config.iVolume; @@ -1150,17 +1186,8 @@ static void do_samples_finish(int *SSumLR, int *RVB, int ns_to, spu.decode_dirty_ch &= ~(1<<3); } - //---------------------------------------------------// - // mix XA infos (if any) - MixXA(SSumLR, ns_to, decode_pos); - /////////////////////////////////////////////////////// - // mix all channels (including reverb) into one buffer - - if(spu_config.iUseReverb) - REVERBDo(SSumLR, RVB, ns_to); - if((spu.spuCtrl&0x4000)==0) // muted? (rare, don't optimize for this) { memset(spu.pS, 0, ns_to * 2 * sizeof(spu.pS[0])); @@ -1283,12 +1310,9 @@ void ClearWorkingState(void) } // SETUPSTREAMS: init most of the spu buffers -void SetupStreams(void) +static void SetupStreams(void) { - int i; - spu.pSpuBuffer = (unsigned char *)malloc(32768); // alloc mixing buffer - spu.RVB = calloc(NSSIZE * 2, sizeof(spu.RVB[0])); spu.SSumLR = calloc(NSSIZE * 2, sizeof(spu.SSumLR[0])); spu.XAStart = // alloc xa buffer @@ -1303,26 +1327,14 @@ void SetupStreams(void) spu.CDDAPlay = spu.CDDAStart; spu.CDDAFeed = spu.CDDAStart; - for(i=0;i<MAXCHAN;i++) // loop sound channels - { - spu.s_chan[i].ADSRX.SustainLevel = 0xf; // -> init sustain - spu.s_chan[i].ADSRX.SustainIncrease = 1; - spu.s_chan[i].pLoop=spu.spuMemC; - spu.s_chan[i].pCurr=spu.spuMemC; - } - ClearWorkingState(); - - spu.bSpuInit=1; // flag: we are inited } // REMOVESTREAMS: free most buffer -void RemoveStreams(void) +static void RemoveStreams(void) { free(spu.pSpuBuffer); // free mixing buffer spu.pSpuBuffer = NULL; - free(spu.RVB); // free reverb buffer - spu.RVB = NULL; free(spu.SSumLR); spu.SSumLR = NULL; free(spu.XAStart); // free XA buffer @@ -1365,6 +1377,10 @@ static int thread_get_i_done(void) return worker->i_done; } +static void thread_sync_caches(void) +{ +} + static void *spu_worker_thread(void *unused) { struct work_item *work; @@ -1446,11 +1462,13 @@ static void exit_spu_thread(void) // SPUINIT: this func will be called first by the main emu long CALLBACK SPUinit(void) { + int i; + spu.spuMemC = calloc(1, 512 * 1024); - memset((void *)&rvb, 0, sizeof(REVERBInfo)); InitADSR(); spu.s_chan = calloc(MAXCHAN+1, sizeof(spu.s_chan[0])); // channel + 1 infos (1 is security for fmod handling) + spu.rvb = calloc(1, sizeof(REVERBInfo)); spu.SB = calloc(MAXCHAN, sizeof(spu.SB[0]) * SB_SIZE); spu.spuAddr = 0; @@ -1464,6 +1482,16 @@ long CALLBACK SPUinit(void) init_spu_thread(); + for (i = 0; i < MAXCHAN; i++) // loop sound channels + { + spu.s_chan[i].ADSRX.SustainLevel = 0xf; // -> init sustain + spu.s_chan[i].ADSRX.SustainIncrease = 1; + spu.s_chan[i].pLoop = spu.spuMemC; + spu.s_chan[i].pCurr = spu.spuMemC; + } + + spu.bSpuInit=1; // flag: we are inited + return 0; } @@ -1504,6 +1532,8 @@ long CALLBACK SPUshutdown(void) spu.SB = NULL; free(spu.s_chan); spu.s_chan = NULL; + free(spu.rvb); + spu.rvb = NULL; RemoveStreams(); // no more streaming spu.bSpuInit=0; diff --git a/plugins/dfsound/spu_c64x.c b/plugins/dfsound/spu_c64x.c index 200ab38..be10a6b 100644 --- a/plugins/dfsound/spu_c64x.c +++ b/plugins/dfsound/spu_c64x.c @@ -43,6 +43,8 @@ static struct { dsp_mem_region_t region; dsp_component_id_t compid; + unsigned int stale_caches:1; + unsigned int req_sent:1; } f; static void thread_work_start(void) @@ -63,7 +65,7 @@ static void thread_work_start(void) // to start the DSP, dsp_rpc_send() must be used, // but before that, previous request must be finished - if (worker->req_sent) { + if (f.req_sent) { if (worker->boot_cnt == worker->last_boot_cnt) { // hopefully still booting //printf("booting?\n"); @@ -74,7 +76,7 @@ static void thread_work_start(void) if (ret != 0) { fprintf(stderr, "dsp_rpc_recv failed: %d\n", ret); f.dsp_logbuf_print(); - worker->req_sent = 0; + f.req_sent = 0; spu_config.iUseThread = 0; return; } @@ -94,7 +96,7 @@ static void thread_work_start(void) spu_config.iUseThread = 0; return; } - worker->req_sent = 1; + f.req_sent = 1; } static int thread_get_i_done(void) @@ -108,14 +110,13 @@ static void thread_work_wait_sync(struct work_item *work, int force) int limit = 1000; int ns_to; - ns_to = work->ns_to; - f.dsp_cache_inv_virt(work->RVB, sizeof(work->RVB[0]) * 2 * ns_to); - f.dsp_cache_inv_virt(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to); - __builtin_prefetch(work->RVB); - __builtin_prefetch(work->SSumLR); - while (worker->i_done == worker->i_reaped && limit-- > 0) { - if (!worker->active) { + if (!f.req_sent) { + printf("dsp: req not sent?\n"); + break; + } + + if (worker->boot_cnt != worker->last_boot_cnt && !worker->active) { printf("dsp: broken sync\n"); worker->last_boot_cnt = ~0; break; @@ -125,6 +126,13 @@ static void thread_work_wait_sync(struct work_item *work, int force) f.dsp_cache_inv_virt(&worker->i_done, 64); } + ns_to = work->ns_to; + f.dsp_cache_inv_virt(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to); + preload(work->SSumLR); + preload(work->SSumLR + 64/4); + + f.stale_caches = 1; // SB, spuMem + if (limit == 0) printf("dsp: wait timeout\n"); @@ -132,7 +140,7 @@ static void thread_work_wait_sync(struct work_item *work, int force) if (worker->i_reaped != worker->i_done - 1) return; - if (worker->req_sent && (force || worker->i_done == worker->i_ready)) { + if (f.req_sent && (force || worker->i_done == worker->i_ready)) { dsp_msg_t msg; int ret; @@ -142,12 +150,20 @@ static void thread_work_wait_sync(struct work_item *work, int force) f.dsp_logbuf_print(); spu_config.iUseThread = 0; } - worker->req_sent = 0; + f.req_sent = 0; } +} - if (force) { +static void thread_sync_caches(void) +{ + if (f.stale_caches) { f.dsp_cache_inv_virt(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24); f.dsp_cache_inv_virt(spu.spuMemC + 0x800, 0x800); + if (spu.rvb->StartAddr) { + int left = 0x40000 - spu.rvb->StartAddr; + f.dsp_cache_inv_virt(spu.spuMem + spu.rvb->StartAddr, left * 2); + } + f.stale_caches = 0; } } @@ -220,9 +236,9 @@ static void init_spu_thread(void) mem->sizeof_region_mem, sizeof(*mem)); goto fail_init; } - if (mem->offsetof_s_chan1 != offsetof(typeof(*mem), s_chan[1])) { + if (mem->offsetof_s_chan1 != offsetof(typeof(*mem), in.s_chan[1])) { fprintf(stderr, "error: size mismatch 2: %d vs %zd\n", - mem->offsetof_s_chan1, offsetof(typeof(*mem), s_chan[1])); + mem->offsetof_s_chan1, offsetof(typeof(*mem), in.s_chan[1])); goto fail_init; } if (mem->offsetof_spos_3_20 != offsetof(typeof(*mem), worker.i[3].ch[20])) { @@ -237,7 +253,9 @@ static void init_spu_thread(void) free(spu.SB); spu.SB = mem->SB; free(spu.s_chan); - spu.s_chan = mem->s_chan; + spu.s_chan = mem->in.s_chan; + free(spu.rvb); + spu.rvb = &mem->in.rvb; worker = &mem->worker; printf("spu: C64x DSP ready (id=%d).\n", (int)f.compid); @@ -267,8 +285,10 @@ static void exit_spu_thread(void) if (worker == NULL) return; - if (worker->req_sent) + if (f.req_sent) { f.dsp_rpc_recv(&msg); + f.req_sent = 0; + } f.dsp_logbuf_print(); f.dsp_shm_free(f.region); @@ -277,6 +297,7 @@ static void exit_spu_thread(void) spu.spuMemC = NULL; spu.SB = NULL; spu.s_chan = NULL; + spu.rvb = NULL; worker = NULL; } diff --git a/plugins/dfsound/spu_c64x.h b/plugins/dfsound/spu_c64x.h index bb20cc3..8210e63 100644 --- a/plugins/dfsound/spu_c64x.h +++ b/plugins/dfsound/spu_c64x.h @@ -11,8 +11,12 @@ struct region_mem { // careful not to lose ARM writes by DSP overwriting // with old data when it's writing out neighbor cachelines int _pad1[128/4 - ((SB_SIZE * 24) & (128/4 - 1))]; - SPUCHAN s_chan[24 + 1]; - int _pad2[128/4 - ((sizeof(SPUCHAN) * 25 / 4) & (128/4 - 1))]; + struct spu_in { + // these are not to be modified by DSP + SPUCHAN s_chan[24 + 1]; + REVERBInfo rvb; + } in; + int _pad2[128/4 - ((sizeof(struct spu_in) / 4) & (128/4 - 1))]; struct spu_worker worker; SPUConfig spu_config; // init/debug diff --git a/plugins/dfsound/spu_c64x_dspcode.c b/plugins/dfsound/spu_c64x_dspcode.c index 97d3028..b0352a9 100644 --- a/plugins/dfsound/spu_c64x_dspcode.c +++ b/plugins/dfsound/spu_c64x_dspcode.c @@ -31,6 +31,7 @@ /* dummy deps, some bloat but avoids ifdef hell in SPU code.. */ static void thread_work_start(void) {} static void thread_work_wait_sync(struct work_item *work, int force) {} +static void thread_sync_caches(void) {} static int thread_get_i_done(void) { return 0; } struct out_driver *out_current; void SetupSound(void) {} @@ -38,7 +39,8 @@ void SetupSound(void) {} static void invalidate_cache(struct work_item *work) { - syscalls.cache_inv(work, offsetof(typeof(*work), RVB), 1); + // see comment in writeout_cache() + //syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 1); syscalls.cache_inv(spu.s_chan, sizeof(spu.s_chan[0]) * 24, 0); syscalls.cache_inv(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * work->ns_to, 0); @@ -48,14 +50,16 @@ static void writeout_cache(struct work_item *work) { int ns_to = work->ns_to; - syscalls.cache_wb(work->RVB, sizeof(work->RVB[0]) * 2 * ns_to, 1); syscalls.cache_wb(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to, 1); + // have to invalidate now, otherwise there is a race between + // DSP evicting dirty lines and ARM writing new data to this area + syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 0); } static void do_processing(void) { + int left, dirty = 0, had_rvb = 0; struct work_item *work; - int left, dirty = 0; while (worker->active) { @@ -70,6 +74,8 @@ static void do_processing(void) work = &worker->i[worker->i_done & WORK_I_MASK]; invalidate_cache(work); + had_rvb |= work->rvb_addr; + spu.spuCtrl = work->ctrl; do_channel_work(work); writeout_cache(work); @@ -82,6 +88,11 @@ static void do_processing(void) if (dirty) { syscalls.cache_wb(spu.spuMemC + 0x800, 0x800, 1); syscalls.cache_wb(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24, 1); + if (had_rvb) { + left = 0x40000 - spu.rvb->StartAddr; + syscalls.cache_wb(spu.spuMem + spu.rvb->StartAddr, left * 2, 1); + had_rvb = 0; + } dirty = 0; continue; } @@ -105,12 +116,13 @@ static unsigned int exec(dsp_component_cmd_t cmd, spu.spuMemC = mem->spu_ram; spu.SB = mem->SB; - spu.s_chan = mem->s_chan; + spu.s_chan = mem->in.s_chan; + spu.rvb = &mem->in.rvb; worker = &mem->worker; memcpy(&spu_config, &mem->spu_config, sizeof(spu_config)); mem->sizeof_region_mem = sizeof(*mem); - mem->offsetof_s_chan1 = offsetof(typeof(*mem), s_chan[1]); + mem->offsetof_s_chan1 = offsetof(typeof(*mem), in.s_chan[1]); mem->offsetof_spos_3_20 = offsetof(typeof(*mem), worker.i[3].ch[20]); // seems to be unneeded, no write-alloc? but just in case.. syscalls.cache_wb(&mem->sizeof_region_mem, 3 * 4, 1); @@ -126,7 +138,9 @@ static unsigned int exec(dsp_component_cmd_t cmd, // c64_tools lib does BCACHE_wbInvAll() when it receives mailbox irq, // but invalidate anyway in case c64_tools is ever fixed.. - syscalls.cache_inv(mem, sizeof(mem->spu_ram) + sizeof(mem->SB), 0); + // XXX edit: don't bother as reverb is not handled, will fix if needed + //syscalls.cache_inv(mem, sizeof(mem->spu_ram) + sizeof(mem->SB), 0); + //syscalls.cache_inv(&mem->in, sizeof(mem->in), 0); break; default: |