aboutsummaryrefslogtreecommitdiff
path: root/plugins/dfsound
diff options
context:
space:
mode:
Diffstat (limited to 'plugins/dfsound')
-rw-r--r--plugins/dfsound/externals.h13
-rw-r--r--plugins/dfsound/registers.c78
-rw-r--r--plugins/dfsound/reverb.c147
-rw-r--r--plugins/dfsound/spu.c124
-rw-r--r--plugins/dfsound/spu_c64x.c55
-rw-r--r--plugins/dfsound/spu_c64x.h8
-rw-r--r--plugins/dfsound/spu_c64x_dspcode.c26
7 files changed, 247 insertions, 204 deletions
diff --git a/plugins/dfsound/externals.h b/plugins/dfsound/externals.h
index 4832fac..f6fc440 100644
--- a/plugins/dfsound/externals.h
+++ b/plugins/dfsound/externals.h
@@ -28,6 +28,11 @@
#define noinline
#define unlikely(x) x
#endif
+#if defined(__GNUC__) && !defined(_TMS320C6X)
+#define preload __builtin_prefetch
+#else
+#define preload(...)
+#endif
#define PSE_LT_SPU 4
#define PSE_SPU_ERR_SUCCESS 0
@@ -121,8 +126,6 @@ typedef struct
int VolLeft;
int VolRight;
- int iRVBLeft;
- int iRVBRight;
int FB_SRC_A; // (offset)
int FB_SRC_B; // (offset)
@@ -224,9 +227,10 @@ typedef struct
int iRightXAVol;
SPUCHAN * s_chan;
- int * SB;
+ REVERBInfo * rvb;
- int * RVB;
+ // buffers
+ int * SB;
int * SSumLR;
int pad[29];
@@ -240,7 +244,6 @@ typedef struct
#ifndef _IN_SPU
extern SPUInfo spu;
-extern REVERBInfo rvb;
void do_samples(unsigned int cycles_to, int do_sync);
void schedule_next_irq(void);
diff --git a/plugins/dfsound/registers.c b/plugins/dfsound/registers.c
index 7a49b1c..4588fa7 100644
--- a/plugins/dfsound/registers.c
+++ b/plugins/dfsound/registers.c
@@ -146,14 +146,14 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val,
//-------------------------------------------------//
case H_SPUReverbAddr:
if(val==0xFFFF || val<=0x200)
- {rvb.StartAddr=rvb.CurrAddr=0;}
+ {spu.rvb->StartAddr=spu.rvb->CurrAddr=0;}
else
{
const long iv=(unsigned long)val<<2;
- if(rvb.StartAddr!=iv)
+ if(spu.rvb->StartAddr!=iv)
{
- rvb.StartAddr=(unsigned long)val<<2;
- rvb.CurrAddr=rvb.StartAddr;
+ spu.rvb->StartAddr=(unsigned long)val<<2;
+ spu.rvb->CurrAddr=spu.rvb->StartAddr;
}
}
goto rvbd;
@@ -163,11 +163,11 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val,
goto upd_irq;
//-------------------------------------------------//
case H_SPUrvolL:
- rvb.VolLeft=val;
+ spu.rvb->VolLeft=val;
break;
//-------------------------------------------------//
case H_SPUrvolR:
- rvb.VolRight=val;
+ spu.rvb->VolRight=val;
break;
//-------------------------------------------------//
@@ -246,38 +246,38 @@ void CALLBACK SPUwriteRegister(unsigned long reg, unsigned short val,
ReverbOn(16,24,val);
break;
//-------------------------------------------------//
- case H_Reverb+0 : rvb.FB_SRC_A=val*4; goto rvbd;
- case H_Reverb+2 : rvb.FB_SRC_B=val*4; goto rvbd;
- case H_Reverb+4 : rvb.IIR_ALPHA=(short)val; goto rvbd;
- case H_Reverb+6 : rvb.ACC_COEF_A=(short)val; goto rvbd;
- case H_Reverb+8 : rvb.ACC_COEF_B=(short)val; goto rvbd;
- case H_Reverb+10 : rvb.ACC_COEF_C=(short)val; goto rvbd;
- case H_Reverb+12 : rvb.ACC_COEF_D=(short)val; goto rvbd;
- case H_Reverb+14 : rvb.IIR_COEF=(short)val; goto rvbd;
- case H_Reverb+16 : rvb.FB_ALPHA=(short)val; goto rvbd;
- case H_Reverb+18 : rvb.FB_X=(short)val; goto rvbd;
- case H_Reverb+20 : rvb.IIR_DEST_A0=val*4; goto rvbd;
- case H_Reverb+22 : rvb.IIR_DEST_A1=val*4; goto rvbd;
- case H_Reverb+24 : rvb.ACC_SRC_A0=val*4; goto rvbd;
- case H_Reverb+26 : rvb.ACC_SRC_A1=val*4; goto rvbd;
- case H_Reverb+28 : rvb.ACC_SRC_B0=val*4; goto rvbd;
- case H_Reverb+30 : rvb.ACC_SRC_B1=val*4; goto rvbd;
- case H_Reverb+32 : rvb.IIR_SRC_A0=val*4; goto rvbd;
- case H_Reverb+34 : rvb.IIR_SRC_A1=val*4; goto rvbd;
- case H_Reverb+36 : rvb.IIR_DEST_B0=val*4; goto rvbd;
- case H_Reverb+38 : rvb.IIR_DEST_B1=val*4; goto rvbd;
- case H_Reverb+40 : rvb.ACC_SRC_C0=val*4; goto rvbd;
- case H_Reverb+42 : rvb.ACC_SRC_C1=val*4; goto rvbd;
- case H_Reverb+44 : rvb.ACC_SRC_D0=val*4; goto rvbd;
- case H_Reverb+46 : rvb.ACC_SRC_D1=val*4; goto rvbd;
- case H_Reverb+48 : rvb.IIR_SRC_B1=val*4; goto rvbd;
- case H_Reverb+50 : rvb.IIR_SRC_B0=val*4; goto rvbd;
- case H_Reverb+52 : rvb.MIX_DEST_A0=val*4; goto rvbd;
- case H_Reverb+54 : rvb.MIX_DEST_A1=val*4; goto rvbd;
- case H_Reverb+56 : rvb.MIX_DEST_B0=val*4; goto rvbd;
- case H_Reverb+58 : rvb.MIX_DEST_B1=val*4; goto rvbd;
- case H_Reverb+60 : rvb.IN_COEF_L=(short)val; goto rvbd;
- case H_Reverb+62 : rvb.IN_COEF_R=(short)val; goto rvbd;
+ case H_Reverb+0 : spu.rvb->FB_SRC_A=val*4; goto rvbd;
+ case H_Reverb+2 : spu.rvb->FB_SRC_B=val*4; goto rvbd;
+ case H_Reverb+4 : spu.rvb->IIR_ALPHA=(short)val; goto rvbd;
+ case H_Reverb+6 : spu.rvb->ACC_COEF_A=(short)val; goto rvbd;
+ case H_Reverb+8 : spu.rvb->ACC_COEF_B=(short)val; goto rvbd;
+ case H_Reverb+10 : spu.rvb->ACC_COEF_C=(short)val; goto rvbd;
+ case H_Reverb+12 : spu.rvb->ACC_COEF_D=(short)val; goto rvbd;
+ case H_Reverb+14 : spu.rvb->IIR_COEF=(short)val; goto rvbd;
+ case H_Reverb+16 : spu.rvb->FB_ALPHA=(short)val; goto rvbd;
+ case H_Reverb+18 : spu.rvb->FB_X=(short)val; goto rvbd;
+ case H_Reverb+20 : spu.rvb->IIR_DEST_A0=val*4; goto rvbd;
+ case H_Reverb+22 : spu.rvb->IIR_DEST_A1=val*4; goto rvbd;
+ case H_Reverb+24 : spu.rvb->ACC_SRC_A0=val*4; goto rvbd;
+ case H_Reverb+26 : spu.rvb->ACC_SRC_A1=val*4; goto rvbd;
+ case H_Reverb+28 : spu.rvb->ACC_SRC_B0=val*4; goto rvbd;
+ case H_Reverb+30 : spu.rvb->ACC_SRC_B1=val*4; goto rvbd;
+ case H_Reverb+32 : spu.rvb->IIR_SRC_A0=val*4; goto rvbd;
+ case H_Reverb+34 : spu.rvb->IIR_SRC_A1=val*4; goto rvbd;
+ case H_Reverb+36 : spu.rvb->IIR_DEST_B0=val*4; goto rvbd;
+ case H_Reverb+38 : spu.rvb->IIR_DEST_B1=val*4; goto rvbd;
+ case H_Reverb+40 : spu.rvb->ACC_SRC_C0=val*4; goto rvbd;
+ case H_Reverb+42 : spu.rvb->ACC_SRC_C1=val*4; goto rvbd;
+ case H_Reverb+44 : spu.rvb->ACC_SRC_D0=val*4; goto rvbd;
+ case H_Reverb+46 : spu.rvb->ACC_SRC_D1=val*4; goto rvbd;
+ case H_Reverb+48 : spu.rvb->IIR_SRC_B1=val*4; goto rvbd;
+ case H_Reverb+50 : spu.rvb->IIR_SRC_B0=val*4; goto rvbd;
+ case H_Reverb+52 : spu.rvb->MIX_DEST_A0=val*4; goto rvbd;
+ case H_Reverb+54 : spu.rvb->MIX_DEST_A1=val*4; goto rvbd;
+ case H_Reverb+56 : spu.rvb->MIX_DEST_B0=val*4; goto rvbd;
+ case H_Reverb+58 : spu.rvb->MIX_DEST_B1=val*4; goto rvbd;
+ case H_Reverb+60 : spu.rvb->IN_COEF_L=(short)val; goto rvbd;
+ case H_Reverb+62 : spu.rvb->IN_COEF_R=(short)val; goto rvbd;
}
return;
@@ -287,7 +287,7 @@ upd_irq:
return;
rvbd:
- rvb.dirty = 1; // recalculate on next update
+ spu.rvb->dirty = 1; // recalculate on next update
}
////////////////////////////////////////////////////////////////////////
diff --git a/plugins/dfsound/reverb.c b/plugins/dfsound/reverb.c
index 7e32b8e..2ff6edc 100644
--- a/plugins/dfsound/reverb.c
+++ b/plugins/dfsound/reverb.c
@@ -50,41 +50,40 @@ INLINE int rvb2ram_offs(int curr, int space, int iOff)
// get_buffer content helper: takes care about wraps
#define g_buffer(var) \
- ((int)(signed short)spu.spuMem[rvb2ram_offs(curr_addr, space, rvb.n##var)])
+ ((int)(signed short)spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var)])
// saturate iVal and store it as var
#define s_buffer(var, iVal) \
ssat32_to_16(iVal); \
- spu.spuMem[rvb2ram_offs(curr_addr, space, rvb.n##var)] = iVal
+ spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var)] = iVal
#define s_buffer1(var, iVal) \
ssat32_to_16(iVal); \
- spu.spuMem[rvb2ram_offs(curr_addr, space, rvb.n##var + 1)] = iVal
+ spu.spuMem[rvb2ram_offs(curr_addr, space, rvb->n##var + 1)] = iVal
////////////////////////////////////////////////////////////////////////
// portions based on spu2-x from PCSX2
-static void MixREVERB(int *SSumLR, int *RVB, int ns_to)
+static void MixREVERB(int *SSumLR, int *RVB, int ns_to, int curr_addr)
{
- int l_old = rvb.iRVBLeft;
- int r_old = rvb.iRVBRight;
- int curr_addr = rvb.CurrAddr;
- int space = 0x40000 - rvb.StartAddr;
- int l = 0, r = 0, ns;
+ const REVERBInfo *rvb = spu.rvb;
+ int IIR_ALPHA = rvb->IIR_ALPHA;
+ int IIR_COEF = rvb->IIR_COEF;
+ int space = 0x40000 - rvb->StartAddr;
+ int l, r, ns;
for (ns = 0; ns < ns_to * 2; )
{
- int IIR_ALPHA = rvb.IIR_ALPHA;
int ACC0, ACC1, FB_A0, FB_A1, FB_B0, FB_B1;
int mix_dest_a0, mix_dest_a1, mix_dest_b0, mix_dest_b1;
- int input_L = RVB[ns] * rvb.IN_COEF_L;
- int input_R = RVB[ns+1] * rvb.IN_COEF_R;
+ int input_L = RVB[ns] * rvb->IN_COEF_L;
+ int input_R = RVB[ns+1] * rvb->IN_COEF_R;
- int IIR_INPUT_A0 = ((g_buffer(IIR_SRC_A0) * rvb.IIR_COEF) + input_L) >> 15;
- int IIR_INPUT_A1 = ((g_buffer(IIR_SRC_A1) * rvb.IIR_COEF) + input_R) >> 15;
- int IIR_INPUT_B0 = ((g_buffer(IIR_SRC_B0) * rvb.IIR_COEF) + input_L) >> 15;
- int IIR_INPUT_B1 = ((g_buffer(IIR_SRC_B1) * rvb.IIR_COEF) + input_R) >> 15;
+ int IIR_INPUT_A0 = ((g_buffer(IIR_SRC_A0) * IIR_COEF) + input_L) >> 15;
+ int IIR_INPUT_A1 = ((g_buffer(IIR_SRC_A1) * IIR_COEF) + input_R) >> 15;
+ int IIR_INPUT_B0 = ((g_buffer(IIR_SRC_B0) * IIR_COEF) + input_L) >> 15;
+ int IIR_INPUT_B1 = ((g_buffer(IIR_SRC_B1) * IIR_COEF) + input_R) >> 15;
int iir_dest_a0 = g_buffer(IIR_DEST_A0);
int iir_dest_a1 = g_buffer(IIR_DEST_A1);
@@ -96,30 +95,34 @@ static void MixREVERB(int *SSumLR, int *RVB, int ns_to)
int IIR_B0 = iir_dest_b0 + ((IIR_INPUT_B0 - iir_dest_b0) * IIR_ALPHA >> 15);
int IIR_B1 = iir_dest_b1 + ((IIR_INPUT_B1 - iir_dest_b1) * IIR_ALPHA >> 15);
+ preload(SSumLR + ns + 64*2/4 - 4);
+
s_buffer1(IIR_DEST_A0, IIR_A0);
s_buffer1(IIR_DEST_A1, IIR_A1);
s_buffer1(IIR_DEST_B0, IIR_B0);
s_buffer1(IIR_DEST_B1, IIR_B1);
- ACC0 = (g_buffer(ACC_SRC_A0) * rvb.ACC_COEF_A +
- g_buffer(ACC_SRC_B0) * rvb.ACC_COEF_B +
- g_buffer(ACC_SRC_C0) * rvb.ACC_COEF_C +
- g_buffer(ACC_SRC_D0) * rvb.ACC_COEF_D) >> 15;
- ACC1 = (g_buffer(ACC_SRC_A1) * rvb.ACC_COEF_A +
- g_buffer(ACC_SRC_B1) * rvb.ACC_COEF_B +
- g_buffer(ACC_SRC_C1) * rvb.ACC_COEF_C +
- g_buffer(ACC_SRC_D1) * rvb.ACC_COEF_D) >> 15;
+ preload(RVB + ns + 64*2/4 - 4);
+
+ ACC0 = (g_buffer(ACC_SRC_A0) * rvb->ACC_COEF_A +
+ g_buffer(ACC_SRC_B0) * rvb->ACC_COEF_B +
+ g_buffer(ACC_SRC_C0) * rvb->ACC_COEF_C +
+ g_buffer(ACC_SRC_D0) * rvb->ACC_COEF_D) >> 15;
+ ACC1 = (g_buffer(ACC_SRC_A1) * rvb->ACC_COEF_A +
+ g_buffer(ACC_SRC_B1) * rvb->ACC_COEF_B +
+ g_buffer(ACC_SRC_C1) * rvb->ACC_COEF_C +
+ g_buffer(ACC_SRC_D1) * rvb->ACC_COEF_D) >> 15;
FB_A0 = g_buffer(FB_SRC_A0);
FB_A1 = g_buffer(FB_SRC_A1);
FB_B0 = g_buffer(FB_SRC_B0);
FB_B1 = g_buffer(FB_SRC_B1);
- mix_dest_a0 = ACC0 - ((FB_A0 * rvb.FB_ALPHA) >> 15);
- mix_dest_a1 = ACC1 - ((FB_A1 * rvb.FB_ALPHA) >> 15);
+ mix_dest_a0 = ACC0 - ((FB_A0 * rvb->FB_ALPHA) >> 15);
+ mix_dest_a1 = ACC1 - ((FB_A1 * rvb->FB_ALPHA) >> 15);
- mix_dest_b0 = FB_A0 + (((ACC0 - FB_A0) * rvb.FB_ALPHA - FB_B0 * rvb.FB_X) >> 15);
- mix_dest_b1 = FB_A1 + (((ACC1 - FB_A1) * rvb.FB_ALPHA - FB_B1 * rvb.FB_X) >> 15);
+ mix_dest_b0 = FB_A0 + (((ACC0 - FB_A0) * rvb->FB_ALPHA - FB_B0 * rvb->FB_X) >> 15);
+ mix_dest_b1 = FB_A1 + (((ACC1 - FB_A1) * rvb->FB_ALPHA - FB_B1 * rvb->FB_X) >> 15);
s_buffer(MIX_DEST_A0, mix_dest_a0);
s_buffer(MIX_DEST_A1, mix_dest_a1);
@@ -129,73 +132,60 @@ static void MixREVERB(int *SSumLR, int *RVB, int ns_to)
l = (mix_dest_a0 + mix_dest_b0) / 2;
r = (mix_dest_a1 + mix_dest_b1) / 2;
- l = (l * rvb.VolLeft) >> 15; // 15?
- r = (r * rvb.VolRight) >> 15;
+ l = (l * rvb->VolLeft) >> 15; // 15?
+ r = (r * rvb->VolRight) >> 15;
- SSumLR[ns++] += (l + l_old) / 2;
- SSumLR[ns++] += (r + r_old) / 2;
SSumLR[ns++] += l;
SSumLR[ns++] += r;
-
- l_old = l;
- r_old = r;
+ SSumLR[ns++] += l;
+ SSumLR[ns++] += r;
curr_addr++;
- if (curr_addr >= 0x40000) curr_addr = rvb.StartAddr;
+ if (curr_addr >= 0x40000) curr_addr = rvb->StartAddr;
}
-
- rvb.iRVBLeft = l;
- rvb.iRVBRight = r;
- rvb.CurrAddr = curr_addr;
}
-static void MixREVERB_off(int *SSumLR, int ns_to)
+static void MixREVERB_off(int *SSumLR, int ns_to, int curr_addr)
{
- int l_old = rvb.iRVBLeft;
- int r_old = rvb.iRVBRight;
- int curr_addr = rvb.CurrAddr;
- int space = 0x40000 - rvb.StartAddr;
- int l = 0, r = 0, ns;
+ const REVERBInfo *rvb = spu.rvb;
+ int space = 0x40000 - rvb->StartAddr;
+ int l, r, ns;
for (ns = 0; ns < ns_to * 2; )
{
+ preload(SSumLR + ns + 64*2/4 - 4);
+
l = (g_buffer(MIX_DEST_A0) + g_buffer(MIX_DEST_B0)) / 2;
r = (g_buffer(MIX_DEST_A1) + g_buffer(MIX_DEST_B1)) / 2;
- l = (l * rvb.VolLeft) >> 15;
- r = (r * rvb.VolRight) >> 15;
+ l = (l * rvb->VolLeft) >> 15;
+ r = (r * rvb->VolRight) >> 15;
- SSumLR[ns++] += (l + l_old) / 2;
- SSumLR[ns++] += (r + r_old) / 2;
SSumLR[ns++] += l;
SSumLR[ns++] += r;
-
- l_old = l;
- r_old = r;
+ SSumLR[ns++] += l;
+ SSumLR[ns++] += r;
curr_addr++;
- if (curr_addr >= 0x40000) curr_addr = rvb.StartAddr;
+ if (curr_addr >= 0x40000) curr_addr = rvb->StartAddr;
}
-
- rvb.iRVBLeft = l;
- rvb.iRVBRight = r;
- rvb.CurrAddr = curr_addr;
}
-static void prepare_offsets(void)
+static void REVERBPrep(void)
{
- int space = 0x40000 - rvb.StartAddr;
+ REVERBInfo *rvb = spu.rvb;
+ int space = 0x40000 - rvb->StartAddr;
int t;
#define prep_offs(v) \
- t = rvb.v; \
+ t = rvb->v; \
while (t >= space) \
t -= space; \
- rvb.n##v = t
+ rvb->n##v = t
#define prep_offs2(d, v1, v2) \
- t = rvb.v1 - rvb.v2; \
+ t = rvb->v1 - rvb->v2; \
while (t >= space) \
t -= space; \
- rvb.n##d = t
+ rvb->n##d = t
prep_offs(IIR_SRC_A0);
prep_offs(IIR_SRC_A1);
@@ -224,37 +214,18 @@ static void prepare_offsets(void)
#undef prep_offs
#undef prep_offs2
- rvb.dirty = 0;
+ rvb->dirty = 0;
}
-INLINE void REVERBDo(int *SSumLR, int *RVB, int ns_to)
+INLINE void REVERBDo(int *SSumLR, int *RVB, int ns_to, int curr_addr)
{
- if (!rvb.StartAddr) // reverb is off
- {
- rvb.iRVBLeft = rvb.iRVBRight = 0;
- return;
- }
-
if (spu.spuCtrl & 0x80) // -> reverb on? oki
{
- if (unlikely(rvb.dirty))
- prepare_offsets();
-
- MixREVERB(SSumLR, RVB, ns_to);
- }
- else if (rvb.VolLeft || rvb.VolRight)
- {
- if (unlikely(rvb.dirty))
- prepare_offsets();
-
- MixREVERB_off(SSumLR, ns_to);
+ MixREVERB(SSumLR, RVB, ns_to, curr_addr);
}
- else // -> reverb off
+ else if (spu.rvb->VolLeft || spu.rvb->VolRight)
{
- // reverb runs anyway
- rvb.CurrAddr += ns_to / 2;
- while (rvb.CurrAddr >= 0x40000)
- rvb.CurrAddr -= 0x40000 - rvb.StartAddr;
+ MixREVERB_off(SSumLR, ns_to, curr_addr);
}
}
diff --git a/plugins/dfsound/spu.c b/plugins/dfsound/spu.c
index ec31b0c..8681d35 100644
--- a/plugins/dfsound/spu.c
+++ b/plugins/dfsound/spu.c
@@ -73,11 +73,8 @@ static char * libraryInfo = N_("P.E.Op.S. Sound Driver V1.7\nCoded by Pete B
SPUInfo spu;
SPUConfig spu_config;
-// MAIN infos struct for each channel
-
-REVERBInfo rvb;
-
static int iFMod[NSSIZE];
+static int RVB[NSSIZE * 2];
int ChanBuf[NSSIZE];
#define CDDA_BUFFER_SIZE (16384 * sizeof(uint32_t)) // must be power of 2
@@ -763,11 +760,13 @@ static void do_silent_chans(int ns_to, int silentch)
static void do_channels(int ns_to)
{
unsigned int mask;
+ int do_rvb, ch, d;
SPUCHAN *s_chan;
int *SB, sinc;
- int ch, d;
- memset(spu.RVB, 0, ns_to * sizeof(spu.RVB[0]) * 2);
+ do_rvb = spu.rvb->StartAddr && spu_config.iUseReverb;
+ if (do_rvb)
+ memset(RVB, 0, ns_to * sizeof(RVB[0]) * 2);
mask = spu.dwNewChannel & 0xffffff;
for (ch = 0; mask != 0; ch++, mask >>= 1) {
@@ -812,14 +811,27 @@ static void do_channels(int ns_to)
if (s_chan->bFMod == 2) // fmod freq channel
memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0]));
- if (s_chan->bRVBActive)
- mix_chan_rvb(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume, spu.RVB);
+ if (s_chan->bRVBActive && do_rvb)
+ mix_chan_rvb(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume, RVB);
else
mix_chan(spu.SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume);
}
+
+ if (spu.rvb->StartAddr) {
+ if (do_rvb) {
+ if (unlikely(spu.rvb->dirty))
+ REVERBPrep();
+
+ REVERBDo(spu.SSumLR, RVB, ns_to, spu.rvb->CurrAddr);
+ }
+
+ spu.rvb->CurrAddr += ns_to / 2;
+ while (spu.rvb->CurrAddr >= 0x40000)
+ spu.rvb->CurrAddr -= 0x40000 - spu.rvb->StartAddr;
+ }
}
-static void do_samples_finish(int *SSumLR, int *RVB, int ns_to,
+static void do_samples_finish(int *SSumLR, int ns_to,
int silentch, int decode_pos);
// optional worker thread handling
@@ -833,8 +845,7 @@ static struct spu_worker {
unsigned int exit_thread;
unsigned int i_ready;
unsigned int i_reaped;
- unsigned int req_sent; // dsp
- unsigned int last_boot_cnt;
+ unsigned int last_boot_cnt; // dsp
};
// aligning for C64X_DSP
unsigned int _pad0[128/4];
@@ -851,6 +862,7 @@ static struct spu_worker {
int ns_to;
int ctrl;
int decode_pos;
+ int rvb_addr;
unsigned int channels_new;
unsigned int channels_on;
unsigned int channels_silent;
@@ -861,10 +873,11 @@ static struct spu_worker {
int start;
int loop;
int ns_to;
+ short vol_l;
+ short vol_r;
ADSRInfoEx adsr;
- // might want to add vol and fmod flags..
+ // might also want to add fmod flags..
} ch[24];
- int RVB[NSSIZE * 2];
int SSumLR[NSSIZE * 2];
} i[4];
} *worker;
@@ -874,6 +887,7 @@ static struct spu_worker {
static void thread_work_start(void);
static void thread_work_wait_sync(struct work_item *work, int force);
+static void thread_sync_caches(void);
static int thread_get_i_done(void);
static int decode_block_work(void *context, int ch, int *SB)
@@ -936,6 +950,8 @@ static void queue_channel_work(int ns_to, unsigned int silentch)
work->ch[ch].sbpos = s_chan->iSBPos;
work->ch[ch].sinc = s_chan->sinc;
work->ch[ch].adsr = s_chan->ADSRX;
+ work->ch[ch].vol_l = s_chan->iLeftVolume;
+ work->ch[ch].vol_r = s_chan->iRightVolume;
work->ch[ch].start = s_chan->pCurr - spu.spuMemC;
work->ch[ch].loop = s_chan->pLoop - spu.spuMemC;
if (s_chan->prevflags & 1)
@@ -952,6 +968,19 @@ static void queue_channel_work(int ns_to, unsigned int silentch)
}
}
+ work->rvb_addr = 0;
+ if (spu.rvb->StartAddr) {
+ if (spu_config.iUseReverb) {
+ if (unlikely(spu.rvb->dirty))
+ REVERBPrep();
+ work->rvb_addr = spu.rvb->CurrAddr;
+ }
+
+ spu.rvb->CurrAddr += ns_to / 2;
+ while (spu.rvb->CurrAddr >= 0x40000)
+ spu.rvb->CurrAddr -= 0x40000 - spu.rvb->StartAddr;
+ }
+
worker->i_ready++;
thread_work_start();
}
@@ -965,7 +994,9 @@ static void do_channel_work(struct work_item *work)
SPUCHAN *s_chan;
ns_to = work->ns_to;
- memset(work->RVB, 0, ns_to * sizeof(work->RVB[0]) * 2);
+
+ if (work->rvb_addr)
+ memset(RVB, 0, ns_to * sizeof(RVB[0]) * 2);
mask = work->channels_new;
for (ch = 0; mask != 0; ch++, mask >>= 1) {
@@ -1010,12 +1041,15 @@ static void do_channel_work(struct work_item *work)
if (s_chan->bFMod == 2) // fmod freq channel
memcpy(iFMod, &ChanBuf, ns_to * sizeof(iFMod[0]));
- if (s_chan->bRVBActive)
+ if (s_chan->bRVBActive && work->rvb_addr)
mix_chan_rvb(work->SSumLR, ns_to,
- s_chan->iLeftVolume, s_chan->iRightVolume, work->RVB);
+ work->ch[ch].vol_l, work->ch[ch].vol_r, RVB);
else
- mix_chan(work->SSumLR, ns_to, s_chan->iLeftVolume, s_chan->iRightVolume);
+ mix_chan(work->SSumLR, ns_to, work->ch[ch].vol_l, work->ch[ch].vol_r);
}
+
+ if (work->rvb_addr)
+ REVERBDo(work->SSumLR, RVB, ns_to, work->rvb_addr);
}
static void sync_worker_thread(int force)
@@ -1032,13 +1066,15 @@ static void sync_worker_thread(int force)
work = &worker->i[worker->i_reaped & WORK_I_MASK];
thread_work_wait_sync(work, force);
- do_samples_finish(work->SSumLR, work->RVB, work->ns_to,
+ do_samples_finish(work->SSumLR, work->ns_to,
work->channels_silent, work->decode_pos);
worker->i_reaped++;
done = thread_get_i_done() - worker->i_reaped;
used_space = worker->i_ready - worker->i_reaped;
}
+ if (force)
+ thread_sync_caches();
}
#else
@@ -1116,7 +1152,7 @@ void do_samples(unsigned int cycles_to, int do_direct)
if (do_direct || worker == NULL || !spu_config.iUseThread) {
do_channels(ns_to);
- do_samples_finish(spu.SSumLR, spu.RVB, ns_to, silentch, spu.decode_pos);
+ do_samples_finish(spu.SSumLR, ns_to, silentch, spu.decode_pos);
}
else {
queue_channel_work(ns_to, silentch);
@@ -1131,7 +1167,7 @@ void do_samples(unsigned int cycles_to, int do_direct)
spu.decode_pos = (spu.decode_pos + ns_to) & 0x1ff;
}
-static void do_samples_finish(int *SSumLR, int *RVB, int ns_to,
+static void do_samples_finish(int *SSumLR, int ns_to,
int silentch, int decode_pos)
{
int volmult = spu_config.iVolume;
@@ -1150,17 +1186,8 @@ static void do_samples_finish(int *SSumLR, int *RVB, int ns_to,
spu.decode_dirty_ch &= ~(1<<3);
}
- //---------------------------------------------------//
- // mix XA infos (if any)
-
MixXA(SSumLR, ns_to, decode_pos);
- ///////////////////////////////////////////////////////
- // mix all channels (including reverb) into one buffer
-
- if(spu_config.iUseReverb)
- REVERBDo(SSumLR, RVB, ns_to);
-
if((spu.spuCtrl&0x4000)==0) // muted? (rare, don't optimize for this)
{
memset(spu.pS, 0, ns_to * 2 * sizeof(spu.pS[0]));
@@ -1283,12 +1310,9 @@ void ClearWorkingState(void)
}
// SETUPSTREAMS: init most of the spu buffers
-void SetupStreams(void)
+static void SetupStreams(void)
{
- int i;
-
spu.pSpuBuffer = (unsigned char *)malloc(32768); // alloc mixing buffer
- spu.RVB = calloc(NSSIZE * 2, sizeof(spu.RVB[0]));
spu.SSumLR = calloc(NSSIZE * 2, sizeof(spu.SSumLR[0]));
spu.XAStart = // alloc xa buffer
@@ -1303,26 +1327,14 @@ void SetupStreams(void)
spu.CDDAPlay = spu.CDDAStart;
spu.CDDAFeed = spu.CDDAStart;
- for(i=0;i<MAXCHAN;i++) // loop sound channels
- {
- spu.s_chan[i].ADSRX.SustainLevel = 0xf; // -> init sustain
- spu.s_chan[i].ADSRX.SustainIncrease = 1;
- spu.s_chan[i].pLoop=spu.spuMemC;
- spu.s_chan[i].pCurr=spu.spuMemC;
- }
-
ClearWorkingState();
-
- spu.bSpuInit=1; // flag: we are inited
}
// REMOVESTREAMS: free most buffer
-void RemoveStreams(void)
+static void RemoveStreams(void)
{
free(spu.pSpuBuffer); // free mixing buffer
spu.pSpuBuffer = NULL;
- free(spu.RVB); // free reverb buffer
- spu.RVB = NULL;
free(spu.SSumLR);
spu.SSumLR = NULL;
free(spu.XAStart); // free XA buffer
@@ -1365,6 +1377,10 @@ static int thread_get_i_done(void)
return worker->i_done;
}
+static void thread_sync_caches(void)
+{
+}
+
static void *spu_worker_thread(void *unused)
{
struct work_item *work;
@@ -1446,11 +1462,13 @@ static void exit_spu_thread(void)
// SPUINIT: this func will be called first by the main emu
long CALLBACK SPUinit(void)
{
+ int i;
+
spu.spuMemC = calloc(1, 512 * 1024);
- memset((void *)&rvb, 0, sizeof(REVERBInfo));
InitADSR();
spu.s_chan = calloc(MAXCHAN+1, sizeof(spu.s_chan[0])); // channel + 1 infos (1 is security for fmod handling)
+ spu.rvb = calloc(1, sizeof(REVERBInfo));
spu.SB = calloc(MAXCHAN, sizeof(spu.SB[0]) * SB_SIZE);
spu.spuAddr = 0;
@@ -1464,6 +1482,16 @@ long CALLBACK SPUinit(void)
init_spu_thread();
+ for (i = 0; i < MAXCHAN; i++) // loop sound channels
+ {
+ spu.s_chan[i].ADSRX.SustainLevel = 0xf; // -> init sustain
+ spu.s_chan[i].ADSRX.SustainIncrease = 1;
+ spu.s_chan[i].pLoop = spu.spuMemC;
+ spu.s_chan[i].pCurr = spu.spuMemC;
+ }
+
+ spu.bSpuInit=1; // flag: we are inited
+
return 0;
}
@@ -1504,6 +1532,8 @@ long CALLBACK SPUshutdown(void)
spu.SB = NULL;
free(spu.s_chan);
spu.s_chan = NULL;
+ free(spu.rvb);
+ spu.rvb = NULL;
RemoveStreams(); // no more streaming
spu.bSpuInit=0;
diff --git a/plugins/dfsound/spu_c64x.c b/plugins/dfsound/spu_c64x.c
index 200ab38..be10a6b 100644
--- a/plugins/dfsound/spu_c64x.c
+++ b/plugins/dfsound/spu_c64x.c
@@ -43,6 +43,8 @@ static struct {
dsp_mem_region_t region;
dsp_component_id_t compid;
+ unsigned int stale_caches:1;
+ unsigned int req_sent:1;
} f;
static void thread_work_start(void)
@@ -63,7 +65,7 @@ static void thread_work_start(void)
// to start the DSP, dsp_rpc_send() must be used,
// but before that, previous request must be finished
- if (worker->req_sent) {
+ if (f.req_sent) {
if (worker->boot_cnt == worker->last_boot_cnt) {
// hopefully still booting
//printf("booting?\n");
@@ -74,7 +76,7 @@ static void thread_work_start(void)
if (ret != 0) {
fprintf(stderr, "dsp_rpc_recv failed: %d\n", ret);
f.dsp_logbuf_print();
- worker->req_sent = 0;
+ f.req_sent = 0;
spu_config.iUseThread = 0;
return;
}
@@ -94,7 +96,7 @@ static void thread_work_start(void)
spu_config.iUseThread = 0;
return;
}
- worker->req_sent = 1;
+ f.req_sent = 1;
}
static int thread_get_i_done(void)
@@ -108,14 +110,13 @@ static void thread_work_wait_sync(struct work_item *work, int force)
int limit = 1000;
int ns_to;
- ns_to = work->ns_to;
- f.dsp_cache_inv_virt(work->RVB, sizeof(work->RVB[0]) * 2 * ns_to);
- f.dsp_cache_inv_virt(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to);
- __builtin_prefetch(work->RVB);
- __builtin_prefetch(work->SSumLR);
-
while (worker->i_done == worker->i_reaped && limit-- > 0) {
- if (!worker->active) {
+ if (!f.req_sent) {
+ printf("dsp: req not sent?\n");
+ break;
+ }
+
+ if (worker->boot_cnt != worker->last_boot_cnt && !worker->active) {
printf("dsp: broken sync\n");
worker->last_boot_cnt = ~0;
break;
@@ -125,6 +126,13 @@ static void thread_work_wait_sync(struct work_item *work, int force)
f.dsp_cache_inv_virt(&worker->i_done, 64);
}
+ ns_to = work->ns_to;
+ f.dsp_cache_inv_virt(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to);
+ preload(work->SSumLR);
+ preload(work->SSumLR + 64/4);
+
+ f.stale_caches = 1; // SB, spuMem
+
if (limit == 0)
printf("dsp: wait timeout\n");
@@ -132,7 +140,7 @@ static void thread_work_wait_sync(struct work_item *work, int force)
if (worker->i_reaped != worker->i_done - 1)
return;
- if (worker->req_sent && (force || worker->i_done == worker->i_ready)) {
+ if (f.req_sent && (force || worker->i_done == worker->i_ready)) {
dsp_msg_t msg;
int ret;
@@ -142,12 +150,20 @@ static void thread_work_wait_sync(struct work_item *work, int force)
f.dsp_logbuf_print();
spu_config.iUseThread = 0;
}
- worker->req_sent = 0;
+ f.req_sent = 0;
}
+}
- if (force) {
+static void thread_sync_caches(void)
+{
+ if (f.stale_caches) {
f.dsp_cache_inv_virt(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24);
f.dsp_cache_inv_virt(spu.spuMemC + 0x800, 0x800);
+ if (spu.rvb->StartAddr) {
+ int left = 0x40000 - spu.rvb->StartAddr;
+ f.dsp_cache_inv_virt(spu.spuMem + spu.rvb->StartAddr, left * 2);
+ }
+ f.stale_caches = 0;
}
}
@@ -220,9 +236,9 @@ static void init_spu_thread(void)
mem->sizeof_region_mem, sizeof(*mem));
goto fail_init;
}
- if (mem->offsetof_s_chan1 != offsetof(typeof(*mem), s_chan[1])) {
+ if (mem->offsetof_s_chan1 != offsetof(typeof(*mem), in.s_chan[1])) {
fprintf(stderr, "error: size mismatch 2: %d vs %zd\n",
- mem->offsetof_s_chan1, offsetof(typeof(*mem), s_chan[1]));
+ mem->offsetof_s_chan1, offsetof(typeof(*mem), in.s_chan[1]));
goto fail_init;
}
if (mem->offsetof_spos_3_20 != offsetof(typeof(*mem), worker.i[3].ch[20])) {
@@ -237,7 +253,9 @@ static void init_spu_thread(void)
free(spu.SB);
spu.SB = mem->SB;
free(spu.s_chan);
- spu.s_chan = mem->s_chan;
+ spu.s_chan = mem->in.s_chan;
+ free(spu.rvb);
+ spu.rvb = &mem->in.rvb;
worker = &mem->worker;
printf("spu: C64x DSP ready (id=%d).\n", (int)f.compid);
@@ -267,8 +285,10 @@ static void exit_spu_thread(void)
if (worker == NULL)
return;
- if (worker->req_sent)
+ if (f.req_sent) {
f.dsp_rpc_recv(&msg);
+ f.req_sent = 0;
+ }
f.dsp_logbuf_print();
f.dsp_shm_free(f.region);
@@ -277,6 +297,7 @@ static void exit_spu_thread(void)
spu.spuMemC = NULL;
spu.SB = NULL;
spu.s_chan = NULL;
+ spu.rvb = NULL;
worker = NULL;
}
diff --git a/plugins/dfsound/spu_c64x.h b/plugins/dfsound/spu_c64x.h
index bb20cc3..8210e63 100644
--- a/plugins/dfsound/spu_c64x.h
+++ b/plugins/dfsound/spu_c64x.h
@@ -11,8 +11,12 @@ struct region_mem {
// careful not to lose ARM writes by DSP overwriting
// with old data when it's writing out neighbor cachelines
int _pad1[128/4 - ((SB_SIZE * 24) & (128/4 - 1))];
- SPUCHAN s_chan[24 + 1];
- int _pad2[128/4 - ((sizeof(SPUCHAN) * 25 / 4) & (128/4 - 1))];
+ struct spu_in {
+ // these are not to be modified by DSP
+ SPUCHAN s_chan[24 + 1];
+ REVERBInfo rvb;
+ } in;
+ int _pad2[128/4 - ((sizeof(struct spu_in) / 4) & (128/4 - 1))];
struct spu_worker worker;
SPUConfig spu_config;
// init/debug
diff --git a/plugins/dfsound/spu_c64x_dspcode.c b/plugins/dfsound/spu_c64x_dspcode.c
index 97d3028..b0352a9 100644
--- a/plugins/dfsound/spu_c64x_dspcode.c
+++ b/plugins/dfsound/spu_c64x_dspcode.c
@@ -31,6 +31,7 @@
/* dummy deps, some bloat but avoids ifdef hell in SPU code.. */
static void thread_work_start(void) {}
static void thread_work_wait_sync(struct work_item *work, int force) {}
+static void thread_sync_caches(void) {}
static int thread_get_i_done(void) { return 0; }
struct out_driver *out_current;
void SetupSound(void) {}
@@ -38,7 +39,8 @@ void SetupSound(void) {}
static void invalidate_cache(struct work_item *work)
{
- syscalls.cache_inv(work, offsetof(typeof(*work), RVB), 1);
+ // see comment in writeout_cache()
+ //syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 1);
syscalls.cache_inv(spu.s_chan, sizeof(spu.s_chan[0]) * 24, 0);
syscalls.cache_inv(work->SSumLR,
sizeof(work->SSumLR[0]) * 2 * work->ns_to, 0);
@@ -48,14 +50,16 @@ static void writeout_cache(struct work_item *work)
{
int ns_to = work->ns_to;
- syscalls.cache_wb(work->RVB, sizeof(work->RVB[0]) * 2 * ns_to, 1);
syscalls.cache_wb(work->SSumLR, sizeof(work->SSumLR[0]) * 2 * ns_to, 1);
+ // have to invalidate now, otherwise there is a race between
+ // DSP evicting dirty lines and ARM writing new data to this area
+ syscalls.cache_inv(work, offsetof(typeof(*work), SSumLR), 0);
}
static void do_processing(void)
{
+ int left, dirty = 0, had_rvb = 0;
struct work_item *work;
- int left, dirty = 0;
while (worker->active)
{
@@ -70,6 +74,8 @@ static void do_processing(void)
work = &worker->i[worker->i_done & WORK_I_MASK];
invalidate_cache(work);
+ had_rvb |= work->rvb_addr;
+ spu.spuCtrl = work->ctrl;
do_channel_work(work);
writeout_cache(work);
@@ -82,6 +88,11 @@ static void do_processing(void)
if (dirty) {
syscalls.cache_wb(spu.spuMemC + 0x800, 0x800, 1);
syscalls.cache_wb(spu.SB, sizeof(spu.SB[0]) * SB_SIZE * 24, 1);
+ if (had_rvb) {
+ left = 0x40000 - spu.rvb->StartAddr;
+ syscalls.cache_wb(spu.spuMem + spu.rvb->StartAddr, left * 2, 1);
+ had_rvb = 0;
+ }
dirty = 0;
continue;
}
@@ -105,12 +116,13 @@ static unsigned int exec(dsp_component_cmd_t cmd,
spu.spuMemC = mem->spu_ram;
spu.SB = mem->SB;
- spu.s_chan = mem->s_chan;
+ spu.s_chan = mem->in.s_chan;
+ spu.rvb = &mem->in.rvb;
worker = &mem->worker;
memcpy(&spu_config, &mem->spu_config, sizeof(spu_config));
mem->sizeof_region_mem = sizeof(*mem);
- mem->offsetof_s_chan1 = offsetof(typeof(*mem), s_chan[1]);
+ mem->offsetof_s_chan1 = offsetof(typeof(*mem), in.s_chan[1]);
mem->offsetof_spos_3_20 = offsetof(typeof(*mem), worker.i[3].ch[20]);
// seems to be unneeded, no write-alloc? but just in case..
syscalls.cache_wb(&mem->sizeof_region_mem, 3 * 4, 1);
@@ -126,7 +138,9 @@ static unsigned int exec(dsp_component_cmd_t cmd,
// c64_tools lib does BCACHE_wbInvAll() when it receives mailbox irq,
// but invalidate anyway in case c64_tools is ever fixed..
- syscalls.cache_inv(mem, sizeof(mem->spu_ram) + sizeof(mem->SB), 0);
+ // XXX edit: don't bother as reverb is not handled, will fix if needed
+ //syscalls.cache_inv(mem, sizeof(mem->spu_ram) + sizeof(mem->SB), 0);
+ //syscalls.cache_inv(&mem->in, sizeof(mem->in), 0);
break;
default: