diff -uNr a/m/MANIFEST.TXT b/m/MANIFEST.TXT --- a/m/MANIFEST.TXT b93a4289f9decd7fc48508715afe4f4237a1edd72646fef53ebab319d50f16adf546473b023c657c5c6d16a47cf3dfa1ecfdee1f00e5796126b8a6c727292904 +++ b/m/MANIFEST.TXT d9f1b9e441a1f4e5d314d5c5f43319591ca03904b3ac36e1ede4d7310b5ff30d2e0c720695f95733f1ac3e2e829fa96700b6e8776acc974cfd30c6994edaf4b6 @@ -1,3 +1,4 @@ 586606 m_genesis "Genesis." 586747 errata_slaveirq "Fix of slave IRQ clearing." 586983 tlb_and_exc_speedup "Exc. handler fastpaths and TLB caching." +587480 simd_tlb_lookup "Experimental variant with SIMDistic TLB." diff -uNr a/m/cpustate.asm b/m/cpustate.asm --- a/m/cpustate.asm 1427e0c04c15c733aed037b1a56252c2f7f8ee7960171ab548b6ce1244320ea1e9b4aa00045d94a4478aad0e605e9206aa9e8b92581be252b3e08c893ed372fe +++ b/m/cpustate.asm 1a9cf2b89dff48898abb798d358e65ae567cdb25ca0521f248d4bb1cb388c604f6d9e17b61b25f6f1822ae4304db1ee0059db15b2c36858aca08ec46653cc0a4 @@ -18,7 +18,7 @@ ;----------------------------------------------------------------------------- ; MIPS Processor State. -; Note: PC, nPC, CP0_Status, CP0_Cause, CP0_Compare, are housed in x86 regs. +; Note: PC, nPC, CP0_Status, CP0_Cause, are housed in x86 regs. struc MCPU .Regs resd 32 ; The standard MIPS Register Set .LO resd 1 ; Multiplication/division results - Low Half @@ -33,20 +33,16 @@ .CP0_BadVAddr resd 1 ; Addr. of most recent addr.-caused exception .CP0_ErrorEpc resd 1 ; Program counter at last exception .CP0_PageMask resd 1 ; Control variable page sizes in TLB entries - ;; The TLB: - .TLB_Entries resd TLB_ENTRIES_COUNT ; TLB entries (without PFN) - .TLB_PFN_Even resd TLB_ENTRIES_COUNT ; TLB PFN0 - .TLB_PFN_Odd resd TLB_ENTRIES_COUNT ; TLB PFN1 + .CP0_Compare resd 1 ; Timer interrupt control + .TLB_PFN resq TLB_ENTRIES_COUNT ; TLB PFN : |ODD|EVEN| endstruc ;----------------------------------------------------------------------------- ;----------------------------------------------------------------------------- ; Refer to the N-th TLB Entry: ;----------------------------------------------------------------------------- -%define TLB_E(N) dword [M_Base_32 + MCPU.TLB_Entries + 4 * (N)] ; N-th PFN : -%define TLB_PFN_E(N) dword [M_Base_32 + MCPU.TLB_PFN_Even + 4 * (N)] -%define TLB_PFN_O(N) dword [M_Base_32 + MCPU.TLB_PFN_Odd + 4 * (N)] +%define TLB_PFN(N) qword [M_Base_64 + MCPU.TLB_PFN + 8 * (N)] ;----------------------------------------------------------------------------- ;----------------------------------------------------------------------------- @@ -61,7 +57,7 @@ ; TODO: is it possible to use the upper halves of the 64bit regs for anything? ; ... or entirely worthless from intel's idiocy of 'auto-zero on mov' ? ;----------------------------------------------------------------------------- -%define Flag_Reg edi ; Delay, Exception, etc flags +%define Flag_Reg edi ; Delay, Exception, etc flags and TLB G %define RAM_Floor rsi ; Physical (x86) address of 1st RAM word %define RAM_Ceiling r8 ; Physical (x86) address of last RAM word %define PC r9d ; Current Program Counter @@ -69,23 +65,26 @@ %define CP0_Status r11d ; Processor status and control %define CP0_Cause r12d ; Cause of last general exception %define CP0_Count r13d ; Processor cycle count -%define CP0_Compare r14d ; Timer interrupt control +%define TLB_Flags r14 ; TLB D1/V1/D0/V0 Flags %define AUX r15d ; Additional TMP for certain ops +%define AUX64 r15 ; all 64bits of AUX ; TODO: 'Suspend to RAM' routine for all of the above. ;----------------------------------------------------------------------------- ;----------------------------------------------------------------------------- -; XMM Regs used for TLB Caching: +; XMM Regs: ;----------------------------------------------------------------------------- -%define Rd_E_Last_Tag xmm5 ; Last good Tag on reading Even vAddr -%define Rd_E_Last_PFN xmm6 ; Last good PFN on reading Even vAddr -%define Rd_O_Last_Tag xmm7 ; Last good Tag on reading Odd vAddr -%define Rd_O_Last_PFN xmm8 ; Last good PFN on reading Odd vAddr -%define Wr_E_Last_Tag xmm9 ; Last good Tag on writing Even vAddr -%define Wr_E_Last_PFN xmm10 ; Last good PFN on writing Even vAddr -%define Wr_O_Last_Tag xmm11 ; Last good Tag on writing Odd vAddr -%define Wr_O_Last_PFN xmm12 ; Last good PFN on writing Odd vAddr +;; 16 Tags: +%define TLB_TAG_BYTE_0 xmm5 ; Byte 0 of Tag +%define TLB_TAG_BYTE_1 xmm6 ; Byte 1 of Tag +%define TLB_TAG_BYTE_2 xmm7 ; Byte 2 of Tag +%define XMM_T0 xmm8 ; Temp + +%define R_TLB_Last_Good_Tag xmm9 ; Last good Tag on reading vAddr (|O|E|) +%define W_TLB_Last_Good_Tag xmm10 ; Last good Tag on writing vAddr (|O|E|) +%define R_TLB_Last_Good_PFN xmm11 ; Last good PFN on reading vAddr (|O|E|) +%define W_TLB_Last_Good_PFN xmm12 ; Last good PFN on writing vAddr (|O|E|) ;----------------------------------------------------------------------------- ;----------------------------------------------------------------------------- @@ -128,8 +127,8 @@ mov nPC, eax mov CP0_Status, eax mov CP0_Cause, eax - mov CP0_Count, eax - mov CP0_Compare, eax + mov CP0_Count, eax + xor TLB_Flags, TLB_Flags ;; Init 'slow' MIPS Regs: mov ecx, 0 _init_reg: @@ -149,6 +148,7 @@ mov Sr(CP0_Epc), eax mov Sr(CP0_BadVAddr), eax mov Sr(CP0_ErrorEpc), eax + mov Sr(CP0_Compare), eax Flg_Clear_All ; Reset all misc Flags to 0 bts CP0_Status, CP0St_ERL ; Start in kernel mode w/ unmapped useg ret diff -uNr a/m/flags.asm b/m/flags.asm --- a/m/flags.asm 7dedb8135f032539dd5f6c0133070aff4078cadb322fcb7e665a56bdfe7940e13d463b2803ea9c9726e7606579218ba9684d21ae106890c6ef7a285119887364 +++ b/m/flags.asm cb16f8ab1a1e89fce1364577dd83a456e0859379a3a9fa42c883a18bc9962d7b017e5d2c99341c60e1d41b510fd0d588fce6d55d058e49e62d89701099cc8080 @@ -26,6 +26,7 @@ %define TLB_Rd_Cache_Valid 4 %define TLB_Wr_Cache_Valid 5 %define Shutdown 6 +; Positions 31 .. 15 store TLB's 'G' Flags ;----------------------------------------------------------------------------- ; Set a given Flag: %macro Flg_On 1 diff -uNr a/m/knobs.asm b/m/knobs.asm --- a/m/knobs.asm 3b8e7b9cf4b6b37a941b53f534fa000b523941e5c52747f0ccf92397c5e64fdcf74bbdd241e70e51bef8893954c0cf5f4db5a89066b68349a3de4f24f737bdbc +++ b/m/knobs.asm e75680eee6b4d6dab5e13fd02db2a86702136633846d4e9d9ca17ffaae25ce6c1d0d138db69081802520d9b418b7027a8150271e15e954971ba44d2506f70ad1 @@ -23,21 +23,6 @@ ;----------------------------------------------------------------------------- ;----------------------------------------------------------------------------- -; If TLBWR_CHEAT is enabled, the TLBWR ('Write Random TLB Entry') instruction -; will slide all unwired entries down by one slot and write into the freed -; slot near the top permitted by CP0_Wired, instead of the traditional -; behaviour (where entry indexed by a modulus of the tick counter is killed.) -; No known code (incl. Linux) tries to rely on the absolute position of -; unwired TLB entries after a TLBWR instruction. So this gives faster lookup -; when iterating over TLB, as the newer unwired entries will aggregate near -; the base of the table. Iron MIPSen do not iterate, they look up in parallel, -; ergo the original MIPS designer did not see any reason to attempt to order -; TLB entries by frequency of use. -;----------------------------------------------------------------------------- -%define TLBWR_CHEAT 1 -;----------------------------------------------------------------------------- - -;----------------------------------------------------------------------------- ; Alignment Grain ;----------------------------------------------------------------------------- %define GRAIN 32 diff -uNr a/m/mips.asm b/m/mips.asm --- a/m/mips.asm f82ea8febceb149c589262c1387c2a99d641219dac217de1ba1e3e99ed8b815b5fe4f6b68128cb55fefed5bd41b4d764c802b6ebce34bd4580769027ca001cd7 +++ b/m/mips.asm fa0643a3865257c9d5f290e90f980f02c7b6ddf6ed9db24c8a1ea3571a9bab6c69642ef743fe03bae3b48ba2b8811caf69f12606529178da5724c6e617411c0f @@ -23,9 +23,9 @@ ;----------------------------------------------------------------------------- ;----------------------------------------------------------------------------- -; # of TLB entries. Could have more; but would have to change not only here. +; # of TLB entries. ;----------------------------------------------------------------------------- -%define TLB_ENTRIES_COUNT 16 ; in principle could have more. +%define TLB_ENTRIES_COUNT 16 ;----------------------------------------------------------------------------- ;----------------------------------------------------------------------------- @@ -95,23 +95,3 @@ %define CP0St_EXL 1 ; Exception Level (0: Normal, 1: Kernel) %define CP0St_IE 0 ; Interrupt Enable ;----------------------------------------------------------------------------- - -;----------------------------------------------------------------------------- -; MIPS TLB Entry. -; We don't use C0 and C1 anywhere! and so we can put all of it in 32bits: -;----------------------------------------------------------------------------- -; 11111111111111110000000000000000 -; FEDCBA9876543210FEDCBA9876543210 -; -------------------------------- -; GVVDDAAAAAAAAVVVVVVVVVVVVVVVVVVV -; |1010| ASID || VPN2 | -;----------------------------------------------------------------------------- -%define TLB_VPN2_Mask 0x7FFFF ; 19 bits -%define TLB_ASID_Mask 0xFF ; 8 bits -%define TLB_ASID_Shift 19 ; sits after VPN2 Mask -%define TLB_D0 27 ; 27th bit -%define TLB_D1 28 ; 28th bit -%define TLB_V0 29 ; 29th bit -%define TLB_V1 30 ; 30th bit -%define TLB_G 31 ; 31st bit (last) -;----------------------------------------------------------------------------- diff -uNr a/m/mips_cpu.asm b/m/mips_cpu.asm --- a/m/mips_cpu.asm 35a5f7d843a515a6301c0d09957d3529f10f0443a50bd54177bcaaecc96054c502d2e14ccf1d5f106247dd2f566839ab49883e97b86cff1d5ad889652e8f5eaf +++ b/m/mips_cpu.asm 12dba93aa8d5a453df085f127856212310f14ab1538be837ce079ce92619cf3c5dbb959e3b38b0b535d9d7ff103cb2dbfa351758fcb131264c2f88d9b2b2ee4f @@ -35,7 +35,7 @@ inc CP0_Count ; Timer: CP0_Count := CP0_Count + 1 ;; Timer Interrupt - cmp CP0_Count, CP0_Compare ; Has timer reached limit? + cmp CP0_Count, Sr(CP0_Compare) ; Has timer reached limit? jne _cycle_no_mips_timer ; If not, do not invoke interrupt SetIRQ TIMER_IRQ ; Timer reached limit, invoke timer IRQ diff -uNr a/m/mipsinst/m_instrs.asm b/m/mipsinst/m_instrs.asm --- a/m/mipsinst/m_instrs.asm 931b5fd9ac59730bbcb95e9a9d3dba41483bbe6b3fc204ad8194397191795bacf3ef76df5335f8f17b3479a007de3a808df640fca949a7802b183bc25e7fe0c3 +++ b/m/mipsinst/m_instrs.asm 343ff34a3cbc7cd5d51c465b8b91754c546c841055b6e84dfc8e928262e958534e727dc20ec0900b103f82f57895cbfb372d0789fae1410b593746f76125187a @@ -92,25 +92,6 @@ _m_tlbwr: ; no fields mov ecx, Sr(CP0_Wired) ; ecx := CP0_Wired -%ifdef TLBWR_CHEAT ; 'Cheat' behaviour (see knobs.asm for rationale) : - mov AUX, ecx ; save this index in AUX, we will use - mov edx, TLB_ENTRIES_COUNT - 1 ; index of last entry - ; Move all TLB entries after the Wired entries down by one slot: -.tlbwr_slide: ; Start by replacing the last entry with the next-to-last: - cmp edx, AUX ; time to stop? - je .tlbr_slide_done ; ... then stop. - mov ecx, edx ; ecx := edx - dec ecx ; ecx := ecx - 1 (prev. TLB index) - mov eax, TLB_E(ecx) ; eax := current TLB entry - mov TLB_E(edx), eax ; move the current into the next - mov eax, TLB_PFN_E(ecx) ; eax := current PFN_EVEN entry - mov TLB_PFN_E(edx), eax ; move the current into the next - mov eax, TLB_PFN_O(ecx) ; eax := current PFN_ODD entry - mov TLB_PFN_O(edx), eax ; move the current into the next - dec edx ; move back by one - jmp .tlbwr_slide ; Continue the slide. -.tlbr_slide_done: ; Now we freed up the top-most non-wired slot in TLB table: -%else ; 'Traditional' behaviour per the MIPS Standard: mov ebx, TLB_ENTRIES_COUNT ; ebx := #tlbentries sub ebx, ecx ; ebx := #tlbentries - Wired mov edx, 0 ; edx (upper half of dividend) := 0 @@ -118,7 +99,6 @@ div ebx ; edx:eax / ebx add edx, ecx ; edx (remainder) := edx + wired mov AUX, edx ; make edx the index for tlb write -%endif call _write_tlb_entry ; Write the AUX-indexed TLB entry. jmp _end_cycle ;----------------------------------------------------------------------------- @@ -136,38 +116,77 @@ align GRAIN, db 0x90 _m_tlbp: ; no fields - mov Sr(CP0_Index), 0x80000000 ; CP0_Index := 0x80000000 - ;; Get the active ASID: - mov edx, Sr(CP0_EntryHi) ; edx := CP0_EntryHi - mov ecx, edx ; ecx := edx - and edx, 0xFF ; edx := edx & 0xFF (get current ASID) - ;; Get the desired tag: - and ecx, 0xFFFFF000 ; ecx := ecx & 0xFFFFF000 - shr ecx, 13 ; ecx := ecx >> 13 (current Tag) - ;; For each slot in table (0 .. 15), attempt lookup - xor AUX, AUX ; Start with the 0-th entry in table -_m_tlbp_lookup_entry: - mov eax, TLB_E(AUX) ; eax := current TLB entry - mov ebx, eax ; ebx := eax - and ebx, TLB_VPN2_Mask ; get VPN2 of this entry - cmp ebx, ecx ; cmp(entry.VPN2, vAddr.tag) - jne _m_tlbp_lookup_nope ; if entry.VPN2 != vAddr.tag: no match - bt eax, TLB_G ; is entry.G = 1? - jc _m_tlbp_lookup_match ; then match. - shr eax, TLB_ASID_Shift ; eax := eax >> TLB_ASID_Shift - and eax, TLB_ASID_Mask ; eax := entry.ASID - cmp eax, edx ; entry.ASID = current ASID ? - jne _m_tlbp_lookup_nope ; if neither G=1 nor ASID match. - ;; otherwise: -_m_tlbp_lookup_match: ; TLB Match: - mov Sr(CP0_Index), AUX ; Save the index - jmp _end_cycle ; Fin. -_m_tlbp_lookup_nope: ; try next one in the table, if any - inc AUX ; index := index + 1 - cmp AUX, TLB_ENTRIES_COUNT ; see if still in range 0 .. n-1 - jb _m_tlbp_lookup_entry ; if in range, go to next entry - ;; if we found nothing, we end up with CP0_Index = 0x80000000 - jmp _end_cycle ; Fin. + mov AUX, Sr(CP0_EntryHi) ; AUX := CP0_EntryHi + ;; ecx := desired Tag: + mov edx, AUX ; edx := CP0_EntryHi + and edx, 0xFFFFF000 ; edx := edx & 0xFFFFF000 + shr edx, 13 ; edx := edx >> 13 (wanted Tag) + ; Search for Byte 0 of Tag: + mov eax, edx ; eax := edx (wanted Tag) + and eax, 0xFF ; Byte 0 (lowest) of wanted Tag + ; Fill T0 with 16 copies of Tag Byte 0: + movd XMM_T0, eax + punpcklbw XMM_T0, XMM_T0 + punpcklwd XMM_T0, XMM_T0 + pshufd XMM_T0, XMM_T0, 0 + ; Now SIMD-compare: + pcmpeqb XMM_T0, TLB_TAG_BYTE_0 + ; Get the result mask of the compare: + pmovmskb ecx, XMM_T0 ; i-th bit in ecx = 1 where match B0 + test ecx, ecx ; if Byte 0 of Tag not found: + jz ._m_tlbp_lookup_nope ; ... then go straight to 'not found' + ; Search for Byte 1 of Tag: + mov eax, edx ; eax := edx (wanted Tag) + shr eax, 8 ; Byte 1 (middle) of wanted Tag + and eax, 0xFF + ; Fill T0 with 16 copies of Tag Byte 1: + movd XMM_T0, eax + punpcklbw XMM_T0, XMM_T0 + punpcklwd XMM_T0, XMM_T0 + pshufd XMM_T0, XMM_T0, 0 + ; Now SIMD-compare: + pcmpeqb XMM_T0, TLB_TAG_BYTE_1 + ; Get the result mask of the compare: + pmovmskb eax, XMM_T0 ; i-th bit in ecx = 1 where match B1 + and ecx, eax ; Keep only where B0 also matched + test ecx, ecx ; if Bytes 0+1 of Tag not found: + jz ._m_tlbp_lookup_nope ; ... then go straight to 'not found' + ; Search for Byte 2 of Tag: + mov eax, edx ; eax := edx (wanted Tag) + shr eax, 16 ; Byte 2 (top) of wanted Tag + and eax, 0xFF + ; Fill T0 with 16 copies of Tag Byte 2: + movd XMM_T0, eax + punpcklbw XMM_T0, XMM_T0 + punpcklwd XMM_T0, XMM_T0 + pshufd XMM_T0, XMM_T0, 0 + ; Now SIMD-compare: + pcmpeqb XMM_T0, TLB_TAG_BYTE_2 + ; Get the result mask of the compare: + pmovmskb eax, XMM_T0 ; i-th bit in ecx = 1 where match B2 + and ecx, eax ; Keep only where B0,B1 also matched + test ecx, ecx ; if Bytes 0+1+2 of Tag not found: + jz ._m_tlbp_lookup_nope ; ... then go straight to 'not found' + ; If we're here, Tag WAS found; so get the TLB index where it lies : + bsf ebx, ecx ; ebx := index of found TLB entry + mov edx, ebx ; edx := ebx + add edx, 16 ; G's start at bit 16 of Flag_Reg + bt Flag_Reg, edx ; See whether i-th G bit is set + jc ._m_tlbp_lookup_found ; Now if G is set, we've found it! + ; Otherwise, index of found Tag is still in bl (0 .. 0xF) + ; G was not set, so get the requested ASID and test whether it matches: + mov eax, AUX ; eax := CP0_EntryHi (al : our ASID) + lea rdx, [TLB_ASID_COPY]; Load address of ASID Copy + cmp byte [rdx + rbx], al ; Compare stored ASID to current + jne ._m_tlbp_lookup_nope ; ... if not equal, 'not found' + ; If we're here, we FOUND a matching TLB entry: +._m_tlbp_lookup_found: + mov Sr(CP0_Index), ebx ; Save the index of the found entry. + jmp _end_cycle ; Fin! +._m_tlbp_lookup_nope: ; not found: + mov Sr(CP0_Index), 0x80000000 ; CP0_Index := 0x80000000 + ; if we found nothing, we end up with CP0_Index = 0x80000000 + jmp _end_cycle ; Fin. ;----------------------------------------------------------------------------- ;----------------------------------------------------------------------------- @@ -279,7 +298,7 @@ _mfc0_r11: ; 0x0b test ecx, ecx ; Sel != 0 ? jnz _mfc0_unknown ; ... then unknown; else: - mov TMP, CP0_Compare ; return CP0_Compare ('fast reg') + mov TMP, Sr(CP0_Compare) ; return CP0_Compare ('fast reg') jmp _mfc0_writeback ; Done ;----------------------------------------------------------------------------- _mfc0_r12: ; 0x0c @@ -478,7 +497,7 @@ test ecx, ecx ; Sel != 0 ? jnz _mtc0_unknown ; ... then unknown; else: ClrIRQ TIMER_IRQ ; Clear MIPS Timer IRQ - mov CP0_Compare, ebx ; CP0_Compare := T ('fast reg') + mov Sr(CP0_Compare), ebx ; CP0_Compare := T ('fast reg') jmp _end_cycle ; Done ;----------------------------------------------------------------------------- _mtc0_r12: ; 0x0c diff -uNr a/m/ram.asm b/m/ram.asm --- a/m/ram.asm 016c026dbe4230bd120c0fc4269e61bd8a44b82580289efc90fed0792b5893a5727e069191fbfb0e32c3c40d2700b4a39a5acb0be1fdbfc475274c344368626a +++ b/m/ram.asm 19593abc66ab9ff8a02fa39884524bba012aaf3afab4c0a588272d07b9269bb59140f584519317ba0c2f412fc0b47d31cd4fea28d3d6754a3f5bbf7bacca3e78 @@ -93,6 +93,16 @@ ;----------------------------------------------------------------------------- ;----------------------------------------------------------------------------- +section .bss +align 32 + TLB_TAG_BYTE_0_COPY resb 16 ; Byte-0 of each TLB entry Tag + TLB_TAG_BYTE_1_COPY resb 16 ; Byte-1 of each TLB entry Tag + TLB_TAG_BYTE_2_COPY resb 16 ; Byte-2 of each TLB entry Tag + TLB_ASID_COPY resb 16 ; ASID of each TLB entry +section .text +;----------------------------------------------------------------------------- + +;----------------------------------------------------------------------------- ; Virt2Phys Read : virtual address in eax; output (physical addr) in eax ;----------------------------------------------------------------------------- align GRAIN, db 0x90 @@ -122,99 +132,147 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Save ebx, ecx, edx, AUX, to xmm ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - movd xmm0, ebx - movd xmm1, ecx - movd xmm2, edx - movd xmm3, AUX - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - mov ecx, eax ; ecx := eax (vAddr) - and ecx, 0xFFFFF000 ; ecx := ecx & 0xFFFFF000 - shr ecx, 13 ; ecx := ecx >> 13 (get vAddr's Tag) + movd xmm0, ebx + movd xmm1, ecx + movd xmm2, edx + movd xmm3, AUX ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - ;; Find out whether we actually must do the lookup, or can use cached: - Flg_Get TLB_Rd_Cache_Valid ; Is Read TLB Cache valid? - jnc .Lookup_Must ; If Read TLB Cache invalid -- must! - bt eax, 12 ; Test odd/even junior bit - jc .Rd_Cached_Odd ; If odd, look at last Odd vAddr Tag -.Rd_Cached_Even: ; If even, look at last Even vAddr Tag - movd edx, Rd_E_Last_Tag ; edx := last Even vAddr's Tag - cmp ecx, edx ; is the current vAddr's Tag equal? - jne .Lookup_Must ; ... if not, must do the lookup dance; - ;; ... Otherwise, we have an Even cache hit: - movd ebx, Rd_E_Last_PFN ; ebx := last good Even PFN - jmp .Cache_Hit ; apply the PFN and wrap up. -.Rd_Cached_Odd: - movd edx, Rd_O_Last_Tag ; edx := last Odd vAddr's Tag - cmp ecx, edx ; is the current vAddr's Tag equal? - jne .Lookup_Must ; ... if not, must do the lookup dance; - ;; ... Otherwise, we have an Odd cache hit: - movd ebx, Rd_O_Last_PFN ; ebx := last good Odd PFN - jmp .Cache_Hit ; apply the PFN and wrap up. + mov ecx, eax ; ecx := eax (vAddr) + and ecx, 0xFFFFF000 ; ecx := ecx & 0xFFFFF000 + shr ecx, 13 ; ecx := ecx >> 13 (get vAddr's Tag) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - ;; Failing the above, we must actually walk the TLB: + ; Find out whether we actually must do the lookup, or can use cached: + Flg_Get TLB_Rd_Cache_Valid ; Is Read TLB Cache valid? + jnc .Lookup_Must ; If Read TLB Cache invalid -- must! + ; If cache is valid, lookup: + mov AUX, ecx ; AUX := tag + xor ecx, ecx ; ecx := 0 + bt eax, 12 ; Test vAddr's odd/even junior bit + setc cl ; ecx := {1 if a-odd, 0 if a-even} + shl rcx, 6 ; rcx := {64 if a-odd, 0 if a-even} + ; get the last-good-Tags: + movq rbx, R_TLB_Last_Good_Tag ; Get last good R-Tag pair + shr rbx, cl ; if arity is odd, get top half + cmp ebx, AUX ; is current Tag == to last-good ? + jne .Lookup_Must ; ... if not, go to Lookup_Must + ; given Tag matched last-good. So get last-good PFN and wrap up: + movq rbx, R_TLB_Last_Good_PFN ; Get last good PFN pair + shr rbx, cl ; if arity is odd, get top half + jmp .PFN_And_Done ; use ebx as the PFN and wrap up. + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; .Lookup_Must: - movd xmm4, ecx ; xmm4 := current vAddr's Tag - ;; Get the active ASID: - mov edx, Sr(CP0_EntryHi) ; edx := CP0_EntryHi - and edx, 0xFF ; edx := edx & 0xFF (get current ASID) - ;; For each slot in table (0 .. 15), attempt lookup - xor AUX, AUX ; Start with the 0-th entry in table -.Lookup_TLB_E: - movd ecx, xmm4 ; ecx := current vAddr's Tag - mov ebx, TLB_E(AUX) ; ebx := current TLB entry - and ebx, TLB_VPN2_Mask ; get VPN2 of this entry - cmp ebx, ecx ; cmp(entry.VPN2, vAddr.tag) - jne .Lookup_TLB_E_Not_Here ; if entry.VPN2 != vAddr.tag: no match - mov ebx, TLB_E(AUX) ; ebx := current TLB entry - bt ebx, TLB_G ; is entry.G = 1? - jc .Lookup_TLB_E_Match ; then match. - shr ebx, TLB_ASID_Shift ; ebx := ebx >> TLB_ASID_Shift - and ebx, TLB_ASID_Mask ; ebx := entry.ASID - cmp ebx, edx ; entry.ASID = current ASID ? - jne .Lookup_TLB_E_Not_Here ; if neither G=1 nor ASID match. - mov ebx, TLB_E(AUX) ; ebx := current TLB entry -.Lookup_TLB_E_Match: ; TLB Match: - bt eax, 12 ; Test odd/even junior bit - jc .Lookup_TLB_E_Match_Odd ; If odd: test V1, D1 -.Lookup_TLB_E_Match_Even: ; If even: test V0, D0 - bt ebx, TLB_V0 ; Is entry.V0=1 ? - jnc .Lookup_TLB_E_Invalid ; If not, TLBRET_INVALID - lea ecx, TLB_PFN_E(AUX) ; prepare to load even PFN entry - mov ebx, dword [ecx] ; Actually load the current PFN entry - movd Rd_E_Last_PFN, ebx ; Save the current PFN as last Even - movd ecx, xmm4 ; ecx := the current Tag - movd Rd_E_Last_Tag, ecx ; Save the current Tag as last Even - jmp .Lookup_TLB_E_Match_Yes ; Since we're reading: go to Match Yes -.Lookup_TLB_E_Match_Odd: ; Odd bit: - bt ebx, TLB_V1 ; Is entry.V1=1 ? - jnc .Lookup_TLB_E_Invalid ; If not, TLBRET_INVALID - lea ecx, TLB_PFN_O(AUX) ; prepare to load odd PFN entry - mov ebx, dword [ecx] ; Actually load the current PFN entry - movd Rd_O_Last_PFN, ebx ; Save the current PFN as last Odd - movd ecx, xmm4 ; ecx := the current Tag - movd Rd_O_Last_Tag, ecx ; Save the current Tag as last Odd -.Lookup_TLB_E_Match_Yes: ; This is the 'success' case - Flg_On TLB_Rd_Cache_Valid - ; Upon next TLB lookup, if cache is valid, and Tag remains same - ; as before, we can use the same PFN as was obtained last time - ; for the respective 12th bit arity of the vAddr! -.Cache_Hit: - and eax, 0xFFF ; vAddr := vAddr & 0xFFF - or eax, ebx ; vAddr := vAddr | entry.PFN[lowbit] - jmp _Lookup_TLB_Done ; vAddr is now correct pAddr, done. -.Lookup_TLB_E_Not_Here: ; try next one in the table, if any - inc AUX ; index := index + 1 - cmp AUX, TLB_ENTRIES_COUNT ; see if still in range 0 .. n-1 - jb .Lookup_TLB_E ; if in range, go to next entry - ;; ... else: - add rsp, 16 ; squelch return to _Virt_xxx and its caller - push _Handle_Exception_TLB_NoMatch ; 'return' straight to handler. - jmp _Lookup_TLB_E_WriteExtr ; Wrap up -.Lookup_TLB_E_Invalid: - SetEXC EXC_TLBL ; Set the EXC_TLBL Exception - add rsp, 16 ; squelch return to _Virt_xxx and its caller - push _Handle_Exception_Other ; 'return' straight to handler. - jmp _Lookup_TLB_E_WriteExtr ; Go to the common epilogue. + movd xmm4, ecx ; ecx := copy of Tag + ;; Search for B0, B1, B2 of Tag, accumulate result in ebx ;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Search for Byte 0 of Tag: + mov edx, ecx ; edx := ecx (wanted Tag) + and edx, 0xFF ; Byte 0 (lowest) of wanted Tag + ; Fill T0 with 16 copies of Tag Byte 0: + movd XMM_T0, edx + punpcklbw XMM_T0, XMM_T0 + punpcklwd XMM_T0, XMM_T0 + pshufd XMM_T0, XMM_T0, 0 + ; Now SIMD-compare: + pcmpeqb XMM_T0, TLB_TAG_BYTE_0 + ; Get the result mask of the compare: + pmovmskb ebx, XMM_T0 ; i-th bit in ebx = 1 where match B0 + test ebx, ebx ; if Byte 0 of Tag not found: + jz .Not_Found ; ... then go straight to 'not found' + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Search for Byte 1 of Tag: + mov edx, ecx ; edx := ecx (wanted Tag) + shr edx, 8 ; Byte 1 (middle) of wanted Tag + and edx, 0xFF + ; Fill T0 with 16 copies of Tag Byte 1: + movd XMM_T0, edx + punpcklbw XMM_T0, XMM_T0 + punpcklwd XMM_T0, XMM_T0 + pshufd XMM_T0, XMM_T0, 0 + ; Now SIMD-compare: + pcmpeqb XMM_T0, TLB_TAG_BYTE_1 + ; Get the result mask of the compare: + pmovmskb edx, XMM_T0 ; i-th bit in edx = 1 where match B1 + and ebx, edx ; Keep only where B0 also matched + test ebx, ebx ; if Bytes 0+1 of Tag not found: + jz .Not_Found ; ... then go straight to 'not found' + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Search for Byte 2 of Tag: + mov edx, ecx ; eax := edx (wanted Tag) + shr edx, 16 ; Byte 2 (top) of wanted Tag + and edx, 0xFF + ; Fill T0 with 16 copies of Tag Byte 2: + movd XMM_T0, edx + punpcklbw XMM_T0, XMM_T0 + punpcklwd XMM_T0, XMM_T0 + pshufd XMM_T0, XMM_T0, 0 + ; Now SIMD-compare: + pcmpeqb XMM_T0, TLB_TAG_BYTE_2 + ; Get the result mask of the compare: + pmovmskb edx, XMM_T0 ; i-th bit in edx = 1 where match B2 + and ebx, edx ; Keep only where B0,B1 also matched + test ebx, ebx ; if Bytes 0+1+2 of Tag not found: + jz .Not_Found ; ... then go straight to 'not found' + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; If we're here, Tag WAS found; so get the TLB index where it lies : + bsf AUX, ebx ; AUX := index of found TLB entry + mov edx, AUX ; edx := AUX + ; Now see whether the corresponding G flag is set: + add edx, 16 ; G's start at bit 16 of Flag_Reg + bt Flag_Reg, edx ; See whether i-th G bit is set + jc .Match ; Now if G is set, we've found it! + ; G was not set, so get the requested ASID and test whether it matches: + ; Get the active ASID: + mov ebx, Sr(CP0_EntryHi); ebx := CP0_EntryHi + and ebx, 0xFF ; ebx := ebx & 0xFF (get current ASID) + ; Determine whether it matches the found Tag entry's : + mov ecx, AUX ; ecx := AUX (index of found entry) + lea rdx, [TLB_ASID_COPY]; Load address of ASID Copy + cmp byte [rdx + rcx], bl ; Compare stored ASID to current + jne .Not_Found ; ... if not equal, 'not found' + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +.Match: + ; If we're here, we have a Match. AUX is index of matching entry; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + xor edx, edx ; edx := 0 + ; Get arity of desired entry: + bt eax, 12 ; Test vAddr's odd/even junior bit + setc dl ; edx := {1 if a-odd, 0 if a-even} + shl edx, 5 ; edx := {32 if a-odd, 0 if a-even} + mov ecx, edx ; ebx := edx (copy of above) + ; Now we know which bit of TLB_Flags is this entry's V. Test it: + add edx, AUX ; add the corresponding index + bt TLB_Flags, rdx ; test if V(Index) is set + jnc .Invalid_R ; ... V == 0, then go to Invalid + ; Now let's load the PFN: + mov rbx, TLB_PFN(AUX64) ; load the PFN pair to rbx + ; ebx is now the PFN. Before wrapping up, update the TLB read cache : + movq R_TLB_Last_Good_PFN, rbx ; Set last good PFN to this PFN: + ; now leave only the correct half of PFN, at bottom of rbx: + shr rbx, cl ; if arity is odd, get upper 32bit + ; set correct half of R_TLB_Last_Good_Tag to the found Tag: + mov rdx, 0xFFFFFFFF00000000 ; rdx := 0xFFFFFFFF00000000 + shr rdx, cl ; if arity is odd, keep bottom + movq AUX64, R_TLB_Last_Good_Tag ; get last good Tag + and AUX64, rdx ; zap correct half of last good tag + movq rdx, xmm4 ; get the Tag again : + shl rdx, cl ; if arity if odd, slide into pos: + or AUX64, rdx ; now or it into place + movq R_TLB_Last_Good_Tag, AUX64 ; update last good Tag. +.PFN_And_Done: + and eax, 0xFFF ; vAddr := vAddr & 0xFFF + or eax, ebx ; vAddr := vAddr | entry.PFN[lowbit] + jmp _Lookup_TLB_Done ; vAddr is now correct pAddr, done. + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +.Not_Found: ; Not Found in Table: + add rsp, 16 ; squelch return to _Virt_xxx and its caller + push _Handle_Exception_TLB_NoMatch ; 'return' straight to handler. + jmp _Lookup_TLB_E_WriteExtr ; Wrap up + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +.Invalid_R: + SetEXC EXC_TLBL ; Set the EXC_TLBL Exception + add rsp, 16 ; squelch return to _Virt_xxx and its caller + push _Handle_Exception_Other ; 'return' straight to handler. + jmp _Lookup_TLB_E_WriteExtr ; Go to the common epilogue. ;----------------------------------------------------------------------------- ;----------------------------------------------------------------------------- @@ -247,111 +305,158 @@ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; Save ebx, ecx, edx, AUX, to xmm ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - movd xmm0, ebx - movd xmm1, ecx - movd xmm2, edx - movd xmm3, AUX - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - mov ecx, eax ; ecx := eax (vAddr) - and ecx, 0xFFFFF000 ; ecx := ecx & 0xFFFFF000 - shr ecx, 13 ; ecx := ecx >> 13 (get vAddr's Tag) + movd xmm0, ebx + movd xmm1, ecx + movd xmm2, edx + movd xmm3, AUX ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - ;; Find out whether we actually must do the lookup, or can use cached: - Flg_Get TLB_Wr_Cache_Valid ; Is Write TLB Cache valid? - jnc .Lookup_Must ; If Write TLB Cache invalid -- must! - bt eax, 12 ; Test odd/even junior bit - jc .Wr_Cached_Odd ; If odd, look at last Odd vAddr Tag -.Wr_Cached_Even: ; If even, look at last Even vAddr Tag - movd edx, Wr_E_Last_Tag ; edx := last Even vAddr's Tag - cmp ecx, edx ; is the current vAddr's Tag equal? - jne .Lookup_Must ; ... if not, must do the lookup dance; - ;; ... Otherwise, we have an Even cache hit: - movd ebx, Wr_E_Last_PFN ; ebx := last good Even PFN - jmp .Cache_Hit ; apply the PFN and wrap up. -.Wr_Cached_Odd: - movd edx, Wr_O_Last_Tag ; edx := last Odd vAddr's Tag - cmp ecx, edx ; is the current vAddr's Tag equal? - jne .Lookup_Must ; ... if not, must do the lookup dance; - ;; ... Otherwise, we have an Odd cache hit: - movd ebx, Wr_O_Last_PFN ; ebx := last good Odd PFN - jmp .Cache_Hit ; apply the PFN and wrap up. + mov ecx, eax ; ecx := eax (vAddr) + and ecx, 0xFFFFF000 ; ecx := ecx & 0xFFFFF000 + shr ecx, 13 ; ecx := ecx >> 13 (get vAddr's Tag) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - ;; Failing the above, we must actually walk the TLB: + ; Find out whether we actually must do the lookup, or can use cached: + Flg_Get TLB_Wr_Cache_Valid ; Is Write TLB Cache valid? + jnc .Lookup_Must ; If Write TLB Cache invalid -- must! + ; If cache is valid, lookup: + mov AUX, ecx ; AUX := tag + xor ecx, ecx ; ecx := 0 + bt eax, 12 ; Test vAddr's odd/even junior bit + setc cl ; ecx := {1 if a-odd, 0 if a-even} + shl rcx, 6 ; rcx := {64 if a-odd, 0 if a-even} + ; get the last-good-Tags: + movq rbx, W_TLB_Last_Good_Tag ; Get last good W-Tag pair + shr rbx, cl ; if arity is odd, get top half + cmp ebx, AUX ; is current Tag == to last-good ? + jne .Lookup_Must ; ... if not, go to Lookup_Must + ; given Tag matched last-good. So get last-good PFN and wrap up: + movq rbx, W_TLB_Last_Good_PFN ; Get last good PFN pair + shr rbx, cl ; if arity is odd, get top half + jmp .PFN_And_Done ; use ebx as the PFN and wrap up. + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; .Lookup_Must: - movd xmm4, ecx ; xmm4 := current vAddr's Tag - ;; Get the active ASID: - mov edx, Sr(CP0_EntryHi) ; edx := CP0_EntryHi - and edx, 0xFF ; edx := edx & 0xFF (get current ASID) - ;; For each slot in table (0 .. 15), attempt lookup - xor AUX, AUX ; Start with the 0-th entry in table -.Lookup_TLB_E: - movd ecx, xmm4 ; ecx := current vAddr's Tag - mov ebx, TLB_E(AUX) ; ebx := current TLB entry - and ebx, TLB_VPN2_Mask ; get VPN2 of this entry - cmp ebx, ecx ; cmp(entry.VPN2, vAddr.tag) - jne .Lookup_TLB_E_Not_Here ; if entry.VPN2 != vAddr.tag: no match - mov ebx, TLB_E(AUX) ; ebx := current TLB entry - bt ebx, TLB_G ; is entry.G = 1? - jc .Lookup_TLB_E_Match ; then match. - shr ebx, TLB_ASID_Shift ; ebx := ebx >> TLB_ASID_Shift - and ebx, TLB_ASID_Mask ; ebx := entry.ASID - cmp ebx, edx ; entry.ASID = current ASID ? - jne .Lookup_TLB_E_Not_Here ; if neither G=1 nor ASID match. - mov ebx, TLB_E(AUX) ; ebx := current TLB entry -.Lookup_TLB_E_Match: ; TLB Match: - bt eax, 12 ; Test odd/even junior bit - jc .Lookup_TLB_E_Match_Odd ; If odd: test V1, D1 -.Lookup_TLB_E_Match_Even: ; If even: test V0, D0 - bt ebx, TLB_V0 ; Is entry.V0=1 ? - jnc .Lookup_TLB_E_Invalid ; If not, TLBRET_INVALID - bt ebx, TLB_D0 ; Is entry.D0=1 ? - jnc .Lookup_TLB_E_Dirty ; If not, go to 'Dirty' - ;; Not invalid or dirty: - lea ecx, TLB_PFN_E(AUX) ; prepare to load even PFN entry - mov ebx, dword [ecx] ; Actually load the current PFN entry - movd Wr_E_Last_PFN, ebx ; Save the current PFN as last Even - movd ecx, xmm4 ; ecx := the current Tag - movd Wr_E_Last_Tag, ecx ; Save the current Tag as last Even - jmp .Lookup_TLB_E_Match_Yes ; ;; Proceed to 'match' : -.Lookup_TLB_E_Match_Odd: ; Odd bit: - bt ebx, TLB_V1 ; Is entry.V1=1 ? - jnc .Lookup_TLB_E_Invalid ; If not, TLBRET_INVALID - bt ebx, TLB_D1 ; Is entry.D1=1 ? - jnc .Lookup_TLB_E_Dirty ; If not, go to 'Dirty' - ;; Not invalid or dirty: - lea ecx, TLB_PFN_O(AUX) ; prepare to load odd PFN entry - mov ebx, dword [ecx] ; Actually load the current PFN entry - movd Wr_O_Last_PFN, ebx ; Save the current PFN as last Odd - movd ecx, xmm4 ; ecx := the current Tag - movd Wr_O_Last_Tag, ecx ; Save the current Tag as last Odd - ;; Proceed to 'match' : -.Lookup_TLB_E_Match_Yes: ; This is the 'success' case - Flg_On TLB_Wr_Cache_Valid - ; Upon next TLB lookup, if cache is valid, and Tag remains same - ; as before, we can use the same PFN as was obtained last time - ; for the respective 12th bit arity of the vAddr! -.Cache_Hit: - and eax, 0xFFF ; vAddr := vAddr & 0xFFF - or eax, ebx ; vAddr := vAddr | entry.PFN[lowbit] - jmp _Lookup_TLB_Done ; vAddr is now correct pAddr, done. -.Lookup_TLB_E_Not_Here: ; try next one in the table, if any - inc AUX ; index := index + 1 - cmp AUX, TLB_ENTRIES_COUNT ; see if still in range 0 .. n-1 - jb .Lookup_TLB_E ; if in range, go to next entry - ;; ... else: - add rsp, 16 ; squelch return to _Virt_xxx and its caller - push _Handle_Exception_TLB_NoMatch ; 'return' straight to handler. - jmp _Lookup_TLB_E_WriteExtr ; Wrap up -.Lookup_TLB_E_Dirty: ; ... else, Dirty: - SetEXC EXC_Mod ; Set the EXC_Mod Exception - add rsp, 16 ; squelch return to _Virt_xxx and its caller - push _Handle_Exception_Other ; 'return' straight to handler. - jmp _Lookup_TLB_E_WriteExtr ; Write the 'extra data' and finish. -.Lookup_TLB_E_Invalid: ; Invalid Write: - SetEXC EXC_TLBS ; Set the EXC_TLBS Exception - add rsp, 16 ; squelch return to _Virt_xxx and its caller - push _Handle_Exception_Other ; 'return' straight to handler. - ;; then drop down to _Lookup_TLB_E_WriteExtr + movd xmm4, ecx ; ecx := copy of Tag + ;; Search for B0, B1, B2 of Tag, accumulate result in ebx ;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Search for Byte 0 of Tag: + mov edx, ecx ; edx := ecx (wanted Tag) + and edx, 0xFF ; Byte 0 (lowest) of wanted Tag + ; Fill T0 with 16 copies of Tag Byte 0: + movd XMM_T0, edx + punpcklbw XMM_T0, XMM_T0 + punpcklwd XMM_T0, XMM_T0 + pshufd XMM_T0, XMM_T0, 0 + ; Now SIMD-compare: + pcmpeqb XMM_T0, TLB_TAG_BYTE_0 + ; Get the result mask of the compare: + pmovmskb ebx, XMM_T0 ; i-th bit in ebx = 1 where match B0 + test ebx, ebx ; if Byte 0 of Tag not found: + jz .Not_Found ; ... then go straight to 'not found' + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Search for Byte 1 of Tag: + mov edx, ecx ; edx := ecx (wanted Tag) + shr edx, 8 ; Byte 1 (middle) of wanted Tag + and edx, 0xFF + ; Fill T0 with 16 copies of Tag Byte 1: + movd XMM_T0, edx + punpcklbw XMM_T0, XMM_T0 + punpcklwd XMM_T0, XMM_T0 + pshufd XMM_T0, XMM_T0, 0 + ; Now SIMD-compare: + pcmpeqb XMM_T0, TLB_TAG_BYTE_1 + ; Get the result mask of the compare: + pmovmskb edx, XMM_T0 ; i-th bit in edx = 1 where match B1 + and ebx, edx ; Keep only where B0 also matched + test ebx, ebx ; if Bytes 0+1 of Tag not found: + jz .Not_Found ; ... then go straight to 'not found' + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; Search for Byte 2 of Tag: + mov edx, ecx ; eax := edx (wanted Tag) + shr edx, 16 ; Byte 2 (top) of wanted Tag + and edx, 0xFF + ; Fill T0 with 16 copies of Tag Byte 2: + movd XMM_T0, edx + punpcklbw XMM_T0, XMM_T0 + punpcklwd XMM_T0, XMM_T0 + pshufd XMM_T0, XMM_T0, 0 + ; Now SIMD-compare: + pcmpeqb XMM_T0, TLB_TAG_BYTE_2 + ; Get the result mask of the compare: + pmovmskb edx, XMM_T0 ; i-th bit in edx = 1 where match B2 + and ebx, edx ; Keep only where B0,B1 also matched + test ebx, ebx ; if Bytes 0+1+2 of Tag not found: + jz .Not_Found ; ... then go straight to 'not found' + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ; If we're here, Tag WAS found; so get the TLB index where it lies : + bsf AUX, ebx ; AUX := index of found TLB entry + mov edx, AUX ; edx := AUX + ; Now see whether the corresponding G flag is set: + add edx, 16 ; G's start at bit 16 of Flag_Reg + bt Flag_Reg, edx ; See whether i-th G bit is set + jc .Match ; Now if G is set, we've found it! + ; G was not set, so get the requested ASID and test whether it matches: + ; Get the active ASID: + mov ebx, Sr(CP0_EntryHi); ebx := CP0_EntryHi + and ebx, 0xFF ; ebx := ebx & 0xFF (get current ASID) + ; Determine whether it matches the found Tag entry's : + mov ecx, AUX ; ecx := AUX (index of found entry) + lea rdx, [TLB_ASID_COPY]; Load address of ASID Copy + cmp byte [rdx + rcx], bl ; Compare stored ASID to current + jne .Not_Found ; ... if not equal, 'not found' + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +.Match: + ; If we're here, we have a Match. AUX is index of matching entry; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + xor edx, edx ; edx := 0 + ; Get arity of desired entry: + bt eax, 12 ; Test vAddr's odd/even junior bit + setc dl ; edx := {1 if a-odd, 0 if a-even} + shl edx, 5 ; edx := {32 if a-odd, 0 if a-even} + mov ecx, edx ; ebx := edx (copy of above) + ; Now we know which bit of TLB_Flags is this entry's V. Test it: + add edx, AUX ; add the corresponding index + bt TLB_Flags, rdx ; test if V(Index) is set + jnc .Invalid_W ; ... if V == 0, then go to Invalid + ; Now, since we're writing, test this entry's D, at pos(V) + 16: + add edx, 16 + bt TLB_Flags, rdx ; test if D(Index) is set + jnc .Dirty_W ; ... if D == 0, then go to Dirty + ; Now let's load the correct odd or even PFN: + mov rbx, TLB_PFN(AUX64) ; load the PFN pair to rbx + ; ebx is now the PFN. Before wrapping up, update the TLB read cache : + movq W_TLB_Last_Good_PFN, rbx ; Set last good PFN to this PFN: + ; now leave only the correct half of PFN, at bottom of rbx: + shr rbx, cl ; if arity is odd, get upper 32bit + ; set correct half of R_TLB_Last_Good_Tag to the found Tag: + mov rdx, 0xFFFFFFFF00000000 ; rdx := 0xFFFFFFFF00000000 + shr rdx, cl ; if arity is odd, keep bottom + movq AUX64, W_TLB_Last_Good_Tag ; get last good Tag + and AUX64, rdx ; zap correct half of last good tag + movq rdx, xmm4 ; get the Tag again : + shl rdx, cl ; if arity if odd, slide into pos: + or AUX64, rdx ; now or it into place + movq W_TLB_Last_Good_Tag, AUX64 ; update last good Tag. +.PFN_And_Done: + and eax, 0xFFF ; vAddr := vAddr & 0xFFF + or eax, ebx ; vAddr := vAddr | entry.PFN[lowbit] + jmp _Lookup_TLB_Done ; vAddr is now correct pAddr, done. + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +.Not_Found: ; Not Found in Table: + add rsp, 16 ; squelch return to _Virt_xxx and its caller + push _Handle_Exception_TLB_NoMatch ; 'return' straight to handler. + jmp _Lookup_TLB_E_WriteExtr ; Wrap up + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +.Invalid_W: + SetEXC EXC_TLBS ; Set the EXC_TLBS Exception + add rsp, 16 ; squelch return to _Virt_xxx and its caller + push _Handle_Exception_Other ; 'return' straight to handler. + jmp _Lookup_TLB_E_WriteExtr ; Go to the common epilogue. + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +.Dirty_W: + SetEXC EXC_Mod ; Set the EXC_Mod Exception + add rsp, 16 ; squelch return to _Virt_xxx and its caller + push _Handle_Exception_Other ; 'return' straight to handler. + ; jmp _Lookup_TLB_E_WriteExtr ; Write the 'extra data' and finish. + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;----------------------------------------------------------------------------- ; Epilogue common to _Virt_To_Phys_Read and _Virt_To_Phys_Write: @@ -526,56 +631,93 @@ ; Kills eax, ebx, ecx, edx. ;----------------------------------------------------------------------------- _write_tlb_entry: - mov edx, Sr(CP0_EntryHi) ; edx := CP0_EntryHi - mov ecx, edx ; ecx := edx - shr ecx, 13 ; ecx := ecx >> 13 to get VPN2 - and edx, TLB_ASID_Mask ; edx := edx & 0xFF to get ASID - shl edx, TLB_ASID_Shift ; edx := edx << 19 to put ASID in place - or ecx, edx ; now we have VPN2 and ASID in ecx - ;; done with edx, can reuse - mov edx, Sr(CP0_EntryLo0) ; edx := CP0_EntryLo0 - mov ebx, Sr(CP0_EntryLo1) ; ebx := CP0_EntryLo1 - ;; get G: - mov eax, edx ; eax := CP0_EntryLo0 - and eax, ebx ; eax := eax & CP0_EntryLo1 - and eax, 0x1 ; eax := eax & 1 to get G - shl eax, TLB_G ; move G bit into position - or ecx, eax ; ecx := ecx | eax to put in G bit - ;; now ecx contains VPN2, ASID, G - ;; Get V0 from CP0_EntryLo0 and put in ecx where belongs: - mov eax, edx ; eax := CP0_EntryLo0 - and eax, 0x2 ; eax := eax & 0x2 to get V0 bit - shl eax, (TLB_V0 - 1) ; put V0 bit in position - or ecx, eax ; ecx := ecx | eax to put in V0 bit - ;; Get D0 from CP0_EntryLo0 and put in ecx where belongs: - mov eax, edx ; eax := CP0_EntryLo0 - and eax, 0x4 ; eax := eax & 0x4 to get D0 bit - shl eax, (TLB_D0 - 2) ; put D0 bit in position - or ecx, eax ; ecx := ecx | eax to put in D0 bit - ;; Get V1 from CP0_EntryLo1 and put in ecx where belongs: - mov eax, ebx ; eax := CP0_EntryLo1 - and eax, 0x2 ; eax := eax & 0x2 to get V1 bit - shl eax, (TLB_V1 - 1) ; put V1 bit in position - or ecx, eax ; ecx := ecx | eax to put in V1 bit - ;; Get D1 from CP0_EntryLo1 and put in ecx where belongs: - mov eax, ebx ; eax := CP0_EntryLo1 - and eax, 0x4 ; eax := eax & 0x4 to get D1 bit - shl eax, (TLB_D1 - 2) ; put D1 bit in position - or ecx, eax ; ecx := ecx | eax to put in D1 bit - ;; Write the TLB entry to the given index (in AUX) : - and AUX, 0xF ; Index of TLB entry is bottom 4 bits - mov TLB_E(AUX), ecx ; Write TLB entry in ecx to n-th slot. - ;; Transform CP0_EntryLo0 (edx) into PFN0: - shr edx, 6 - and edx, 0xFFFFF - shl edx, 12 - ;; Transform CP0_EntryLo1 (ebx) into PFN1: - shr ebx, 6 - and ebx, 0xFFFFF - shl ebx, 12 - ;; Store PFN: - mov TLB_PFN_E(AUX), edx ; Store PFN0 - mov TLB_PFN_O(AUX), ebx ; Store PFN1 - Invalidate_TLB_Cache ; Invalidate both R and W TLB Caches - ret ; Done. + and AUX, 0xF ; constrain Index range (0 .. 15) + mov ecx, AUX ; ecx := Index + ; Clear old Flags: V0, D0, V1, D1 : + mov rax, ~(1 | (1 << 16) | (1 << 32) | (1 << 48)) ; zap mask + rol rax, cl ; Rotate into indexed position + and TLB_Flags, rax ; Clear old V0, D0, V1, D1. + ; Clear old G: + add ecx, 16 ; G starts at 16th bit; + btr Flag_Reg, ecx ; Clear old G. + ; Now, set the new values: + mov ecx, AUX ; ecx := Index + mov edx, Sr(CP0_EntryHi) ; edx := CP0_EntryHi + ; First, get this entry's Tag (VPN2) from CP0_EntryHi : + mov eax, edx ; eax := edx (CP0_EntryHi) + shr eax, 13 ; eax := ecx >> 13 to get the Tag + ; Now, save this entry's Tag to the selected Index: + ; Write Byte 0 of Tag to TLB_TAG_BYTE_0_COPY and TLB_TAG_BYTE_0 : + lea rbx, [TLB_TAG_BYTE_0_COPY] ; Load address of B0 Copy + mov byte [rbx + rcx], al ; Change the indexed byte + movdqa TLB_TAG_BYTE_0, [rbx] ; Update XMM reg with B0 Copy + ; Write Byte 1 of Tag to TLB_TAG_BYTE_1_COPY and TLB_TAG_BYTE_1 : + lea rbx, [TLB_TAG_BYTE_1_COPY] ; Load address of B1 Copy + mov byte [rbx + rcx], ah ; Change the indexed byte + movdqa TLB_TAG_BYTE_1, [rbx] ; Update XMM reg with B1 Copy + ; Write Byte 2 of Tag to TLB_TAG_BYTE_2_COPY and TLB_TAG_BYTE_2 : + shr eax, 16 + lea rbx, [TLB_TAG_BYTE_2_COPY] ; Load address of B2 Copy + mov byte [rbx + rcx], al ; Change the indexed byte + movdqa TLB_TAG_BYTE_2, [rbx] ; Update XMM reg with B2 Copy + ; Done with Tag. Now, get this entry's ASID from CP0_EntryHi : + mov eax, edx ; eax := edx + and eax, 0xFF ; Get ASID from CP0_EntryHi + ; Store this entry's ASID to the selected Index: + lea rbx, [TLB_ASID_COPY] ; Load address of ASID Copy + mov byte [rbx + rcx], al ; Change the indexed byte + ; Done with contents of CP0_EntryHi. Now, get G, V0, D0, V1, D1 : + mov edx, Sr(CP0_EntryLo0) ; edx := CP0_EntryLo0 + mov ebx, Sr(CP0_EntryLo1) ; ebx := CP0_EntryLo1 + ; Get G using CP0_EntryLo0 and CP0_EntryLo1 : + mov eax, edx ; eax := CP0_EntryLo0 + and eax, ebx ; eax := eax & CP0_EntryLo1 + and eax, 0x1 ; eax := eax & 1 to get G + ; Write the new G(Index) to indexed pos of upper 16 bits of Flag_Reg : + add ecx, 16 ; Position of all G's in Flag_Reg + shl eax, cl ; Slide new G into final position + or Flag_Reg, eax ; Set the new value G(Index) + ; Get V0 from CP0_EntryLo0 and write to TLB_Flags : + mov ecx, AUX ; ecx := Index + mov eax, edx ; eax := CP0_EntryLo0 + and eax, 0x2 ; eax := eax & 0x2 to get V0 bit + shr eax, 1 ; Put V0 into bottom-most pos + shl eax, cl ; Slide V0 into final position + or TLB_Flags, rax ; Put the new value in V0(Index) + ; Get D0 from CP0_EntryLo0 and write to TLB_Flags : + add ecx, 16 ; Position where D0 lives: + mov eax, edx ; eax := CP0_EntryLo0 + and eax, 0x4 ; eax := eax & 0x4 to get D0 bit + shr eax, 2 ; Put D0 into bottom-most pos + shl eax, cl ; Slide D0 into final position + or TLB_Flags, rax ; Put the new value in D0(Index) + ; Get V1 from CP0_EntryLo1 and write to TLB_Flags : + add ecx, 16 ; V1 starts at 32-nd bit + mov eax, ebx ; eax := CP0_EntryLo1 + and eax, 0x2 ; eax := eax & 0x2 to get V1 bit + shr eax, 1 ; Put V0 into bottom-most pos + shl rax, cl ; Slide V1 into final position + or TLB_Flags, rax ; Put the new value in V1(Index) + ; Get D1 from CP0_EntryLo1 and write to TLB_Flags : + add ecx, 16 ; D1 starts at 48-th bit + mov eax, ebx ; eax := CP0_EntryLo1 + and eax, 0x4 ; eax := eax & 0x4 to get D1 bit + shr eax, 2 ; Put D1 into bottom-most pos + shl rax, cl ; Slide D1 into final position + or TLB_Flags, rax ; Put the new value in D1(Index) + ; Transform CP0_EntryLo0 (edx) into PFN0: + shr edx, 6 + and edx, 0xFFFFF + shl edx, 12 + ; Transform CP0_EntryLo1 (ebx) into PFN1: + shr ebx, 6 + and ebx, 0xFFFFF + shl ebx, 12 + ; Store PFN: + shl rbx, 32 ; rbx := rbx << 32 (Odd) + and rdx, 0xFFFFFFFF ; rdx := rdx & 0xFFFFFFFF (Even) + or rbx, rdx ; rbx := rbx | rdx (combined PFN) + mov TLB_PFN(AUX64), rbx ; Store PFN + ; Fin. + ret ;-----------------------------------------------------------------------------