User:Myask/MyaGrafx: Difference between revisions

From NESdev Wiki
Jump to navigationJump to search
m (re-section for ease of editing)
(→‎Basic Implementation: Pinocchio to real? verilog)
Line 8: Line 8:
So, for the basicmost case... PPU: --10 NT11 11yy yxxx is where attributes normally reside.  To our advantage, the nametable byte precedes the attribute table byte. This allows at least the 8x8 tile to be detected: NT-fetch is PPU: --10 NTYY YyyX XXxx. (A0, A5 are what we want to know, as they're not in the AT fetch; A1,6 are the 16-px accounted for by which two bits within an attribute byte.). For this simple one, instead of storing the attribute data per-tile, it's arranged as usual: four tiles in a 32x32 are specified each byte, just that the game pak will return different byte depending on the X and Y tile evenness.
So, for the basicmost case... PPU: --10 NT11 11yy yxxx is where attributes normally reside.  To our advantage, the nametable byte precedes the attribute table byte. This allows at least the 8x8 tile to be detected: NT-fetch is PPU: --10 NTYY YyyX XXxx. (A0, A5 are what we want to know, as they're not in the AT fetch; A1,6 are the 16-px accounted for by which two bits within an attribute byte.). For this simple one, instead of storing the attribute data per-tile, it's arranged as usual: four tiles in a 32x32 are specified each byte, just that the game pak will return different byte depending on the X and Y tile evenness.
==Basic Implementation==
==Basic Implementation==
//pseudo-verilog
  /* NES 8x8(8x1?)-Attribute graphics mapper
   
    Created by "Myask", April 2015
  always @(negedge PPU_/RD)
    8x8-mapper:
  if (PPU_A[13] & (~& PPUA[9:8])) begin //NT-fetch
      Accepts input at ppu_a = 14'b11_NT?0_yxYY_YXXX
    AT_8X <= PPUA[0];
        (ppu_a[8] == 0 to avoid conflict with palettes)
    AT_8Y <= PPUA[5];
      Outputs at      ppu_a = 14'b10_NT11_11YY_YXXX
  end  
        using the previous nametable fetch to select which of four possible
        attribute byte to return.
always @(posedge M2)
  */
  if (~CPU_W & CPU_A[15] == 1) //only have one visible register bit, so little decoding necessary: CPU$8xxx.
  module mya_at_mapper{
    Mya_ATRAM_Enable <= CPU_D[0];
    system_clk,
    m2,
assign Mya_ATRAM_A[5:0] = PPU_A[5:0]; //don't really need to go through CPLD?
    cpu_rw, //high=r
assign Mya_ATRAM_A[6] = (PPU_A[12] ? PPU_A[6]: AT_8X); //write to PPU  0011 NT*0 YXyy yxxx, A8=0 is to dodge palettes.
    cpu_a,
assign Mya_ATRAM_A[7] = (PPU_A[12] ? PPU_A[7]: AT_8Y);
    cpu_d,
assign Mya_ATRAM_A[9:8] = PPU_A[11:10]; //NT-select. Also don't need to be routed through.
    romsel_n,
assign Mya_ATRAM_WR = PPU_WR & (&PPU_A[13:12]) & ~PPU_A[8];
    irq_n,
assign UL = ~(AT_8X | AT_8Y); //UpperLeft: replace with 0 to not bother using CIRAM for any attributes at all
    exp,
assign Mya_ATRAM_CE = PPU_A[13] & (PPU_A[12] ? (~PPU_A[8]) : PPU_A[8]) & PPU_A[9] & ~UL & Mya_ATRAM_Enable;
    ppu_wr_n,
assign CIRAM_CE = PPUA[13]    //NT/AT only
    ppu_rd_n,
  & (~PPUA[12] | (& PPUA[11:8]))  //disable for 30xx-3Exx to allow the writes to cart
    ppu_a,
  & ((~&PPUA[9:6]) ? UL : 1); //and enable for the UL AT fetches and all NT fetches.
    ppu_d,
assign Mya_ATRAM_D[7:0] = PPU_D[7:0]
    ppu_a13_n,
    ciram_ce_n,
    ciram_a10,
    mya_atram_a,
    mya_atram_d,
    mya_atram_we_n,
    mya_atram_oe_n,
    mya_atram_cs1_n,
    mya_atram_cs2
  };
  //first, cart-edge signals
  input        system_clk;
  input        m2;
  input        cpu_rw;
  input [14:0] cpu_a;
  inout  [7:0] cpu_d;
  input        romsel_n;
 
  output        irq_n;
 
  inout  [9:0] exp;
 
  input        ppu_wr_n;
  input        ppu_rd_n;
  input [13:0] ppu_a;
  inout  [7:0] ppu_d;
  input        ppu_a13_n;
 
  output        ciram_ce_n;
  output        ciram_a10;
  //then cart-internals: first, the (probably 6264)
  output [12:0] mya_atram_a;
  inout  [7:0] mya_atram_d;
  output        mya_atram_we_n;
  output        mya_atram_oe_n;
  output        mya_atram_cs1_n;
  output        mya_atram_cs2;
  //then make all the variable names (sigh) Cart-external:
  wire          system_clk;
  wire          m2;
  wire          cpu_rw;
  wire  [14:0] cpu_a;
  wire    [7:0] cpu_d;
  wire          romsel_n;
  wire          irq_n;
  wire    [9:0] exp;
 
  wire          ppu_wr_n;
  wire          ppu_rd_n;
  wire  [13:0] ppu_a;
  wire    [7:0] ppu_d;
  wire          ppu_a13_n;
 
  wire          ciram_ce_n;
  wire          ciram_a10;
  //cart-internal:
  wire  [12:0] mya_atram_a;
  wire    [7:0] mya_atram_d;
  wire          mya_atram_we_n;
  wire          mya_atram_oe_n;
  wire          mya_atram_cs1_n;
  reg          mya_atram_cs2;
  //chip-internal:
  reg          at_8x;
  reg          at_8y;
  //reg    [2:0] at_finey;
 
  always @(negedge ppu_rd_n)
    if (ppu_a[13] & (~& ppu_a[9:8])) begin //trap nt-fetch
      at_8x <= ppu_a[0]; //and store the 8px-level X
      at_8y <= ppu_a[5]; //and Y-coordinates.
    end //trap nt-fetch
 
  always @(posedge m2)
    if (~cpu_rw & cpu_a[15] == 1)  
      mya_atram_cs2 <= cpu_d[0];
  //only have one visible register bit, so little decoding necessary: CPU$8xxx.
 
  assign mya_atram_a[5:0] = ppu_a[5:0]; //don't really need to go through CPLD?
  assign mya_atram_a[6] = (ppu_a[12] ? ppu_a[6]: at_8x);  
  assign mya_atram_a[7] = (ppu_a[12] ? ppu_a[7]: at_8y);
  assign mya_atram_a[9:8] = ppu_a[11:10];  
  //ppu_a[11:10] = NT-select. also don't need to be routed through CPLD
  assign mya_atram_a[12:10] = 3'b000; //Doing fineY-AT'd need a new write port
  assign mya_atram_wr = ~ppu_wr_n & (&ppu_a[13:12]) & ~ppu_a[8];
  assign ul = ~(at_8x | at_8y);  
  //replace ul with 0 to not bother using ciram for any attributes at all
  assign mya_atram_oe_n = ~( ppu_a[13]
    & (& ppu_a[9:6]) //Nametable: @PPU 16'b0010_xx11_11xx_xxxx
    & ~ul & ~ppu_rd_n);
  assign mya_atram_we_n = ~( (& ppu_a[13:12])  & ~ppu_a[8] );
    //Write-port: @PPU 14'b11_NT?0_YXYY_YXXX: ~a[8] to avoid palettespace
  assign mya_atram_cs1_n = gnd;
  //assign mya_atram_cs2 = mya_atram_enable;
  assign ciram_ce_n = ~(ppu_a[13]    //nt/at only
    & (~ppu_a[12] | (& ppu_a[11:8]))   
    & ((~& ppu_a[9:6]) ? ul : 1) );//enable for the ul AT- and all NT- fetches.
 
  assign mya_atram_d[7:0] = ppu_d[7:0];//really don't need to go through cpld.
 
  endmodule //mya_at_mapper
 
==Less basic==
==Less basic==
This mode of writing does not work if we want to extend to 8x1 attributes; there are three bits of attribute space to add and we only have three choices (00, 01, 10) of PPUADDR 8-9 for NT3. Even in two-screen mirroring, there is a small problem: but as we are relying on CIRAM for the first sliver of each section, one does not need to have duplicate write-access to those. One could remap $38** to what would have been in $3F**. Four-screen proves more problematic. Also problematic is determining the fine-Y. Brute-force method is to snoop for writes to PPU_SCROLL, as well as reads from PPU_STATUS and writes to PPU_ADDR to know the high-byte latch status. If we don't want to allow raster effects, which seems like a short-sighted decision, perhaps one could somehow divine where to begin from the dummy-fetch prerender scanline. In any case, it would basically require a scanline counter, at which point one would just add a few more bits of state to get a useful scanline-type interrupt, though if it shares the low three bits with the rendering portion it would be more of a NT-relative Y-coordinate interrupt...
This mode of writing does not work if we want to extend to 8x1 attributes; there are three bits of attribute space to add and we only have three choices (00, 01, 10) of PPUADDR 8-9 for NT3. Even in two-screen mirroring, there is a small problem: but as we are relying on CIRAM for the first sliver of each section, one does not need to have duplicate write-access to those. One could remap $38** to what would have been in $3F**. Four-screen proves more problematic. Also problematic is determining the fine-Y. Brute-force method is to snoop for writes to PPU_SCROLL, as well as reads from PPU_STATUS and writes to PPU_ADDR to know the high-byte latch status. If we don't want to allow raster effects, which seems like a short-sighted decision, perhaps one could somehow divine where to begin from the dummy-fetch prerender scanline. In any case, it would basically require a scanline counter, at which point one would just add a few more bits of state to get a useful scanline-type interrupt, though if it shares the low three bits with the rendering portion it would be more of a NT-relative Y-coordinate interrupt...

Revision as of 00:13, 23 April 2015

"Perhaps someone should mock up a specification for a CPLD that only provides 8x8 attributes and nothing else."--Tepples

Sources

PPU rendering, Cartridge connector

Basic

As the cart only has CIRAM A10 piped through, one can't just remap part of CIRAM to supply the 256 bytes of attribute one needs for page 0.

So, for the basicmost case... PPU: --10 NT11 11yy yxxx is where attributes normally reside. To our advantage, the nametable byte precedes the attribute table byte. This allows at least the 8x8 tile to be detected: NT-fetch is PPU: --10 NTYY YyyX XXxx. (A0, A5 are what we want to know, as they're not in the AT fetch; A1,6 are the 16-px accounted for by which two bits within an attribute byte.). For this simple one, instead of storing the attribute data per-tile, it's arranged as usual: four tiles in a 32x32 are specified each byte, just that the game pak will return different byte depending on the X and Y tile evenness.

Basic Implementation

 /* NES 8x8(8x1?)-Attribute graphics mapper
   Created by "Myask", April 2015
   8x8-mapper:
     Accepts input at ppu_a = 14'b11_NT?0_yxYY_YXXX 
        (ppu_a[8] == 0 to avoid conflict with palettes)
     Outputs at       ppu_a = 14'b10_NT11_11YY_YXXX
       using the previous nametable fetch to select which of four possible
       attribute byte to return.
 */
 module mya_at_mapper{
   system_clk,
   m2,
   cpu_rw, //high=r
   cpu_a,
   cpu_d,
   romsel_n,
   irq_n,
   exp,
   ppu_wr_n,
   ppu_rd_n,
   ppu_a,
   ppu_d,
   ppu_a13_n,
   ciram_ce_n,
   ciram_a10,
   mya_atram_a,
   mya_atram_d,
   mya_atram_we_n,
   mya_atram_oe_n,
   mya_atram_cs1_n,
   mya_atram_cs2
 };
 //first, cart-edge signals
 input         system_clk;
 input         m2;
 input         cpu_rw;
 input  [14:0] cpu_a;
 inout   [7:0] cpu_d;
 input         romsel_n;
 
 output        irq_n;
 
 inout   [9:0] exp;
 
 input         ppu_wr_n;
 input         ppu_rd_n;
 input  [13:0] ppu_a;
 inout   [7:0] ppu_d;
 input         ppu_a13_n;
 
 output        ciram_ce_n;
 output        ciram_a10;
 //then cart-internals: first, the (probably 6264)
 output [12:0] mya_atram_a;
 inout   [7:0] mya_atram_d;
 output        mya_atram_we_n;
 output        mya_atram_oe_n;
 output        mya_atram_cs1_n;
 output        mya_atram_cs2;
 //then make all the variable names (sigh) Cart-external:
 wire          system_clk;
 wire          m2;
 wire          cpu_rw;
 wire   [14:0] cpu_a;
 wire    [7:0] cpu_d;
 wire          romsel_n;
 wire          irq_n;
 wire    [9:0] exp;
 
 wire          ppu_wr_n;
 wire          ppu_rd_n;
 wire   [13:0] ppu_a;
 wire    [7:0] ppu_d;
 wire          ppu_a13_n;
 
 wire          ciram_ce_n;
 wire          ciram_a10;
 //cart-internal:
 wire   [12:0] mya_atram_a;
 wire    [7:0] mya_atram_d;
 wire          mya_atram_we_n;
 wire          mya_atram_oe_n;
 wire          mya_atram_cs1_n;
 reg           mya_atram_cs2;
 //chip-internal:
 reg           at_8x;
 reg           at_8y; 
 //reg     [2:0] at_finey;
 
 always @(negedge ppu_rd_n)
   if (ppu_a[13] & (~& ppu_a[9:8])) begin //trap nt-fetch
     at_8x <= ppu_a[0]; //and store the 8px-level X
     at_8y <= ppu_a[5]; //and Y-coordinates.
   end //trap nt-fetch
 
 always @(posedge m2)
   if (~cpu_rw & cpu_a[15] == 1) 
     mya_atram_cs2 <= cpu_d[0]; 
 //only have one visible register bit, so little decoding necessary: CPU$8xxx.
 
 assign mya_atram_a[5:0] = ppu_a[5:0]; //don't really need to go through CPLD?
 assign mya_atram_a[6] = (ppu_a[12] ? ppu_a[6]: at_8x); 
 assign mya_atram_a[7] = (ppu_a[12] ? ppu_a[7]: at_8y);
 assign mya_atram_a[9:8] = ppu_a[11:10]; 
 //ppu_a[11:10] = NT-select. also don't need to be routed through CPLD
 assign mya_atram_a[12:10] = 3'b000; //Doing fineY-AT'd need a new write port
 assign mya_atram_wr = ~ppu_wr_n & (&ppu_a[13:12]) & ~ppu_a[8];
 assign ul = ~(at_8x | at_8y); 
 //replace ul with 0 to not bother using ciram for any attributes at all
 assign mya_atram_oe_n = ~( ppu_a[13]
   & (& ppu_a[9:6]) //Nametable: @PPU 16'b0010_xx11_11xx_xxxx
   & ~ul & ~ppu_rd_n);
 assign mya_atram_we_n = ~( (& ppu_a[13:12])  & ~ppu_a[8] );
   //Write-port: @PPU 14'b11_NT?0_YXYY_YXXX: ~a[8] to avoid palettespace
 assign mya_atram_cs1_n = gnd;
 //assign mya_atram_cs2 = mya_atram_enable;
 assign ciram_ce_n = ~(ppu_a[13]    //nt/at only
   & (~ppu_a[12] | (& ppu_a[11:8]))  
   & ((~& ppu_a[9:6]) ? ul : 1) );//enable for the ul AT- and all NT- fetches.
 
 assign mya_atram_d[7:0] = ppu_d[7:0];//really don't need to go through cpld.
 
 endmodule //mya_at_mapper

Less basic

This mode of writing does not work if we want to extend to 8x1 attributes; there are three bits of attribute space to add and we only have three choices (00, 01, 10) of PPUADDR 8-9 for NT3. Even in two-screen mirroring, there is a small problem: but as we are relying on CIRAM for the first sliver of each section, one does not need to have duplicate write-access to those. One could remap $38** to what would have been in $3F**. Four-screen proves more problematic. Also problematic is determining the fine-Y. Brute-force method is to snoop for writes to PPU_SCROLL, as well as reads from PPU_STATUS and writes to PPU_ADDR to know the high-byte latch status. If we don't want to allow raster effects, which seems like a short-sighted decision, perhaps one could somehow divine where to begin from the dummy-fetch prerender scanline. In any case, it would basically require a scanline counter, at which point one would just add a few more bits of state to get a useful scanline-type interrupt, though if it shares the low three bits with the rendering portion it would be more of a NT-relative Y-coordinate interrupt...