Diferencias

Muestra las diferencias entre dos versiones de la página.

--- cursos:ensamblador:gfx2_direccionamiento [21-01-2024 17:07] – [Cálculo de posiciones de pixeles mediante composición] sromero
+++ cursos:ensamblador:gfx2_direccionamiento [21-01-2024 17:22] (actual) – [Optimizaciones para Get_Pixel_Offset_HR] sromero
@@ Línea 970: / Línea 970: @@
     ;--- Fin nuevo código ---
-    ret
-</code>
- En ocasiones se puede reescribir una rutina de otra forma para ser ligeramente más eficiente: \\ //Dean Belfield//, en su página //L Break Into Program// nos proporciona la siguiente rutina optimizada que requiere 117 t-estados, a costa de no devolvernos la posición relativa del pixel:
-\\
-<code z80>
-; Get screen address
-;  B = Y pixel position
-;  C = X pixel position
-; Returns address in HL
-;
-Get_Pixel_Address:
-    ld a, b                  ; Calculate Y2,Y1,Y0
-    and %00000111            ; Mask out unwanted bits
-    or %01000000             ; Set base address of screen
-    ld h, a                  ; Store in H
-    ld a, b                  ; Calculate Y7,Y6
-    rra                      ; Shift to position
-    rra
-    rra
-    and %00011000            ; Mask out unwanted bits
-    or h                     ; OR with Y2,Y1,Y0
-    ld h, a                  ; Store in H
-    ld a, b                  ; Calculate Y5,Y4,Y3
-    rla                      ; Shift to position
-    rla
-    and %11100000            ; Mask out unwanted bits
-    ld l, a                  ; Store in L
-    ld a, c                  ; Calculate X4,X3,X2,X1,X0
-    rra                      ; Shift into position
-    rra
-    rra
-    and %00011111            ; Mask out unwanted bits
-    or l                     ; OR with Y5,Y4,Y3
-    ld l, a                  ; Store in L
-    ret
-</code>
-Finalmente, //David Black// en su web //Overtaken by events// nos ofrece la siguiente rutina de 105 t-estados y 26 bytes:
-\\
-<code z80>
-; Get screen address
-;  B = Y pixel position
-;  C = X pixel position
-; Returns address in HL
-Get_Screen_Address:
-    ld a,b 	                 ; Work on the upper byte of the address
-    and %00000111                ; a = Y2 Y1 y0
-    or %01000000                 ; first three bits are always 010
-    ld h,a 	                 ; store in h
-    ld a,b 	                 ; get bits Y7, Y6
-    rra 	                 ; move them into place
-    rra
-    rra
-    and %00011000                ; mask off
-    or h 	                 ; a = 0 1 0 Y7 Y6 Y2 Y1 Y0
-    ld h,a 	                 ; calculation of h is now complete
-    ld a,b 	                 ; get y
-    rla
-    rla
-    and %11100000                ; a = y5 y4 y3 0 0 0 0 0
-    ld l,a 	                 ; store in l
-    ld a,c
-    and %00011111                ; a = X4 X3 X2 X1
-    or l 	                 ; a = Y5 Y4 Y3 X4 X3 X2 X1
-    ld l,a 	                 ; calculation of l is complete
     ret
 </code>
@@ Línea 2002: / Línea 1934: @@
  Llamando a la anterior rutina con unas coordenadas (c,f) en C y B obtenemos la dirección de memoria de imagen (HL) y de atributo (DE) de dicho carácter, así como el valor del atributo en sí mismo (A).
+\\
+===== Optimizaciones para Get_Pixel_Offset_HR =====
+ En ocasiones se puede reescribir una rutina de otra forma para ser ligeramente más eficiente, y las rutinas relacionadas con los gráficos (tanto "dibujar" gráficos como calcular la posición de dibujado) es una firma candidata a optimizarla todo lo posible.
+\\ //Dean Belfield//, en su página //L Break Into Program// nos proporciona la siguiente rutina optimizada para obtener la dirección de memoria de un pixel dadas su coordenadas (x,y) que requiere 117 t-estados, a costa de no devolvernos la posición relativa del pixel:
+\\
+<code z80>
+; Get screen address - by Dean Belfield
+;
+;  B = Y pixel position
+;  C = X pixel position
+;  Returns address in HL
+Get_Pixel_Address:
+    ld a, b                  ; Calculate Y2,Y1,Y0
+    and %00000111            ; Mask out unwanted bits
+    or %01000000             ; Set base address of screen
+    ld h, a                  ; Store in H
+    ld a, b                  ; Calculate Y7,Y6
+    rra                      ; Shift to position
+    rra
+    rra
+    and %00011000            ; Mask out unwanted bits
+    or h                     ; OR with Y2,Y1,Y0
+    ld h, a                  ; Store in H
+    ld a, b                  ; Calculate Y5,Y4,Y3
+    rla                      ; Shift to position
+    rla
+    and %11100000            ; Mask out unwanted bits
+    ld l, a                  ; Store in L
+    ld a, c                  ; Calculate X4,X3,X2,X1,X0
+    rra                      ; Shift into position
+    rra
+    rra
+    and %00011111            ; Mask out unwanted bits
+    or l                     ; OR with Y5,Y4,Y3
+    ld l, a                  ; Store in L
+    ret
+</code>
+Finalmente, //David Black// en su web //Overtaken by events// nos ofrece la siguiente rutina de 105 t-estados y 26 bytes:
+\\
+<code z80>
+; Get screen address - by David Black
+;  B = Y pixel position
+;  C = X pixel position
+; Returns address in HL
+Get_Screen_Address:
+    ld a,b 	                 ; Work on the upper byte of the address
+    and %00000111                ; a = Y2 Y1 y0
+    or %01000000                 ; first three bits are always 010
+    ld h,a 	                 ; store in h
+    ld a,b 	                 ; get bits Y7, Y6
+    rra 	                 ; move them into place
+    rra
+    rra
+    and %00011000                ; mask off
+    or h 	                 ; a = 0 1 0 Y7 Y6 Y2 Y1 Y0
+    ld h,a 	                 ; calculation of h is now complete
+    ld a,b 	                 ; get y
+    rla
+    rla
+    and %11100000                ; a = y5 y4 y3 0 0 0 0 0
+    ld l,a 	                 ; store in l
+    ld a,c
+    and %00011111                ; a = X4 X3 X2 X1
+    or l 	                 ; a = Y5 Y4 Y3 X4 X3 X2 X1
+    ld l,a 	                 ; calculation of l is complete
+    ret
+</code>
+Utilizando tablas, en esta misma web podemos ver las siguientes 2 aproximaciones de //Patrick Prendergast// en base a organizar los datos en memoria alineándolos de una forma que "desperdiciamos" memoria a cambio de que las rutinas sean más rápidas por cómo están alineados esos datos:
+<code z80>
+; Store the LUT table in the format "y5 y4 y3 y7 y6 y2 y1 y0"
+; Lower 5 bits where you need them for y and upper 3 bits to mask
+; out to OR with X (which are replaced with 010 anyway).
+; This way you'd only need 192 bytes for the table, which could be
+; page-aligned for speed. You'd be looking at 69 cycles por request
+; and 16 + 192 for the code + table.
+;
+; By Patrick Prendergast.
+; b = y, c = x
+getScreenAddress:
+    ld h,tbl >> 8
+    ld l,b
+    ld h,(hl)
+    ld a,%11100000
+    and h
+    or c
+    ld l,a
+    ld a,%00011111
+    and h
+    or %01000000
+    ld h,a
+    ret
+tbl: ; y5 y4 y3 y7 y6 y2 y1 y0
+    .db 0,1,2,3,4,5,6,7,32,33...
+; Option 2: if you are willing to [potentially] sacrifice
+; some space for speed, you can divide the table so that
+; you have the low and high bytes of your address list in
+; 2 independent tables and have them both page aligned -
+; with the low byte first in memory.
+; This would completely remove to need to calc y*2 to get
+; to your table offset.
+; This would require 64 bytes of padding after the 1st table
+; (due to both tables being page aligned) meaning you would
+; need 448 bytes all up. That being said the 64 bytes of
+; padding space is not needed so you can include any other
+; data you might need there so it's not wasted.
+; Then you would only need 47 cycles to lookup your address!
+;
+; By Patrick Prendergast.
+; b = y, c = x
+getScreenAddress:
+    ld h,tblLow >> 8
+    ld l,b
+    ld a,(hl)
+    inc h
+    ld h,(hl)
+    or c
+    ld l,a
+    ret
+    ALIGN 256
+tblLow: ; (ADDR & 0xFF)
+    .db 0,0,0,0,0,0,0,0,32,32,32...
+    ALIGN 256
+tblHigh: ; (ADDR >> 8)
+    .db 64,65,66,67,68,69,70,71,64,65,66...
+</code>
+Estas rutinas son realmente rápidas, teniendo la segunda un coste de sólo 47 t-estados por cálculo de dirección, a costa de ocupar más espacio por separar la parte alta y la parte baja de la tabla precalculada, y alinearlas en memoria en un múltiplo de 256 para evitar cálculos.
 \\
@@ Línea 2020: / Línea 2094: @@
   * [[http://www.worldofspectrum.org/faq/reference/z80reference.htm|Z80 Reference de WOS]].
   * [[http://www.speccy.org/trastero/cosas/Fichas/fichas.htm|Microfichas de CM de MicroHobby]].
+  * [[https://www.overtakenbyevents.com/lets-talk-about-the-zx-specrum-screen-layout-part-three/|Overtaken by Events - the screen layout (III)]].
 \\
 **[ [[.:indice|⬉]] | [[.:gfx1_vram|⬅]] | [[.:gfx3_sprites_lowres|➡]] ]**