Code optimisation or translate to assembly?
Posted: Thu Sep 29, 2016 9:44 pm
				
				I write converter ZX Spectrum screen to RGBA.
I need super fast speed. I create table all varyants of pixel and color, but I think it can be accelerated further?
Original code, without optimisation:
perhaps it makes sense to use sse2 instructions?
			I need super fast speed. I create table all varyants of pixel and color, but I think it can be accelerated further?
Code: Select all
Procedure InitAttributesTable()
  
  For pixels.a = 0 To 255
    For attr.a = 0 To 255
      
      Paper = (attr >> 3) & $0F 
      ink   = (attr & 7)  | ((attr & 64) >> 3)
      
      bit = 128
      For z = 0 To 7
        
        If pixels & bit
          
          color.l = color( ink )
          
        Else  
          
          color = color( paper )
          
        EndIf
        
        c.l = $FF000000 | Red(color) << 16 | Green(color) << 8 | Blue(color)
        PokeL(attributes_table + ((((pixels&bit) << 8) | attr )<<2), c )
                        
        bit >> 1
      Next z 
            
    Next attr
    
  Next pixels    
    
EndProcedure
Code: Select all
Procedure scr2texture (texture, *mem)
    
  For y = 191 To 0 Step -1
    
    pixelLine.l = 32 * ((y & $C0) | ((y << 3) & $38) | ((y >> 3) & $07)) 
    attr.l      = 6144 + ((y & $F8) << 2)                                
    
    For x = 0 To 31
      
      chr_attr.l     =  PeekA(*mem + attr + x ) 
      chr_pixels.l   =  PeekA(*mem + pixelLine + x )
    
      CopyMemory( attributes_table + ((((chr_pixels&128) << 8) | (chr_attr))<<2), texture, 4 ):texture + 4
      CopyMemory( attributes_table + ((((chr_pixels&64 ) << 8) | (chr_attr))<<2), texture, 4 ):texture + 4
      CopyMemory( attributes_table + ((((chr_pixels&32 ) << 8) | (chr_attr))<<2), texture, 4 ):texture + 4
      CopyMemory( attributes_table + ((((chr_pixels&16 ) << 8) | (chr_attr))<<2), texture, 4 ):texture + 4
      CopyMemory( attributes_table + ((((chr_pixels&8  ) << 8) | (chr_attr))<<2), texture, 4 ):texture + 4
      CopyMemory( attributes_table + ((((chr_pixels&4  ) << 8) | (chr_attr))<<2), texture, 4 ):texture + 4
      CopyMemory( attributes_table + ((((chr_pixels&2  ) << 8) | (chr_attr))<<2), texture, 4 ):texture + 4
      CopyMemory( attributes_table + ((((chr_pixels&1  ) << 8) | (chr_attr))<<2), texture, 4 ):texture + 4
      
    Next x
    
  Next y
    
EndProcedure
Code: Select all
  For y = 0 To 191; 192 lines of screen
    
    pixelLine = 32 * ((y & $C0) | ((y << 3) & $38) | ((y >> 3) & $07))
    attr      = 6144 + ((y & $F8) << 2)                               
    
    For x = 0 To 31; 32 columns of screen
      
      chr_attr     =  PeekC(*mem + attr + x ) 
      chr_pixels   =  PeekC(*mem + pixelLine + x )
      
      Paper = (chr_attr >> 3) & $0F 
      ink   = (chr_attr & 7)  | ((chr_attr & 64) >> 3)
      
      bit = 128
      For z = 0 To 7; 8 pixels of column
        
        If chr_pixels & bit
          
          Plot( x<<3 + z, y, color( ink )) ; pixel color
          
        Else  
          
          Plot( x<<3 + z, y, color( Paper )); background color
          
        EndIf
        
        bit >> 1
      Next z 
      
    Next x
    
  Next y
perhaps it makes sense to use sse2 instructions?