/* vim:set ft=cpp ts=4 sw=4 sts=4 sta ai bs=2: */
// DDRender.cpp: implementation of the CDDRender class.
//
//////////////////////////////////////////////////////////////////////

#include "stdafx.h"
#include "DDRender.h"

#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif



//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

CDDRender::CDDRender()
{
	m_pDD      = NULL;
	m_pDDSPrimary  = NULL;
	m_pDDSVideo    = NULL;
	m_pClipper    = NULL;

	m_rcMemory.left  = 0;
	m_rcMemory.right  = 0;
	m_rcMemory.top    = 0;
	m_rcMemory.bottom  = 0;

	m_bIsInitialized = FALSE;
	m_bProgressive = TRUE;
}

CDDRender::~CDDRender()
{
	DestroyDDraw();
}

void CDDRender::DestroyDDraw()
{ 
	if (m_pDDSVideo)
	{
		m_pDDSVideo->Release();
		m_pDDSVideo = NULL;
	}

	if (m_pClipper)
	{
		m_pClipper->Release();
		m_pClipper = NULL;
	}
	
	if (m_pDDSPrimary)
	{
		m_pDDSPrimary->Release();
		m_pDDSPrimary = NULL;
	}
 
	if (m_pDD)
	{
		m_pDD->Release();
		m_pDD = NULL;
	}
	m_bIsInitialized = FALSE;
}

DWORD CDDRender::DDColorMatch(IDirectDrawSurface7 * pdds, COLORREF rgb)
{
	COLORREF                rgbT;
	HDC                     hdc;
	DWORD                   dw = CLR_INVALID;
	DDSURFACEDESC2          ddsd;
	HRESULT                 hres;
  
	//
	//  Use GDI SetPixel to color match for us
	//
	if (rgb != CLR_INVALID && pdds->GetDC(&hdc) == DD_OK)
	{	
		rgbT = GetPixel(hdc, 0, 0);     // Save current pixel value
		SetPixel(hdc, 0, 0, rgb);       // Set our value
		pdds->ReleaseDC(hdc);
	}

	//
	// Now lock the surface so we can read back the converted color
	//
	ddsd.dwSize = sizeof(ddsd);
	while ((hres = pdds->Lock(NULL, &ddsd, 0, NULL)) == DDERR_WASSTILLDRAWING)
		;

	if (hres == DD_OK)
	{
		dw = *(DWORD *) ddsd.lpSurface;                 // Get DWORD
		if (ddsd.ddpfPixelFormat.dwRGBBitCount < 32)	dw &= (1 << ddsd.ddpfPixelFormat.dwRGBBitCount) - 1;  // Mask it to bpp
		pdds->Unlock(NULL);
	}

	//
	//  Now put the color that was there back.
	//
	if (rgb != CLR_INVALID && pdds->GetDC(&hdc) == DD_OK)
	{
		SetPixel(hdc, 0, 0, rgbT);
		pdds->ReleaseDC(hdc);
	}
	return dw;
}

BOOL CDDRender::Init( HWND hwnd, int nwidth, int nheight)
{
	m_hVideoWnd = hwnd;
	m_nWidth = nwidth;
	m_nHeight = nheight;

	if(m_nHeight >= 480)			m_bProgressive = FALSE;
	else							m_bProgressive = TRUE;

	m_nDoubleWidth = m_nWidth*2;
	m_nMemoryHeight = m_nHeight;

	m_nHeight = m_nMemoryHeight;
	
	DDSURFACEDESC2       ddsd;
	DDPIXELFORMAT        ddpfBltFormat = 
	{sizeof(DDPIXELFORMAT), DDPF_FOURCC,MAKEFOURCC('Y','U','Y','2'),0,0,0,0,0}; // YUY2

	// Create DirectDraw interface
	if( DD_OK == DirectDrawCreateEx(NULL, (VOID**)&m_pDD, IID_IDirectDraw7, NULL) )
	{
		// Set Cooperative Level
		if( DD_OK == m_pDD->SetCooperativeLevel(m_hVideoWnd, DDSCL_NORMAL) )
		{  
			// Create Primary Surface
			memset(&ddsd, 0, sizeof(ddsd));
			ddsd.dwSize = sizeof(ddsd);
			ddsd.dwFlags = DDSD_CAPS;
			ddsd.ddsCaps.dwCaps = DDSCAPS_PRIMARYSURFACE;      
			if( DD_OK == m_pDD->CreateSurface(&ddsd, &m_pDDSPrimary, NULL) )
			{
				// Create a clipper for the primary surface
				m_pDD->CreateClipper(0, &m_pClipper, NULL);
				m_pClipper->SetHWnd(0, m_hVideoWnd);
				m_pDDSPrimary->SetClipper(m_pClipper);
    
				//--------------------------------------------------------------------------
				// Create Offscreen Surface for video
				ZeroMemory(&ddsd, sizeof(ddsd));

				ddsd.dwSize = sizeof(ddsd);
				ddsd.dwFlags = DDSD_CAPS | DDSD_HEIGHT | DDSD_WIDTH | DDSD_PIXELFORMAT;

				ddsd.dwWidth = m_nWidth;      // Surface memory width...
				ddsd.dwHeight = m_nMemoryHeight;  // Surface memory height...

				ddsd.ddsCaps.dwCaps = DDSCAPS_OFFSCREENPLAIN | DDSCAPS_VIDEOMEMORY;
				ddsd.ddpfPixelFormat = ddpfBltFormat;
	
				HRESULT hResult = m_pDD->CreateSurface(&ddsd, &m_pDDSVideo, NULL);
				if ( hResult == DD_OK )
				{
					// Setup ColorKey
					memset(&m_DDBltFx, 0, sizeof(m_DDBltFx));
					m_DDBltFx.dwSize = sizeof(m_DDBltFx);

					m_DDBltFx.ddckDestColorkey.dwColorSpaceLowValue = DDColorMatch(m_pDDSPrimary, RGB(255, 0, 255));
					m_DDBltFx.ddckDestColorkey.dwColorSpaceHighValue = DDColorMatch(m_pDDSPrimary, RGB(255, 0, 255));

					m_BltFlags |= DDBLT_DDFX  | DDBLT_KEYDESTOVERRIDE;
					m_bIsInitialized = TRUE;

				}
				else if(hResult == DDERR_OUTOFVIDEOMEMORY)
					TRACE("DirectDraw does not have enough display memory to perform the operation.");
				else if(hResult == DDERR_INVALIDPIXELFORMAT)
					TRACE("The pixel format was invalid as specified.");
				else
					TRACE("Failed to offscreen surface for video.");
			}
			else	TRACE("Failed to primary surface");
		}
	}
	return m_bIsInitialized;
}

int CDDRender::Render(UINT nCh, unsigned char *pbuffer)
{
	POINT pt = {0,0};
	RECT rcDisplay;
	DDSURFACEDESC2  ddsd;
	unsigned char *src, *dst;  
	int StepSize;

	if(!m_bIsInitialized)		return -1;

	rcDisplay = m_rcDisplay[nCh];
	m_nHeight = m_nMemoryHeight;

	// Memory rectangle...
	m_rcMemory.left = 0;
	m_rcMemory.top = 0;
	m_rcMemory.right  = m_nWidth;
	m_rcMemory.bottom  = m_nHeight; 

	::ClientToScreen(m_hVideoWnd, &pt);
	OffsetRect(&rcDisplay, pt.x, pt.y);

	ZeroMemory(&ddsd, sizeof(DDSURFACEDESC2));
	ddsd.dwSize = sizeof(DDSURFACEDESC2);

	src = pbuffer;
  
	StepSize = m_nDoubleWidth;		// Progressive...  

	if ( DD_OK != m_pDDSVideo->Lock( NULL, &ddsd, DDLOCK_SURFACEMEMORYPTR|DDLOCK_WAIT, NULL) )
	{
		m_pDDSVideo->Restore();
		m_pDDSPrimary->Restore();

		TRACE("Failed to Lock surface memory...\n");
		return 0;
	}

	dst = (unsigned char *)ddsd.lpSurface;

	if( m_bProgressive == FALSE) {
		long EdgeDetect			= 625;
		long JaggieThreshold	= 73;

		int Line;
		BYTE* YVal1;
		BYTE* YVal2;
		BYTE* YVal3;
		BYTE* Dest;

		const __int64 YMask    = 0x00ff00ff00ff00ff;
		const __int64 UVMask    = 0xff00ff00ff00ff00;

		__int64 qwEdgeDetect;
		__int64 qwThreshold;
		const __int64 Mask = 0x7f7f7f7f7f7f7f7f;

		qwEdgeDetect = EdgeDetect;
		qwEdgeDetect += (qwEdgeDetect << 48) + (qwEdgeDetect << 32) + (qwEdgeDetect << 16);
		qwThreshold = JaggieThreshold;
		qwThreshold += (qwThreshold << 48) + (qwThreshold << 32) + (qwThreshold << 16);


		// copy first even line no matter what, and the first odd line if we're
		// processing an odd field.
		CopyMemory(dst, src, StepSize);

		for (Line = 0; Line < m_nMemoryHeight/2 - 1; Line++)
		{
			YVal1	= (BYTE *)src+((Line*2 + 0)*StepSize);		//even
			YVal2	= (BYTE *)src+((Line*2 + 1)*StepSize);		//odd
			YVal3	= (BYTE *)src+((Line*2 + 2)*StepSize);		//even
			Dest	= dst + (Line * 2 + 1) * ddsd.lPitch;


			// For ease of reading, the comments below assume that we're operating on an odd
			// field (i.e., that bIsOdd is true).  The exact same processing is done when we
			// operate on an even field, but the roles of the odd and even fields are reversed.
			// It's just too cumbersome to explain the algorithm in terms of "the next odd
			// line if we're doing an odd field, or the next even line if we're doing an
			// even field" etc.  So wherever you see "odd" or "even" below, keep in mind that
			// half the time this function is called, those words' meanings will invert.

			// Copy the odd line to the overlay verbatim.
			//memcpyMMX(Dest + StepSize, YVal3, StepSize);
			CopyMemory(Dest + ddsd.lPitch, YVal3, StepSize);
			_asm
			{
				mov ecx, StepSize
				mov eax, dword ptr [YVal1]
				mov ebx, dword ptr [YVal2]
				mov edx, dword ptr [YVal3]
				mov edi, dword ptr [Dest]
				shr ecx, 3       // there are LineLength / 8 qwords

				align 8
			DoNext8Bytes:           
				movq mm0, qword ptr[eax] 
				movq mm1, qword ptr[ebx] 
				movq mm2, qword ptr[edx]

				// get intensities in mm3 - 4
				movq mm3, mm0
				movq mm4, mm1
				movq mm5, mm2

				pand mm3, YMask
				pand mm4, YMask
				pand mm5, YMask

				// get average in mm0
				psrlw mm0, 01
				psrlw mm2, 01
				pand  mm0, Mask
				pand  mm2, Mask
				paddw mm0, mm2

				// work out (O1 - E) * (O2 - E) / 2 - EdgeDetect * (O1 - O2) ^ 2 >> 12
				// result will be in mm6

				psrlw mm3, 01
				psrlw mm4, 01
				psrlw mm5, 01

				movq mm6, mm3
				psubw mm6, mm4  //mm6 = O1 - E

				movq mm7, mm5
				psubw mm7, mm4  //mm7 = O2 - E

				pmullw mm6, mm7     // mm0 = (O1 - E) * (O2 - E)

				movq mm7, mm3
				psubw mm7, mm5      // mm7 = (O1 - O2)
				pmullw mm7, mm7     // mm7 = (O1 - O2) ^ 2
				psrlw mm7, 12       // mm7 = (O1 - O2) ^ 2 >> 12
				pmullw mm7, qwEdgeDetect        // mm7  = EdgeDetect * (O1 - O2) ^ 2 >> 12

				psubw mm6, mm7      // mm6 is what we want

				pcmpgtw mm6, qwThreshold

				movq mm7, mm6

				pand mm0, mm6

				pandn mm7, mm1

				por mm7, mm0

				movq qword ptr[edi], mm7

				add eax, 8
				add ebx, 8
				add edx, 8
				add edi, 8
				dec ecx
				jne near DoNext8Bytes
				emms
			}
		}

		// Copy last odd line if we're processing an even field.
		CopyMemory(dst+((m_nMemoryHeight-1)*ddsd.lPitch), src+((m_nMemoryHeight-1)*StepSize), StepSize);

	} else {
		for (int i=0; i<m_nMemoryHeight-1; i++)
		{
			CopyMemory(dst, src, StepSize);

			src += StepSize;
			dst += ddsd.lPitch;
		}
	}

	m_pDDSVideo->Unlock(NULL);
	m_pDDSPrimary->Blt(&rcDisplay, m_pDDSVideo, &m_rcMemory, 0, NULL);

	return TRUE;
}

void CDDRender::SetDisplaySize(int nwidth, int nheight)
{
	UINT ncount;
	for(UINT i=0; i<MAX_CHANNEL; i++)
	{
		UINT n;
		n = nheight / 4;
		if((i % 4) == 0)
		{
			ncount = i;
			m_rcDisplay[i].left = 0;
			m_rcDisplay[i].top  = (nheight / 4) * ((i-(i/(4*4)*(4*4))) / 4);
		}
		else
		{
			m_rcDisplay[i].left = m_rcDisplay[i - 1].right;
			m_rcDisplay[i].top = m_rcDisplay[ncount].top;
		}

		m_rcDisplay[i].right  = m_rcDisplay[i].left + (nwidth / 4);
		m_rcDisplay[i].bottom  = m_rcDisplay[i].top + (nheight / 4);
	}
}

void CDDRender::SettingResolution(UINT nWidth, UINT nHeight)
{
	m_nWidth = nWidth;
	m_nHeight = nHeight;
	m_nMemoryHeight = m_nHeight;
}
