Image duplicate detection
rings,
when it works it works very well, when it doesn't it's a long way off
When one pic was a resized version of the same pic it matched exact but when something was slightly lightened or in some cases poorer quality it guessed other pics that were completely different were the similar.
when it works it works very well, when it doesn't it's a long way off
When one pic was a resized version of the same pic it matched exact but when something was slightly lightened or in some cases poorer quality it guessed other pics that were completely different were the similar.
Paul Dwyer
“In nature, it’s not the strongest nor the most intelligent who survives. It’s the most adaptable to change” - Charles Darwin
“If you can't explain it to a six-year old you really don't understand it yourself.” - Albert Einstein
“In nature, it’s not the strongest nor the most intelligent who survives. It’s the most adaptable to change” - Charles Darwin
“If you can't explain it to a six-year old you really don't understand it yourself.” - Albert Einstein
Okay, this is starting to collect some useful double ups and similarities
0 = Exact
1-25 = likely to be the same pic just resaved or resized
After about 50 it gets a little harder to pic, very similar content pics or pics that have colouring changes (lightening etc) can be as high as 200.
Run this, point it at a dir of Pics and it loads them and compares them and then shows you a list with in order that they are close up to a distance of 300 (higher than this and I'm not catching anything interesting). There are some duplicate comparisons to be filtered out but a comparison is very fast so not too big a deal. If the directory has a thousand images or so it could take a while
I think I can tweak the comparison further to lower the score of two pics that are the same but have had colour changes.
Yes, the interface is a mixed mess, it prompts for a dir (doesn't recurse) and outputs to console. The SQLite stuff is there as I'm playing with some other grouping and sorting ideas and it saves me coding it.
0 = Exact
1-25 = likely to be the same pic just resaved or resized
After about 50 it gets a little harder to pic, very similar content pics or pics that have colouring changes (lightening etc) can be as high as 200.
Run this, point it at a dir of Pics and it loads them and compares them and then shows you a list with in order that they are close up to a distance of 300 (higher than this and I'm not catching anything interesting). There are some duplicate comparisons to be filtered out but a comparison is very fast so not too big a deal. If the directory has a thousand images or so it could take a while
I think I can tweak the comparison further to lower the score of two pics that are the same but have had colour changes.
Yes, the interface is a mixed mess, it prompts for a dir (doesn't recurse) and outputs to console. The SQLite stuff is there as I'm playing with some other grouping and sorting ideas and it saves me coding it.
Code: Select all
UseJPEGImageDecoder()
UsePNGImageDecoder()
UseJPEG2000ImageDecoder()
UseSQLiteDatabase()
#SigPtCount = 16
Structure ImgSig
ImgPath.s
PtAvg.w[#SigPtCount]
EndStructure
;================================================================
Procedure.l GetImgDist(*Img1.ImgSig, *Img2.ImgSig)
Distance.l = 0
For i = 0 To #SigPtCount -1
Distance = Distance + Abs(*Img1\PtAvg[i]-*Img2\PtAvg[i])
Next
ProcedureReturn Distance
EndProcedure
;================================================================
Procedure GetImgSig(*Img.ImgSig); path populated, pt's to be added
;Debug *Img\ImgPath
Dim Colours.l(#SigPtCount)
Protected ImgWidth.l
Protected ImgHeight.l
Protected PointNo.l = 0
If FileSize(*Img\ImgPath) < 0
ProcedureReturn 0
EndIf
ImgID.l = LoadImage( #PB_Any , *Img\ImgPath)
If IMGID
ResizeImage(ImgID, Int(Sqr(#SigPtCount)), Int(Sqr(#SigPtCount)))
StartDrawing(ImageOutput(ImgID))
Ystep.l = Int(Round(ImgHeight/(Sqr(#SigPtCount) +1 ),#PB_Round_Nearest))
XStep.l = Int(Round(ImgWidth/(Sqr(#SigPtCount) +1),#PB_Round_Nearest))
For y = 1 To Int(Sqr(#SigPtCount))
For x = 1 To Int(Sqr(#SigPtCount))
Colour.l = Point(x, y)
*Img\PtAvg[PointNo] = Red(Colour) + Green(Colour) + Blue(Colour) / 3
PointNo = PointNo + 1
Next
Next
StopDrawing()
EndIf
EndProcedure
;================================================================
Dim Images.ImgSig(0)
OpenConsole()
ImageFile.s = ""
FilePath.s = PathRequester("Select Images Directory", "c:\")
PrintN("Loading Images...")
If Len(filepath) > 0
FileCount = 0
hDir.i = ExamineDirectory(#PB_Any , FilePath, "*.jpg")
If hDir
While NextDirectoryEntry(hDir)
If DirectoryEntryType(hDir) = #PB_DirectoryEntry_File
ReDim Images(FileCount)
Images(FileCount)\ImgPath = FilePath + DirectoryEntryName(hDir)
GetImgSig(@Images(FileCount))
FileCount = FileCount + 1
Print(".")
EndIf
Wend
FinishDirectory(hDir)
EndIf
EndIf
PrintN("")
PrintN("Comparing Images...")
If OpenDatabase(0, ":memory:", "", "")
DatabaseUpdate(0, "CREATE TABLE ImgDistances (File1 text, File2 text, Distance INT)")
For i = 0 To FileCount - 1
Print(".")
For j = 0 To FileCount - 1
If i = j Or j < i
;skip self compare or prev compared
Else
DatabaseUpdate(0, "INSERT INTO ImgDistances (File1,File2,Distance) VALUES ('" + Images(i)\ImgPath + "','" + Images(j)\ImgPath + "',"+ Str(GetImgDist(@Images(i),@Images(j)))+")")
EndIf
Next
Next
PrintN("")
PrintN("Displaying short distance Images...")
sSQL.s = "select * from ImgDistances where Distance < 300 order by distance"
If DatabaseQuery(0, sSQL)
While NextDatabaseRow(0)
PrintN(GetDatabaseString(0, 0) + "," + GetDatabaseString(0, 1) + "," + Str(GetDatabaseLong(0, 2)))
Wend
FinishDatabaseQuery(0)
EndIf
EndIf
PrintN("Press Enter to quit")
Input()
CloseConsole()
Paul Dwyer
“In nature, it’s not the strongest nor the most intelligent who survives. It’s the most adaptable to change” - Charles Darwin
“If you can't explain it to a six-year old you really don't understand it yourself.” - Albert Einstein
“In nature, it’s not the strongest nor the most intelligent who survives. It’s the most adaptable to change” - Charles Darwin
“If you can't explain it to a six-year old you really don't understand it yourself.” - Albert Einstein
I'm talking to myself here but that's fine, thinking "out loud" helps me.
Here's a gui version so you can see the results of the image distance calculations I have so far, it shows the images in a listview in order that they are similar. If you click on a record it displays the two images so you can see for yourself. If you photographed a fireworks night or something then most will seem pretty similar
. In the select statement on line 144 you can put in a "where" clause as per the comment to only list the most similar.
I haven't added recursion at this stage but I probably will later.
This "ProportionalImgResize()" proc I just pulled out of my arse, I don't know if that's quite correct but it seems okay
Here's a gui version so you can see the results of the image distance calculations I have so far, it shows the images in a listview in order that they are similar. If you click on a record it displays the two images so you can see for yourself. If you photographed a fireworks night or something then most will seem pretty similar
I haven't added recursion at this stage but I probably will later.
This "ProportionalImgResize()" proc I just pulled out of my arse, I don't know if that's quite correct but it seems okay
Code: Select all
UseJPEGImageDecoder()
UsePNGImageDecoder()
UseJPEG2000ImageDecoder()
UseSQLiteDatabase()
#LVSICF_NOINVALIDATEALL = 1
#LVN_ODCACHEHINT = #LVN_FIRST - 13
#SigPtCount = 16
#Img1 = 11
#Img2 = 12
Structure ImgSig
ImgPath.s
PtAvg.w[#SigPtCount]
EndStructure
Declare WinCallback(hwnd, msg, wParam, lParam)
Declare.l GetImgDist(*Img1.ImgSig, *Img2.ImgSig)
Declare GetImgSig(*Img.ImgSig); path populated, pt's to be added
Declare.l LoadImages(Path.s, Array Images.ImgSig(1)) ;return count
Declare.l CompareImages(Array Images.ImgSig(1), FileCount.l);return count
Declare DoEvents()
Declare LoadForm()
Declare ProportionalImgResize(ImgID.i, MaxDimension.l)
;... Array to hold data
Global Dim LvData.s(2,0) ;(3 rows, varible columns)
;Application Entry Point
LoadForm()
;================================================================
Procedure DoEvents()
While WindowEvent() : Wend
EndProcedure
;================================================================
Procedure.l GetImgDist(*Img1.ImgSig, *Img2.ImgSig)
Distance.l = 0
For i = 0 To #SigPtCount -1
Distance = Distance + Abs(*Img1\PtAvg[i]-*Img2\PtAvg[i])
Next
ProcedureReturn Distance
EndProcedure
;================================================================
Procedure GetImgSig(*Img.ImgSig); path populated, pt's to be added
;Debug *Img\ImgPath
Dim Colours.l(#SigPtCount)
Protected ImgWidth.l
Protected ImgHeight.l
Protected PointNo.l = 0
If FileSize(*Img\ImgPath) < 0
ProcedureReturn 0
EndIf
ImgID.l = LoadImage( #PB_Any , *Img\ImgPath)
If IMGID
ResizeImage(ImgID, Int(Sqr(#SigPtCount)), Int(Sqr(#SigPtCount)))
StartDrawing(ImageOutput(ImgID))
Ystep.l = Int(Round(ImgHeight/(Sqr(#SigPtCount) +1 ),#PB_Round_Nearest))
XStep.l = Int(Round(ImgWidth/(Sqr(#SigPtCount) +1),#PB_Round_Nearest))
For y = 1 To Int(Sqr(#SigPtCount))
For x = 1 To Int(Sqr(#SigPtCount))
Colour.l = Point(x, y)
*Img\PtAvg[PointNo] = Red(Colour) + Green(Colour) + Blue(Colour) / 3
PointNo = PointNo + 1
Next
Next
StopDrawing()
EndIf
EndProcedure
;================================================================
Procedure.l LoadImages(Path.s, Array Images.ImgSig(1))
FileCount.i = 0
hDir.i = ExamineDirectory(#PB_Any , Path, "*.jpg")
If hDir
While NextDirectoryEntry(hDir)
If DirectoryEntryType(hDir) = #PB_DirectoryEntry_File
ReDim Images(FileCount)
Images(FileCount)\ImgPath = Path + DirectoryEntryName(hDir)
FileCount = FileCount + 1
EndIf
Wend
FinishDirectory(hDir)
EndIf
For i = 0 To FileCount -1
GetImgSig(@Images(i))
StatusBarText(0, 0, "Loading Image: " + Images(i)\ImgPath )
StatusBarText(0, 1, Str(i) + " of " + Str(FileCount-1))
DoEvents()
Next
ProcedureReturn FileCount
EndProcedure
;================================================================
Procedure.l CompareImages(Array Images.ImgSig(1), FileCount.l)
If OpenDatabase(0, ":memory:", "", "")
DatabaseUpdate(0, "CREATE TABLE ImgDistances (File1 text, File2 text, Distance INT)")
For i = 0 To FileCount - 1
For j = 0 To FileCount - 1
If i = j Or j < i
;skip self compare or prev compared
Else
DatabaseUpdate(0, "INSERT INTO ImgDistances (File1,File2,Distance) VALUES ('" + Images(i)\ImgPath + "','" + Images(j)\ImgPath + "',"+ Str(GetImgDist(@Images(i),@Images(j)))+")")
EndIf
Next
DoEvents()
Next
sSQL.s = "select * from ImgDistances order by distance" ;where Distance < 200
RetRowCount = 0
If DatabaseQuery(0, sSQL)
While NextDatabaseRow(0)
;PrintN(GetDatabaseString(0, 0) + "," + GetDatabaseString(0, 1) + "," + Str(GetDatabaseLong(0, 2)))
ReDim LvData(2,RetRowCount)
LvData(0,RetRowCount) = GetDatabaseString(0, 0)
LvData(1,RetRowCount) = GetDatabaseString(0, 1)
LvData(2,RetRowCount) = Str(GetDatabaseLong(0, 2))
Debug RetRowCount
RetRowCount = RetRowCount + 1
Wend
FinishDatabaseQuery(0)
EndIf
EndIf
ProcedureReturn RetRowCount
EndProcedure
;================================================================
Procedure WinCallback(hwnd, msg, wParam, lParam)
result = #PB_ProcessPureBasicEvents
Select msg
Case #WM_NOTIFY
*pnmh.NMHDR = lParam
Select *pnmh\code
Case #LVN_ODCACHEHINT
result = 0
Case #LVN_GETDISPINFO
*pnmlvdi.NMLVDISPINFO = lParam
If *pnmlvdi\item\mask & #LVIF_TEXT
;... Item text is being requested
;Debug Str(*pnmlvdi\item\iSubItem) + "," + Str(*pnmlvdi\item\iItem)
*pnmlvdi\item\pszText = @LvData(*pnmlvdi\item\iSubItem,*pnmlvdi\item\iItem)
EndIf
EndSelect
EndSelect
ProcedureReturn result
EndProcedure
;================================================================
Procedure LoadForm()
Dim Images.ImgSig(0)
ImgSz.l = 300
If OpenWindow(0, 0, 0, 612, 620, "Image Distance", #PB_Window_SystemMenu | #PB_Window_MinimizeGadget | #PB_Window_ScreenCentered) ;And CreateGadgetList(WindowID(0))
SetWindowCallback(@WinCallback())
; left column
ImageGadget(#Img1, 2, 280, ImgSz,ImgSz,0,#PB_Image_Border)
ImageGadget(#Img2, 310, 280, ImgSz,ImgSz,0,#PB_Image_Border)
If CreateStatusBar(0, WindowID(0))
AddStatusBarField(450) ; autosize this field
AddStatusBarField(#PB_Ignore)
EndIf
hWndList.i=ListIconGadget(#PB_Any,2,2,608,270,"Image 1",250,#LVS_OWNERDATA | #PB_ListIcon_FullRowSelect )
AddGadgetColumn(hWndList,1,"Image 2",250)
AddGadgetColumn(hWndList,2,"Distance",80)
If CreateMenu(0, WindowID(0))
MenuTitle("File")
MenuItem(1, "Load From Dir")
MenuBar()
MenuItem(2, "Exit")
; MenuTitle("Options")
; MenuItem(11, "Recurse")
EndIf
Repeat
Event = WaitWindowEvent()
Select Event
Case #PB_Event_Gadget
Select EventGadget()
Case hWndList
hWndImg1.i = LoadImage(#PB_Any, LvData(0,GetGadgetState(hWndList)))
hWndImg2.i = LoadImage(#PB_Any, LvData(1,GetGadgetState(hWndList)))
ProportionalImgResize(hWndImg1, ImgSz)
ProportionalImgResize(hWndImg2, ImgSz)
SetGadgetState(#Img1, ImageID(hWndImg1))
SetGadgetState(#Img2, ImageID(hWndImg2))
FreeImage(hWndImg1)
FreeImage(hWndImg2)
EndSelect
Case #PB_Event_Menu
Select EventMenu()
Case 1
FilePath.s = PathRequester("Select Images Directory", "f:\photos")
If Len(FilePath) > 0
ImgCount.i = LoadImages(FilePath.s, Images())
ItemCount.i = CompareImages(Images(),imgcount)
SendMessage_(GadgetID(hWndList), #LVM_SETITEMCOUNT, ItemCount, #LVSICF_NOINVALIDATEALL)
EndIf
Case 2
Event = #PB_Event_CloseWindow
EndSelect
EndSelect
Until Event = #PB_Event_CloseWindow
EndIf
EndProcedure
;================================================================
Procedure ProportionalImgResize(ImgID.i, MaxDimension.l)
Protected MinDimension.f
ImgHeight.l = ImageHeight(ImgID)
ImgWidth.l = ImageWidth(ImgID)
If ImgHeight > ImgWidth
MinDimension = (MaxDimension/ImgHeight) * ImgWidth
ResizeImage(ImgID,MinDimension,MaxDimension)
Else
MinDimension = (MaxDimension/ImgWidth) * ImgHeight
ResizeImage(ImgID,MaxDimension,MinDimension)
EndIf
EndProcedure
Paul Dwyer
“In nature, it’s not the strongest nor the most intelligent who survives. It’s the most adaptable to change” - Charles Darwin
“If you can't explain it to a six-year old you really don't understand it yourself.” - Albert Einstein
“In nature, it’s not the strongest nor the most intelligent who survives. It’s the most adaptable to change” - Charles Darwin
“If you can't explain it to a six-year old you really don't understand it yourself.” - Albert Einstein
