Well, i guess it's time to answer my own post, so the result can be found also when searching decomposed to precomposed.
I wrote a basic iconv wrapper specifically for this, to potentially expand on it later. It looks like this
Code: Select all
CompilerIf #PB_Compiler_OS <> #PB_OS_MacOS
ImportC ""
errno_location() As "__error"
EndImport
ImportC "/usr/lib/libiconv.2.dylib"
iconv_open(tocode.p-ascii, fromcode.p-ascii)
iconv(cd.i, *pInbuf, *inbytesleft, *pOutbuf, *outbytesleft)
iconv_close(cd.i)
EndImport
Macro iconv_dbg(msg)
Debug msg
EndMacro
#ic_E2BIG = 7 ; Argument List too long
#ic_EINVAL = 22 ; Invalid argument
#ic_EILSEQ = 84 ; Illegal byte sequence
#ic_UTF8 = "UTF8"
#ic_UTF8_MAC = "UTF8-MAC"
Procedure iconv_new(srcCharset.s, targetCharset.s)
ProcedureReturn iconv_open(targetCharset, srcCharset)
EndProcedure
Procedure iconv_convert(ih.i, *srcBuf, bufLen, *resulLen_i)
; converts bufLen bytes of *srcBuf using the given iconv handle ih
; and returns the result buffer to be freed by the caller *resultLen
; will contain the length of the return result buffer.
; reset *p back to the start as it will be incremented by iconv
Protected *p = *srcBuf
*p = *srcBuf
; aloocate output
Protected outLen = bufLen * 2
Protected outLeft = outLen
Protected *out = AllocateMemory(outLen)
; create a output pointer for iconv to incrment
Protected *op = *out
Protected inLeft = bufLen
iconv_dbg("ih:0x" + Hex(ih) + " *srcBuf:0x" + Hex(*srcBuf) +
" buf length:" + Str(bufLen) + " *resulLen_i:0x" + Hex(*resulLen_i))
Repeat
iconv_dbg("@out:0x" + Hex(*out) + " inLeft:" + Str(inLeft) +
" outLeft:" + Str(outLeft))
; convert from utf8-nfd to utf8-nfc
Protected ret = iconv(ih, @*p, @inLeft, @*op, @outLeft)
iconv_dbg("iconv:" + Str(ret))
If ret <> -1
PokeI(*resulLen_i, (*op - *out))
iconv_dbg("resultLen:" + Str((*op - *out)))
ProcedureReturn *out
EndIf
Protected errno = PeekL(errno_location())
iconv_dbg("errno:" + Str(errno))
If errno = #ic_E2BIG
outLen * 2
outLeft + outLen
Protected *newOut = ReAllocateMemory(*out, outLen, #PB_Memory_NoClear)
*op = *newOut + (*op - *out)
*out = *newOut
Else
iconv_dbg("iconv failed at input offset:" + Str(*p-*srcBuf))
FreeMemory(*out)
PokeI(*resulLen_i, 0)
ProcedureReturn #Null
EndIf
Until #False
EndProcedure
Procedure iconv_free(ih.i)
If ih <> 0
iconv_close(ih)
EndIf
EndProcedure
Procedure.s iconv_preCompose(text.s)
; This is a mac animal. It converts decomposed unicode as such returned
; by filesystem functions like ExamineDirectory, into precomposed unicode
; as such used everywhere else.
; returns "" in case of an error
Protected ih = iconv_new(#ic_UTF8_MAC, #ic_UTF8)
If ih = 0
ProcedureReturn ""
EndIf
Protected buf.s = Space(StringByteLength(text))
Protected len = PokeS(@buf, text, StringByteLength(text), #PB_UTF8)
Protected outLen = 0
Protected *res = iconv_convert(ih, @buf, len, @outLen)
iconv_free(ih)
If *res = #Null
ProcedureReturn ""
EndIf
Protected result.s = PeekS(*res, outLen, #PB_UTF8)
FreeMemory(*res)
ProcedureReturn result
EndProcedure
CompilerEndIf
This also has code for more persistent conversions, but not yet partial support or error conditions.
A use example could look like this
Code: Select all
Protected dh = ExamineDirectory(#PB_Any, somPath, "*")
If dh = 0
Debug "Fail"
End
EndIf
While NextDirectoryEntry(dh)
Protected name.s = DirectoryEntryName(dh)
CompilerIf #PB_Compiler_OS = #PB_OS_MacOS
name = iconv_preCompose(name)
CompilerEndIf
; do stuff
Wend
FinishDirectory(dh)
Also anybody using native MacOSX API will probably call something like this with a memory pointer to the UTF8 file path
Code: Select all
Procedure.s iconv_preComposeFromUtf8Mem(*path)
Protected ih = iconv_new(#ic_UTF8_MAC, #ic_UTF8)
If ih = 0
ProcedureReturn ""
EndIf
; find the length
Protected *p = *path
While PeekA(*p) <> 0 : *p + 1 : Wend
Protected len = *p-*path ; no need for the terminator
Protected outLen = 0
Protected *out = iconv_convert(ih, *path, len, @outLen)
iconv_free(ih)
If *out = 0
ProcedureReturn ""
EndIf
Protected result.s = PeekS(*out, outLen, #PB_UTF8)
FreeMemory(*out)
ProcedureReturn result
EndProcedure
Hope this helps the next victim caught by this.