diff options
| author | Junio C Hamano <gitster@pobox.com> | 2022-05-20 15:26:54 -0700 |
|---|---|---|
| committer | Junio C Hamano <gitster@pobox.com> | 2022-05-20 15:26:54 -0700 |
| commit | 804ec0301fddb68d5170939084d79e2c0c672028 (patch) | |
| tree | 2a59846b4ac9122d38675d8610543900e77806d1 | |
| parent | 2e55151800531fea805bbe3a655d73627c1ea194 (diff) | |
| parent | fbe5f6b80437adbcd58af1b3751b830910a2ddaa (diff) | |
| download | git-804ec0301fddb68d5170939084d79e2c0c672028.tar.xz | |
Merge branch 'tk/p4-utf8-bom'
"git p4" update.
* tk/p4-utf8-bom:
git-p4: preserve utf8 BOM when importing from p4 to git
| -rwxr-xr-x | git-p4.py | 10 | ||||
| -rwxr-xr-x | t/t9802-git-p4-filetype.sh | 34 |
2 files changed, 44 insertions, 0 deletions
@@ -3046,6 +3046,16 @@ class P4Sync(Command, P4UserMap): print("\nIgnoring apple filetype file %s" % file['depotFile']) return + if type_base == "utf8": + # The type utf8 explicitly means utf8 *with BOM*. These are + # streamed just like regular text files, however, without + # the BOM in the stream. + # Therefore, to accurately import these files into git, we + # need to explicitly re-add the BOM before writing. + # 'contents' is a set of bytes in this case, so create the + # BOM prefix as a b'' literal. + contents = [b'\xef\xbb\xbf' + contents[0]] + contents[1:] + # Note that we do not try to de-mangle keywords on utf16 files, # even though in theory somebody may want that. regexp = p4_keywords_regexp_for_type(type_base, type_mods) diff --git a/t/t9802-git-p4-filetype.sh b/t/t9802-git-p4-filetype.sh index 19073c6e9f..2a6ee2a467 100755 --- a/t/t9802-git-p4-filetype.sh +++ b/t/t9802-git-p4-filetype.sh @@ -333,4 +333,38 @@ test_expect_success SYMLINKS 'empty symlink target' ' ) ' +test_expect_success SYMLINKS 'utf-8 with and without BOM in text file' ' + ( + cd "$cli" && + + # some utf8 content + echo some tǣxt >utf8-nobom-test && + + # same utf8 content as before but with bom + echo some tǣxt | sed '\''s/^/\xef\xbb\xbf/'\'' >utf8-bom-test && + + # bom only + dd bs=1 count=3 if=utf8-bom-test of=utf8-bom-empty-test && + + p4 add utf8-nobom-test utf8-bom-test utf8-bom-empty-test && + p4 submit -d "add utf8 test files" + ) && + test_when_finished cleanup_git && + + git p4 clone --dest="$git" //depot@all && + ( + cd "$git" && + git checkout refs/remotes/p4/master && + + echo some tǣxt >utf8-nobom-check && + test_cmp utf8-nobom-check utf8-nobom-test && + + echo some tǣxt | sed '\''s/^/\xef\xbb\xbf/'\'' >utf8-bom-check && + test_cmp utf8-bom-check utf8-bom-test && + + dd bs=1 count=3 if=utf8-bom-check of=utf8-bom-empty-check && + test_cmp utf8-bom-empty-check utf8-bom-empty-test + ) +' + test_done |
