diff options
| -rw-r--r-- | debian/changelog | 5 | ||||
| -rw-r--r-- | doc/source/library/apt_pkg.rst | 6 | ||||
| -rw-r--r-- | python/tag.cc | 128 | ||||
| -rw-r--r-- | tests/test_tagfile.py | 93 |
4 files changed, 201 insertions, 31 deletions
diff --git a/debian/changelog b/debian/changelog index f04326eb..60a573f0 100644 --- a/debian/changelog +++ b/debian/changelog @@ -35,6 +35,11 @@ python-apt (0.8.5) UNRELEASED; urgency=low * po/sl.po: Slovenian translation updated by Matej Urbančič * po/tl.po: Tagalog translation updated by Ariel S. Betan + [ Julian Andres Klode ] + * Merge patch from Colin Watson to handle non-UTF8 tag files in + Python 3, by using bytes instead of str when requested; and + document this in the RST documentation (Closes: #656288) + -- Michael Vogt <mvo@debian.org> Tue, 17 Apr 2012 14:09:24 +0200 python-apt (0.8.4) unstable; urgency=low diff --git a/doc/source/library/apt_pkg.rst b/doc/source/library/apt_pkg.rst index bba7491f..b5ee1a53 100644 --- a/doc/source/library/apt_pkg.rst +++ b/doc/source/library/apt_pkg.rst @@ -1904,7 +1904,7 @@ thereof and provides a function :func:`RewriteSection` which takes a :class:`TagSection()` object and sorting information and outputs a sorted section as a string. -.. class:: TagFile(file) +.. class:: TagFile(file, bytes: bool = False) An object which represents a typical debian control file. Can be used for Packages, Sources, control, Release, etc. Such an object provides two @@ -1921,6 +1921,10 @@ section as a string. .. versionchanged:: 0.7.100 Added support for using gzip files, via :class:`gzip.GzipFile` or any file containing a compressed gzip stream. + + .. versionadded:: 0.8.5 + + Added support for using bytes instead of str in Python 3 .. method:: next() diff --git a/python/tag.cc b/python/tag.cc index 49f53c31..bf79debf 100644 --- a/python/tag.cc +++ b/python/tag.cc @@ -38,6 +38,10 @@ using namespace std; struct TagSecData : public CppPyObject<pkgTagSection> { char *Data; + bool Bytes; +#if PY_MAJOR_VERSION >= 3 + PyObject *Encoding; +#endif }; // The owner of the TagFile is a Python file object. @@ -45,6 +49,10 @@ struct TagFileData : public CppPyObject<pkgTagFile> { TagSecData *Section; FileFd Fd; + bool Bytes; +#if PY_MAJOR_VERSION >= 3 + PyObject *Encoding; +#endif }; // Traversal and Clean for owned objects @@ -60,6 +68,35 @@ int TagFileClear(PyObject *self) { return 0; } +// Helpers to return Unicode or bytes as appropriate. +#if PY_MAJOR_VERSION < 3 +#define TagSecString_FromStringAndSize(self, v, len) \ + PyString_FromStringAndSize((v), (len)) +#define TagSecString_FromString(self, v) PyString_FromString(v) +#else +PyObject *TagSecString_FromStringAndSize(PyObject *self, const char *v, + Py_ssize_t len) { + TagSecData *Self = (TagSecData *)self; + if (Self->Bytes) + return PyBytes_FromStringAndSize(v, len); + else if (Self->Encoding) + return PyUnicode_Decode(v, len, PyUnicode_AsString(Self->Encoding), 0); + else + return PyUnicode_FromStringAndSize(v, len); +} + +PyObject *TagSecString_FromString(PyObject *self, const char *v) { + TagSecData *Self = (TagSecData *)self; + if (Self->Bytes) + return PyBytes_FromString(v); + else if (Self->Encoding) + return PyUnicode_Decode(v, strlen(v), + PyUnicode_AsString(Self->Encoding), 0); + else + return PyUnicode_FromString(v); +} +#endif + /*}}}*/ // TagSecFree - Free a Tag Section /*{{{*/ @@ -107,9 +144,9 @@ static PyObject *TagSecFind(PyObject *Self,PyObject *Args) { if (Default == 0) Py_RETURN_NONE; - return PyString_FromString(Default); + return TagSecString_FromString(Self,Default); } - return PyString_FromStringAndSize(Start,Stop-Start); + return TagSecString_FromStringAndSize(Self,Start,Stop-Start); } static char *doc_FindRaw = @@ -128,14 +165,14 @@ static PyObject *TagSecFindRaw(PyObject *Self,PyObject *Args) { if (Default == 0) Py_RETURN_NONE; - return PyString_FromString(Default); + return TagSecString_FromString(Self,Default); } const char *Start; const char *Stop; GetCpp<pkgTagSection>(Self).Get(Start,Stop,Pos); - return PyString_FromStringAndSize(Start,Stop-Start); + return TagSecString_FromStringAndSize(Self,Start,Stop-Start); } static char *doc_FindFlag = @@ -161,21 +198,18 @@ static PyObject *TagSecFindFlag(PyObject *Self,PyObject *Args) // Map access, operator [] static PyObject *TagSecMap(PyObject *Self,PyObject *Arg) { - if (PyString_Check(Arg) == 0) - { - PyErr_SetNone(PyExc_TypeError); + const char *Name = PyObject_AsString(Arg); + if (Name == 0) return 0; - } - const char *Start; const char *Stop; - if (GetCpp<pkgTagSection>(Self).Find(PyString_AsString(Arg),Start,Stop) == false) + if (GetCpp<pkgTagSection>(Self).Find(Name,Start,Stop) == false) { - PyErr_SetString(PyExc_KeyError,PyString_AsString(Arg)); + PyErr_SetString(PyExc_KeyError,Name); return 0; } - return PyString_FromStringAndSize(Start,Stop-Start); + return TagSecString_FromStringAndSize(Self,Start,Stop-Start); } // len() operation @@ -230,9 +264,9 @@ static PyObject *TagSecExists(PyObject *Self,PyObject *Args) static int TagSecContains(PyObject *Self,PyObject *Arg) { - if (PyString_Check(Arg) == 0) - return 0; - const char *Name = PyString_AsString(Arg); + const char *Name = PyObject_AsString(Arg); + if (Name == 0) + return 0; const char *Start; const char *Stop; if (GetCpp<pkgTagSection>(Self).Find(Name,Start,Stop) == false) @@ -256,7 +290,7 @@ static PyObject *TagSecStr(PyObject *Self) const char *Start; const char *Stop; GetCpp<pkgTagSection>(Self).GetSection(Start,Stop); - return PyString_FromStringAndSize(Start,Stop-Start); + return TagSecString_FromStringAndSize(Self,Start,Stop-Start); } /*}}}*/ // TagFile Wrappers /*{{{*/ @@ -286,6 +320,12 @@ static PyObject *TagFileNext(PyObject *Self) Obj.Section->Owner = Self; Py_INCREF(Obj.Section->Owner); Obj.Section->Data = 0; + Obj.Section->Bytes = Obj.Bytes; +#if PY_MAJOR_VERSION >= 3 + // We don't need to incref Encoding as the previous Section object already + // held a reference to it. + Obj.Section->Encoding = Obj.Encoding; +#endif if (Obj.Object.Step(Obj.Section->Object) == false) return HandleErrors(NULL); @@ -347,11 +387,12 @@ static PyObject *TagFileJump(PyObject *Self,PyObject *Args) static PyObject *TagSecNew(PyTypeObject *type,PyObject *Args,PyObject *kwds) { char *Data; int Len; - char *kwlist[] = {"text", 0}; + char Bytes = 0; + char *kwlist[] = {"text", "bytes", 0}; // this allows reading "byte" types from python3 - but we don't // make (much) use of it yet - if (PyArg_ParseTupleAndKeywords(Args,kwds,"s#",kwlist,&Data,&Len) == 0) + if (PyArg_ParseTupleAndKeywords(Args,kwds,"s#|b",kwlist,&Data,&Len,&Bytes) == 0) return 0; // Create the object.. @@ -359,6 +400,10 @@ static PyObject *TagSecNew(PyTypeObject *type,PyObject *Args,PyObject *kwds) { new (&New->Object) pkgTagSection(); New->Data = new char[strlen(Data)+2]; snprintf(New->Data,strlen(Data)+2,"%s\n",Data); + New->Bytes = Bytes; +#if PY_MAJOR_VERSION >= 3 + New->Encoding = 0; +#endif if (New->Object.Scan(New->Data,strlen(New->Data)) == false) { @@ -391,19 +436,21 @@ PyObject *ParseSection(PyObject *self,PyObject *Args) static PyObject *TagFileNew(PyTypeObject *type,PyObject *Args,PyObject *kwds) { TagFileData *New; - PyObject *File; + PyObject *File = 0; + char Bytes = 0; - char *kwlist[] = {"file", 0}; - if (PyArg_ParseTupleAndKeywords(Args,kwds,"O",kwlist,&File) == 0) + char *kwlist[] = {"file", "bytes", 0}; + if (PyArg_ParseTupleAndKeywords(Args,kwds,"O|b",kwlist,&File,&Bytes) == 0) return 0; // check if we got a filename or a file object int fileno = -1; const char *filename = NULL; - if (PyString_Check(File)) - filename = PyObject_AsString(File); - else + filename = PyObject_AsString(File); + if (filename == NULL) { + PyErr_Clear(); fileno = PyObject_AsFileDescriptor(File); + } // handle invalid arguments if (fileno == -1 && filename == NULL) @@ -432,8 +479,18 @@ static PyObject *TagFileNew(PyTypeObject *type,PyObject *Args,PyObject *kwds) new (&New->Fd) FileFd(filename, FileFd::ReadOnly, false); #endif } + New->Bytes = Bytes; New->Owner = File; Py_INCREF(New->Owner); +#if PY_MAJOR_VERSION >= 3 + if (fileno > 0) { + New->Encoding = PyObject_GetAttr(File, PyUnicode_FromString("encoding")); + if (New->Encoding && !PyUnicode_Check(New->Encoding)) + New->Encoding = 0; + } else + New->Encoding = 0; + Py_XINCREF(New->Encoding); +#endif new (&New->Object) pkgTagFile(&New->Fd); // Create the section @@ -442,6 +499,11 @@ static PyObject *TagFileNew(PyTypeObject *type,PyObject *Args,PyObject *kwds) New->Section->Owner = New; Py_INCREF(New->Section->Owner); New->Section->Data = 0; + New->Section->Bytes = Bytes; +#if PY_MAJOR_VERSION >= 3 + New->Section->Encoding = New->Encoding; + Py_XINCREF(New->Section->Encoding); +#endif return HandleErrors(New); } @@ -519,7 +581,7 @@ PyObject *RewriteSection(PyObject *self,PyObject *Args) } // Return the string - PyObject *ResObj = PyString_FromStringAndSize(bp,size); + PyObject *ResObj = TagSecString_FromStringAndSize(Section,bp,size); free(bp); return HandleErrors(ResObj); } @@ -548,11 +610,15 @@ PySequenceMethods TagSecSeqMeth = {0,0,0,0,0,0,0,TagSecContains,0,0}; PyMappingMethods TagSecMapMeth = {TagSecLength,TagSecMap,0}; -static char *doc_TagSec = "TagSection(text: str)\n\n" +static char *doc_TagSec = "TagSection(text: str, [bytes: bool = False])\n\n" "Provide methods to access RFC822-style header sections, like those\n" "found in debian/control or Packages files.\n\n" "TagSection() behave like read-only dictionaries and also provide access\n" - "to the functions provided by the C++ class (e.g. find)"; + "to the functions provided by the C++ class (e.g. find).\n\n" + "By default, text read from files is treated as strings (binary data in\n" + "Python 2, Unicode strings in Python 3). Use bytes=True to cause all\n" + "header values read from this TagSection to be bytes even in Python 3.\n" + "Header names are always treated as Unicode."; PyTypeObject PyTagSection_Type = { PyVarObject_HEAD_INIT(&PyType_Type, 0) @@ -623,7 +689,7 @@ static PyGetSetDef TagFileGetSet[] = { }; -static char *doc_TagFile = "TagFile(file)\n\n" +static char *doc_TagFile = "TagFile(file, [bytes: bool = False])\n\n" "TagFile() objects provide access to debian control files, which consist\n" "of multiple RFC822-style sections.\n\n" "To provide access to those sections, TagFile objects provide an iterator\n" @@ -635,7 +701,11 @@ static char *doc_TagFile = "TagFile(file)\n\n" "It is important to not mix the use of both APIs, because this can have\n" "unwanted effects.\n\n" "The parameter 'file' refers to an object providing a fileno() method or\n" - "a file descriptor (an integer)"; + "a file descriptor (an integer).\n\n" + "By default, text read from files is treated as strings (binary data in\n" + "Python 2, Unicode strings in Python 3). Use bytes=True to cause all\n" + "header values read from this TagFile to be bytes even in Python 3.\n" + "Header names are always treated as Unicode."; // Type for a Tag File PyTypeObject PyTagFile_Type = diff --git a/tests/test_tagfile.py b/tests/test_tagfile.py index 371cc6ee..33197e6a 100644 --- a/tests/test_tagfile.py +++ b/tests/test_tagfile.py @@ -1,18 +1,26 @@ #!/usr/bin/python +# -*- coding: utf-8 -*- # # Copyright (C) 2010 Michael Vogt <mvo@ubuntu.com> +# Copyright (C) 2012 Canonical Ltd. +# Author: Colin Watson <cjwatson@ubuntu.com> # # Copying and distribution of this file, with or without modification, # are permitted in any medium without royalty provided the copyright # notice and this notice are preserved. """Unit tests for verifying the correctness of apt_pkg.TagFile""" +from __future__ import print_function, unicode_literals + +import io import glob import os +import shutil +import sys +import tempfile import unittest from test_all import get_library_dir -import sys sys.path.insert(0, get_library_dir()) import apt_pkg @@ -20,6 +28,13 @@ import apt_pkg class TestTagFile(unittest.TestCase): """ test the apt_pkg.TagFile """ + def setUp(self): + apt_pkg.init() + self.temp_dir = tempfile.mkdtemp() + + def tearDown(self): + shutil.rmtree(self.temp_dir) + def test_tag_file(self): basepath = os.path.dirname(__file__) tagfilepath = os.path.join(basepath, "./data/tagfile/*") @@ -38,5 +53,81 @@ class TestTagFile(unittest.TestCase): # Raises Type error self.assertRaises(TypeError, apt_pkg.TagFile, object()) + def test_utf8(self): + value = "Tést Persön <test@example.org>" + packages = os.path.join(self.temp_dir, "Packages") + with io.open(packages, "w", encoding="UTF-8") as packages_file: + print("Maintainer: %s" % value, file=packages_file) + print("", file=packages_file) + if sys.version < '3': + # In Python 2, test the traditional file interface. + with open(packages) as packages_file: + tagfile = apt_pkg.TagFile(packages_file) + tagfile.step() + self.assertEqual( + value.encode("UTF-8"), tagfile.section["Maintainer"]) + with io.open(packages, encoding="UTF-8") as packages_file: + tagfile = apt_pkg.TagFile(packages_file) + tagfile.step() + if sys.version < '3': + self.assertEqual( + value.encode("UTF-8"), tagfile.section["Maintainer"]) + else: + self.assertEqual(value, tagfile.section["Maintainer"]) + + def test_latin1(self): + value = "Tést Persön <test@example.org>" + packages = os.path.join(self.temp_dir, "Packages") + with io.open(packages, "w", encoding="ISO-8859-1") as packages_file: + print("Maintainer: %s" % value, file=packages_file) + print("", file=packages_file) + if sys.version < '3': + # In Python 2, test the traditional file interface. + with open(packages) as packages_file: + tagfile = apt_pkg.TagFile(packages_file) + tagfile.step() + self.assertEqual( + value.encode("ISO-8859-1"), tagfile.section["Maintainer"]) + with io.open(packages) as packages_file: + tagfile = apt_pkg.TagFile(packages_file, bytes=True) + tagfile.step() + self.assertEqual( + value.encode("ISO-8859-1"), tagfile.section["Maintainer"]) + if sys.version >= '3': + # In Python 3, TagFile can pick up the encoding of the file + # object. + with io.open(packages, encoding="ISO-8859-1") as packages_file: + tagfile = apt_pkg.TagFile(packages_file) + tagfile.step() + self.assertEqual(value, tagfile.section["Maintainer"]) + + def test_mixed(self): + value = "Tést Persön <test@example.org>" + packages = os.path.join(self.temp_dir, "Packages") + with io.open(packages, "w", encoding="UTF-8") as packages_file: + print("Maintainer: %s" % value, file=packages_file) + print("", file=packages_file) + with io.open(packages, "a", encoding="ISO-8859-1") as packages_file: + print("Maintainer: %s" % value, file=packages_file) + print("", file=packages_file) + if sys.version < '3': + # In Python 2, test the traditional file interface. + with open(packages) as packages_file: + tagfile = apt_pkg.TagFile(packages_file) + tagfile.step() + self.assertEqual( + value.encode("UTF-8"), tagfile.section["Maintainer"]) + tagfile.step() + self.assertEqual( + value.encode("ISO-8859-1"), tagfile.section["Maintainer"]) + with io.open(packages) as packages_file: + tagfile = apt_pkg.TagFile(packages_file, bytes=True) + tagfile.step() + self.assertEqual( + value.encode("UTF-8"), tagfile.section["Maintainer"]) + tagfile.step() + self.assertEqual( + value.encode("ISO-8859-1"), tagfile.section["Maintainer"]) + if __name__ == "__main__": unittest.main() |
