summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--debian/changelog5
-rw-r--r--doc/source/library/apt_pkg.rst6
-rw-r--r--python/tag.cc128
-rw-r--r--tests/test_tagfile.py93
4 files changed, 201 insertions, 31 deletions
diff --git a/debian/changelog b/debian/changelog
index f04326eb..60a573f0 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -35,6 +35,11 @@ python-apt (0.8.5) UNRELEASED; urgency=low
* po/sl.po: Slovenian translation updated by Matej Urbančič
* po/tl.po: Tagalog translation updated by Ariel S. Betan
+ [ Julian Andres Klode ]
+ * Merge patch from Colin Watson to handle non-UTF8 tag files in
+ Python 3, by using bytes instead of str when requested; and
+ document this in the RST documentation (Closes: #656288)
+
-- Michael Vogt <mvo@debian.org> Tue, 17 Apr 2012 14:09:24 +0200
python-apt (0.8.4) unstable; urgency=low
diff --git a/doc/source/library/apt_pkg.rst b/doc/source/library/apt_pkg.rst
index bba7491f..b5ee1a53 100644
--- a/doc/source/library/apt_pkg.rst
+++ b/doc/source/library/apt_pkg.rst
@@ -1904,7 +1904,7 @@ thereof and provides a function :func:`RewriteSection` which takes a
:class:`TagSection()` object and sorting information and outputs a sorted
section as a string.
-.. class:: TagFile(file)
+.. class:: TagFile(file, bytes: bool = False)
An object which represents a typical debian control file. Can be used for
Packages, Sources, control, Release, etc. Such an object provides two
@@ -1921,6 +1921,10 @@ section as a string.
.. versionchanged:: 0.7.100
Added support for using gzip files, via :class:`gzip.GzipFile` or any
file containing a compressed gzip stream.
+
+ .. versionadded:: 0.8.5
+
+ Added support for using bytes instead of str in Python 3
.. method:: next()
diff --git a/python/tag.cc b/python/tag.cc
index 49f53c31..bf79debf 100644
--- a/python/tag.cc
+++ b/python/tag.cc
@@ -38,6 +38,10 @@ using namespace std;
struct TagSecData : public CppPyObject<pkgTagSection>
{
char *Data;
+ bool Bytes;
+#if PY_MAJOR_VERSION >= 3
+ PyObject *Encoding;
+#endif
};
// The owner of the TagFile is a Python file object.
@@ -45,6 +49,10 @@ struct TagFileData : public CppPyObject<pkgTagFile>
{
TagSecData *Section;
FileFd Fd;
+ bool Bytes;
+#if PY_MAJOR_VERSION >= 3
+ PyObject *Encoding;
+#endif
};
// Traversal and Clean for owned objects
@@ -60,6 +68,35 @@ int TagFileClear(PyObject *self) {
return 0;
}
+// Helpers to return Unicode or bytes as appropriate.
+#if PY_MAJOR_VERSION < 3
+#define TagSecString_FromStringAndSize(self, v, len) \
+ PyString_FromStringAndSize((v), (len))
+#define TagSecString_FromString(self, v) PyString_FromString(v)
+#else
+PyObject *TagSecString_FromStringAndSize(PyObject *self, const char *v,
+ Py_ssize_t len) {
+ TagSecData *Self = (TagSecData *)self;
+ if (Self->Bytes)
+ return PyBytes_FromStringAndSize(v, len);
+ else if (Self->Encoding)
+ return PyUnicode_Decode(v, len, PyUnicode_AsString(Self->Encoding), 0);
+ else
+ return PyUnicode_FromStringAndSize(v, len);
+}
+
+PyObject *TagSecString_FromString(PyObject *self, const char *v) {
+ TagSecData *Self = (TagSecData *)self;
+ if (Self->Bytes)
+ return PyBytes_FromString(v);
+ else if (Self->Encoding)
+ return PyUnicode_Decode(v, strlen(v),
+ PyUnicode_AsString(Self->Encoding), 0);
+ else
+ return PyUnicode_FromString(v);
+}
+#endif
+
/*}}}*/
// TagSecFree - Free a Tag Section /*{{{*/
@@ -107,9 +144,9 @@ static PyObject *TagSecFind(PyObject *Self,PyObject *Args)
{
if (Default == 0)
Py_RETURN_NONE;
- return PyString_FromString(Default);
+ return TagSecString_FromString(Self,Default);
}
- return PyString_FromStringAndSize(Start,Stop-Start);
+ return TagSecString_FromStringAndSize(Self,Start,Stop-Start);
}
static char *doc_FindRaw =
@@ -128,14 +165,14 @@ static PyObject *TagSecFindRaw(PyObject *Self,PyObject *Args)
{
if (Default == 0)
Py_RETURN_NONE;
- return PyString_FromString(Default);
+ return TagSecString_FromString(Self,Default);
}
const char *Start;
const char *Stop;
GetCpp<pkgTagSection>(Self).Get(Start,Stop,Pos);
- return PyString_FromStringAndSize(Start,Stop-Start);
+ return TagSecString_FromStringAndSize(Self,Start,Stop-Start);
}
static char *doc_FindFlag =
@@ -161,21 +198,18 @@ static PyObject *TagSecFindFlag(PyObject *Self,PyObject *Args)
// Map access, operator []
static PyObject *TagSecMap(PyObject *Self,PyObject *Arg)
{
- if (PyString_Check(Arg) == 0)
- {
- PyErr_SetNone(PyExc_TypeError);
+ const char *Name = PyObject_AsString(Arg);
+ if (Name == 0)
return 0;
- }
-
const char *Start;
const char *Stop;
- if (GetCpp<pkgTagSection>(Self).Find(PyString_AsString(Arg),Start,Stop) == false)
+ if (GetCpp<pkgTagSection>(Self).Find(Name,Start,Stop) == false)
{
- PyErr_SetString(PyExc_KeyError,PyString_AsString(Arg));
+ PyErr_SetString(PyExc_KeyError,Name);
return 0;
}
- return PyString_FromStringAndSize(Start,Stop-Start);
+ return TagSecString_FromStringAndSize(Self,Start,Stop-Start);
}
// len() operation
@@ -230,9 +264,9 @@ static PyObject *TagSecExists(PyObject *Self,PyObject *Args)
static int TagSecContains(PyObject *Self,PyObject *Arg)
{
- if (PyString_Check(Arg) == 0)
- return 0;
- const char *Name = PyString_AsString(Arg);
+ const char *Name = PyObject_AsString(Arg);
+ if (Name == 0)
+ return 0;
const char *Start;
const char *Stop;
if (GetCpp<pkgTagSection>(Self).Find(Name,Start,Stop) == false)
@@ -256,7 +290,7 @@ static PyObject *TagSecStr(PyObject *Self)
const char *Start;
const char *Stop;
GetCpp<pkgTagSection>(Self).GetSection(Start,Stop);
- return PyString_FromStringAndSize(Start,Stop-Start);
+ return TagSecString_FromStringAndSize(Self,Start,Stop-Start);
}
/*}}}*/
// TagFile Wrappers /*{{{*/
@@ -286,6 +320,12 @@ static PyObject *TagFileNext(PyObject *Self)
Obj.Section->Owner = Self;
Py_INCREF(Obj.Section->Owner);
Obj.Section->Data = 0;
+ Obj.Section->Bytes = Obj.Bytes;
+#if PY_MAJOR_VERSION >= 3
+ // We don't need to incref Encoding as the previous Section object already
+ // held a reference to it.
+ Obj.Section->Encoding = Obj.Encoding;
+#endif
if (Obj.Object.Step(Obj.Section->Object) == false)
return HandleErrors(NULL);
@@ -347,11 +387,12 @@ static PyObject *TagFileJump(PyObject *Self,PyObject *Args)
static PyObject *TagSecNew(PyTypeObject *type,PyObject *Args,PyObject *kwds) {
char *Data;
int Len;
- char *kwlist[] = {"text", 0};
+ char Bytes = 0;
+ char *kwlist[] = {"text", "bytes", 0};
// this allows reading "byte" types from python3 - but we don't
// make (much) use of it yet
- if (PyArg_ParseTupleAndKeywords(Args,kwds,"s#",kwlist,&Data,&Len) == 0)
+ if (PyArg_ParseTupleAndKeywords(Args,kwds,"s#|b",kwlist,&Data,&Len,&Bytes) == 0)
return 0;
// Create the object..
@@ -359,6 +400,10 @@ static PyObject *TagSecNew(PyTypeObject *type,PyObject *Args,PyObject *kwds) {
new (&New->Object) pkgTagSection();
New->Data = new char[strlen(Data)+2];
snprintf(New->Data,strlen(Data)+2,"%s\n",Data);
+ New->Bytes = Bytes;
+#if PY_MAJOR_VERSION >= 3
+ New->Encoding = 0;
+#endif
if (New->Object.Scan(New->Data,strlen(New->Data)) == false)
{
@@ -391,19 +436,21 @@ PyObject *ParseSection(PyObject *self,PyObject *Args)
static PyObject *TagFileNew(PyTypeObject *type,PyObject *Args,PyObject *kwds)
{
TagFileData *New;
- PyObject *File;
+ PyObject *File = 0;
+ char Bytes = 0;
- char *kwlist[] = {"file", 0};
- if (PyArg_ParseTupleAndKeywords(Args,kwds,"O",kwlist,&File) == 0)
+ char *kwlist[] = {"file", "bytes", 0};
+ if (PyArg_ParseTupleAndKeywords(Args,kwds,"O|b",kwlist,&File,&Bytes) == 0)
return 0;
// check if we got a filename or a file object
int fileno = -1;
const char *filename = NULL;
- if (PyString_Check(File))
- filename = PyObject_AsString(File);
- else
+ filename = PyObject_AsString(File);
+ if (filename == NULL) {
+ PyErr_Clear();
fileno = PyObject_AsFileDescriptor(File);
+ }
// handle invalid arguments
if (fileno == -1 && filename == NULL)
@@ -432,8 +479,18 @@ static PyObject *TagFileNew(PyTypeObject *type,PyObject *Args,PyObject *kwds)
new (&New->Fd) FileFd(filename, FileFd::ReadOnly, false);
#endif
}
+ New->Bytes = Bytes;
New->Owner = File;
Py_INCREF(New->Owner);
+#if PY_MAJOR_VERSION >= 3
+ if (fileno > 0) {
+ New->Encoding = PyObject_GetAttr(File, PyUnicode_FromString("encoding"));
+ if (New->Encoding && !PyUnicode_Check(New->Encoding))
+ New->Encoding = 0;
+ } else
+ New->Encoding = 0;
+ Py_XINCREF(New->Encoding);
+#endif
new (&New->Object) pkgTagFile(&New->Fd);
// Create the section
@@ -442,6 +499,11 @@ static PyObject *TagFileNew(PyTypeObject *type,PyObject *Args,PyObject *kwds)
New->Section->Owner = New;
Py_INCREF(New->Section->Owner);
New->Section->Data = 0;
+ New->Section->Bytes = Bytes;
+#if PY_MAJOR_VERSION >= 3
+ New->Section->Encoding = New->Encoding;
+ Py_XINCREF(New->Section->Encoding);
+#endif
return HandleErrors(New);
}
@@ -519,7 +581,7 @@ PyObject *RewriteSection(PyObject *self,PyObject *Args)
}
// Return the string
- PyObject *ResObj = PyString_FromStringAndSize(bp,size);
+ PyObject *ResObj = TagSecString_FromStringAndSize(Section,bp,size);
free(bp);
return HandleErrors(ResObj);
}
@@ -548,11 +610,15 @@ PySequenceMethods TagSecSeqMeth = {0,0,0,0,0,0,0,TagSecContains,0,0};
PyMappingMethods TagSecMapMeth = {TagSecLength,TagSecMap,0};
-static char *doc_TagSec = "TagSection(text: str)\n\n"
+static char *doc_TagSec = "TagSection(text: str, [bytes: bool = False])\n\n"
"Provide methods to access RFC822-style header sections, like those\n"
"found in debian/control or Packages files.\n\n"
"TagSection() behave like read-only dictionaries and also provide access\n"
- "to the functions provided by the C++ class (e.g. find)";
+ "to the functions provided by the C++ class (e.g. find).\n\n"
+ "By default, text read from files is treated as strings (binary data in\n"
+ "Python 2, Unicode strings in Python 3). Use bytes=True to cause all\n"
+ "header values read from this TagSection to be bytes even in Python 3.\n"
+ "Header names are always treated as Unicode.";
PyTypeObject PyTagSection_Type =
{
PyVarObject_HEAD_INIT(&PyType_Type, 0)
@@ -623,7 +689,7 @@ static PyGetSetDef TagFileGetSet[] = {
};
-static char *doc_TagFile = "TagFile(file)\n\n"
+static char *doc_TagFile = "TagFile(file, [bytes: bool = False])\n\n"
"TagFile() objects provide access to debian control files, which consist\n"
"of multiple RFC822-style sections.\n\n"
"To provide access to those sections, TagFile objects provide an iterator\n"
@@ -635,7 +701,11 @@ static char *doc_TagFile = "TagFile(file)\n\n"
"It is important to not mix the use of both APIs, because this can have\n"
"unwanted effects.\n\n"
"The parameter 'file' refers to an object providing a fileno() method or\n"
- "a file descriptor (an integer)";
+ "a file descriptor (an integer).\n\n"
+ "By default, text read from files is treated as strings (binary data in\n"
+ "Python 2, Unicode strings in Python 3). Use bytes=True to cause all\n"
+ "header values read from this TagFile to be bytes even in Python 3.\n"
+ "Header names are always treated as Unicode.";
// Type for a Tag File
PyTypeObject PyTagFile_Type =
diff --git a/tests/test_tagfile.py b/tests/test_tagfile.py
index 371cc6ee..33197e6a 100644
--- a/tests/test_tagfile.py
+++ b/tests/test_tagfile.py
@@ -1,18 +1,26 @@
#!/usr/bin/python
+# -*- coding: utf-8 -*-
#
# Copyright (C) 2010 Michael Vogt <mvo@ubuntu.com>
+# Copyright (C) 2012 Canonical Ltd.
+# Author: Colin Watson <cjwatson@ubuntu.com>
#
# Copying and distribution of this file, with or without modification,
# are permitted in any medium without royalty provided the copyright
# notice and this notice are preserved.
"""Unit tests for verifying the correctness of apt_pkg.TagFile"""
+from __future__ import print_function, unicode_literals
+
+import io
import glob
import os
+import shutil
+import sys
+import tempfile
import unittest
from test_all import get_library_dir
-import sys
sys.path.insert(0, get_library_dir())
import apt_pkg
@@ -20,6 +28,13 @@ import apt_pkg
class TestTagFile(unittest.TestCase):
""" test the apt_pkg.TagFile """
+ def setUp(self):
+ apt_pkg.init()
+ self.temp_dir = tempfile.mkdtemp()
+
+ def tearDown(self):
+ shutil.rmtree(self.temp_dir)
+
def test_tag_file(self):
basepath = os.path.dirname(__file__)
tagfilepath = os.path.join(basepath, "./data/tagfile/*")
@@ -38,5 +53,81 @@ class TestTagFile(unittest.TestCase):
# Raises Type error
self.assertRaises(TypeError, apt_pkg.TagFile, object())
+ def test_utf8(self):
+ value = "Tést Persön <test@example.org>"
+ packages = os.path.join(self.temp_dir, "Packages")
+ with io.open(packages, "w", encoding="UTF-8") as packages_file:
+ print("Maintainer: %s" % value, file=packages_file)
+ print("", file=packages_file)
+ if sys.version < '3':
+ # In Python 2, test the traditional file interface.
+ with open(packages) as packages_file:
+ tagfile = apt_pkg.TagFile(packages_file)
+ tagfile.step()
+ self.assertEqual(
+ value.encode("UTF-8"), tagfile.section["Maintainer"])
+ with io.open(packages, encoding="UTF-8") as packages_file:
+ tagfile = apt_pkg.TagFile(packages_file)
+ tagfile.step()
+ if sys.version < '3':
+ self.assertEqual(
+ value.encode("UTF-8"), tagfile.section["Maintainer"])
+ else:
+ self.assertEqual(value, tagfile.section["Maintainer"])
+
+ def test_latin1(self):
+ value = "Tést Persön <test@example.org>"
+ packages = os.path.join(self.temp_dir, "Packages")
+ with io.open(packages, "w", encoding="ISO-8859-1") as packages_file:
+ print("Maintainer: %s" % value, file=packages_file)
+ print("", file=packages_file)
+ if sys.version < '3':
+ # In Python 2, test the traditional file interface.
+ with open(packages) as packages_file:
+ tagfile = apt_pkg.TagFile(packages_file)
+ tagfile.step()
+ self.assertEqual(
+ value.encode("ISO-8859-1"), tagfile.section["Maintainer"])
+ with io.open(packages) as packages_file:
+ tagfile = apt_pkg.TagFile(packages_file, bytes=True)
+ tagfile.step()
+ self.assertEqual(
+ value.encode("ISO-8859-1"), tagfile.section["Maintainer"])
+ if sys.version >= '3':
+ # In Python 3, TagFile can pick up the encoding of the file
+ # object.
+ with io.open(packages, encoding="ISO-8859-1") as packages_file:
+ tagfile = apt_pkg.TagFile(packages_file)
+ tagfile.step()
+ self.assertEqual(value, tagfile.section["Maintainer"])
+
+ def test_mixed(self):
+ value = "Tést Persön <test@example.org>"
+ packages = os.path.join(self.temp_dir, "Packages")
+ with io.open(packages, "w", encoding="UTF-8") as packages_file:
+ print("Maintainer: %s" % value, file=packages_file)
+ print("", file=packages_file)
+ with io.open(packages, "a", encoding="ISO-8859-1") as packages_file:
+ print("Maintainer: %s" % value, file=packages_file)
+ print("", file=packages_file)
+ if sys.version < '3':
+ # In Python 2, test the traditional file interface.
+ with open(packages) as packages_file:
+ tagfile = apt_pkg.TagFile(packages_file)
+ tagfile.step()
+ self.assertEqual(
+ value.encode("UTF-8"), tagfile.section["Maintainer"])
+ tagfile.step()
+ self.assertEqual(
+ value.encode("ISO-8859-1"), tagfile.section["Maintainer"])
+ with io.open(packages) as packages_file:
+ tagfile = apt_pkg.TagFile(packages_file, bytes=True)
+ tagfile.step()
+ self.assertEqual(
+ value.encode("UTF-8"), tagfile.section["Maintainer"])
+ tagfile.step()
+ self.assertEqual(
+ value.encode("ISO-8859-1"), tagfile.section["Maintainer"])
+
if __name__ == "__main__":
unittest.main()