From 1c5834df7a0494f25ab47bd2066a2246845ae8c1 Mon Sep 17 00:00:00 2001 From: Ulf Rompe Date: Sat, 13 Jul 2019 16:55:50 +0200 Subject: [PATCH 1/3] Align bytearray.strip, .rstrip, .lstrip to their bytes counterparts. The previous implementations differed a lot from the the ones found in bytesobject.c and all three of them included hardcoded lists of whitespace characters. Knowledge about whitespace already exists in pyctype.c and should not be duplicated. --- Objects/bytearrayobject.c | 161 ++++++++++++++++++-------------------- 1 file changed, 74 insertions(+), 87 deletions(-) diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 9dd67127b614647..5ff5298c8ce6ae1 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1780,26 +1780,78 @@ bytearray_remove_impl(PyByteArrayObject *self, int value) Py_RETURN_NONE; } -/* XXX These two helpers could be optimized if argsize == 1 */ +#define LEFTSTRIP 0 +#define RIGHTSTRIP 1 +#define BOTHSTRIP 2 -static Py_ssize_t -lstrip_helper(const char *myptr, Py_ssize_t mysize, - const void *argptr, Py_ssize_t argsize) +static PyObject * +do_xstrip(PyByteArrayObject *self, int striptype, PyObject *sepobj) { - Py_ssize_t i = 0; - while (i < mysize && memchr(argptr, (unsigned char) myptr[i], argsize)) - i++; - return i; + Py_buffer vsep; + char *s = PyByteArray_AS_STRING(self); + Py_ssize_t len = PyByteArray_GET_SIZE(self); + char *sep; + Py_ssize_t seplen; + Py_ssize_t i, j; + + if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0) + return NULL; + sep = vsep.buf; + seplen = vsep.len; + + i = 0; + if (striptype != RIGHTSTRIP) { + while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) { + i++; + } + } + + j = len; + if (striptype != LEFTSTRIP) { + do { + j--; + } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen)); + j++; + } + + PyBuffer_Release(&vsep); + + return PyByteArray_FromStringAndSize(s+i, j-i); } -static Py_ssize_t -rstrip_helper(const char *myptr, Py_ssize_t mysize, - const void *argptr, Py_ssize_t argsize) + +static PyObject * +do_strip(PyByteArrayObject *self, int striptype) +{ + char *s = PyByteArray_AS_STRING(self); + Py_ssize_t len = PyByteArray_GET_SIZE(self), i, j; + + i = 0; + if (striptype != RIGHTSTRIP) { + while (i < len && Py_ISSPACE(s[i])) { + i++; + } + } + + j = len; + if (striptype != LEFTSTRIP) { + do { + j--; + } while (j >= i && Py_ISSPACE(s[j])); + j++; + } + + return PyByteArray_FromStringAndSize(s+i, j-i); +} + + +static PyObject * +do_argstrip(PyByteArrayObject *self, int striptype, PyObject *bytes) { - Py_ssize_t i = mysize - 1; - while (i >= 0 && memchr(argptr, (unsigned char) myptr[i], argsize)) - i--; - return i + 1; + if (bytes != NULL && bytes != Py_None) { + return do_xstrip(self, striptype, bytes); + } + return do_strip(self, striptype); } /*[clinic input] @@ -1815,33 +1867,9 @@ If the argument is omitted or None, strip leading and trailing ASCII whitespace. static PyObject * bytearray_strip_impl(PyByteArrayObject *self, PyObject *bytes) -/*[clinic end generated code: output=760412661a34ad5a input=ef7bb59b09c21d62]*/ +/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/ { - Py_ssize_t left, right, mysize, byteslen; - char *myptr; - const char *bytesptr; - Py_buffer vbytes; - - if (bytes == Py_None) { - bytesptr = "\t\n\r\f\v "; - byteslen = 6; - } - else { - if (PyObject_GetBuffer(bytes, &vbytes, PyBUF_SIMPLE) != 0) - return NULL; - bytesptr = (const char *) vbytes.buf; - byteslen = vbytes.len; - } - myptr = PyByteArray_AS_STRING(self); - mysize = Py_SIZE(self); - left = lstrip_helper(myptr, mysize, bytesptr, byteslen); - if (left == mysize) - right = left; - else - right = rstrip_helper(myptr, mysize, bytesptr, byteslen); - if (bytes != Py_None) - PyBuffer_Release(&vbytes); - return PyByteArray_FromStringAndSize(myptr + left, right - left); + return do_argstrip(self, BOTHSTRIP, bytes); } /*[clinic input] @@ -1852,35 +1880,14 @@ bytearray.lstrip Strip leading bytes contained in the argument. -If the argument is omitted or None, strip leading ASCII whitespace. +If the argument is omitted or None, strip leading ASCII whitespace. [clinic start generated code]*/ static PyObject * bytearray_lstrip_impl(PyByteArrayObject *self, PyObject *bytes) -/*[clinic end generated code: output=d005c9d0ab909e66 input=80843f975dd7c480]*/ +/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/ { - Py_ssize_t left, right, mysize, byteslen; - char *myptr; - const char *bytesptr; - Py_buffer vbytes; - - if (bytes == Py_None) { - bytesptr = "\t\n\r\f\v "; - byteslen = 6; - } - else { - if (PyObject_GetBuffer(bytes, &vbytes, PyBUF_SIMPLE) != 0) - return NULL; - bytesptr = (const char *) vbytes.buf; - byteslen = vbytes.len; - } - myptr = PyByteArray_AS_STRING(self); - mysize = Py_SIZE(self); - left = lstrip_helper(myptr, mysize, bytesptr, byteslen); - right = mysize; - if (bytes != Py_None) - PyBuffer_Release(&vbytes); - return PyByteArray_FromStringAndSize(myptr + left, right - left); + return do_argstrip(self, LEFTSTRIP, bytes); } /*[clinic input] @@ -1896,29 +1903,9 @@ If the argument is omitted or None, strip trailing ASCII whitespace. static PyObject * bytearray_rstrip_impl(PyByteArrayObject *self, PyObject *bytes) -/*[clinic end generated code: output=030e2fbd2f7276bd input=e728b994954cfd91]*/ +/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/ { - Py_ssize_t right, mysize, byteslen; - char *myptr; - const char *bytesptr; - Py_buffer vbytes; - - if (bytes == Py_None) { - bytesptr = "\t\n\r\f\v "; - byteslen = 6; - } - else { - if (PyObject_GetBuffer(bytes, &vbytes, PyBUF_SIMPLE) != 0) - return NULL; - bytesptr = (const char *) vbytes.buf; - byteslen = vbytes.len; - } - myptr = PyByteArray_AS_STRING(self); - mysize = Py_SIZE(self); - right = rstrip_helper(myptr, mysize, bytesptr, byteslen); - if (bytes != Py_None) - PyBuffer_Release(&vbytes); - return PyByteArray_FromStringAndSize(myptr, right); + return do_argstrip(self, RIGHTSTRIP, bytes); } /*[clinic input] From c016eb76698748f44ccb8918be9d242cbcbaf715 Mon Sep 17 00:00:00 2001 From: Ulf Rompe Date: Sun, 14 Jul 2019 10:48:15 +0200 Subject: [PATCH 2/3] Update clinic signatures. --- Objects/bytearrayobject.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Objects/bytearrayobject.c b/Objects/bytearrayobject.c index 5ff5298c8ce6ae1..7e09f0a7a561df2 100644 --- a/Objects/bytearrayobject.c +++ b/Objects/bytearrayobject.c @@ -1867,7 +1867,7 @@ If the argument is omitted or None, strip leading and trailing ASCII whitespace. static PyObject * bytearray_strip_impl(PyByteArrayObject *self, PyObject *bytes) -/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/ +/*[clinic end generated code: output=760412661a34ad5a input=ef7bb59b09c21d62]*/ { return do_argstrip(self, BOTHSTRIP, bytes); } @@ -1880,12 +1880,12 @@ bytearray.lstrip Strip leading bytes contained in the argument. -If the argument is omitted or None, strip leading ASCII whitespace. +If the argument is omitted or None, strip leading ASCII whitespace. [clinic start generated code]*/ static PyObject * bytearray_lstrip_impl(PyByteArrayObject *self, PyObject *bytes) -/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/ +/*[clinic end generated code: output=d005c9d0ab909e66 input=80843f975dd7c480]*/ { return do_argstrip(self, LEFTSTRIP, bytes); } @@ -1903,7 +1903,7 @@ If the argument is omitted or None, strip trailing ASCII whitespace. static PyObject * bytearray_rstrip_impl(PyByteArrayObject *self, PyObject *bytes) -/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/ +/*[clinic end generated code: output=030e2fbd2f7276bd input=e728b994954cfd91]*/ { return do_argstrip(self, RIGHTSTRIP, bytes); } From a6962a0d3f7f035792d765c11c23c4077781422e Mon Sep 17 00:00:00 2001 From: Ulf Rompe Date: Sun, 14 Jul 2019 14:50:37 +0200 Subject: [PATCH 3/3] Add NEWS entry for the bytearray.strip, .lstrip, .rstrip alignments. --- .../2019-07-14-14-48-50.bpo-25433.T1O9mC.rst | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2019-07-14-14-48-50.bpo-25433.T1O9mC.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-07-14-14-48-50.bpo-25433.T1O9mC.rst b/Misc/NEWS.d/next/Core and Builtins/2019-07-14-14-48-50.bpo-25433.T1O9mC.rst new file mode 100644 index 000000000000000..a249e65fcfef489 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2019-07-14-14-48-50.bpo-25433.T1O9mC.rst @@ -0,0 +1,5 @@ +Align ``bytearray.strip``, ``.lstrip``, ``.rstrip`` to their ``bytes`` +counterparts. + +Speedup ranges between 0% when stripping custom bytes and 8% when stripping +whitespace.