mirror of https://github.com/bvn13/soupselect.git
Added monkeypatch and unmonkeypatch, for patching a findSelect method on to the Soup class itself.
parent
9a6bf542fe
commit
2960a5f11b
|
@ -12,8 +12,6 @@ select(soup, 'div#main ul a')
|
|||
|
||||
"""
|
||||
|
||||
from BeautifulSoup import BeautifulSoup as Soup
|
||||
|
||||
import re
|
||||
|
||||
tag_re = re.compile('^[a-z0-9]+$')
|
||||
|
@ -53,6 +51,10 @@ def attribute_checker(operator, attribute, value=''):
|
|||
|
||||
|
||||
def select(soup, selector):
|
||||
"""
|
||||
soup should be a BeautifulSoup instance; selector is a CSS selector
|
||||
specifying the elements you want to retrieve.
|
||||
"""
|
||||
tokens = selector.split()
|
||||
current_context = [soup]
|
||||
for token in tokens:
|
||||
|
@ -107,3 +109,17 @@ def select(soup, selector):
|
|||
found.extend(context.findAll(token))
|
||||
current_context = found
|
||||
return current_context
|
||||
|
||||
def monkeypatch(BeautifulSoupClass=None):
|
||||
"""
|
||||
If you don't explicitly state the class to patch, defaults to the most
|
||||
common import location for BeautifulSoup.
|
||||
"""
|
||||
if not BeautifulSoupClass:
|
||||
from BeautifulSoup import BeautifulSoup as BeautifulSoupClass
|
||||
BeautifulSoupClass.findSelect = select
|
||||
|
||||
def unmonkeypatch(BeautifulSoupClass=None):
|
||||
if not BeautifulSoupClass:
|
||||
from BeautifulSoup import BeautifulSoup as BeautifulSoupClass
|
||||
delattr(BeautifulSoupClass, 'findSelect')
|
||||
|
|
|
@ -1,9 +1,10 @@
|
|||
import unittest
|
||||
from BeautifulSoup import BeautifulSoup
|
||||
|
||||
from soupselect import select
|
||||
from soupselect import select, monkeypatch, unmonkeypatch
|
||||
|
||||
class TestBasicSelectors(unittest.TestCase):
|
||||
class BaseTest(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.soup = BeautifulSoup(HTML)
|
||||
|
||||
|
@ -22,6 +23,8 @@ class TestBasicSelectors(unittest.TestCase):
|
|||
def assertSelectMultiple(self, *tests):
|
||||
for selector, expected_ids in tests:
|
||||
self.assertSelect(selector, expected_ids)
|
||||
|
||||
class TestBasicSelectors(BaseTest):
|
||||
|
||||
def test_one_tag_one(self):
|
||||
els = select(self.soup, 'title')
|
||||
|
@ -91,6 +94,8 @@ class TestBasicSelectors(unittest.TestCase):
|
|||
'.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
|
||||
self.assertSelects(selector, ['pmulti'])
|
||||
|
||||
class TestAttributeSelectors(BaseTest):
|
||||
|
||||
def test_attribute_equals(self):
|
||||
self.assertSelectMultiple(
|
||||
('p[class="onep"]', ['p1']),
|
||||
|
@ -197,6 +202,54 @@ class TestBasicSelectors(unittest.TestCase):
|
|||
('p[blah]', []),
|
||||
)
|
||||
|
||||
class TestMonkeyPatch(BaseTest):
|
||||
|
||||
def assertSelectMultipleExplicit(self, soup, *tests):
|
||||
for selector, expected_ids in tests:
|
||||
el_ids = [el['id'] for el in soup.findSelect(selector)]
|
||||
el_ids.sort()
|
||||
expected_ids.sort()
|
||||
self.assertEqual(expected_ids, el_ids,
|
||||
"Selector %s, expected [%s], got [%s]" % (
|
||||
selector, ', '.join(expected_ids), ', '.join(el_ids)
|
||||
)
|
||||
)
|
||||
|
||||
def test_monkeypatch_explicit(self):
|
||||
soup = BeautifulSoup(HTML)
|
||||
self.assertRaises(TypeError, soup.findSelect, '*')
|
||||
|
||||
monkeypatch(BeautifulSoup)
|
||||
|
||||
self.assert_(soup.findSelect('*'))
|
||||
self.assertSelectMultipleExplicit(soup,
|
||||
('link', ['l1']),
|
||||
('div#main', ['main']),
|
||||
('div div', ['inner']),
|
||||
)
|
||||
|
||||
unmonkeypatch(BeautifulSoup)
|
||||
|
||||
self.assertRaises(TypeError, soup.findSelect, '*')
|
||||
|
||||
def test_monkeypatch_implicit(self):
|
||||
soup = BeautifulSoup(HTML)
|
||||
self.assertRaises(TypeError, soup.findSelect, '*')
|
||||
|
||||
monkeypatch()
|
||||
|
||||
self.assert_(soup.findSelect('*'))
|
||||
self.assertSelectMultipleExplicit(soup,
|
||||
('link', ['l1']),
|
||||
('div#main', ['main']),
|
||||
('div div', ['inner']),
|
||||
)
|
||||
|
||||
unmonkeypatch()
|
||||
|
||||
self.assertRaises(TypeError, soup.findSelect, '*')
|
||||
|
||||
|
||||
HTML = """
|
||||
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
||||
"http://www.w3.org/TR/html4/strict.dtd">
|
||||
|
|
Loading…
Reference in New Issue