Added monkeypatch and unmonkeypatch, for patching a findSelect method on to the Soup class itself.

master
swillison 2007-02-28 13:42:08 +00:00
parent 9a6bf542fe
commit 2960a5f11b
2 changed files with 73 additions and 4 deletions

View File

@ -12,8 +12,6 @@ select(soup, 'div#main ul a')
"""
from BeautifulSoup import BeautifulSoup as Soup
import re
tag_re = re.compile('^[a-z0-9]+$')
@ -53,6 +51,10 @@ def attribute_checker(operator, attribute, value=''):
def select(soup, selector):
"""
soup should be a BeautifulSoup instance; selector is a CSS selector
specifying the elements you want to retrieve.
"""
tokens = selector.split()
current_context = [soup]
for token in tokens:
@ -107,3 +109,17 @@ def select(soup, selector):
found.extend(context.findAll(token))
current_context = found
return current_context
def monkeypatch(BeautifulSoupClass=None):
"""
If you don't explicitly state the class to patch, defaults to the most
common import location for BeautifulSoup.
"""
if not BeautifulSoupClass:
from BeautifulSoup import BeautifulSoup as BeautifulSoupClass
BeautifulSoupClass.findSelect = select
def unmonkeypatch(BeautifulSoupClass=None):
if not BeautifulSoupClass:
from BeautifulSoup import BeautifulSoup as BeautifulSoupClass
delattr(BeautifulSoupClass, 'findSelect')

View File

@ -1,9 +1,10 @@
import unittest
from BeautifulSoup import BeautifulSoup
from soupselect import select
from soupselect import select, monkeypatch, unmonkeypatch
class TestBasicSelectors(unittest.TestCase):
class BaseTest(unittest.TestCase):
def setUp(self):
self.soup = BeautifulSoup(HTML)
@ -22,6 +23,8 @@ class TestBasicSelectors(unittest.TestCase):
def assertSelectMultiple(self, *tests):
for selector, expected_ids in tests:
self.assertSelect(selector, expected_ids)
class TestBasicSelectors(BaseTest):
def test_one_tag_one(self):
els = select(self.soup, 'title')
@ -91,6 +94,8 @@ class TestBasicSelectors(unittest.TestCase):
'.class3', 'p.class3', 'html p.class2', 'div#inner .class2'):
self.assertSelects(selector, ['pmulti'])
class TestAttributeSelectors(BaseTest):
def test_attribute_equals(self):
self.assertSelectMultiple(
('p[class="onep"]', ['p1']),
@ -197,6 +202,54 @@ class TestBasicSelectors(unittest.TestCase):
('p[blah]', []),
)
class TestMonkeyPatch(BaseTest):
def assertSelectMultipleExplicit(self, soup, *tests):
for selector, expected_ids in tests:
el_ids = [el['id'] for el in soup.findSelect(selector)]
el_ids.sort()
expected_ids.sort()
self.assertEqual(expected_ids, el_ids,
"Selector %s, expected [%s], got [%s]" % (
selector, ', '.join(expected_ids), ', '.join(el_ids)
)
)
def test_monkeypatch_explicit(self):
soup = BeautifulSoup(HTML)
self.assertRaises(TypeError, soup.findSelect, '*')
monkeypatch(BeautifulSoup)
self.assert_(soup.findSelect('*'))
self.assertSelectMultipleExplicit(soup,
('link', ['l1']),
('div#main', ['main']),
('div div', ['inner']),
)
unmonkeypatch(BeautifulSoup)
self.assertRaises(TypeError, soup.findSelect, '*')
def test_monkeypatch_implicit(self):
soup = BeautifulSoup(HTML)
self.assertRaises(TypeError, soup.findSelect, '*')
monkeypatch()
self.assert_(soup.findSelect('*'))
self.assertSelectMultipleExplicit(soup,
('link', ['l1']),
('div#main', ['main']),
('div div', ['inner']),
)
unmonkeypatch()
self.assertRaises(TypeError, soup.findSelect, '*')
HTML = """
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
"http://www.w3.org/TR/html4/strict.dtd">