jjlee · erichydrick · Jul 27, 2012 · Jul 27, 2012 · Jul 27, 2012
diff --git a/mechanize/_headersutil.py b/mechanize/_headersutil.py
@@ -18,25 +18,27 @@
 import _rfc3986
 
 
-def is_html_file_extension(url, allow_xhtml):
+def is_html_file_extension(url, allow_xhtml, allow_json=False):
     ext = os.path.splitext(_rfc3986.urlsplit(url)[2])[1]
     html_exts = [".htm", ".html"]
     if allow_xhtml:
         html_exts += [".xhtml"]
+    if allow_json:
+        html_exts += [".json"]
     return ext in html_exts
 
 
-def is_html(ct_headers, url, allow_xhtml=False):
+def is_html(ct_headers, url, allow_xhtml=False, allow_json=False):
     """
     ct_headers: Sequence of Content-Type headers
     url: Response URL
 
     """
     if not ct_headers:
-        return is_html_file_extension(url, allow_xhtml)
+        return is_html_file_extension(url, allow_xhtml, allow_json)
     headers = split_header_words(ct_headers)
     if len(headers) < 1:
-        return is_html_file_extension(url, allow_xhtml)
+        return is_html_file_extension(url, allow_xhtml, allow_json)
     first_header = headers[0]
     first_parameter = first_header[0]
     ct = first_parameter[0]
@@ -46,6 +48,10 @@ def is_html(ct_headers, url, allow_xhtml=False):
             "text/xhtml", "text/xml",
             "application/xml", "application/xhtml+xml",
             ]
+    if allow_json:
+        html_types += [
+            "application/json",
+            ]
     return ct in html_types
 
 
@@ -234,8 +240,8 @@ def parse_ns_headers(ns_headers):
 
 
 def _test():
-   import doctest, _headersutil
-   return doctest.testmod(_headersutil)
+    import doctest, _headersutil
+    return doctest.testmod(_headersutil)
 
 if __name__ == "__main__":
-   _test()
+    _test()
diff --git a/mechanize/_html.py b/mechanize/_html.py
@@ -65,13 +65,14 @@ def encoding(self, response):
 
 
 class ResponseTypeFinder:
-    def __init__(self, allow_xhtml):
+    def __init__(self, allow_xhtml, allow_json=False):
         self._allow_xhtml = allow_xhtml
+        self._allow_json = allow_json
     def is_html(self, response, encoding):
         ct_hdrs = response.info().getheaders("content-type")
         url = response.geturl()
         # XXX encoding
-        return _is_html(ct_hdrs, url, self._allow_xhtml)
+        return _is_html(ct_hdrs, url, self._allow_xhtml, self._allow_json)
 
 
 class Args(object):
@@ -463,8 +464,8 @@ class Factory:
     encoding: string specifying the encoding of response if it contains a text
      document (this value is left unspecified for documents that do not have
      an encoding, e.g. an image file)
-    is_html: true if response contains an HTML document (XHTML may be
-     regarded as HTML too)
+    is_html: true if response contains an HTML document (XHTML and/or JSON may 
+    be regarded as HTML too)
     title: page title, or None if no title or not HTML
     global_form: form object containing all controls that are not descendants
      of any FORM element, or None if the forms_factory does not support
@@ -476,7 +477,7 @@ class Factory:
 
     def __init__(self, forms_factory, links_factory, title_factory,
                  encoding_finder=EncodingFinder(DEFAULT_ENCODING),
-                 response_type_finder=ResponseTypeFinder(allow_xhtml=False),
+                 response_type_finder=ResponseTypeFinder(allow_xhtml=False, allow_json=False),
                  ):
         """
 
@@ -578,14 +579,15 @@ def links(self):
 
 class DefaultFactory(Factory):
     """Based on sgmllib."""
-    def __init__(self, i_want_broken_xhtml_support=False):
+    def __init__(self, i_want_broken_xhtml_support=False, i_want_broken_json_support=False):
         Factory.__init__(
             self,
             forms_factory=FormsFactory(),
             links_factory=LinksFactory(),
             title_factory=TitleFactory(),
             response_type_finder=ResponseTypeFinder(
-                allow_xhtml=i_want_broken_xhtml_support),
+                allow_xhtml=i_want_broken_xhtml_support,
+                allow_json=i_want_broken_json_support),
             )
 
     def set_response(self, response):
@@ -604,14 +606,16 @@ class RobustFactory(Factory):
 
     """
     def __init__(self, i_want_broken_xhtml_support=False,
+                 i_want_broken_json_support=False,
                  soup_class=None):
         Factory.__init__(
             self,
             forms_factory=RobustFormsFactory(),
             links_factory=RobustLinksFactory(),
             title_factory=RobustTitleFactory(),
             response_type_finder=ResponseTypeFinder(
-                allow_xhtml=i_want_broken_xhtml_support),
+                allow_xhtml=i_want_broken_xhtml_support,
+                allow_json=i_want_broken_json_support),
             )
         if soup_class is None:
             soup_class = MechanizeBs

diff --git a/mechanize/_http.py b/mechanize/_http.py
@@ -184,17 +184,19 @@ class HTTPEquivProcessor(BaseHandler):
 
     def __init__(self, head_parser_class=HeadParser,
                  i_want_broken_xhtml_support=False,
+                 i_want_broken_json_support=False
                  ):
         self.head_parser_class = head_parser_class
         self._allow_xhtml = i_want_broken_xhtml_support
+        self._allow_json = i_want_broken_json_support
 
     def http_response(self, request, response):
         if not hasattr(response, "seek"):
             response = response_seek_wrapper(response)
         http_message = response.info()
         url = response.geturl()
         ct_hdrs = http_message.getheaders("content-type")
-        if is_html(ct_hdrs, url, self._allow_xhtml):
+        if is_html(ct_hdrs, url, self._allow_xhtml, self._allow_json):
             try:
                 try:
                     html_headers = parse_head(response,