1 files changed, 0 insertions, 269 deletions
diff --git a/tests/bytecode/mp-tests/ptex.py b/tests/bytecode/mp-tests/ptex.py
deleted file mode 100644
index 8f23d78009..0000000000
--- a/tests/bytecode/mp-tests/ptex.py
+++ /dev/null
@@ -1,269 +0,0 @@
-import sys
-import os
-import os.path
-import datetime
-import argparse
-from xml.etree.ElementTree import Element, SubElement, tostring
-
-from log import Log
-from texparser import TexParser
-from latexparser import LatexParser
-from gettexfile import file_has_suffix
-from gettexfile import get_tex_file
-
-from xiwi.common.misc import buildFileList
-from xiwi.common import arxivid
-from xiwi.common.stats import Statistics
-
-def str_contains(s1, s2):
-    return s1.find(s2) != -1
-
-def str_contains_one_of(st, st_list):
-    for st2 in st_list:
-        if str_contains(st, st2):
-            return True
-    return False
-
-def detect_file_kind(file_obj):
-    """Simple detection of kind of source file."""
-    kind = 'unknown'
-    firstline = file_obj.readline()
-    while firstline.isspace():
-        firstline = file_obj.readline()
-    if firstline.startswith('%!PS'):
-        kind = 'PS'
-    elif firstline.startswith('%auto-ignore'):
-        kind = 'auto-ignore'
-    else:
-        file_obj.seek(0)
-        for line in file_obj:
-            if str_contains(line, '\\def'):
-                # might be tex, if we don't find anything else
-                kind = 'tex'
-            if str_contains(line, '\\input'):
-                # might be tex, if we don't find anything else
-                kind = 'tex'
-                if str_contains(line, 'amstex') or str_contains(line, 'harvmac'):
-                    # definitely tex
-                    kind = 'tex'
-                    break
-            if str_contains(line, '\\documentclass'):
-                # definitely latex
-                kind = 'latex'
-                break
-            if str_contains(line, '\\documentstyle'):
-                # could be tex or latex
-                if str_contains(line, 'amsppt'):
-                    kind = 'tex'
-                    break
-                else:
-                    kind = 'latex'
-                    break
-    file_obj.seek(0)
-    return kind
-
-class WithdrawnPaper(object):
-    def __init__(self):
-        pass
-
-    def __getitem__(self, item):
-        if item == 'refs':
-            return []
-        elif item == 'success':
-            return True
-
-    def parse(self):
-        pass
-
-def process_article(filename):
-    """Returns TexParserBase derived object on success, None on failure."""
-
-    # get the tex file
-    filename, file_obj, tarfile_obj = get_tex_file(filename)
-    if file_obj is None:
-        return None
-
-    # detect the type of file
-    kind = detect_file_kind(file_obj)
-
-    # act on the type of file
-    parser = None
-    if kind == 'PS':
-        print('skipping postscript file')
-    elif kind == 'auto-ignore':
-        print('asked to ignore file, most likely it was withdrawn')
-        parser = WithdrawnPaper()
-    if kind == 'tex':
-        print('parsing as TeX')
-        parser = TexParser(filename, file_obj, tarfile_obj)
-    elif kind == 'latex':
-        print('parsing as LaTeX')
-        parser = LatexParser(filename, file_obj, tarfile_obj)
-    else:
-        print('cannot determine kind of file')
-
-    # attempt to parse the document
-    try:
-        if parser is not None:
-            parser.parse()
-    except Exception as e:
-        print('exception while trying to parse file:')
-        print(str(e))
-        parser = None
-
-    # close the files
-    file_obj.close()
-    if tarfile_obj is not None:
-        tarfile_obj.close()
-
-    # return the parsed document
-    return parser
-
-arxiv_classes = [
-    'acc-phys', 'adap-org', 'alg-geom', 'ao-sci', 'astro-ph', 'atom-ph',
-    'bayes-an', 'chao-dyn', 'chem-ph', 'cmp-lg', 'comp-gas', 'cond-mat',
-    'cs', 'dg-ga', 'funct-an', 'gr-qc', 'hep-ex', 'hep-lat',
-    'hep-ph', 'hep-th', 'math', 'math-ph', 'mtrl-th', 'nlin',
-    'nucl-ex', 'nucl-th', 'patt-sol', 'physics', 'plasm-ph', 'q-alg',
-    'q-bio', 'quant-ph', 'solv-int', 'supr-con'
-]
-
-def do_single_file(file_name, print_xml, write_xml_dir):
-    arxiv_id, arxiv_version = arxivid.filenameToArxivAndVersion(file_name)
-    if arxiv_id is None:
-        print('WARN: could not determine arXiv identifier for', file_name)
-        arxiv_id = '<unknown>'
-        arxiv_version = 0
-
-    Log.reset()
-    Statistics.begin_item(arxiv_id)
-
-    if file_has_suffix(file_name, '.pdf'):
-        Statistics.count('1) pdf')
-        succ = True
-    else:
-        Statistics.count('2) processed')
-
-        parser = process_article(file_name)
-
-        if parser is not None :
-            succ = parser['success']
-            bib_refs = parser['refs']
-        else :
-            succ = False
-            bib_refs = []
-
-        if str_contains_one_of(arxiv_id, ['gr-qc', 'hep-']):
-            Statistics.count('hep-processed')
-            if succ:
-                Statistics.count('hep-success')
-        if succ:
-            print('-success--------')
-            Statistics.count('3) success')
-        else:
-            print('-fail-----------')
-            Statistics.count('4) fail')
-
-        show_ref = False
-
-        if succ and show_ref:
-            for bib_ref in bib_refs:
-                print(bib_ref.key, 'with', bib_ref.cite_count, 'citations in paper')
-                if len(bib_ref.bib_info) == 0:
-                    print('no reference')
-                else:
-                    print(bib_ref.bib_info_as_str(keep_comments=True))
-
-        if succ and (print_xml or write_xml_dir):
-            xml = Element('article')
-            SubElement(xml, 'id').text = arxiv_id
-            if arxiv_version > 0:
-                SubElement(xml, 'version').text = str(arxiv_version)
-            refs = SubElement(xml, 'refs')
-            for bib_ref in bib_refs:
-                bib_text = bib_ref.bib_info_as_str(keep_comments=True)
-                if len(bib_text) != 0:
-                    ncites = bib_ref.cite_count
-                    if ncites < 1:
-                        ncites = 1
-                    ref = SubElement(refs, 'ref', order=str(bib_ref.ref_order_num), freq=str(ncites))
-                    ref.text = bib_text
-            if print_xml:
-                print(tostring(xml))
-            if isinstance(write_xml_dir, str):
-                if arxiv_id != '<unknown>':
-                    xml_file_name = os.path.join(write_xml_dir, arxiv_id.replace('/', '') + '.xml')
-                else:
-                    fname = os.path.split(file_name)[1]
-                    if fname.rfind('.') > 0:
-                        fname = fname[:fname.rfind('.')]
-                    xml_file_name = write_xml_dir + '/' + fname + '.xml'
-                file_obj = open(xml_file_name, 'wb')
-                file_obj.write(tostring(xml, encoding='utf-8'))
-                file_obj.close()
-
-    Statistics.end_item()
-
-    return succ
-
-summaryStrs = []
-
-if __name__ == "__main__":
-    cmd_parser = argparse.ArgumentParser(description='Parse TeX/LaTeX to find references.')
-    cmd_parser.add_argument('--filelist', action='store_true', help='file names on the command line each contain a list of files to process')
-    cmd_parser.add_argument('--print-xml', action='store_true', help='print XML output to stdout')
-    cmd_parser.add_argument('--write-xml', metavar='<dir>', help='destination directory to write XML output files')
-    cmd_parser.add_argument('--failed', metavar='<file>', help='output file to write list of failed files')
-    cmd_parser.add_argument('files', nargs='+', help='input files')
-    args = cmd_parser.parse_args()
-
-    # print date stamp
-    timeStart = datetime.datetime.now()
-    print('[ptex] started processing at', str(timeStart))
-
-    print('given', len(args.files), 'files, first file:', args.files[0])
-    print('================')
-
-    Statistics.clear('article')
-
-    # build list of files to process
-    file_list = buildFileList(args.filelist, args.files)
-
-    # ensure the destination directory exists
-    if args.write_xml is not None and os.path.exists(args.write_xml):
-        try:
-            os.makedirs(args.write_xml)
-        except:
-            pass
-
-    # process the files
-    failed_files = []
-    for file_name in file_list:
-        success = do_single_file(file_name, args.print_xml, args.write_xml)
-        if not success:
-            failed_files.append(file_name)
-
-    # write the failed files to an output file, if requested
-    if args.failed is not None:
-        file_obj = open(args.failed, 'w')
-        file_obj.writelines(f + '\n' for f in failed_files)
-        file_obj.close()
-
-    print('================')
-    Statistics.show()
-    Statistics.show_detail('fail')
-    #Statistics.show_detail('cite-range')
-    #Statistics.show_detail('bad-ascii')
-    #Statistics.show_detail('non-ascii')
-
-    print('================')
-
-    # print date stamp
-    timeEnd = datetime.datetime.now()
-    print('[ptex] finished processing at', str(timeEnd))
-
-    # print summary for email
-    summaryStrs.extend(Statistics.get_summary())
-    summaryStrs.insert(0, 'started processing at %s, took %.1f minutes' % (timeStart.strftime('%H:%M'), (timeEnd - timeStart).total_seconds() / 60))
-    for s in summaryStrs:
-        print('**SUMMARY** [ptex]', s)