漢数字を数値に変換する(JParsec版)

一応Java版も作った。jparsec 2.0用です。jparsecはここから落とせるのが本物です。だからどのjparsecだよ。

package sample.janumber;

import junit.framework.TestCase;

import org.codehaus.jparsec.Parser;
import org.codehaus.jparsec.Parsers;
import org.codehaus.jparsec.Scanners;
import org.codehaus.jparsec.functors.Map;
import org.codehaus.jparsec.functors.Map3;
import org.codehaus.jparsec.pattern.CharPredicates;
import org.codehaus.jparsec.pattern.Pattern;
import org.codehaus.jparsec.pattern.Patterns;

public class JaNumberJParsecTest extends TestCase {
    /**
     * @return パーサを生成して戻す
     */
    public static Parser<Long> createParser() {
        // <digit> ::= "一" | "二" | "三" | "四" | "五" | "六" | "七" | "八" | "九"
        Pattern digitsPattern = Patterns.isChar(CharPredicates.among("一二三四五六七八九"));
        Parser<Long> digits = Scanners.pattern(digitsPattern, "Digits").source()
            .map(new Map<String, Long>(){
                public Long map(String s) {
                    long longValue = 0;
                    switch (s.charAt(0)) {
                        case '一': longValue = 1; break; 
                        case '二': longValue = 2; break; 
                        case '三': longValue = 3; break; 
                        case '四': longValue = 4; break; 
                        case '五': longValue = 5; break; 
                        case '六': longValue = 6; break; 
                        case '七': longValue = 7; break; 
                        case '八': longValue = 8; break; 
                        case '九': longValue = 9; break; 
                    }
                    return longValue;
                }});

        // <class> ::= "千" | "百" | "十"
        Pattern classesPattern = Patterns.isChar(CharPredicates.among("千百十"));
        Parser<Long> classes = Scanners.pattern(classesPattern, "Classes").source()
            .map(new Map<String, Long>(){
                public Long map(String s) {
                    long longValue = 0;
                    switch (s.charAt(0)) {
                        case '十': longValue = 10; break; 
                        case '百': longValue = 100; break; 
                        case '千': longValue = 1000; break; 
                    }
                    return longValue;
                }});

        // <unit> ::= "万" | "億" | "兆"
        Pattern unitsPattern = Patterns.isChar(CharPredicates.among("万億兆"));
        Parser<Long> units = Scanners.pattern(unitsPattern, "Units").source()
            .map(new Map<String, Long>(){
                public Long map(String s) {
                    long longValue = 0;
                    switch (s.charAt(0)) {
                        case '万': longValue = 10000; break; 
                        case '億': longValue = 10000*10000; break; 
                        case '兆': longValue = 10000*10000*10000; break; 
                    }
                    return longValue;
                }});

        // <singlet> ::= <digit>
        Parser<Long> singlet = digits;

        // <quadruplet> ::= <digit>? <class> <quadruplet>? | <singlet>
        Parser.Reference<Long> quadrupletRef = Parser.newReference();
        Parser<Long> quadruplet = 
            Parsers.or(
                Parsers.sequence(
                    digits.optional(1L), classes, quadrupletRef.lazy().optional(0L),
                    new Map3<Long, Long, Long, Long>() {
                        public Long map(Long digitValue, Long classValue, Long nextValue) {
                            return digitValue * classValue + nextValue;
                    }}),
                singlet);
        quadrupletRef.set(quadruplet);

        // <number> ::= <quadruplet> <unit> <number>? | <quadruplet>
        Parser.Reference<Long> numberRef = Parser.newReference();
        Parser<Long> number = 
            Parsers.or(
                Parsers.sequence(
                    quadruplet,
                    units,
                    numberRef.lazy().optional(0L),
                    new Map3<Long, Long, Long, Long>() {
                        public Long map(Long quadrupletValue, Long unitValue, Long nextValue) {
                            return quadrupletValue * unitValue + nextValue;
                        }
                    }),
                quadruplet);
        numberRef.set(number);
       
        return number;
    }
    private Parser<Long> parser = createParser();
    public Long parse(String target) {
        return parser.parse(target);
    }
/*

続きでテスト(略)

 */
    public void testJaNumber349000203() {
        long result = parse("三億四千九百万二百三");
        assertEquals("三億四千九百万二百三==349000203", 349000203L, result);
    }
    public void testJaNumberFormat() {
        try {
            long result = parse("一億万");
            fail("億の次はDigitsかClassesかEOF");
        } catch (ParserException e) {
            e.printStackTrace();
            // 「Digits, Classes or EOF expected, 万 encountered.」と表示される
        }
    }
}