漢数字を数値に変換する(JParsec版)
一応Java版も作った。jparsec 2.0用です。jparsecはここから落とせるのが本物です。だからどのjparsecだよ。
package sample.janumber; import junit.framework.TestCase; import org.codehaus.jparsec.Parser; import org.codehaus.jparsec.Parsers; import org.codehaus.jparsec.Scanners; import org.codehaus.jparsec.functors.Map; import org.codehaus.jparsec.functors.Map3; import org.codehaus.jparsec.pattern.CharPredicates; import org.codehaus.jparsec.pattern.Pattern; import org.codehaus.jparsec.pattern.Patterns; public class JaNumberJParsecTest extends TestCase { /** * @return パーサを生成して戻す */ public static Parser<Long> createParser() { // <digit> ::= "一" | "二" | "三" | "四" | "五" | "六" | "七" | "八" | "九" Pattern digitsPattern = Patterns.isChar(CharPredicates.among("一二三四五六七八九")); Parser<Long> digits = Scanners.pattern(digitsPattern, "Digits").source() .map(new Map<String, Long>(){ public Long map(String s) { long longValue = 0; switch (s.charAt(0)) { case '一': longValue = 1; break; case '二': longValue = 2; break; case '三': longValue = 3; break; case '四': longValue = 4; break; case '五': longValue = 5; break; case '六': longValue = 6; break; case '七': longValue = 7; break; case '八': longValue = 8; break; case '九': longValue = 9; break; } return longValue; }}); // <class> ::= "千" | "百" | "十" Pattern classesPattern = Patterns.isChar(CharPredicates.among("千百十")); Parser<Long> classes = Scanners.pattern(classesPattern, "Classes").source() .map(new Map<String, Long>(){ public Long map(String s) { long longValue = 0; switch (s.charAt(0)) { case '十': longValue = 10; break; case '百': longValue = 100; break; case '千': longValue = 1000; break; } return longValue; }}); // <unit> ::= "万" | "億" | "兆" Pattern unitsPattern = Patterns.isChar(CharPredicates.among("万億兆")); Parser<Long> units = Scanners.pattern(unitsPattern, "Units").source() .map(new Map<String, Long>(){ public Long map(String s) { long longValue = 0; switch (s.charAt(0)) { case '万': longValue = 10000; break; case '億': longValue = 10000*10000; break; case '兆': longValue = 10000*10000*10000; break; } return longValue; }}); // <singlet> ::= <digit> Parser<Long> singlet = digits; // <quadruplet> ::= <digit>? <class> <quadruplet>? | <singlet> Parser.Reference<Long> quadrupletRef = Parser.newReference(); Parser<Long> quadruplet = Parsers.or( Parsers.sequence( digits.optional(1L), classes, quadrupletRef.lazy().optional(0L), new Map3<Long, Long, Long, Long>() { public Long map(Long digitValue, Long classValue, Long nextValue) { return digitValue * classValue + nextValue; }}), singlet); quadrupletRef.set(quadruplet); // <number> ::= <quadruplet> <unit> <number>? | <quadruplet> Parser.Reference<Long> numberRef = Parser.newReference(); Parser<Long> number = Parsers.or( Parsers.sequence( quadruplet, units, numberRef.lazy().optional(0L), new Map3<Long, Long, Long, Long>() { public Long map(Long quadrupletValue, Long unitValue, Long nextValue) { return quadrupletValue * unitValue + nextValue; } }), quadruplet); numberRef.set(number); return number; } private Parser<Long> parser = createParser(); public Long parse(String target) { return parser.parse(target); } /*
続きでテスト(略)
*/ public void testJaNumber349000203() { long result = parse("三億四千九百万二百三"); assertEquals("三億四千九百万二百三==349000203", 349000203L, result); } public void testJaNumberFormat() { try { long result = parse("一億万"); fail("億の次はDigitsかClassesかEOF"); } catch (ParserException e) { e.printStackTrace(); // 「Digits, Classes or EOF expected, 万 encountered.」と表示される } } }