Coverage Report - com.legstar.cobol.utils.PictureUtil
 
Classes in this File Line Coverage Branch Coverage Complexity
PictureUtil
98 %
121/123
92 %
46/50
6,2
 
 1  
 /*******************************************************************************
 2  
  * Copyright (c) 2010 LegSem.
 3  
  * All rights reserved. This program and the accompanying materials
 4  
  * are made available under the terms of the GNU Lesser Public License v2.1
 5  
  * which accompanies this distribution, and is available at
 6  
  * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
 7  
  * 
 8  
  * Contributors:
 9  
  *     LegSem - initial API and implementation
 10  
  ******************************************************************************/
 11  
 package com.legstar.cobol.utils;
 12  
 
 13  
 import java.util.HashMap;
 14  
 import java.util.LinkedList;
 15  
 import java.util.List;
 16  
 import java.util.Map;
 17  
 
 18  
 import com.legstar.cob2xsd.PictureSymbol;
 19  
 
 20  
 /**
 21  
  * Utility class provides methods to introspect COBOL picture clauses.
 22  
  *
 23  
  */
 24  
 public final class PictureUtil {
 25  
     
 26  
     /**
 27  
      * Utility class.
 28  
      */
 29  0
     private PictureUtil() {
 30  
         
 31  0
     }
 32  
     
 33  
     /**
 34  
      * Determines how many times a given character occurs in a picture string.
 35  
      * A character can appear standalone or as a factored sequence like X(nn).
 36  
      * Unlike all other picture symbols, currency symbols are case sensitive.
 37  
      * For example, �D� and �d� specify different currency symbols.
 38  
      * @param picture the picture string
 39  
      * @param currencySymbol the currency symbol
 40  
      * @return a map of all characters to search for
 41  
      */
 42  
     public static Map < Character, Integer > getPictureCharOccurences(
 43  
             final String picture,
 44  
             final char currencySymbol) {
 45  
 
 46  871
         Map < Character, Integer > charNum = new HashMap < Character, Integer >();
 47  871
         charNum.put('A', 0);
 48  871
         charNum.put('B', 0);
 49  871
         charNum.put('G', 0);
 50  871
         charNum.put('N', 0);
 51  871
         charNum.put('X', 0);
 52  871
         charNum.put('P', 0);
 53  871
         charNum.put('Z', 0);
 54  871
         charNum.put('0', 0);
 55  871
         charNum.put('/', 0);
 56  871
         charNum.put('+', 0);
 57  871
         charNum.put('-', 0);
 58  871
         charNum.put('*', 0);
 59  871
         charNum.put('C', 0);
 60  871
         charNum.put('D', 0);
 61  871
         charNum.put('.', 0);
 62  871
         charNum.put(',', 0);
 63  871
         charNum.put('9', 0);
 64  871
         charNum.put('E', 0);
 65  871
         charNum.put('S', 0);
 66  871
         charNum.put('V', 0);
 67  871
         charNum.put(currencySymbol, 0);
 68  
         
 69  871
         List < PictureSymbol > pictureSymbols = parsePicture(picture, currencySymbol);
 70  871
         for (PictureSymbol pictureSymbol : pictureSymbols) {
 71  1542
             Integer number = charNum.get(pictureSymbol.getSymbol());
 72  1542
             if (number != null) {
 73  1542
                 number += pictureSymbol.getNumber();
 74  1542
                 charNum.put(pictureSymbol.getSymbol(), number);
 75  
             }
 76  1542
         }
 77  
 
 78  871
         return charNum;
 79  
     }
 80  
     
 81  
     /**
 82  
      * The COBOL picture clause determines the length, in number of characters,
 83  
      * for all alphanumeric and numeric-edited data items.
 84  
      * <p/>
 85  
      * The length evaluated here is either the number of character positions
 86  
      * (which corresponds to the size constraint on the client side) or the
 87  
      * byte size of the storage needed on z/OS for the data item. You select between
 88  
      * one or the other with the calcStorageLength parameter.
 89  
      * <p/>
 90  
      * When the currency sign is more than a single character, then the first
 91  
      * occurrence of the currency symbol counts for more than one byte of storage.
 92  
      * 
 93  
      * @param charNum map of all characters in the picture string
 94  
      * @param isSignSeparate if sign occupies a separated position (no overpunch)
 95  
      * @param currencySign the currency sign
 96  
      * @param currencySymbol the currency symbol
 97  
      * @param calcStorageLength when true the length returned is the z/OS storage length
 98  
      * @return the length, in number of characters, of the data item
 99  
      */
 100  
     public static int calcLengthFromPicture(
 101  
             final Map < Character, Integer > charNum,
 102  
             final boolean isSignSeparate,
 103  
             final String currencySign,
 104  
             final char currencySymbol,
 105  
             final boolean calcStorageLength) {
 106  
 
 107  809
         int length = 0;
 108  
         
 109  
         /* character position occupied by each picture symbol */
 110  809
         Map < Character, Integer > charLen = new HashMap < Character, Integer >();
 111  809
         charLen.put('A', 1);
 112  809
         charLen.put('B', 1);
 113  809
         charLen.put('G', (calcStorageLength) ? 2 : 1);
 114  809
         charLen.put('N', (calcStorageLength) ? 2 : 1);
 115  809
         charLen.put('X', 1);
 116  809
         charLen.put('P', 0);
 117  809
         charLen.put('Z', 1);
 118  809
         charLen.put('0', 1);
 119  809
         charLen.put('/', 1);
 120  809
         charLen.put('+', 1);
 121  809
         charLen.put('-', 1);
 122  809
         charLen.put('*', 1);
 123  809
         charLen.put('C', 2);
 124  809
         charLen.put('D', 2);
 125  809
         charLen.put('.', 1);
 126  809
         charLen.put(',', 1);
 127  809
         charLen.put('9', 1);
 128  809
         charLen.put('E', 1);
 129  809
         charLen.put('S', (isSignSeparate) ? 1 : 0);
 130  809
         charLen.put('V', 0);
 131  809
         charLen.put(currencySymbol, 1);
 132  
         
 133  809
         for (Map.Entry < Character, Integer > entry : charNum.entrySet()) {
 134  16989
             length += entry.getValue() * charLen.get(entry.getKey());
 135  
         }
 136  809
         if (currencySign.length() > 1 && charNum.get(currencySymbol) > 1) {
 137  2
             length += currencySign.length() - 1;
 138  
         }
 139  809
         return length;
 140  
     }
 141  
     
 142  
     /**
 143  
      * Try to infer a regular expression to match a COBOL picture clause.
 144  
      * <p/>
 145  
      * The objective is to build a string that would fit the internal representation
 146  
      * of a picture edited COBOL field.
 147  
      * <p/>
 148  
      * If a picture is not restrictive, for instance PIC X does not impose
 149  
      * any restriction, then we return null (no pattern).
 150  
      * <p/>
 151  
      * Regular expressions in XML Schema are more like PERL than Java regex.
 152  
      * @param picture the picture clause
 153  
      * @param currencySign the currency sign
 154  
      * @param currencySymbol the currency symbol
 155  
      * @return a regular expression
 156  
      */
 157  
     public static String getRegexFromPicture(
 158  
             final String picture,
 159  
             final String currencySign,
 160  
             final char currencySymbol) {
 161  542
         StringBuilder result = new StringBuilder();
 162  
         
 163  
         /* Table that associate a picture symbol to a regex atom */
 164  542
         Map < Character, String > charRegex = new HashMap < Character, String >();
 165  542
         charRegex.put('A', "[\\p{L}\\s]"); // any letter or space character
 166  542
         charRegex.put('B', "\\s"); // space
 167  542
         charRegex.put('G', "."); // TODO does not reflect the double byte nature
 168  542
         charRegex.put('N', "."); // TODO does not reflect the double byte nature
 169  542
         charRegex.put('X', "."); // Any byte
 170  542
         charRegex.put('P', "[\\d\\.]"); // Floating decimal point
 171  542
         charRegex.put('Z', "[1-9\\s]"); // Numeric or space
 172  542
         charRegex.put('0', "0"); // Zero character
 173  542
         charRegex.put('/', "/"); // Forward slash character
 174  542
         charRegex.put('+', "[\\+\\-\\d]"); // Position can be a sign or a digit
 175  542
         charRegex.put('-', "[\\+\\-\\d]"); // Position can be a sign or a digit
 176  542
         charRegex.put('*', "[1-9\\*]"); // Position can be an asterisk or a digit
 177  542
         charRegex.put('C', "(CR|\\s\\s)"); // Credit or spaces
 178  542
         charRegex.put('D', "(DB|\\s\\s)"); // Debit or spaces
 179  542
         charRegex.put('.', "\\."); // Decimal point character
 180  542
         charRegex.put(',', ","); // Comma character
 181  542
         charRegex.put('9', "\\d"); // A digit
 182  542
         charRegex.put('E', "E"); // Exponent
 183  542
         charRegex.put('S', "[\\+\\-]"); // A numeric sign
 184  542
         charRegex.put('V', ""); // A virtual decimal point
 185  542
         charRegex.put(currencySymbol, "(" + currencySign.replace(" ", "\\s") + "|\\d|\\s)");
 186  
         
 187  542
         List < PictureSymbol > pictureSymbols = parsePicture(picture, currencySymbol);
 188  
 
 189  
         /* If there is only one symbol and it is non restrictive, no pattern*/
 190  542
         if (pictureSymbols.size() == 1) {
 191  401
             String symbol =  charRegex.get(pictureSymbols.get(0).getSymbol());
 192  401
             if (symbol == null || symbol.equals(".")) {
 193  246
                 return null;
 194  
             }
 195  
         }
 196  
         
 197  
         /* Add quantifiers */
 198  296
         for (PictureSymbol pictureSymbol : pictureSymbols) {
 199  555
             String regex = charRegex.get(pictureSymbol.getSymbol());
 200  555
             if (charRegex != null) {
 201  555
                 result.append(regex);
 202  555
                 int occurs = pictureSymbol.getNumber();
 203  555
                 if (occurs > 1) {
 204  332
                     result.append("{0," + occurs + "}");
 205  
                 } else {
 206  223
                     result.append("?");
 207  
                 }
 208  
             }
 209  555
         }
 210  
         
 211  296
         return result.toString();
 212  
     }
 213  
     
 214  
     /**
 215  
      * Parse a COBOL picture clause. Character symbols are returned in the order
 216  
      * where they are found in the picture clause. All factoring is resolved and
 217  
      * each character is associated with its occurrence number.
 218  
      * <p/>
 219  
      * For instance: 9(3)V99XX becomes 4 entries in the list for characters 9, V, 9 and X.
 220  
      * First 9 occurs 3 times, V occurs 1 time, 9 occurs 2 and X occurs 2.  
 221  
      * @param currencySymbol the currency symbol
 222  
      * @param picture the COBOL picture clause
 223  
      * @return ordered list of symbols appearing in the picture clause with their
 224  
      * number of occurrences.
 225  
      */
 226  
     public static List < PictureSymbol > parsePicture(
 227  
             final String picture,
 228  
             final char currencySymbol) {
 229  1420
         int factoredNumber = 0;
 230  1420
         boolean factorSequence = false;
 231  1420
         char lastChar = 0;
 232  1420
         PictureSymbol pictureSymbol = null;
 233  1420
         List < PictureSymbol > result = new LinkedList < PictureSymbol >();
 234  8276
         for (int i = 0; i < picture.length(); i++) {
 235  6856
             char c = picture.charAt(i);
 236  6856
             if (c != currencySymbol) {
 237  6806
                 c = Character.toUpperCase(c);
 238  
             }
 239  6856
             if (factorSequence) {
 240  2862
                 if (c == ')') {
 241  1222
                     pictureSymbol.setNumber(pictureSymbol.getNumber() + factoredNumber - 1);
 242  1222
                     factorSequence = false;
 243  
                 } else {
 244  1640
                     if  (Character.isDigit(c)) {
 245  1636
                         factoredNumber = factoredNumber * 10 + Character.getNumericValue(c);
 246  
                     }
 247  
                 }
 248  
             } else {
 249  3994
                 if (c == '(') {
 250  1222
                     factoredNumber = 0;
 251  1222
                     factorSequence = true;
 252  
                 } else {
 253  
                     /* CR and DB are special cases where we need to ignore,
 254  
                      * the second character R or B.*/
 255  2772
                     if ((c != 'B' || lastChar != 'D') && (c != 'R' || lastChar != 'C')) {
 256  2740
                         if (c == lastChar) {
 257  388
                             pictureSymbol.setNumber(pictureSymbol.getNumber() + 1);
 258  
                         } else {
 259  2352
                             pictureSymbol = new PictureSymbol(c, 1);
 260  2352
                             result.add(pictureSymbol);
 261  2352
                             lastChar = c;
 262  
                         }
 263  
                     }
 264  
                 }
 265  
             }
 266  
         }
 267  1420
         return result;
 268  
     }
 269  
     
 270  
 
 271  
 }