#!/usr/bin/perl
# Developed by Alberto Simões on 26.6.2012
# for questions, please contact albie@alfarrabio.di.uminho.pt
# converts JRC-Acquis bilingual file format to TMX format, used in DGT-TM

use XML::DT::Sequence ;
use XML::TMX::Writer;
use warnings;
use strict;

my $filename = shift;
my $output = shift;

die "$0 input output\n" unless $filename or $output;

my $tmx = XML::TMX::Writer->new();
$tmx->start_tmx(id => 'ambs',
                -output => $output);

my @langs = ("L1","L2");

my $proc = XML::DT::Sequence->new();
my $i = 0;
print STDERR "..";
$proc->process($filename,
               -tag => 'link',
               -head => {
                         text => sub {
                             @langs = split /\s+/, $v{select} if $v{select};
                         },
                        },
               -body => {
                         'link' => sub {
                             ++$i;
                             print STDERR "\r$i" unless $i % 100;
                             $tmx->add_tu(
                                          $langs[0] => $v{s1},
                                          $langs[1] => $v{s2},
                                         );
                         },
                         'p' => sub{ $c },
                         's1' => sub{ father->{s1} = $c },
                         's2' => sub{ father->{s2} = $c },
                        },
              );
print STDERR "\r$i\n";
$tmx->end_tmx();