01-13-2015 02:52 AM
Hi,
I am glad to see that you have resolved all your issues.
Concerning column number limitation, may be you can do the test in the while condition :
while(*carattere && !error && (stringa_in_corso<numero_stringhe)) ==> it will stop the parsing as soon as the column number is reached
Regards,
Stef
01-13-2015 02:57 AM
i've done it this way because i had already tried it with the old version and there was this problem:
if i have a special column with CR LF inside and i don't parse it because i've reached the maximum readable column when i will parse the next line i will get an error because i'm reading a "inquote" value
01-15-2015 02:28 AM
i've done some correction to handle correctly the inquote column with CR LF inside it. this seem to be the final reader version.
commented there is the old version.
i have done also a column counter, maybe it's useful to allocate the destination string
int leggi_riga_csv_v2(char **lines, int riga_partenza, char *stringa_destinazione[], int numero_stringhe, int formato)
{
char delimitatore[2] = {',',';'};
int stringa_in_corso = 0;
int index_stringa_in_corso = 0;
int inquote = 0;
int i = 0;
int error = 0;
char *carattere = NULL;
for(i = 0; i < numero_stringhe; i++){
stringa_destinazione[i][0]=0;
}
//Point to begining of current line
carattere = lines[riga_partenza];
index_stringa_in_corso = 0;
//while(*carattere && !error)
while(!error)
{
switch(*carattere){
case '\"':
if(index_stringa_in_corso == 0){
// se come primo carattere ho una " allora e' una stringa speciale
inquote = 1;
//carattere++; //skip quote
carattere++; //get next character
}
else{
if(inquote){
//Check for double quote
carattere++;
if(*carattere == '\"'){
if(stringa_in_corso < numero_stringhe){
stringa_destinazione[stringa_in_corso][index_stringa_in_corso++] = *carattere; //concatenate quote
}
carattere++; //skip quote
}
else if((*carattere == 0) || (*carattere == '\r') || (*carattere == '\n')){
//end of quoted string
if(stringa_in_corso < numero_stringhe){
stringa_destinazione[stringa_in_corso][index_stringa_in_corso] = 0; //terminate string
}
error = 1; //reached end of line
}
else if(*carattere == delimitatore[formato]){
if(stringa_in_corso < numero_stringhe){
stringa_destinazione[stringa_in_corso][index_stringa_in_corso] = 0; //terminate string
}
//parse next string
stringa_in_corso++;
index_stringa_in_corso=0;
carattere++; //get next character
}
else{
error = 1; //Quote string not followed by delimiter or end of string !
}
/*
else if((*carattere != 0) && (*carattere != delimitatore[formato])){
error = 1; //Quote string not followed by delimiter or end of string !
}
else{
//end of quoted string
if(stringa_in_corso < numero_stringhe){
stringa_destinazione[stringa_in_corso][index_stringa_in_corso] = 0; //terminate string
}
//parse next string
stringa_in_corso++;
index_stringa_in_corso=0;
}
*/
}
else
error = 1; //Quote inside unquoted string !
}
break;
case ',':
if(formato == 1){
if(stringa_in_corso < numero_stringhe){
stringa_destinazione[stringa_in_corso][index_stringa_in_corso++] = '.'; //replace , by .
}
}
else
{
if(stringa_in_corso < numero_stringhe){
stringa_destinazione[stringa_in_corso][index_stringa_in_corso] = 0; //terminate string
}
//parse next string
stringa_in_corso++;
index_stringa_in_corso=0;
}
carattere++;
break;
case ';':
if(formato == 0){
if(stringa_in_corso < numero_stringhe){
stringa_destinazione[stringa_in_corso][index_stringa_in_corso++] = ';';
}
}
else
{
if(stringa_in_corso < numero_stringhe){
stringa_destinazione[stringa_in_corso][index_stringa_in_corso] = 0; //terminate string
}
//parse next string
stringa_in_corso++;
index_stringa_in_corso=0;
}
carattere++;
break;
case '\0': // Terminatore
case '\r': // CR = 0x0D = 13
case '\n': // LF = 0x0A = 10
if(inquote){
//cariage return or line feed inside quote
//insert line break and parse next line
if(stringa_in_corso < numero_stringhe){
stringa_destinazione[stringa_in_corso][index_stringa_in_corso++] = '\r';
stringa_destinazione[stringa_in_corso][index_stringa_in_corso++] = '\n';
}
carattere = lines[++riga_partenza];
}
else{
//end of line
if(stringa_in_corso < numero_stringhe){
stringa_destinazione[stringa_in_corso][index_stringa_in_corso] = 0; //terminate string
}
error = 1; //reached end of line
}
break;
default:
//copy others characters in destination string
if(stringa_in_corso < numero_stringhe){
stringa_destinazione[stringa_in_corso][index_stringa_in_corso++] = *carattere;
}
carattere++;
break;
}
}
return riga_partenza;
}
int contacolonne_csv(char **lines, int formato)
{
char delimitatore[2] = {',',';'};
int numero_colonne = {0};
int index_stringa_in_corso = {0};
int riga_partenza = {0};
int inquote = 0;
int error = 0;
char *carattere = NULL;
//Point to begining of current line
carattere = lines[riga_partenza];
while(!error)
{
switch(*carattere){
case '\"':
if(index_stringa_in_corso == 0){
// se come primo carattere ho una " allora e' una stringa speciale
inquote = 1;
carattere++; //get next character
}
else{
if(inquote){
//get next character
carattere++;
if(*carattere == '\"'){ //Check for double quote
index_stringa_in_corso++;
carattere++; //get next character
}
else if(*carattere == 0){
//end of quoted string
numero_colonne++;
error = 1; //reached end of line
}
else if(*carattere == delimitatore[formato]){
//end of quoted string
//parse next string
carattere++; //get next character
numero_colonne++;
index_stringa_in_corso=0;
}
else{
error = 1; //Quote string not followed by delimiter or end of string !
}
}
else
error = 1; //Quote inside unquoted string !
}
break;
case ',':
if(formato == 1){
// non fare niente
index_stringa_in_corso++;
}
else
{
//end of string
//parse next string
numero_colonne++;
index_stringa_in_corso=0;
}
carattere++;
break;
case ';':
if(formato == 0){
// non fare niente
index_stringa_in_corso++;
}
else
{
//end of string
//parse next string
numero_colonne++;
index_stringa_in_corso=0;
}
carattere++;
break;
case '\0': // Terminatore
case '\r': // CR = 0x0D = 13
case '\n': // LF = 0x0A = 10
if(inquote){
//cariage return or line feed inside quote
// non fare niente
index_stringa_in_corso++;
index_stringa_in_corso++;
carattere = lines[++riga_partenza];
}
else{
//parse next string
numero_colonne++;
error = 1; //reached end of line
}
break;
default:
// non fare niente
index_stringa_in_corso++;
carattere++;
break;
}
}
return numero_colonne;
}
01-15-2015 03:19 AM
Hi holly7787,
The best could be to allocate destination strings when parsing a line.
Each time you encounter a new separator you allocate a new string ... then your function can return allocated strings and the number of columns found.
Next part of your code will process strings and free allocated strings.
This will avoid to walk through the whole twice (to count columns and then parse lines).
Last optimization, line splitting is also not required you can just put the content of all your file in a buffer and pass the buffer pointer and the last character offset to your function.
Something like this :
int leggi_riga_csv_v3(char *buffer, int *offset ...
*carattere will become buffer[*offset]
carattere++ will become offset++
Best regards,
Stef