diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..1ff0c42 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,63 @@ +############################################################################### +# Set default behavior to automatically normalize line endings. +############################################################################### +* text=auto + +############################################################################### +# Set default behavior for command prompt diff. +# +# This is need for earlier builds of msysgit that does not have it on by +# default for csharp files. +# Note: This is only used by command line +############################################################################### +#*.cs diff=csharp + +############################################################################### +# Set the merge driver for project and solution files +# +# Merging from the command prompt will add diff markers to the files if there +# are conflicts (Merging from VS is not affected by the settings below, in VS +# the diff markers are never inserted). Diff markers may cause the following +# file extensions to fail to load in VS. An alternative would be to treat +# these files as binary and thus will always conflict and require user +# intervention with every merge. To do so, just uncomment the entries below +############################################################################### +#*.sln merge=binary +#*.csproj merge=binary +#*.vbproj merge=binary +#*.vcxproj merge=binary +#*.vcproj merge=binary +#*.dbproj merge=binary +#*.fsproj merge=binary +#*.lsproj merge=binary +#*.wixproj merge=binary +#*.modelproj merge=binary +#*.sqlproj merge=binary +#*.wwaproj merge=binary + +############################################################################### +# behavior for image files +# +# image files are treated as binary by default. +############################################################################### +#*.jpg binary +#*.png binary +#*.gif binary + +############################################################################### +# diff behavior for common document formats +# +# Convert binary document formats to text before diffing them. This feature +# is only available from the command line. Turn it on by uncommenting the +# entries below. +############################################################################### +#*.doc diff=astextplain +#*.DOC diff=astextplain +#*.docx diff=astextplain +#*.DOCX diff=astextplain +#*.dot diff=astextplain +#*.DOT diff=astextplain +#*.pdf diff=astextplain +#*.PDF diff=astextplain +#*.rtf diff=astextplain +#*.RTF diff=astextplain diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b06e864 --- /dev/null +++ b/.gitignore @@ -0,0 +1,212 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. + +# User-specific files +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +build/ +bld/ +[Bb]in/ +[Oo]bj/ + +# Visual Studio 2015 cache/options directory +.vs/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# DNX +project.lock.json +artifacts/ + +*_i.c +*_p.c +*_i.h +*.ilk +*.meta +*.obj +*.pch +*.pdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opensdf +*.sdf +*.cachefile + +# Visual Studio profiler +*.psess +*.vsp +*.vspx + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# NCrunch +_NCrunch_* +.*crunch*.local.xml + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +## TODO: Comment the next line if you want to checkin your +## web deploy settings but do note that will include unencrypted +## passwords +#*.pubxml + +*.publishproj + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/packages/* +# except build/, which is used as an MSBuild target. +!**/packages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/packages/repositories.config + +# Windows Azure Build Output +csx/ +*.build.csdef + +# Windows Store app package directory +AppPackages/ + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!*.[Cc]ache/ + +# Others +ClientBin/ +[Ss]tyle[Cc]op.* +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.pfx +*.publishsettings +node_modules/ +orleans.codegen.cs + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm + +# SQL Server files +*.mdf +*.ldf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings + +# Microsoft Fakes +FakesAssemblies/ + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# LightSwitch generated files +GeneratedArtifacts/ +_Pvt_Extensions/ +ModelManifest.xml diff --git a/AruuzCore.sln b/AruuzCore.sln new file mode 100644 index 0000000..b986eb4 --- /dev/null +++ b/AruuzCore.sln @@ -0,0 +1,20 @@ + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual Studio 2010 +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "AruuzCore", "AruuzCore\AruuzCore.vcxproj", "{00A900B2-176E-4D01-B877-B4EC1F0D6FBA}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Win32 = Debug|Win32 + Release|Win32 = Release|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {00A900B2-176E-4D01-B877-B4EC1F0D6FBA}.Debug|Win32.ActiveCfg = Debug|Win32 + {00A900B2-176E-4D01-B877-B4EC1F0D6FBA}.Debug|Win32.Build.0 = Debug|Win32 + {00A900B2-176E-4D01-B877-B4EC1F0D6FBA}.Release|Win32.ActiveCfg = Release|Win32 + {00A900B2-176E-4D01-B877-B4EC1F0D6FBA}.Release|Win32.Build.0 = Release|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection +EndGlobal diff --git a/AruuzCore/AruuzCore.c b/AruuzCore/AruuzCore.c new file mode 100644 index 0000000..0a7004b --- /dev/null +++ b/AruuzCore/AruuzCore.c @@ -0,0 +1,23 @@ +// AruuzCore.cpp : Defines the entry point for the console application. +// +#include +#include +#include +#include"globals.h" +#include"test_bed.h" + + + +int main(int argc, char** argv) +{ + char *locale; + printf("The thread locale is now set to %s.\n",setlocale( LC_ALL, "URDU" )); + code_test(); + tree_test(); + while(1) + { + + } + return 0; +} + diff --git a/AruuzCore/AruuzCore.vcxproj b/AruuzCore/AruuzCore.vcxproj new file mode 100644 index 0000000..178c606 --- /dev/null +++ b/AruuzCore/AruuzCore.vcxproj @@ -0,0 +1,104 @@ + + + + + Debug + Win32 + + + Release + Win32 + + + + {00A900B2-176E-4D01-B877-B4EC1F0D6FBA} + Win32Proj + AruuzCore + + + + Application + true + Unicode + v140 + + + Application + false + true + Unicode + v140 + + + + + + + + + + + + + true + + + false + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + + + Console + true + true + true + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/AruuzCore/AruuzCore.vcxproj.filters b/AruuzCore/AruuzCore.vcxproj.filters new file mode 100644 index 0000000..3fae134 --- /dev/null +++ b/AruuzCore/AruuzCore.vcxproj.filters @@ -0,0 +1,72 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms + + + + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + \ No newline at end of file diff --git a/AruuzCore/ReadMe.txt b/AruuzCore/ReadMe.txt new file mode 100644 index 0000000..0596104 --- /dev/null +++ b/AruuzCore/ReadMe.txt @@ -0,0 +1,40 @@ +======================================================================== + CONSOLE APPLICATION : AruuzCore Project Overview +======================================================================== + +AppWizard has created this AruuzCore application for you. + +This file contains a summary of what you will find in each of the files that +make up your AruuzCore application. + + +AruuzCore.vcxproj + This is the main project file for VC++ projects generated using an Application Wizard. + It contains information about the version of Visual C++ that generated the file, and + information about the platforms, configurations, and project features selected with the + Application Wizard. + +AruuzCore.vcxproj.filters + This is the filters file for VC++ projects generated using an Application Wizard. + It contains information about the association between the files in your project + and the filters. This association is used in the IDE to show grouping of files with + similar extensions under a specific node (for e.g. ".cpp" files are associated with the + "Source Files" filter). + +AruuzCore.cpp + This is the main application source file. + +///////////////////////////////////////////////////////////////////////////// +Other standard files: + +StdAfx.h, StdAfx.cpp + These files are used to build a precompiled header (PCH) file + named AruuzCore.pch and a precompiled types file named StdAfx.obj. + +///////////////////////////////////////////////////////////////////////////// +Other notes: + +AppWizard uses "TODO:" comments to indicate parts of the source code you +should add to or customize. + +///////////////////////////////////////////////////////////////////////////// diff --git a/AruuzCore/error_handling.c b/AruuzCore/error_handling.c new file mode 100644 index 0000000..f7428b3 --- /dev/null +++ b/AruuzCore/error_handling.c @@ -0,0 +1,15 @@ +#include"error_handling.h" + +struct _errordesc errordesc[] = { + { E_SUCCESS, "No error" }, + { E_INVALID_INPUT, "Invalid input" }, + { E_ZERO_LEN_INIT, "Zero length initilization" } +}; + +struct _errordesc sourcedesc[] = { + { S_DEFUALT, "No source specified" }, + { S_MET_NEW, "meter_new, [meters.c]"} +}; + +error_t err_code = E_SUCCESS; +source_t err_source = S_DEFUALT; \ No newline at end of file diff --git a/AruuzCore/error_handling.h b/AruuzCore/error_handling.h new file mode 100644 index 0000000..ea69155 --- /dev/null +++ b/AruuzCore/error_handling.h @@ -0,0 +1,27 @@ +#ifndef _errorhandling_h +#define _errorhandling_h + +typedef enum _config_error +{ + E_SUCCESS = 0, + E_INVALID_INPUT, + E_ZERO_LEN_INIT +}error_t; + +typedef enum _error_source +{ + S_DEFUALT = 0, + S_MET_NEW +}source_t; + +extern struct _errordesc { + int code; + char *message; +}; + +extern error_t err_code; +extern source_t err_source; +extern struct _errordesc errordesc[]; +extern struct _errordesc sourcedesc[]; + +#endif \ No newline at end of file diff --git a/AruuzCore/globals.h b/AruuzCore/globals.h new file mode 100644 index 0000000..f91d9c0 --- /dev/null +++ b/AruuzCore/globals.h @@ -0,0 +1,20 @@ +/*contains global variables and preprocessor directives +*/ +#ifndef _globals_h +#define _globals_h + +#define bool int +#define true 1 +#define false 0 +#define SHORT_SYLLABLE "-" +#define LONG_SYLLABLE "=" +#define WORD_END_SHORT "~" +#define UNKNOWN_SYLLABLE "x" +#define REGULAR_METER 0 +#define RUBAI_METER 1 +#define HINDI_METER 2 + + + + +#endif \ No newline at end of file diff --git a/AruuzCore/meters.c b/AruuzCore/meters.c new file mode 100644 index 0000000..f5ef66e --- /dev/null +++ b/AruuzCore/meters.c @@ -0,0 +1,102 @@ +#include +#include +#include +#include"globals.h" +#include"error_handling.h" +#include"meters_def.h" +#include"utility.h" +#include"meters.h" + +meter *meter_new(int id, bool usage, char **patterns, char *orig_pattern, wchar_t *name, int *length, int numPatterns, int type) +{ + meter *retVal; + if(length <= 0) + { + err_code = E_ZERO_LEN_INIT; + err_source = S_MET_NEW; + return NULL; + } + retVal = (meter *)malloc(sizeof(meter)); + retVal->id = id; + retVal->length = length; + retVal->name = name; + retVal->numPatterns = numPatterns; + retVal->patterns = patterns; + retVal->usage = usage; + retVal->type = type; + retVal->original_pattern = orig_pattern; + + return retVal; +} + +void meter_delete(meter *met) +{ + int i; + if(met != NULL) + { + for(i = 0; i < met->numPatterns; i++) + free(met->patterns[i]); + free(met->name); + free(met); + } +} + +meters_list meters_list_new() +{ + int i; + meter met_lst[TOTAL_METERS]; + meters_list lst; + char *pattern_t,*pattern_o; + + for(i = 0; i < NUM_METERS; i++) + { + met_lst[i].id = id[i]; + met_lst[i].name = meterNames[i]; + met_lst[i].type = REGULAR_METER; + met_lst[i].usage = usage[i]; + met_lst[i].original_pattern = meters[i]; + if(strstr(meters[i],"+") != NULL) + { + met_lst[i].numPatterns = 4; + met_lst[i].patterns = (char **)malloc(4*sizeof(char)); + pattern_o = (char *)malloc((strlen(meters[i]))*sizeof(char)); + pattern_o = strcpy(pattern_o,meters[i]); + pattern_o = remove_char(pattern_o,'/'); + pattern_t = (char *)malloc((strlen(meters[i]) + 1)*sizeof(char)); + pattern_t = strcpy(pattern_t,pattern_o); + pattern_t = remove_char(pattern_t,'+'); + met_lst[i].patterns[0] = pattern_t; + met_lst[i].length = (int)strlen(pattern_t); + pattern_t = (char *)malloc((strlen(meters[i]) + 1)*sizeof(char)); + pattern_t = strcpy(pattern_t,pattern_o); + pattern_t = remove_char(pattern_t,'+'); + pattern_t = strcat(pattern_t,"~"); + met_lst[i].patterns[1] = pattern_t; + pattern_t = (char *)malloc((strlen(meters[i]) + 1)*sizeof(char)); + pattern_t = strcpy(pattern_t,pattern_o); + pattern_t = replace_char(pattern_t,'+','~'); + met_lst[i].patterns[2] = pattern_t; + pattern_t = (char *)malloc((strlen(meters[i]) + 1)*sizeof(char)); + pattern_t = strcpy(pattern_t,pattern_o); + pattern_t = replace_char(pattern_t,'+','~'); + pattern_t = strcat(pattern_t,"~"); + met_lst[i].patterns[3] = pattern_t; + } + else + { + met_lst[i].numPatterns = 2; + met_lst[i].patterns = (char **)malloc(2*sizeof(char)); + pattern_o = (char *)malloc((strlen(meters[i]))*sizeof(char)); + pattern_o = strcpy(pattern_o,meters[i]); + pattern_o = remove_char(pattern_o,'/'); + met_lst[i].patterns[0] = pattern_o; + met_lst[i].length = (int)strlen(pattern_o); + pattern_t = (char *)malloc((strlen(meters[i]) + 1)*sizeof(char)); + pattern_t = strcpy(pattern_t,pattern_o); + pattern_t = strcat(pattern_t,"~"); + met_lst[i].patterns[1] = pattern_t; + } + } + lst.meters = met_lst; + lst.length = NUM_METERS; +} \ No newline at end of file diff --git a/AruuzCore/meters.h b/AruuzCore/meters.h new file mode 100644 index 0000000..100348e --- /dev/null +++ b/AruuzCore/meters.h @@ -0,0 +1,40 @@ +#include +#include "globals.h" +#ifndef _meters_h +#define _meters_h + +typedef struct +{ + int id; + bool usage; + char **patterns; + char *original_pattern; + wchar_t *name; + int length; + int numPatterns; + int type; +}meter; + +typedef struct +{ + char *name; + char *pattern; + int length; +}foot; + +typedef struct +{ + meter *meters; + int length; +}meters_list; + +meter *meter_new(int id, bool usage, char **patterns, char *orig_pattern, wchar_t *name, int *length, int numPatterns, int type); +void meter_delete(meter *met); +meters_list meters_list_new(void); + + +extern meters_list METERS; + + + +#endif \ No newline at end of file diff --git a/AruuzCore/meters_def.c b/AruuzCore/meters_def.c new file mode 100644 index 0000000..e69de29 diff --git a/AruuzCore/meters_def.h b/AruuzCore/meters_def.h new file mode 100644 index 0000000..a0092ed --- /dev/null +++ b/AruuzCore/meters_def.h @@ -0,0 +1,663 @@ +#include +#ifndef _metersdef_h +#define _metersdef_h + +#define NUM_METERS 129 +#define NUM_RUBAI_METERS 12 +#define NUM_HINDI_METERS 11 +#define NUM_FEET 32 +#define TOTAL_METERS (NUM_METERS + NUM_RUBAI_METERS + NUM_HINDI_METERS) + +static char *feet[] = { + "===", + "==-=", + "==-", + "==", + "=-==", + "=-=-", + "=-=", + "=--=", + "=-", + "=", + "-===", + "-==-", + "-==", + "-=-=", + "-=-", + "-=", + "--==", + "--=-=", + "--=-", + "--=", + "-=-==", + "===-", + "-=--=", + "==-=-", + "=-==-", + "=--=-", + "-===-", + "-=-=-", + "--==-", + "--=-=-", + "-=-==-", + "-=--=-" +}; +static char *feetNames[] = { + "مفعولن", + "مستفعلن", + "مفعول", + "فِعْلن", + "فاعلاتن", + "فاعلاتُ", + "فاعلن", + "مفتَعِلن", + "فِعْل", + "فِع", + "مفاعیلن", + "مفاعیل", + "فعولن", + "مفاعلن", + "فعول", + "فَعَل", + "فَعِلاتن", + "متَفاعلن", + "فَعِلات", + "فَعِلن", + "مَفاعلاتن", + "مفعولاتُ", + "مفاعِلَتن", + "مستفعلان", + "فاعلاتان", + "مفتَعِلان", + "مفاعیلان", + "مفاعلان", + "فَعِلاتان", + "متَفاعلان", + "مَفاعلاتان", + "مفاعِلَتان" +}; + +static char *rubaiMeters[] = { + "==-/-==-/-==-/-=", + "==-/-==-/-===/=", + "==-/-=-=/-===/=", + "==-/-=-=/-==-/-=", + "===/=-=/-==-/-=", + "===/=-=/-===/=", + "==-/-===/===/=", + "==-/-===/==-/-=", + "===/===/==-/-=", + "===/===/===/=", + "===/==-/-===/=", + "===/==-/-==-/-=" +}; + +static char *rubaiMeterNames[] = { + "ہزج مثمّن اخرب مکفوف مجبوب", + "ہزج مثمّن اخرب مکفوف ابتر", + "ہزج مثمّن اخرب مقبوض ابتر", + "ہزج مثمّن اخرب مقبوض مکفوف مجبوب", + "ہزج مثمّن اخرم اشتر مکفوف مجبوب", + "ہزج مثمّن اخرم اشتر ابتر", + "ہزج مثمّن اخرب اخرم ابتر", + "ہزج مثمّن اخرب مجبوب", + "ہزج مثمّن اخرم اخرب مجبوب", + "ہزج مثمّن اخرم ابتر", + "ہزج مثمّن اخرم اخرب ابتر", + "ہزج مثمّن اخرم اخرب مکفوف مجبوب" +}; + +static char * hindiMeters[] = { + "=(=)/=(=)/=(=)/=(=)/=(=)/=(=)/=(=)/=", + "=(=)/=(=)/=(=)/=(=)/=(=)/=", + "=(=)/=(=)/=(=)/=(=)/=(=)/=(=)/=(=)/==", + "=(=)/=(=)/=(=)/=", + "=(=)/=(=)/=(=)/==", + "=(=)/=(=)/=", + "=(=)/=(=)/=(=)/=(=)/=(=)/==", + "=(=)/=(=)", + "(=)=/(=)=/(=)=/(=)=/(=)=/(=)=/(=)=/(=)=", + "(=)=/(=)=/(=)=/(=)=/(=)=/(=)=", + "(=)=/(=)=/(=)=/(=)" +}; +static char* hindiMeterNames[] = { + "بحرِ ہندی/ متقارب مثمن مضاعف", + "بحرِ ہندی/ متقارب مسدس مضاعف", + "بحرِ ہندی/ متقارب اثرم مقبوض محذوف مضاعف", + "بحرِ ہندی/ متقارب مربع مضاعف", + "بحرِ ہندی/ متقارب اثرم مقبوض محذوف", + "بحرِ ہندی/ متقارب مثمن محذوف", + "بحرِ ہندی/ متقارب مسدس محذوف", + "بحرِ ہندی/ متقارب مربع محذوف", + "بحرِ زمزمہ/ متدارک مثمن مضاعف", + "بحرِ زمزمہ/ متدارک مسدس مضاعف", + "بحرِ زمزمہ/ متدارک مربع مضاعف" +}; + +static int id[] = { + 13, + 14, + 15, + 16, + 17, + 2, + 2, + 4, + 4, + 4, + 4, + 18, + 19, + 3, + 3, + 20, + 21, + 22, + 23, + 5, + 5, + 5, + 24, + 25, + 26, + 27, + 6, + 6, + 6, + 6, + 30, + 31, + 32, + 33, + 34, + 35, + 35, + 35, + 35, + 36, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 7, + 103, + 64, + 65, + 8, + 8, + 8, + 8, + 9, + 9, + 9, + 9, + 10, + 10, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 1, + 1, + 1, + 1, + 11, + 11, + 78, + 79, + 80, + 81, + 12, + 12, + 12, + 12, + 12, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 36, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104 +}; + +static int usage[] ={ + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 1, + 0, + 1, + 0, + 1, + 0, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 0, + 0, + 0, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1 +}; + +static char* meters[] = { + "-===/-===/-===/-===", + "-===/-===/-===/-==", + "-=-=/-=-=/-=-=/-=-=", + "=-=/-===+=-=/-===", + "-=-=/-===/-=-=/-===", + "==-/-==-/-==-/-===", + "==-/-===+==-/-===", + "==-/-==-/-==-/-==", + "===/==-/-==-/-==", + "==-/-===/==-/-==", + "==-/-==-/-===/==", + "-===/-===/-===", + "-===/-===/-==", + "==-/-=-=/-==", + "===/=-=/-==", + "=-=/-=-=+=-=/-=-=", + "-===/-==", + "-===/-==+-===/-==", + "==-=/==-=/==-=/==-=", + "=--=/=--=/=--=/=--=", + "=--=/-=-=+=--=/-=-=", + "-=-=/=--=+-=-=/=--=", + "==-=/==-=/==-=", + "=--=/=--=/=--=", + "=-==/=-==/=-==/=-==", + "=-==/=-==/=-==/=-=", + "=-==/--==/--==/--=", + "--==/--==/--==/--=", + "=-==/--==/--==/==", + "--==/--==/--==/==", + "--=-/=-==+--=-/=-==", + "==-/=-==+==-/=-==", + "--==/--==/--==/--==", + "=-==/=-==/=-==", + "=-==/=-==/=-=", + "=-==/--==/--=", + "=-==/--==/==", + "--==/--==/--=", + "--==/--==/==", + "--==/--==/--==", + "-==/-==/-==/-==", + "-==/-==/-==/-==/-==/-==/-==/-==", + "-==/-==/-==/-=", + "=-/-=-/-=-/-==", + "=-/-=-/-=-/-=", + "=-/-=-/-=-/-=-/-=-/-=-/-=-/-=", + "=-/-=-/-=-/-=-/-=-/-=-/-=-/-==", + "-==/-==/-==", + "-==/-==/-=", + "==/-==/==/-==", + "=-=/=-=/=-=/=-=", + "--=/--=/--=/--=", + "--=/--=/--=/--=/--=/--=/--=/--=", + "=-=/=-=/=-=/--=", + "=-=/=-=/=-=", + "=-=/-=/=-=/-=", + "--=-=/--=-=/--=-=/--=-=", + "--=-=/--=-=/--=-=", + "-=--=/-=--=/-=--=/-=--=", + "-=--=/-=--=/-=--=", + "-=--=/-=--=/-==", + "-===/=-==/-===/=-==", + "-==-/=-=-/-==-/=-=", + "==-/=-==/==-/=-==", + "==-/=-=-/-==-/=-=", + "==-/=-==/==-/=-=", + "==-/=-=-/-===", + "==-=/=-==/==-=/=-==", + "-=-=/--==/-=-=/--==", + "-=-=/===/-=-=/--==", + "-=-=/--==/-=-=/===", + "-=-=/===/-=-=/===", + "-=-=/--==/-=-=/--=", + "-=-=/===/-=-=/--=", + "-=-=/--==/-=-=/==", + "-=-=/===/-=-=/==", + "-=-=/--==/-=-=", + "-=-=/===/-=-=", + "==-=/===-/==-=/===-", + "=--=/=-=+=--=/=-=", + "=--=/=-=-/=--=/=", + "=--=/=-=/=--=", + "===-/==-=/===-/==-=", + "=-=-/=--=/=-=-/=--=", + "==-=/==-=/===-", + "=--=/=--=/=-=", + "==-=/==-=/-==", + "=-==/==-=/=-==/==-=", + "=-==/==-=/=-==", + "--==/-=-=/--==", + "=-==/-=-=/--=", + "--==/-=-=/--=", + "=-==/-=-=/==", + "--==/-=-=/==", + "=-==/-=-=/=", + "--==/-=-=/=", + "-===/-==/-===", + "-==/-===/-==/-=-=", + "-==/-=-=/-==/-=-=", + "=-==/=-=/=-==/=-=", + "--==/--=/--==/--=", + "--==/==/--==/--=", + "===/--=/--==/--=", + "--==/--=/===/--=", + "--==/--=/--==/==", + "=-==/--=/=-==/--=", + "==-=/=-=/==-=/=-=", + "-=-=/--=/-=-=/--=", + "-===/-===/=-==", + "==-/-==-/=-==", + "=-==/=-==/==-=", + "--==/--==/-=-=", + "=-==/-===/-===", + "=-=-/-==-/-==", + "-=-==/-=-==/-=-==/-=-==", + "=-=/-===", + "=-=/-=-=", + "-===/-===", + "-=-=/-=-=/-=-=/-=", + "=-==/--==/--==", + "-===/-===", + "=-==/=-==", + "=-==/=-=", + "-==/-==", + "--=-=/--=-=", + "-==/-===", + "=-==/=-=", + "-===/-===/-===/-===/-===/-===/-===/-===", + "-=-==/-=-==" +}; +static wchar_t* meterNames[] = { + L"ہزج مثمن سالم", + L"ہزج مثمن محذوف", + L"ہزج مثمن مقبوض", + L"ہزج مثمن اشتر", + L"ہزج مثمن مقبوض سالم", + L"ہزج مثمن اخرب مکفوف سالم", + L"ہزج مثمن اخرب سالم", + L"ہزج مثمن اخرب مکفوف محذوف", + L"ہزج مثمن اخرب مکفوف محذوف", + L"ہزج مثمن اخرب مکفوف محذوف", + L"ہزج مثمن اخرب مکفوف محذوف", + L"ہزج مسدس سالم", + L"ہزج مسدس محذوف", + L"ہزج مسدس اخرب مقبوض محذوف", + L"ہزج مسدس اخرم اشتر محذوف", + L"ہزج مربع اشتر مقبوض مضاعف", + L"ہزج مربع محذوف", + L"ہزج مربع محذوف مضاعف", + L"رجز مثمن سالم", + L"رجز مثمن مطوی", + L"رجز مثمن مطوی مخبون", + L"رجز مثمن مخبون مطوی", + L"رجز مسدس سالم", + L"رجز مسدس مطوی", + L"رمل مثمن سالم", + L"رمل مثمن محذوف", + L"رمل مثمن سالم مخبون محذوف", + L"رمل مثمن سالم مخبون محذوف", + L"رمل مثمن مخبون محذوف مقطوع", + L"رمل مثمن مخبون محذوف مقطوع", + L"رمل مثمن مشکول", + L"رمل مثمن مشکول مسکّن", + L"رمل مثمن مخبون", + L"رمل مسدس سالم", + L"رمل مسدس محذوف", + L"رمل مسدس مخبون محذوف", + L"رمل مسدس مخبون محذوف مسکن", + L"رمل مسدس مخبون محذوف", + L"رمل مسدس مخبون محذوف مسکن", + L"رمل مسدس مخبون", + L"متقارب مثمن سالم", + L"متقارب مثمن سالم مضاعف", + L"متقارب مثمن محذوف", + L"متقارب مثمن اثرم مقبوض", + L"متقارب مثمن اثرم مقبوض محذوف", + L"متقارب مثمن اثرم مقبوض مضاعف", + L"متقارب مثمن اثرم مقبوض محذوف مضاعف", + L"متقارب مسدس سالم", + L"متقارب مسدس محذوف", + L"متقارب مربع اثلم سالم مضاعف", + L"متدارک مثمن سالم", + L"متدارک مثمن مخبون", + L"متدارک مثمن مخبون مضاعف", + L"متدارک مثمن سالم مقطوع", + L"متدارک مسدس سالم", + L"متدارک مربع مخلع مضاعف", + L"کامل مثمن سالم", + L"کامل مسدس سالم", + L"وافر مثمن سالم", + L"وافر مسدس سالم", + L"وافر مسدس مقطوف", + L"مضارع مثمن سالم", + L"مضارع مثمن مکفوف محذوف", + L"مضارع مثمن اخرب", + L"مضارع مثمن اخرب مکفوف محذوف", + L"مضارع مثمن اخرب محذوف", + L"مضارع مسدس اخرب مکفوف", + L"مجتث مثمن سالم", + L"مجتث مثمن مخبون", + L"مجتث مثمن مخبون", + L"مجتث مثمن مخبون", + L"مجتث مثمن مخبون", + L"مجتث مثمن مخبون محذوف", + L"مجتث مثمن مخبون محذوف", + L"مجتث مثمن مخبون محذوف مسکن", + L"مجتث مثمن مخبون محذوف مسکن", + L"مجتث مسدس مخبون", + L"مجتث مسدس مخبون", + L"منسرح مثمن سالم", + L"منسرح مثمن مطوی مکسوف", + L"منسرح مثمن مطوی منحور", + L"منسرح مسدس مطوی مکسوف", + L"مقتضب مثمن سالم", + L"مقتضب مثمن مطوی", + L"سریع مسدس سالم", + L"سریع مسدس مطوی مکسوف", + L"سریع مسدس مخبون مکسوف ", + L"خفیف مثمن سالم", + L"خفیف مسدس سالم", + L"خفیف مسدس مخبون", + L"خفیف مسدس مخبون محذوف", + L"خفیف مسدس مخبون محذوف", + L"خفیف مسدس مخبون محذوف مقطوع", + L"خفیف مسدس مخبون محذوف مقطوع", + L"خفیف مسدس سالم مخبون محجوف", + L"خفیف مسدس مخبون محجوف", + L"طویل مثمن سالم", + L"طویل مثمن سالم مقبوض", + L"طویل مثمن مقبوض", + L"مدید مثمن سالم", + L"مدید مثمن مخبون", + L"مدید مثمن مخبون", + L"مدید مثمن مخبون", + L"مدید مثمن مخبون", + L"مدید مثمن مخبون", + L"مدید مثمن سالم مخبون", + L"بسیط مثمن سالم", + L"بسیط مثمن مخبون", + L"قریب مسدس سالم", + L"قریب مسدس اخرب مکفوف", + L"جدید مسدس سالم", + L"جدید مسدس مخبون", + L"مشاکل مسدس سالم", + L"مشاکل مسدس مکفوف محذوف", + L"جمیل مثمن سالم", + L"ہزج مربع اشتر", + L"ہزج مربع اشتر مقبوض", + L"ہزج مربع سالم", + L"ہزج مثمن مقبوض محذوف", + L"رمل مسدس مخبون", + L"ہزج مربع سالم", + L"رمل مربع سالم", + L"ہزج مربع محذوف", + L"متقارب مربع سالم", + L"کامل مربع سالم", + L"طویل مربع سالم", + L"مدید مربع سالم", + L"ہزج مثمن سالم مضاعف", + L"جمیل مربع سالم" +}; +#endif \ No newline at end of file diff --git a/AruuzCore/stdafx.cpp b/AruuzCore/stdafx.cpp new file mode 100644 index 0000000..9dbdd34 --- /dev/null +++ b/AruuzCore/stdafx.cpp @@ -0,0 +1,8 @@ +// stdafx.cpp : source file that includes just the standard includes +// AruuzCore.pch will be the pre-compiled header +// stdafx.obj will contain the pre-compiled type information + +#include "stdafx.h" + +// TODO: reference any additional headers you need in STDAFX.H +// and not in this file diff --git a/AruuzCore/stdafx.h b/AruuzCore/stdafx.h new file mode 100644 index 0000000..b005a83 --- /dev/null +++ b/AruuzCore/stdafx.h @@ -0,0 +1,15 @@ +// stdafx.h : include file for standard system include files, +// or project specific include files that are used frequently, but +// are changed infrequently +// + +#pragma once + +#include "targetver.h" + +#include +#include + + + +// TODO: reference additional headers your program requires here diff --git a/AruuzCore/targetver.h b/AruuzCore/targetver.h new file mode 100644 index 0000000..87c0086 --- /dev/null +++ b/AruuzCore/targetver.h @@ -0,0 +1,8 @@ +#pragma once + +// Including SDKDDKVer.h defines the highest available Windows platform. + +// If you wish to build your application for a previous Windows platform, include WinSDKVer.h and +// set the _WIN32_WINNT macro to the platform you wish to support before including SDKDDKVer.h. + +#include diff --git a/AruuzCore/test_bed.c b/AruuzCore/test_bed.c new file mode 100644 index 0000000..285975e --- /dev/null +++ b/AruuzCore/test_bed.c @@ -0,0 +1,33 @@ +#include +#include +#include +#include +#include"globals.h" +#include"meters.h" +#include"tree.h" +#include"test_bed.h" + +void code_test() +{ + meters_list lst = meters_list_new(); + printf("Short: %s\n",SHORT_SYLLABLE); + printf("Long: %s\n",LONG_SYLLABLE); + printf("End-Short: %s\n",WORD_END_SHORT); + printf("Unknown: %s\n",UNKNOWN_SYLLABLE); + wprintf(L": %s\n",lst.meters[0].name); +} +void tree_test(void) +{ + int i; + struct binary_tree *code_tree = NULL; + char pattern[] = "x===-==x-=x"; + char *val; + insert(&code_tree,"root"); + for(i = 0; i < strlen(pattern); i++) + { + val = (char*)malloc(2*sizeof(char)); + memcpy(val,&pattern[i],1); + val[1] = '\0'; + insert(&code_tree,val); + } +} \ No newline at end of file diff --git a/AruuzCore/test_bed.h b/AruuzCore/test_bed.h new file mode 100644 index 0000000..eed8027 --- /dev/null +++ b/AruuzCore/test_bed.h @@ -0,0 +1,7 @@ +#ifndef _testbed_h +#define _testbed_h + +void code_test(void); +void tree_test(void); + +#endif \ No newline at end of file diff --git a/AruuzCore/tree.c b/AruuzCore/tree.c new file mode 100644 index 0000000..795fe89 --- /dev/null +++ b/AruuzCore/tree.c @@ -0,0 +1,52 @@ +#include +#include +#include +#include"tree.h" + +int insert(struct binary_tree **root, char *key) +{ + if(*root == NULL) + { + *root = (struct binary_tree *)malloc(sizeof(struct binary_tree)); + (*root)->left = NULL; + (*root)->right = NULL; + (*root)->value = (char*)malloc(sizeof(key)); + memcpy((*root)->value,key,strlen(key) + 1); + } + else if((*root)->right == NULL && (*root)->left == NULL) + { + if(strcmp(key,UNKNOWN_SYLLABLE) == 0) + { + (*root)->left = (struct binary_tree *)malloc(sizeof(struct binary_tree)); + (*root)->left->value= (char*)malloc(sizeof(SHORT_SYLLABLE)); + memcpy((*root)->left->value,SHORT_SYLLABLE,strlen(SHORT_SYLLABLE) + 1); + (*root)->left->left = NULL; + (*root)->left->right = NULL; + (*root)->right = (struct binary_tree *)malloc(sizeof(struct binary_tree)); + (*root)->right->value= (char*)malloc(sizeof(LONG_SYLLABLE) + 1); + memcpy((*root)->right->value,LONG_SYLLABLE,strlen(LONG_SYLLABLE) + 1); + (*root)->right->left = NULL; + (*root)->right->right = NULL; + } + else + { + (*root)->left = (struct binary_tree *)malloc(sizeof(struct binary_tree)); + (*root)->left->value= (char*)malloc(sizeof(key)); + memcpy((*root)->left->value,key,strlen(key) + 1); + (*root)->left->left = NULL; + (*root)->left->right = NULL; + } + } + else + { + if((*root)->right != NULL) + { + insert(&(*root)->right,key); + } + if((*root)->left != NULL) + { + insert(&(*root)->left,key); + } + } + return 0; +} \ No newline at end of file diff --git a/AruuzCore/tree.h b/AruuzCore/tree.h new file mode 100644 index 0000000..d1996c3 --- /dev/null +++ b/AruuzCore/tree.h @@ -0,0 +1,12 @@ +#include"globals.h" +#ifndef _tree_h +#define _tree_h + +struct binary_tree +{ + char *value; + struct binary_tree *left; + struct binary_tree *right; +}; +int insert(struct binary_tree **root, char *key); +#endif \ No newline at end of file diff --git a/AruuzCore/utf8.c b/AruuzCore/utf8.c new file mode 100644 index 0000000..4c6595d --- /dev/null +++ b/AruuzCore/utf8.c @@ -0,0 +1,476 @@ +/* + Basic UTF-8 manipulation routines + by Jeff Bezanson + placed in the public domain Fall 2005 + + This code is designed to provide the utilities you need to manipulate + UTF-8 as an internal string encoding. These functions do not perform the + error checking normally needed when handling UTF-8 data, so if you happen + to be from the Unicode Consortium you will want to flay me alive. + I do this because error checking can be performed at the boundaries (I/O), + with these routines reserved for higher performance on data known to be + valid. +*/ +#include +#include +#include +#include +#ifdef WIN32 +#include +#else +#include +#endif + +#include "utf8.h" + +static const u_int32_t offsetsFromUTF8[6] = { + 0x00000000UL, 0x00003080UL, 0x000E2080UL, + 0x03C82080UL, 0xFA082080UL, 0x82082080UL +}; + +static const char trailingBytesForUTF8[256] = { + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 +}; + +/* returns length of next utf-8 sequence */ +int u8_seqlen(char *s) +{ + return trailingBytesForUTF8[(unsigned int)(unsigned char)s[0]] + 1; +} + +/* conversions without error checking + only works for valid UTF-8, i.e. no 5- or 6-byte sequences + srcsz = source size in bytes, or -1 if 0-terminated + sz = dest size in # of wide characters + + returns # characters converted + dest will always be L'\0'-terminated, even if there isn't enough room + for all the characters. + if sz = srcsz+1 (i.e. 4*srcsz+4 bytes), there will always be enough space. +*/ +int u8_toucs(u_int32_t *dest, int sz, char *src, int srcsz) +{ + u_int32_t ch; + char *src_end = src + srcsz; + int nb; + int i=0; + + while (i < sz-1) { + nb = trailingBytesForUTF8[(unsigned char)*src]; + if (srcsz == -1) { + if (*src == 0) + goto done_toucs; + } + else { + if (src + nb >= src_end) + goto done_toucs; + } + ch = 0; + switch (nb) { + /* these fall through deliberately */ + case 3: ch += (unsigned char)*src++; ch <<= 6; + case 2: ch += (unsigned char)*src++; ch <<= 6; + case 1: ch += (unsigned char)*src++; ch <<= 6; + case 0: ch += (unsigned char)*src++; + } + ch -= offsetsFromUTF8[nb]; + dest[i++] = ch; + } + done_toucs: + dest[i] = 0; + return i; +} + +/* srcsz = number of source characters, or -1 if 0-terminated + sz = size of dest buffer in bytes + + returns # characters converted + dest will only be '\0'-terminated if there is enough space. this is + for consistency; imagine there are 2 bytes of space left, but the next + character requires 3 bytes. in this case we could NUL-terminate, but in + general we can't when there's insufficient space. therefore this function + only NUL-terminates if all the characters fit, and there's space for + the NUL as well. + the destination string will never be bigger than the source string. +*/ +int u8_toutf8(char *dest, int sz, u_int32_t *src, int srcsz) +{ + u_int32_t ch; + int i = 0; + char *dest_end = dest + sz; + + while (srcsz<0 ? src[i]!=0 : i < srcsz) { + ch = src[i]; + if (ch < 0x80) { + if (dest >= dest_end) + return i; + *dest++ = (char)ch; + } + else if (ch < 0x800) { + if (dest >= dest_end-1) + return i; + *dest++ = (ch>>6) | 0xC0; + *dest++ = (ch & 0x3F) | 0x80; + } + else if (ch < 0x10000) { + if (dest >= dest_end-2) + return i; + *dest++ = (ch>>12) | 0xE0; + *dest++ = ((ch>>6) & 0x3F) | 0x80; + *dest++ = (ch & 0x3F) | 0x80; + } + else if (ch < 0x110000) { + if (dest >= dest_end-3) + return i; + *dest++ = (ch>>18) | 0xF0; + *dest++ = ((ch>>12) & 0x3F) | 0x80; + *dest++ = ((ch>>6) & 0x3F) | 0x80; + *dest++ = (ch & 0x3F) | 0x80; + } + i++; + } + if (dest < dest_end) + *dest = '\0'; + return i; +} + +int u8_wc_toutf8(char *dest, u_int32_t ch) +{ + if (ch < 0x80) { + dest[0] = (char)ch; + return 1; + } + if (ch < 0x800) { + dest[0] = (ch>>6) | 0xC0; + dest[1] = (ch & 0x3F) | 0x80; + return 2; + } + if (ch < 0x10000) { + dest[0] = (ch>>12) | 0xE0; + dest[1] = ((ch>>6) & 0x3F) | 0x80; + dest[2] = (ch & 0x3F) | 0x80; + return 3; + } + if (ch < 0x110000) { + dest[0] = (ch>>18) | 0xF0; + dest[1] = ((ch>>12) & 0x3F) | 0x80; + dest[2] = ((ch>>6) & 0x3F) | 0x80; + dest[3] = (ch & 0x3F) | 0x80; + return 4; + } + return 0; +} + +/* charnum => byte offset */ +int u8_offset(char *str, int charnum) +{ + int offs=0; + + while (charnum > 0 && str[offs]) { + (void)(isutf(str[++offs]) || isutf(str[++offs]) || + isutf(str[++offs]) || ++offs); + charnum--; + } + return offs; +} + +/* byte offset => charnum */ +int u8_charnum(char *s, int offset) +{ + int charnum = 0, offs=0; + + while (offs < offset && s[offs]) { + (void)(isutf(s[++offs]) || isutf(s[++offs]) || + isutf(s[++offs]) || ++offs); + charnum++; + } + return charnum; +} + +/* number of characters */ +int u8_strlen(char *s) +{ + int count = 0; + int i = 0; + + while (u8_nextchar(s, &i) != 0) + count++; + + return count; +} + +/* reads the next utf-8 sequence out of a string, updating an index */ +u_int32_t u8_nextchar(char *s, int *i) +{ + u_int32_t ch = 0; + int sz = 0; + + do { + ch <<= 6; + ch += (unsigned char)s[(*i)++]; + sz++; + } while (s[*i] && !isutf(s[*i])); + ch -= offsetsFromUTF8[sz-1]; + + return ch; +} + +void u8_inc(char *s, int *i) +{ + (void)(isutf(s[++(*i)]) || isutf(s[++(*i)]) || + isutf(s[++(*i)]) || ++(*i)); +} + +void u8_dec(char *s, int *i) +{ + (void)(isutf(s[--(*i)]) || isutf(s[--(*i)]) || + isutf(s[--(*i)]) || --(*i)); +} + +int octal_digit(char c) +{ + return (c >= '0' && c <= '7'); +} + +int hex_digit(char c) +{ + return ((c >= '0' && c <= '9') || + (c >= 'A' && c <= 'F') || + (c >= 'a' && c <= 'f')); +} + +/* assumes that src points to the character after a backslash + returns number of input characters processed */ +int u8_read_escape_sequence(char *str, u_int32_t *dest) +{ + u_int32_t ch; + char digs[9]="\0\0\0\0\0\0\0\0"; + int dno=0, i=1; + + ch = (u_int32_t)str[0]; /* take literal character */ + if (str[0] == 'n') + ch = L'\n'; + else if (str[0] == 't') + ch = L'\t'; + else if (str[0] == 'r') + ch = L'\r'; + else if (str[0] == 'b') + ch = L'\b'; + else if (str[0] == 'f') + ch = L'\f'; + else if (str[0] == 'v') + ch = L'\v'; + else if (str[0] == 'a') + ch = L'\a'; + else if (octal_digit(str[0])) { + i = 0; + do { + digs[dno++] = str[i++]; + } while (octal_digit(str[i]) && dno < 3); + ch = strtol(digs, NULL, 8); + } + else if (str[0] == 'x') { + while (hex_digit(str[i]) && dno < 2) { + digs[dno++] = str[i++]; + } + if (dno > 0) + ch = strtol(digs, NULL, 16); + } + else if (str[0] == 'u') { + while (hex_digit(str[i]) && dno < 4) { + digs[dno++] = str[i++]; + } + if (dno > 0) + ch = strtol(digs, NULL, 16); + } + else if (str[0] == 'U') { + while (hex_digit(str[i]) && dno < 8) { + digs[dno++] = str[i++]; + } + if (dno > 0) + ch = strtol(digs, NULL, 16); + } + *dest = ch; + + return i; +} + +/* convert a string with literal \uxxxx or \Uxxxxxxxx characters to UTF-8 + example: u8_unescape(mybuf, 256, "hello\\u220e") + note the double backslash is needed if called on a C string literal */ +int u8_unescape(char *buf, int sz, char *src) +{ + int c=0, amt; + u_int32_t ch; + char temp[4]; + + while (*src && c < sz) { + if (*src == '\\') { + src++; + amt = u8_read_escape_sequence(src, &ch); + } + else { + ch = (u_int32_t)*src; + amt = 1; + } + src += amt; + amt = u8_wc_toutf8(temp, ch); + if (amt > sz-c) + break; + memcpy(&buf[c], temp, amt); + c += amt; + } + if (c < sz) + buf[c] = '\0'; + return c; +} + +int u8_escape_wchar(char *buf, int sz, u_int32_t ch) +{ + if (ch == L'\n') + return snprintf(buf, sz, "\\n"); + else if (ch == L'\t') + return snprintf(buf, sz, "\\t"); + else if (ch == L'\r') + return snprintf(buf, sz, "\\r"); + else if (ch == L'\b') + return snprintf(buf, sz, "\\b"); + else if (ch == L'\f') + return snprintf(buf, sz, "\\f"); + else if (ch == L'\v') + return snprintf(buf, sz, "\\v"); + else if (ch == L'\a') + return snprintf(buf, sz, "\\a"); + else if (ch == L'\\') + return snprintf(buf, sz, "\\\\"); + else if (ch < 32 || ch == 0x7f) + return snprintf(buf, sz, "\\x%hhX", (unsigned char)ch); + else if (ch > 0xFFFF) + return snprintf(buf, sz, "\\U%.8X", (u_int32_t)ch); + else if (ch >= 0x80 && ch <= 0xFFFF) + return snprintf(buf, sz, "\\u%.4hX", (unsigned short)ch); + + return snprintf(buf, sz, "%c", (char)ch); +} + +int u8_escape(char *buf, int sz, char *src, int escape_quotes) +{ + int c=0, i=0, amt; + + while (src[i] && c < sz) { + if (escape_quotes && src[i] == '"') { + amt = snprintf(buf, sz - c, "\\\""); + i++; + } + else { + amt = u8_escape_wchar(buf, sz - c, u8_nextchar(src, &i)); + } + c += amt; + buf += amt; + } + if (c < sz) + *buf = '\0'; + return c; +} + +char *u8_strchr(char *s, u_int32_t ch, int *charn) +{ + int i = 0, lasti=0; + u_int32_t c; + + *charn = 0; + while (s[i]) { + c = u8_nextchar(s, &i); + if (c == ch) { + return &s[lasti]; + } + lasti = i; + (*charn)++; + } + return NULL; +} + +char *u8_memchr(char *s, u_int32_t ch, size_t sz, int *charn) +{ + int i = 0, lasti=0; + u_int32_t c; + int csz; + + *charn = 0; + while (i < sz) { + c = csz = 0; + do { + c <<= 6; + c += (unsigned char)s[i++]; + csz++; + } while (i < sz && !isutf(s[i])); + c -= offsetsFromUTF8[csz-1]; + + if (c == ch) { + return &s[lasti]; + } + lasti = i; + (*charn)++; + } + return NULL; +} + +int u8_is_locale_utf8(char *locale) +{ + /* this code based on libutf8 */ + const char* cp = locale; + + for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ','; cp++) { + if (*cp == '.') { + const char* encoding = ++cp; + for (; *cp != '\0' && *cp != '@' && *cp != '+' && *cp != ','; cp++) + ; + if ((cp-encoding == 5 && !strncmp(encoding, "UTF-8", 5)) + || (cp-encoding == 4 && !strncmp(encoding, "utf8", 4))) + return 1; /* it's UTF-8 */ + break; + } + } + return 0; +} + +int u8_vprintf(char *fmt, va_list ap) +{ + int cnt, sz=0; + char *buf; + u_int32_t *wcs; + + sz = 512; + buf = (char*)alloca(sz); + try_print: + cnt = vsnprintf(buf, sz, fmt, ap); + if (cnt >= sz) { + buf = (char*)alloca(cnt - sz + 1); + sz = cnt + 1; + goto try_print; + } + wcs = (u_int32_t*)alloca((cnt+1) * sizeof(u_int32_t)); + cnt = u8_toucs(wcs, cnt+1, buf, cnt); + printf("%ls", (wchar_t*)wcs); + return cnt; +} + +int u8_printf(char *fmt, ...) +{ + int cnt; + va_list args; + + va_start(args, fmt); + + cnt = u8_vprintf(fmt, args); + + va_end(args); + return cnt; +} diff --git a/AruuzCore/utf8.h b/AruuzCore/utf8.h new file mode 100644 index 0000000..b538ac3 --- /dev/null +++ b/AruuzCore/utf8.h @@ -0,0 +1,71 @@ +#include + +/* is c the start of a utf8 sequence? */ +#define isutf(c) (((c)&0xC0)!=0x80) + +/* convert UTF-8 data to wide character */ +int u8_toucs(u_int32_t *dest, int sz, char *src, int srcsz); + +/* the opposite conversion */ +int u8_toutf8(char *dest, int sz, u_int32_t *src, int srcsz); + +/* single character to UTF-8 */ +int u8_wc_toutf8(char *dest, u_int32_t ch); + +/* character number to byte offset */ +int u8_offset(char *str, int charnum); + +/* byte offset to character number */ +int u8_charnum(char *s, int offset); + +/* return next character, updating an index variable */ +u_int32_t u8_nextchar(char *s, int *i); + +/* move to next character */ +void u8_inc(char *s, int *i); + +/* move to previous character */ +void u8_dec(char *s, int *i); + +/* returns length of next utf-8 sequence */ +int u8_seqlen(char *s); + +/* assuming src points to the character after a backslash, read an + escape sequence, storing the result in dest and returning the number of + input characters processed */ +int u8_read_escape_sequence(char *src, u_int32_t *dest); + +/* given a wide character, convert it to an ASCII escape sequence stored in + buf, where buf is "sz" bytes. returns the number of characters output. */ +int u8_escape_wchar(char *buf, int sz, u_int32_t ch); + +/* convert a string "src" containing escape sequences to UTF-8 */ +int u8_unescape(char *buf, int sz, char *src); + +/* convert UTF-8 "src" to ASCII with escape sequences. + if escape_quotes is nonzero, quote characters will be preceded by + backslashes as well. */ +int u8_escape(char *buf, int sz, char *src, int escape_quotes); + +/* utility predicates used by the above */ +int octal_digit(char c); +int hex_digit(char c); + +/* return a pointer to the first occurrence of ch in s, or NULL if not + found. character index of found character returned in *charn. */ +char *u8_strchr(char *s, u_int32_t ch, int *charn); + +/* same as the above, but searches a buffer of a given size instead of + a NUL-terminated string. */ +char *u8_memchr(char *s, u_int32_t ch, size_t sz, int *charn); + +/* count the number of characters in a UTF-8 string */ +int u8_strlen(char *s); + +int u8_is_locale_utf8(char *locale); + +/* printf where the format string and arguments may be in UTF-8. + you can avoid this function and just use ordinary printf() if the current + locale is UTF-8. */ +int u8_vprintf(char *fmt, va_list ap); +int u8_printf(char *fmt, ...); diff --git a/AruuzCore/utility.c b/AruuzCore/utility.c new file mode 100644 index 0000000..aa7a9f4 --- /dev/null +++ b/AruuzCore/utility.c @@ -0,0 +1,27 @@ +#include +#include"utility.h" + +char *remove_char(char *src, char key) +{ + int idxToDel = -1; + char *subStr; + while(strchr(src,key) != NULL) + { + subStr = strchr(src,key); + idxToDel = subStr - src; + memmove(&src[idxToDel], &src[idxToDel + 1], strlen(src) - idxToDel); + } + return src; +} +char *replace_char(char *src, char to_replace, char replace_with) +{ + int idxToReplace = -1; + char *subStr; + while(strchr(src,to_replace) != NULL) + { + subStr = strchr(src,to_replace); + idxToReplace = subStr - src; + src[idxToReplace] = replace_with; + } + return src; +} \ No newline at end of file diff --git a/AruuzCore/utility.h b/AruuzCore/utility.h new file mode 100644 index 0000000..c626e31 --- /dev/null +++ b/AruuzCore/utility.h @@ -0,0 +1,7 @@ +#ifndef _utility_h +#define utility_h + +char *remove_char(char *src, char key); +char *replace_char(char *src, char to_replace, char replace_with); + +#endif \ No newline at end of file